aboutsummaryrefslogtreecommitdiff
path: root/v_windows/v/vlib/encoding
diff options
context:
space:
mode:
authorIndrajith K L2022-12-03 17:00:20 +0530
committerIndrajith K L2022-12-03 17:00:20 +0530
commitf5c4671bfbad96bf346bd7e9a21fc4317b4959df (patch)
tree2764fc62da58f2ba8da7ed341643fc359873142f /v_windows/v/vlib/encoding
downloadcli-tools-windows-f5c4671bfbad96bf346bd7e9a21fc4317b4959df.tar.gz
cli-tools-windows-f5c4671bfbad96bf346bd7e9a21fc4317b4959df.tar.bz2
cli-tools-windows-f5c4671bfbad96bf346bd7e9a21fc4317b4959df.zip
Adds most of the toolsHEADmaster
Diffstat (limited to 'v_windows/v/vlib/encoding')
-rw-r--r--v_windows/v/vlib/encoding/base58/alphabet.v65
-rw-r--r--v_windows/v/vlib/encoding/base58/base58.v181
-rw-r--r--v_windows/v/vlib/encoding/base58/base58_test.v89
-rw-r--r--v_windows/v/vlib/encoding/base64/base64.v308
-rw-r--r--v_windows/v/vlib/encoding/base64/base64_memory_test.v59
-rw-r--r--v_windows/v/vlib/encoding/base64/base64_test.v150
-rw-r--r--v_windows/v/vlib/encoding/binary/binary.v100
-rw-r--r--v_windows/v/vlib/encoding/csv/README.md19
-rw-r--r--v_windows/v/vlib/encoding/csv/reader.v196
-rw-r--r--v_windows/v/vlib/encoding/csv/reader_test.v253
-rw-r--r--v_windows/v/vlib/encoding/csv/writer.v80
-rw-r--r--v_windows/v/vlib/encoding/csv/writer_test.v11
-rw-r--r--v_windows/v/vlib/encoding/hex/hex.v62
-rw-r--r--v_windows/v/vlib/encoding/hex/hex_test.v54
-rw-r--r--v_windows/v/vlib/encoding/utf8/east_asian/east_asian_width.v1204
-rw-r--r--v_windows/v/vlib/encoding/utf8/east_asian/east_asian_width_test.v23
-rw-r--r--v_windows/v/vlib/encoding/utf8/encoding_utf8_test.v9
-rw-r--r--v_windows/v/vlib/encoding/utf8/utf8.v88
-rw-r--r--v_windows/v/vlib/encoding/utf8/utf8_util.v1161
-rw-r--r--v_windows/v/vlib/encoding/utf8/utf8_util_test.v66
20 files changed, 4178 insertions, 0 deletions
diff --git a/v_windows/v/vlib/encoding/base58/alphabet.v b/v_windows/v/vlib/encoding/base58/alphabet.v
new file mode 100644
index 0000000..44d4fc3
--- /dev/null
+++ b/v_windows/v/vlib/encoding/base58/alphabet.v
@@ -0,0 +1,65 @@
+module base58
+
+// alphabets is a map of common base58 alphabets
+pub const alphabets = init_alphabets()
+
+// init_alphabet instantiates the preconfigured `Alphabet`s and returns them as `map[string]Alphabet`.
+// This is a temporary function. Setting const alphabets to the value returned in this function
+// causes a C error right now.
+fn init_alphabets() map[string]Alphabet {
+ return {
+ 'btc': new_alphabet('123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz') or {
+ panic(@MOD + '.' + @FN + ': this should never happen')
+ }
+ 'flickr': new_alphabet('123456789abcdefghijkmnopqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ') or {
+ panic(@MOD + '.' + @FN + ': this should never happen')
+ }
+ 'ripple': new_alphabet('rpshnaf39wBUDNEGHJKLM4PQRST7VWXYZ2bcdeCg65jkm8oFqi1tuvAxyz') or {
+ panic(@MOD + '.' + @FN + ': this should never happen')
+ }
+ }
+}
+
+// Alphabet is the series of characters that an input
+// will be encoded to and a decode table.
+struct Alphabet {
+mut:
+ decode []i8 = []i8{len: 128, init: -1}
+ encode []byte = []byte{len: 58}
+}
+
+// str returns an Alphabet encode table byte array as a string
+pub fn (alphabet Alphabet) str() string {
+ // i guess i had a brain fart here. Why would I actually use this code?!
+ // mut str := []byte{}
+ // for entry in alphabet.encode {
+ // str << entry
+ // }
+ // return str.bytestr()
+ return alphabet.encode.bytestr()
+}
+
+// new_alphabet instantiates an Alphabet object based on
+// the provided characters
+pub fn new_alphabet(str string) ?Alphabet {
+ if str.len != 58 {
+ return error(@MOD + '.' + @FN + ': string must be 58 characters in length')
+ }
+
+ mut ret := Alphabet{}
+ copy(ret.encode, str.bytes())
+
+ mut distinct := 0
+ for i, b in ret.encode {
+ if ret.decode[b] == -1 {
+ distinct++
+ }
+ ret.decode[b] = i8(i)
+ }
+
+ if distinct != 58 {
+ return error(@MOD + '.' + @FN + ': string must not contain repeating characters')
+ }
+
+ return ret
+}
diff --git a/v_windows/v/vlib/encoding/base58/base58.v b/v_windows/v/vlib/encoding/base58/base58.v
new file mode 100644
index 0000000..fb2ff72
--- /dev/null
+++ b/v_windows/v/vlib/encoding/base58/base58.v
@@ -0,0 +1,181 @@
+// algorthim is adapted from https://github.com/mr-tron/base58 under the MIT license
+
+module base58
+
+import math
+
+// encode_int encodes any integer type to base58 string with Bitcoin alphabet
+pub fn encode_int(input int) ?string {
+ return encode_int_walpha(input, alphabets['btc'])
+}
+
+// encode_int_walpha any integer type to base58 string with custom alphabet
+pub fn encode_int_walpha(input int, alphabet Alphabet) ?string {
+ if input <= 0 {
+ return error(@MOD + '.' + @FN + ': input must be greater than zero')
+ }
+
+ mut buffer := []byte{}
+
+ mut i := input
+ for i > 0 {
+ remainder := i % 58
+ buffer << alphabet.encode[i8(remainder)]
+ // This needs to be casted so byte inputs can
+ // be used. i8 because remainder will never be
+ // over 58.
+ i = i / 58
+ }
+
+ return buffer.reverse().bytestr()
+}
+
+// encode encodes byte array to base58 with Bitcoin alphabet
+pub fn encode(input string) string {
+ return encode_walpha(input, alphabets['btc'])
+}
+
+// encode_walpha encodes byte array to base58 with custom aplhabet
+pub fn encode_walpha(input string, alphabet Alphabet) string {
+ if input.len == 0 {
+ return ''
+ }
+
+ bin := input.bytes()
+ mut sz := bin.len
+
+ mut zcount := 0
+ for zcount < sz && bin[zcount] == 0 {
+ zcount++
+ }
+
+ // It is crucial to make this as short as possible, especially for
+ // the usual case of Bitcoin addresses
+ sz = zcount + (sz - zcount) * 555 / 406 + 1
+ // integer simplification of
+ // ceil(log(256)/log(58))
+
+ mut out := []byte{len: sz}
+ mut i := 0
+ mut high := 0
+ mut carry := u32(0)
+
+ high = sz - 1
+ for b in bin {
+ i = sz - 1
+ for carry = u32(b); i > high || carry != 0; i-- {
+ carry = carry + 256 * u32(out[i])
+ out[i] = byte(carry % 58)
+ carry /= 58
+ }
+ high = 1
+ }
+
+ // determine additional "zero-gap" in the buffer, aside from zcount
+ for i = zcount; i < sz && out[i] == 0; i++ {}
+
+ // now encode the values with actual alphabet in-place
+ val := out[i - zcount..]
+ sz = val.len
+ for i = 0; i < sz; i++ {
+ out[i] = alphabet.encode[val[i]]
+ }
+
+ return out[..sz].bytestr()
+}
+
+// decode_int decodes base58 string to an integer with Bitcoin alphabet
+pub fn decode_int(input string) ?int {
+ return decode_int_walpha(input, alphabets['btc'])
+}
+
+// decode_int_walpha decodes base58 string to an integer with custom alphabet
+pub fn decode_int_walpha(input string, alphabet Alphabet) ?int {
+ mut total := 0 // to hold the results
+ b58 := input.reverse()
+ for i, ch in b58 {
+ ch_i := alphabet.encode.bytestr().index_byte(ch)
+ if ch_i == -1 {
+ return error(@MOD + '.' + @FN +
+ ': input string contains values not found in the provided alphabet')
+ }
+
+ val := ch_i * math.pow(58, i)
+
+ total += int(val)
+ }
+
+ return total
+}
+
+// decode decodes base58 string using the Bitcoin alphabet
+pub fn decode(str string) ?string {
+ return decode_walpha(str, alphabets['btc'])
+}
+
+// decode_walpha decodes base58 string using custom alphabet
+pub fn decode_walpha(str string, alphabet Alphabet) ?string {
+ if str.len == 0 {
+ return ''
+ }
+
+ zero := alphabet.encode[0]
+ b58sz := str.len
+
+ mut zcount := 0
+ for i := 0; i < b58sz && str[i] == zero; i++ {
+ zcount++
+ }
+
+ mut t := u64(0)
+ mut c := u64(0)
+
+ // the 32-bit algorithm stretches the result up to 2x
+ mut binu := []byte{len: 2 * ((b58sz * 406 / 555) + 1)}
+ mut outi := []u32{len: (b58sz + 3) / 4}
+
+ for _, r in str {
+ if r > 127 {
+ panic(@MOD + '.' + @FN +
+ ': high-bit set on invalid digit; outside of ascii range ($r). This should never happen.')
+ }
+ if alphabet.decode[r] == -1 {
+ return error(@MOD + '.' + @FN + ': invalid base58 digit ($r)')
+ }
+
+ c = u64(alphabet.decode[r])
+
+ for j := outi.len - 1; j >= 0; j-- {
+ t = u64(outi[j]) * 58 + c
+ c = t >> 32
+ outi[j] = u32(t & 0xffffffff)
+ }
+ }
+
+ // initial mask depend on b58sz, on further loops it always starts at 24 bits
+ mut mask := (u32(b58sz % 4) * 8)
+ if mask == 0 {
+ mask = 32
+ }
+ mask -= 8
+
+ mut out_len := 0
+ for j := 0; j < outi.len; j++ {
+ for mask < 32 {
+ binu[out_len] = byte(outi[j] >> mask)
+ mask -= 8
+ out_len++
+ }
+ mask = 24
+ }
+
+ // find the most significant byte post-decode, if any
+ for msb := zcount; msb < binu.len; msb++ { // loop relies on u32 overflow
+ if binu[msb] > 0 {
+ return binu[msb - zcount..out_len].bytestr()
+ }
+ }
+
+ // it's all zeroes
+ return binu[..out_len].bytestr()
+}
diff --git a/v_windows/v/vlib/encoding/base58/base58_test.v b/v_windows/v/vlib/encoding/base58/base58_test.v
new file mode 100644
index 0000000..5cbd37b
--- /dev/null
+++ b/v_windows/v/vlib/encoding/base58/base58_test.v
@@ -0,0 +1,89 @@
+module base58
+
+fn main() {
+ test_encode_int() or {}
+ test_decode_int() or {}
+ test_encode_string()
+ test_fails() or {}
+}
+
+fn test_encode_int() ? {
+ a := 0x24 // should be 'd' in base58
+ assert encode_int(a) ? == 'd'
+
+ test_encode_int_walpha() ?
+}
+
+fn test_encode_int_walpha() ? {
+ // random alphabet
+ abc := new_alphabet('abcdefghij\$lmnopqrstuvwxyz0123456789_ABCDEFGHIJLMNOPQRSTUV') or {
+ panic(@MOD + '.' + @FN + ': this should never happen')
+ }
+ a := 0x24 // should be '_' in base58 with our custom alphabet
+ assert encode_int_walpha(a, abc) ? == '_'
+}
+
+fn test_decode_int() ? {
+ a := 'd'
+ assert decode_int(a) ? == 0x24
+
+ test_decode_int_walpha() ?
+}
+
+fn test_decode_int_walpha() ? {
+ abc := new_alphabet('abcdefghij\$lmnopqrstuvwxyz0123456789_ABCDEFGHIJLMNOPQRSTUV') or {
+ panic(@MOD + '.' + @FN + ': this should never happen')
+ }
+ a := '_'
+ assert decode_int_walpha(a, abc) ? == 0x24
+}
+
+fn test_encode_string() {
+ // should be 'TtaR6twpTGu8VpY' in base58 and '0P7yfPSL0pQh2L5' with our custom alphabet
+ a := 'lorem ipsum'
+ assert encode(a) == 'TtaR6twpTGu8VpY'
+
+ abc := new_alphabet('abcdefghij\$lmnopqrstuvwxyz0123456789_ABCDEFGHIJLMNOPQRSTUV') or {
+ panic(@MOD + '.' + @FN + ': this should never happen')
+ }
+ assert encode_walpha(a, abc) == '0P7yfPSL0pQh2L5'
+}
+
+fn test_decode_string() ? {
+ a := 'TtaR6twpTGu8VpY'
+ assert decode(a) ? == 'lorem ipsum'
+
+ abc := new_alphabet('abcdefghij\$lmnopqrstuvwxyz0123456789_ABCDEFGHIJLMNOPQRSTUV') or {
+ panic(@MOD + '.' + @FN + ': this should never happen')
+ }
+ b := '0P7yfPSL0pQh2L5'
+ assert decode_walpha(b, abc) ? == 'lorem ipsum'
+}
+
+fn test_fails() ? {
+ a := -238
+ b := 0
+ if z := encode_int(a) {
+ return error(@MOD + '.' + @FN + ': expected encode_int to fail, got $z')
+ }
+ if z := encode_int(b) {
+ return error(@MOD + '.' + @FN + ': expected encode_int to fail, got $z')
+ }
+
+ c := '!'
+ if z := decode_int(c) {
+ return error(@MOD + '.' + @FN + ': expected decode_int to fail, got $z')
+ }
+ if z := decode(c) {
+ return error(@MOD + '.' + @FN + ': expected decode to fail, got $z')
+ }
+
+ // repeating character
+ if abc := new_alphabet('aaaaafghij\$lmnopqrstuvwxyz0123456789_ABCDEFGHIJLMNOPQRSTUV') {
+ return error(@MOD + '.' + @FN + ': expected new_alphabet to fail, got $abc')
+ }
+ // more than 58 characters long
+ if abc := new_alphabet('abcdefghij\$lmnopqrstuvwxyz0123456789_ABCDEFGHIJLMNOPQRSTUVWXYZ') {
+ return error(@MOD + '.' + @FN + ': expected new_alphabet to fail, got $abc')
+ }
+}
diff --git a/v_windows/v/vlib/encoding/base64/base64.v b/v_windows/v/vlib/encoding/base64/base64.v
new file mode 100644
index 0000000..ad06722
--- /dev/null
+++ b/v_windows/v/vlib/encoding/base64/base64.v
@@ -0,0 +1,308 @@
+// Copyright (c) 2019-2021 Alexander Medvednikov. All rights reserved.
+// Use of this source code is governed by an MIT license
+// that can be found in the LICENSE file.
+// Based off: https://github.com/golang/go/blob/master/src/encoding/base64/base64.go
+// Last commit: https://github.com/golang/go/commit/9a93baf4d7d13d7d5c67388c93960d78abc8e11e
+module base64
+
+const (
+ index = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 62, 63, 62, 62, 63, 52, 53, 54, 55,
+ 56, 57, 58, 59, 60, 61, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
+ 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 0, 0, 0, 0, 63, 0, 26, 27, 28, 29,
+ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51]!
+ ending_table = [0, 2, 1]!
+ enc_table = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'
+)
+
+union B64_64_datablock {
+mut:
+ data u64
+ data_byte [8]byte
+}
+
+union B64_32_datablock {
+mut:
+ data u32
+ data_byte [4]byte
+}
+
+// decode decodes the base64 encoded `string` value passed in `data`.
+// Please note: If you need to decode many strings repeatedly, take a look at `decode_in_buffer`.
+// Example: assert base64.decode('ViBpbiBiYXNlIDY0') == 'V in base 64'
+pub fn decode(data string) []byte {
+ mut size := i64(data.len) * 3 / 4
+ if size <= 0 || data.len % 4 != 0 {
+ return []
+ }
+ size = (size + 3) & ~0x03 // round to the next multiple of 4 (the decoding loop writes multiples of 4 bytes)
+ unsafe {
+ buffer := malloc(int(size))
+ n := decode_in_buffer(data, buffer)
+ return buffer.vbytes(n)
+ }
+}
+
+// decode_str is the string variant of decode
+pub fn decode_str(data string) string {
+ size := data.len * 3 / 4
+ if size <= 0 || data.len % 4 != 0 {
+ return ''
+ }
+ unsafe {
+ buffer := malloc_noscan(size + 1)
+ buffer[size] = 0
+ return tos(buffer, decode_in_buffer(data, buffer))
+ }
+}
+
+// encode encodes the `[]byte` value passed in `data` to base64.
+// Please note: base64 encoding returns a `string` that is ~ 4/3 larger than the input.
+// Please note: If you need to encode many strings repeatedly, take a look at `encode_in_buffer`.
+// Example: assert base64.encode('V in base 64') == 'ViBpbiBiYXNlIDY0'
+pub fn encode(data []byte) string {
+ return alloc_and_encode(data.data, data.len)
+}
+
+// encode_str is the string variant of encode
+pub fn encode_str(data string) string {
+ return alloc_and_encode(data.str, data.len)
+}
+
+// alloc_and_encode is a private function that allocates and encodes data into a string
+// Used by encode and encode_str
+fn alloc_and_encode(src &byte, len int) string {
+ size := 4 * ((len + 2) / 3)
+ if size <= 0 {
+ return ''
+ }
+ unsafe {
+ buffer := malloc_noscan(size + 1)
+ buffer[size] = 0
+ return tos(buffer, encode_from_buffer(buffer, src, len))
+ }
+}
+
+// url_decode returns a decoded URL `string` version of
+// the a base64 url encoded `string` passed in `data`.
+pub fn url_decode(data string) []byte {
+ mut result := data.replace_each(['-', '+', '_', '/'])
+ match result.len % 4 {
+ // Pad with trailing '='s
+ 2 { result += '==' } // 2 pad chars
+ 3 { result += '=' } // 1 pad char
+ else {} // no padding
+ }
+ return decode(result)
+}
+
+// url_decode_str is the string variant of url_decode
+pub fn url_decode_str(data string) string {
+ mut result := data.replace_each(['-', '+', '_', '/'])
+ match result.len % 4 {
+ // Pad with trailing '='s
+ 2 { result += '==' } // 2 pad chars
+ 3 { result += '=' } // 1 pad char
+ else {} // no padding
+ }
+ return decode_str(result)
+}
+
+// url_encode returns a base64 URL encoded `string` version
+// of the value passed in `data`.
+pub fn url_encode(data []byte) string {
+ return encode(data).replace_each(['+', '-', '/', '_', '=', ''])
+}
+
+// url_encode_str is the string variant of url_encode
+pub fn url_encode_str(data string) string {
+ return encode_str(data).replace_each(['+', '-', '/', '_', '=', ''])
+}
+
+// assemble64 assembles 8 base64 digits into 6 bytes.
+// Each digit comes from the decode map.
+// Please note: Invalid base64 digits are not expected and not handled.
+fn assemble64(n1 byte, n2 byte, n3 byte, n4 byte, n5 byte, n6 byte, n7 byte, n8 byte) u64 {
+ return u64(n1) << 58 | u64(n2) << 52 | u64(n3) << 46 | u64(n4) << 40 | u64(n5) << 34 | u64(n6) << 28 | u64(n7) << 22 | u64(n8) << 16
+}
+
+// assemble32 assembles 4 base64 digits into 3 bytes.
+// Each digit comes from the decode map.
+// Please note: Invalid base64 digits are not expected and not handled.
+fn assemble32(n1 byte, n2 byte, n3 byte, n4 byte) u32 {
+ return u32(n1) << 26 | u32(n2) << 20 | u32(n3) << 14 | u32(n4) << 8
+}
+
+// decode_in_buffer decodes the base64 encoded `string` reference passed in `data` into `buffer`.
+// decode_in_buffer returns the size of the decoded data in the buffer.
+// Please note: The `buffer` should be large enough (i.e. 3/4 of the data.len, or larger)
+// to hold the decoded data.
+// Please note: This function does NOT allocate new memory, and is thus suitable for handling very large strings.
+pub fn decode_in_buffer(data &string, buffer &byte) int {
+ return decode_from_buffer(buffer, data.str, data.len)
+}
+
+// decode_from_buffer decodes the base64 encoded ASCII bytes from `data` into `buffer`.
+// decode_from_buffer returns the size of the decoded data in the buffer.
+// Please note: The `buffer` should be large enough (i.e. 3/4 of the data.len, or larger)
+// to hold the decoded data.
+// Please note: This function does NOT allocate new memory, and is thus suitable for handling very large strings.
+pub fn decode_in_buffer_bytes(data []byte, buffer &byte) int {
+ return decode_from_buffer(buffer, data.data, data.len)
+}
+
+// decode_from_buffer decodes the base64 encoded ASCII bytes from `src` into `dest`.
+// decode_from_buffer returns the size of the decoded data in the buffer.
+// Please note: The `dest` buffer should be large enough (i.e. 3/4 of the `src_len`, or larger)
+// to hold the decoded data.
+// Please note: This function does NOT allocate new memory, and is thus suitable for handling very large strings.
+// Please note: This function is for internal base64 decoding
+fn decode_from_buffer(dest &byte, src &byte, src_len int) int {
+ if src_len < 4 {
+ return 0
+ }
+
+ mut padding := 0
+ if unsafe { src[src_len - 1] == `=` } {
+ if unsafe { src[src_len - 2] == `=` } {
+ padding = 2
+ } else {
+ padding = 1
+ }
+ }
+
+ mut d := unsafe { src }
+ mut b := unsafe { dest }
+
+ unsafe {
+ mut n_decoded_bytes := 0 // padding bytes are also counted towards this.
+ mut si := 0
+
+ mut datablock_64 := B64_64_datablock{
+ data: 0
+ }
+ mut datablock_32 := B64_32_datablock{
+ data: 0
+ }
+
+ for src_len - si >= 8 {
+ // Converting 8 bytes of input into 6 bytes of output. Storing these in the upper bytes of an u64.
+ datablock_64.data = assemble64(byte(base64.index[d[si + 0]]), byte(base64.index[d[si + 1]]),
+ byte(base64.index[d[si + 2]]), byte(base64.index[d[si + 3]]), byte(base64.index[d[
+ si + 4]]), byte(base64.index[d[si + 5]]), byte(base64.index[d[si + 6]]),
+ byte(base64.index[d[si + 7]]))
+
+ // Reading out the individual bytes from the u64. Watch out with endianess.
+ $if little_endian {
+ b[n_decoded_bytes + 0] = datablock_64.data_byte[7]
+ b[n_decoded_bytes + 1] = datablock_64.data_byte[6]
+ b[n_decoded_bytes + 2] = datablock_64.data_byte[5]
+ b[n_decoded_bytes + 3] = datablock_64.data_byte[4]
+ b[n_decoded_bytes + 4] = datablock_64.data_byte[3]
+ b[n_decoded_bytes + 5] = datablock_64.data_byte[2]
+ } $else {
+ b[n_decoded_bytes + 0] = datablock_64.data_byte[0]
+ b[n_decoded_bytes + 1] = datablock_64.data_byte[1]
+ b[n_decoded_bytes + 2] = datablock_64.data_byte[2]
+ b[n_decoded_bytes + 3] = datablock_64.data_byte[3]
+ b[n_decoded_bytes + 4] = datablock_64.data_byte[4]
+ b[n_decoded_bytes + 5] = datablock_64.data_byte[5]
+ }
+
+ n_decoded_bytes += 6
+ si += 8
+ }
+
+ for src_len - si >= 4 {
+ datablock_32.data = assemble32(byte(base64.index[d[si + 0]]), byte(base64.index[d[si + 1]]),
+ byte(base64.index[d[si + 2]]), byte(base64.index[d[si + 3]]))
+ $if little_endian {
+ b[n_decoded_bytes + 0] = datablock_32.data_byte[3]
+ b[n_decoded_bytes + 1] = datablock_32.data_byte[2]
+ b[n_decoded_bytes + 2] = datablock_32.data_byte[1]
+ b[n_decoded_bytes + 3] = datablock_32.data_byte[0]
+ } $else {
+ b[n_decoded_bytes + 0] = datablock_32.data_byte[0]
+ b[n_decoded_bytes + 1] = datablock_32.data_byte[1]
+ b[n_decoded_bytes + 2] = datablock_32.data_byte[2]
+ b[n_decoded_bytes + 3] = datablock_32.data_byte[3]
+ }
+
+ n_decoded_bytes += 3
+ si += 4
+ }
+
+ return n_decoded_bytes - padding
+ }
+}
+
+// encode_in_buffer base64 encodes the `[]byte` passed in `data` into `buffer`.
+// encode_in_buffer returns the size of the encoded data in the buffer.
+// Please note: The buffer should be large enough (i.e. 4/3 of the data.len, or larger) to hold the encoded data.
+// Please note: The function does NOT allocate new memory, and is suitable for handling very large strings.
+pub fn encode_in_buffer(data []byte, buffer &byte) int {
+ return encode_from_buffer(buffer, data.data, data.len)
+}
+
+// encode_from_buffer will perform encoding from any type of src buffer
+// and write the bytes into `dest`.
+// Please note: The `dest` buffer should be large enough (i.e. 4/3 of the src_len, or larger) to hold the encoded data.
+// Please note: This function is for internal base64 encoding
+fn encode_from_buffer(dest &byte, src &byte, src_len int) int {
+ if src_len == 0 {
+ return 0
+ }
+ output_length := 4 * ((src_len + 2) / 3)
+
+ mut d := unsafe { src }
+ mut b := unsafe { dest }
+ etable := base64.enc_table.str
+
+ mut di := 0
+ mut si := 0
+ n := (src_len / 3) * 3
+ for si < n {
+ // Convert 3x 8bit source bytes into 4 bytes
+ unsafe {
+ val := u32(d[si + 0]) << 16 | u32(d[si + 1]) << 8 | u32(d[si + 2])
+
+ b[di + 0] = etable[val >> 18 & 0x3F]
+ b[di + 1] = etable[val >> 12 & 0x3F]
+ b[di + 2] = etable[val >> 6 & 0x3F]
+ b[di + 3] = etable[val & 0x3F]
+ }
+ si += 3
+ di += 4
+ }
+
+ remain := src_len - si
+ if remain == 0 {
+ return output_length
+ }
+
+ // Add the remaining small block and padding
+ unsafe {
+ mut val := u32(d[si + 0]) << 16
+ if remain == 2 {
+ val |= u32(d[si + 1]) << 8
+ }
+
+ b[di + 0] = etable[val >> 18 & 0x3F]
+ b[di + 1] = etable[val >> 12 & 0x3F]
+
+ match remain {
+ 2 {
+ b[di + 2] = etable[val >> 6 & 0x3F]
+ b[di + 3] = byte(`=`)
+ }
+ 1 {
+ b[di + 2] = byte(`=`)
+ b[di + 3] = byte(`=`)
+ }
+ else {
+ panic('base64: This case should never occur.')
+ }
+ }
+ }
+ return output_length
+}
diff --git a/v_windows/v/vlib/encoding/base64/base64_memory_test.v b/v_windows/v/vlib/encoding/base64/base64_memory_test.v
new file mode 100644
index 0000000..be543af
--- /dev/null
+++ b/v_windows/v/vlib/encoding/base64/base64_memory_test.v
@@ -0,0 +1,59 @@
+import encoding.base64
+
+fn test_long_encoding() {
+ repeats := 1000
+ input_size := 3000
+
+ s_original := []byte{len: input_size, init: `a`}
+ s_encoded := base64.encode(s_original)
+ s_encoded_bytes := s_encoded.bytes()
+ s_decoded := base64.decode(s_encoded)
+
+ assert s_encoded.len > s_original.len
+ assert s_original == s_decoded
+
+ ebuffer := unsafe { malloc(s_encoded.len) }
+ dbuffer := unsafe { malloc(s_decoded.len) }
+ defer {
+ unsafe { free(ebuffer) }
+ unsafe { free(dbuffer) }
+ }
+ //
+ encoded_size := base64.encode_in_buffer(s_original, ebuffer)
+ mut encoded_in_buf := []byte{len: encoded_size}
+ unsafe { C.memcpy(encoded_in_buf.data, ebuffer, encoded_size) }
+ assert input_size * 4 / 3 == encoded_size
+ assert encoded_in_buf[0] == `Y`
+ assert encoded_in_buf[1] == `W`
+ assert encoded_in_buf[2] == `F`
+ assert encoded_in_buf[3] == `h`
+
+ assert encoded_in_buf[encoded_size - 4] == `Y`
+ assert encoded_in_buf[encoded_size - 3] == `W`
+ assert encoded_in_buf[encoded_size - 2] == `F`
+ assert encoded_in_buf[encoded_size - 1] == `h`
+
+ assert encoded_in_buf == s_encoded_bytes
+
+ decoded_size := base64.decode_in_buffer(s_encoded, dbuffer)
+ assert decoded_size == input_size
+ mut decoded_in_buf := []byte{len: decoded_size}
+ unsafe { C.memcpy(decoded_in_buf.data, dbuffer, decoded_size) }
+ assert decoded_in_buf == s_original
+
+ mut s := 0
+ for _ in 0 .. repeats {
+ resultsize := base64.encode_in_buffer(s_original, ebuffer)
+ s += resultsize
+ assert resultsize == s_encoded.len
+ }
+
+ for _ in 0 .. repeats {
+ resultsize := base64.decode_in_buffer(s_encoded, dbuffer)
+ s += resultsize
+ assert resultsize == s_decoded.len
+ }
+
+ println('Final s: $s')
+ // assert s == 39147008
+}
diff --git a/v_windows/v/vlib/encoding/base64/base64_test.v b/v_windows/v/vlib/encoding/base64/base64_test.v
new file mode 100644
index 0000000..8d08de2
--- /dev/null
+++ b/v_windows/v/vlib/encoding/base64/base64_test.v
@@ -0,0 +1,150 @@
+import encoding.base64
+
+struct TestPair {
+ decoded string
+ encoded string
+}
+
+const (
+ pairs = [
+ // RFC 3548 examples
+ TestPair{'\x14\xfb\x9c\x03\xd9\x7e', 'FPucA9l+'},
+ TestPair{'\x14\xfb\x9c\x03\xd9', 'FPucA9k='},
+ TestPair{'\x14\xfb\x9c\x03', 'FPucAw=='},
+ // RFC 4648 examples
+ TestPair{'', ''},
+ TestPair{'f', 'Zg=='},
+ TestPair{'fo', 'Zm8='},
+ TestPair{'foo', 'Zm9v'},
+ TestPair{'foob', 'Zm9vYg=='},
+ TestPair{'fooba', 'Zm9vYmE='},
+ TestPair{'foobar', 'Zm9vYmFy'},
+ // Wikipedia examples
+ TestPair{'sure.', 'c3VyZS4='},
+ TestPair{'sure', 'c3VyZQ=='},
+ TestPair{'sur', 'c3Vy'},
+ TestPair{'su', 'c3U='},
+ TestPair{'leasure.', 'bGVhc3VyZS4='},
+ TestPair{'easure.', 'ZWFzdXJlLg=='},
+ TestPair{'asure.', 'YXN1cmUu'},
+ TestPair{'sure.', 'c3VyZS4='},
+ ]
+
+ man_pair = TestPair{'Man is distinguished, not only by his reason, but by this singular passion from other animals, which is a lust of the mind, that by a perseverance of delight in the continued and indefatigable generation of knowledge, exceeds the short vehemence of any carnal pleasure.', 'TWFuIGlzIGRpc3Rpbmd1aXNoZWQsIG5vdCBvbmx5IGJ5IGhpcyByZWFzb24sIGJ1dCBieSB0aGlzIHNpbmd1bGFyIHBhc3Npb24gZnJvbSBvdGhlciBhbmltYWxzLCB3aGljaCBpcyBhIGx1c3Qgb2YgdGhlIG1pbmQsIHRoYXQgYnkgYSBwZXJzZXZlcmFuY2Ugb2YgZGVsaWdodCBpbiB0aGUgY29udGludWVkIGFuZCBpbmRlZmF0aWdhYmxlIGdlbmVyYXRpb24gb2Yga25vd2xlZGdlLCBleGNlZWRzIHRoZSBzaG9ydCB2ZWhlbWVuY2Ugb2YgYW55IGNhcm5hbCBwbGVhc3VyZS4='}
+)
+
+fn test_decode() {
+ assert base64.decode(man_pair.encoded) == man_pair.decoded.bytes()
+
+ // Test for incorrect padding.
+ assert base64.decode('aGk') == ''.bytes()
+ assert base64.decode('aGk=') == 'hi'.bytes()
+ assert base64.decode('aGk==') == ''.bytes()
+
+ for i, p in pairs {
+ got := base64.decode(p.encoded)
+ if got != p.decoded.bytes() {
+ eprintln('pairs[$i]: expected = $p.decoded, got = $got')
+ assert false
+ }
+ }
+}
+
+fn test_decode_str() {
+ assert base64.decode_str(man_pair.encoded) == man_pair.decoded
+
+ // Test for incorrect padding.
+ assert base64.decode_str('aGk') == ''
+ assert base64.decode_str('aGk=') == 'hi'
+ assert base64.decode_str('aGk==') == ''
+
+ for i, p in pairs {
+ got := base64.decode_str(p.encoded)
+ if got != p.decoded {
+ eprintln('pairs[$i]: expected = $p.decoded, got = $got')
+ assert false
+ }
+ }
+}
+
+fn test_encode() {
+ assert base64.encode(man_pair.decoded.bytes()) == man_pair.encoded
+
+ for i, p in pairs {
+ got := base64.encode(p.decoded.bytes())
+ if got != p.encoded {
+ eprintln('pairs[$i]: expected = $p.encoded, got = $got')
+ assert false
+ }
+ }
+}
+
+fn test_encode_str() {
+ assert base64.encode_str(man_pair.decoded) == man_pair.encoded
+
+ for i, p in pairs {
+ got := base64.encode_str(p.decoded)
+ if got != p.encoded {
+ eprintln('pairs[$i]: expected = $p.encoded, got = $got')
+ assert false
+ }
+ }
+}
+
+fn test_url_encode() {
+ test := base64.url_encode('Hello Base64Url encoding!'.bytes())
+ assert test == 'SGVsbG8gQmFzZTY0VXJsIGVuY29kaW5nIQ'
+}
+
+fn test_url_encode_str() {
+ test := base64.url_encode_str('Hello Base64Url encoding!')
+ assert test == 'SGVsbG8gQmFzZTY0VXJsIGVuY29kaW5nIQ'
+}
+
+fn test_url_decode() {
+ test := base64.url_decode('SGVsbG8gQmFzZTY0VXJsIGVuY29kaW5nIQ')
+ assert test == 'Hello Base64Url encoding!'.bytes()
+}
+
+fn test_url_decode_str() {
+ test := base64.url_decode_str('SGVsbG8gQmFzZTY0VXJsIGVuY29kaW5nIQ')
+ assert test == 'Hello Base64Url encoding!'
+}
+
+fn test_encode_null_byte() {
+ assert base64.encode([byte(`A`), 0, `C`]) == 'QQBD'
+}
+
+fn test_encode_null_byte_str() {
+ // While this works, bytestr() does a memcpy
+ s := [byte(`A`), 0, `C`].bytestr()
+ assert base64.encode_str(s) == 'QQBD'
+}
+
+fn test_decode_null_byte() {
+ assert base64.decode('QQBD') == [byte(`A`), 0, `C`]
+}
+
+fn test_decode_null_byte_str() {
+ // While this works, bytestr() does a memcpy
+ s := [byte(`A`), 0, `C`].bytestr()
+ assert base64.decode_str('QQBD') == s
+}
+
+fn test_decode_in_buffer_bytes() {
+ rfc4648_pairs := [
+ TestPair{'foob', 'Zm9vYg=='},
+ TestPair{'fooba', 'Zm9vYmE='},
+ TestPair{'foobar', 'Zm9vYmFy'},
+ ]
+ mut src_dec_buf := []byte{len: 8}
+ mut src_enc_buf := []byte{len: 8}
+ mut out_buf := []byte{len: 8}
+
+ for p in rfc4648_pairs {
+ src_dec_buf = p.decoded.bytes()
+ src_enc_buf = p.encoded.bytes()
+ n := base64.decode_in_buffer_bytes(src_enc_buf, out_buf.data)
+ assert src_dec_buf == out_buf[..n]
+ }
+}
diff --git a/v_windows/v/vlib/encoding/binary/binary.v b/v_windows/v/vlib/encoding/binary/binary.v
new file mode 100644
index 0000000..d7fe298
--- /dev/null
+++ b/v_windows/v/vlib/encoding/binary/binary.v
@@ -0,0 +1,100 @@
+// Copyright (c) 2019-2021 Alexander Medvednikov. All rights reserved.
+// Use of this source code is governed by an MIT license
+// that can be found in the LICENSE file.
+module binary
+
+// Little Endian
+[inline]
+pub fn little_endian_u16(b []byte) u16 {
+ _ = b[1] // bounds check
+ return u16(b[0]) | (u16(b[1]) << u16(8))
+}
+
+[inline]
+pub fn little_endian_put_u16(mut b []byte, v u16) {
+ _ = b[1] // bounds check
+ b[0] = byte(v)
+ b[1] = byte(v >> u16(8))
+}
+
+[inline]
+pub fn little_endian_u32(b []byte) u32 {
+ _ = b[3] // bounds check
+ return u32(b[0]) | (u32(b[1]) << u32(8)) | (u32(b[2]) << u32(16)) | (u32(b[3]) << u32(24))
+}
+
+[inline]
+pub fn little_endian_put_u32(mut b []byte, v u32) {
+ _ = b[3] // bounds check
+ b[0] = byte(v)
+ b[1] = byte(v >> u32(8))
+ b[2] = byte(v >> u32(16))
+ b[3] = byte(v >> u32(24))
+}
+
+[inline]
+pub fn little_endian_u64(b []byte) u64 {
+ _ = b[7] // bounds check
+ return u64(b[0]) | (u64(b[1]) << u64(8)) | (u64(b[2]) << u64(16)) | (u64(b[3]) << u64(24)) | (u64(b[4]) << u64(32)) | (u64(b[5]) << u64(40)) | (u64(b[6]) << u64(48)) | (u64(b[7]) << u64(56))
+}
+
+[inline]
+pub fn little_endian_put_u64(mut b []byte, v u64) {
+ _ = b[7] // bounds check
+ b[0] = byte(v)
+ b[1] = byte(v >> u64(8))
+ b[2] = byte(v >> u64(16))
+ b[3] = byte(v >> u64(24))
+ b[4] = byte(v >> u64(32))
+ b[5] = byte(v >> u64(40))
+ b[6] = byte(v >> u64(48))
+ b[7] = byte(v >> u64(56))
+}
+
+// Big Endian
+[inline]
+pub fn big_endian_u16(b []byte) u16 {
+ _ = b[1] // bounds check
+ return u16(b[1]) | (u16(b[0]) << u16(8))
+}
+
+[inline]
+pub fn big_endian_put_u16(mut b []byte, v u16) {
+ _ = b[1] // bounds check
+ b[0] = byte(v >> u16(8))
+ b[1] = byte(v)
+}
+
+[inline]
+pub fn big_endian_u32(b []byte) u32 {
+ _ = b[3] // bounds check
+ return u32(b[3]) | (u32(b[2]) << u32(8)) | (u32(b[1]) << u32(16)) | (u32(b[0]) << u32(24))
+}
+
+[inline]
+pub fn big_endian_put_u32(mut b []byte, v u32) {
+ _ = b[3] // bounds check
+ b[0] = byte(v >> u32(24))
+ b[1] = byte(v >> u32(16))
+ b[2] = byte(v >> u32(8))
+ b[3] = byte(v)
+}
+
+[inline]
+pub fn big_endian_u64(b []byte) u64 {
+ _ = b[7] // bounds check
+ return u64(b[7]) | (u64(b[6]) << u64(8)) | (u64(b[5]) << u64(16)) | (u64(b[4]) << u64(24)) | (u64(b[3]) << u64(32)) | (u64(b[2]) << u64(40)) | (u64(b[1]) << u64(48)) | (u64(b[0]) << u64(56))
+}
+
+[inline]
+pub fn big_endian_put_u64(mut b []byte, v u64) {
+ _ = b[7] // bounds check
+ b[0] = byte(v >> u64(56))
+ b[1] = byte(v >> u64(48))
+ b[2] = byte(v >> u64(40))
+ b[3] = byte(v >> u64(32))
+ b[4] = byte(v >> u64(24))
+ b[5] = byte(v >> u64(16))
+ b[6] = byte(v >> u64(8))
+ b[7] = byte(v)
+}
diff --git a/v_windows/v/vlib/encoding/csv/README.md b/v_windows/v/vlib/encoding/csv/README.md
new file mode 100644
index 0000000..01f3e4e
--- /dev/null
+++ b/v_windows/v/vlib/encoding/csv/README.md
@@ -0,0 +1,19 @@
+## Reader example
+
+```v
+import encoding.csv
+
+data := 'x,y\na,b,c\n'
+mut parser := csv.new_reader(data)
+// read each line
+for {
+ items := parser.read() or { break }
+ println(items)
+}
+```
+
+It prints:
+```
+['x', 'y']
+['a', 'b', 'c']
+```
diff --git a/v_windows/v/vlib/encoding/csv/reader.v b/v_windows/v/vlib/encoding/csv/reader.v
new file mode 100644
index 0000000..dafd022
--- /dev/null
+++ b/v_windows/v/vlib/encoding/csv/reader.v
@@ -0,0 +1,196 @@
+// Copyright (c) 2019-2021 Alexander Medvednikov. All rights reserved.
+// Use of this source code is governed by an MIT license
+// that can be found in the LICENSE file.
+module csv
+
+// Once interfaces are further along the idea would be to have something similar to
+// go's io.reader & bufio.reader rather than reading the whole file into string, this
+// would then satisfy that interface. I designed it this way to be easily adapted.
+struct ErrCommentIsDelimiter {
+ msg string = 'encoding.csv: comment cannot be the same as delimiter'
+ code int
+}
+
+struct ErrInvalidDelimiter {
+ msg string = 'encoding.csv: invalid delimiter'
+ code int
+}
+
+struct ErrEndOfFile {
+ msg string = 'encoding.csv: end of file'
+ code int
+}
+
+struct ErrInvalidLineEnding {
+ msg string = 'encoding.csv: could not find any valid line endings'
+ code int
+}
+
+struct Reader {
+ // not used yet
+ // has_header bool
+ // headings []string
+ data string
+pub mut:
+ delimiter byte
+ comment byte
+ is_mac_pre_osx_le bool
+ row_pos int
+}
+
+// new_reader initializes a Reader with string data to parse
+pub fn new_reader(data string) &Reader {
+ return &Reader{
+ delimiter: `,`
+ comment: `#`
+ data: data
+ }
+}
+
+// read reads a row from the CSV data.
+// If successful, the result holds an array of each column's data.
+pub fn (mut r Reader) read() ?[]string {
+ l := r.read_record() ?
+ return l
+}
+
+// Once we have multi dimensional array
+// pub fn (mut r Reader) read_all() ?[][]string {
+// mut records := []string{}
+// for {
+// record := r.read_record() or {
+// if err.error == err_eof.error {
+// return records
+// } else {
+// return err
+// }
+// }
+// records << record
+// }
+// return records
+// }
+fn (mut r Reader) read_line() ?string {
+ // last record
+ if r.row_pos == r.data.len {
+ return IError(&ErrEndOfFile{})
+ }
+ le := if r.is_mac_pre_osx_le { '\r' } else { '\n' }
+ mut i := r.data.index_after(le, r.row_pos)
+ if i == -1 {
+ if r.row_pos == 0 {
+ // check for pre osx mac line endings
+ i = r.data.index_after('\r', r.row_pos)
+ if i != -1 {
+ r.is_mac_pre_osx_le = true
+ } else {
+ // no valid line endings found
+ return IError(&ErrInvalidLineEnding{})
+ }
+ } else {
+ // No line ending on file
+ i = r.data.len - 1
+ }
+ }
+ mut line := r.data[r.row_pos..i]
+ r.row_pos = i + 1
+ // normalize win line endings (remove extra \r)
+ if !r.is_mac_pre_osx_le && (line.len >= 1 && line[line.len - 1] == `\r`) {
+ line = line[..line.len - 1]
+ }
+ return line
+}
+
+fn (mut r Reader) read_record() ?[]string {
+ if r.delimiter == r.comment {
+ return IError(&ErrCommentIsDelimiter{})
+ }
+ if !valid_delim(r.delimiter) {
+ return IError(&ErrInvalidDelimiter{})
+ }
+ mut need_read := true
+ mut keep_raw := false
+ mut line := ''
+ mut fields := []string{}
+ mut i := -1
+ for {
+ if need_read {
+ l := r.read_line() ?
+ if l.len <= 0 {
+ if keep_raw {
+ line += '\n'
+ }
+ continue
+ } else if l[0] == r.comment {
+ if keep_raw {
+ line += '\n' + l
+ }
+ continue
+ } else {
+ if keep_raw {
+ line += '\n'
+ }
+ line += l
+ }
+ need_read = false
+ keep_raw = false
+ }
+ if line.len == 0 || line[0] != `"` { // not quoted
+ j := line.index(r.delimiter.ascii_str()) or {
+ // last
+ fields << line[..line.len]
+ break
+ }
+ i = j
+ fields << line[..i]
+ line = line[i + 1..]
+ continue
+ } else { // quoted
+ mut need_more := true
+ mut has_double_quotes := false
+ mut j := 0
+ mut n := 1
+ for n < line.len {
+ if line[n] == `"` {
+ if n == line.len - 1 || line[n + 1] != `"` {
+ need_more = false
+ j = n - 1
+ break
+ } else {
+ has_double_quotes = true
+ n++
+ }
+ }
+ n++
+ }
+ if need_more {
+ need_read = true
+ keep_raw = true
+ continue
+ }
+ line = line[1..]
+ if j + 1 == line.len {
+ // last record
+ fields << if has_double_quotes { line[..j].replace('""', '"') } else { line[..j] }
+ break
+ }
+ next := line[j + 1]
+ if next == r.delimiter {
+ fields << if has_double_quotes { line[..j].replace('""', '"') } else { line[..j] }
+ if j + 2 == line.len {
+ line = ''
+ } else {
+ line = line[j + 2..]
+ }
+ continue
+ }
+ }
+ if i <= -1 && fields.len == 0 {
+ return IError(&ErrInvalidDelimiter{})
+ }
+ }
+ return fields
+}
+
+fn valid_delim(b byte) bool {
+ return b != 0 && b != `"` && b != `\r` && b != `\n`
+}
diff --git a/v_windows/v/vlib/encoding/csv/reader_test.v b/v_windows/v/vlib/encoding/csv/reader_test.v
new file mode 100644
index 0000000..cd54827
--- /dev/null
+++ b/v_windows/v/vlib/encoding/csv/reader_test.v
@@ -0,0 +1,253 @@
+import encoding.csv
+
+fn test_encoding_csv_reader() {
+ data := 'name,email,phone,other\njoe,joe@blow.com,0400000000,test\nsam,sam@likesham.com,0433000000,"test quoted field"\n#chris,chris@nomail.com,94444444,"commented row"\n'
+ mut csv_reader := csv.new_reader(data)
+ mut row_count := 0
+ for {
+ row := csv_reader.read() or { break }
+ row_count++
+ if row_count == 1 {
+ assert row[0] == 'name'
+ assert row[1] == 'email'
+ assert row[2] == 'phone'
+ assert row[3] == 'other'
+ } else if row_count == 2 {
+ assert row[0] == 'joe'
+ assert row[1] == 'joe@blow.com'
+ assert row[2] == '0400000000'
+ assert row[3] == 'test'
+ } else if row_count == 3 {
+ assert row[0] == 'sam'
+ assert row[1] == 'sam@likesham.com'
+ assert row[2] == '0433000000'
+ // quoted field
+ assert row[3] == 'test quoted field'
+ }
+ }
+ assert row_count == 3
+}
+
+fn test_line_break_lf() {
+ lf_data := 'name,email\njoe,joe@blow.com\n'
+ mut csv_reader := csv.new_reader(lf_data)
+ mut row_count := 0
+ for {
+ row := csv_reader.read() or { break }
+ row_count++
+ if row_count == 1 {
+ assert row[0] == 'name'
+ assert row[1] == 'email'
+ } else if row_count == 2 {
+ assert row[0] == 'joe'
+ assert row[1] == 'joe@blow.com'
+ }
+ }
+ assert row_count == 2
+}
+
+fn test_line_break_cr() {
+ cr_data := 'name,email\rjoe,joe@blow.com\r'
+ mut csv_reader := csv.new_reader(cr_data)
+ mut row_count := 0
+ for {
+ row := csv_reader.read() or { break }
+ row_count++
+ if row_count == 1 {
+ assert row[0] == 'name'
+ assert row[1] == 'email'
+ } else if row_count == 2 {
+ assert row[0] == 'joe'
+ assert row[1] == 'joe@blow.com'
+ }
+ }
+ assert row_count == 2
+}
+
+fn test_line_break_crlf() {
+ crlf_data := 'name,email\r\njoe,joe@blow.com\r\n'
+ mut csv_reader := csv.new_reader(crlf_data)
+ mut row_count := 0
+ for {
+ row := csv_reader.read() or { break }
+ row_count++
+ if row_count == 1 {
+ assert row[0] == 'name'
+ assert row[1] == 'email'
+ } else if row_count == 2 {
+ assert row[0] == 'joe'
+ assert row[1] == 'joe@blow.com'
+ }
+ }
+ assert row_count == 2
+}
+
+fn test_no_line_ending() {
+ data := 'name,email,phone,other\njoe,joe@blow.com,0400000000,test'
+ mut csv_reader := csv.new_reader(data)
+ mut row_count := 0
+ for {
+ csv_reader.read() or { break }
+ row_count++
+ }
+ assert row_count == 2
+}
+
+fn test_last_field_empty() {
+ data := '"name","description","value"\n"one","first","1"\n"two","second",\n'
+ mut csv_reader := csv.new_reader(data)
+ mut row_count := 0
+ for {
+ row := csv_reader.read() or { break }
+ row_count++
+ if row_count == 1 {
+ assert row[0] == 'name'
+ assert row[1] == 'description'
+ assert row[2] == 'value'
+ } else if row_count == 2 {
+ assert row[0] == 'one'
+ assert row[1] == 'first'
+ assert row[2] == '1'
+ } else if row_count == 3 {
+ assert row[0] == 'two'
+ assert row[1] == 'second'
+ assert row[2] == ''
+ }
+ }
+ assert row_count == 3
+}
+
+fn test_empty_fields_no_quotes() {
+ data := '1,2,3,4\n,6,7,8\n9,,11,12\n13,14,,16\n17,18,19,\n'
+
+ mut csv_reader := csv.new_reader(data)
+ mut row_count := 0
+ for {
+ row := csv_reader.read() or { break }
+ row_count++
+ if row_count == 1 {
+ assert row[0] == '1'
+ assert row[1] == '2'
+ assert row[2] == '3'
+ assert row[3] == '4'
+ } else if row_count == 2 {
+ assert row[0] == ''
+ assert row[1] == '6'
+ assert row[2] == '7'
+ assert row[3] == '8'
+ } else if row_count == 3 {
+ assert row[0] == '9'
+ assert row[1] == ''
+ assert row[2] == '11'
+ assert row[3] == '12'
+ } else if row_count == 4 {
+ assert row[0] == '13'
+ assert row[1] == '14'
+ assert row[2] == ''
+ assert row[3] == '16'
+ } else if row_count == 5 {
+ assert row[0] == '17'
+ assert row[1] == '18'
+ assert row[2] == '19'
+ assert row[3] == ''
+ }
+ }
+ assert row_count == 5
+}
+
+fn test_empty_line() {
+ data := '"name","description","value"\n\n\n"one","first","1"\n\n"two","second",\n'
+ mut csv_reader := csv.new_reader(data)
+ mut row_count := 0
+ for {
+ row := csv_reader.read() or { break }
+ row_count++
+ if row_count == 1 {
+ assert row[0] == 'name'
+ assert row[1] == 'description'
+ assert row[2] == 'value'
+ } else if row_count == 2 {
+ assert row[0] == 'one'
+ assert row[1] == 'first'
+ assert row[2] == '1'
+ } else if row_count == 3 {
+ assert row[0] == 'two'
+ assert row[1] == 'second'
+ }
+ }
+ assert row_count == 3
+}
+
+fn test_field_multiple_line() {
+ data := '"name","multiple
+
+ line","value"\n"one","first","1"\n'
+ mut csv_reader := csv.new_reader(data)
+ mut row_count := 0
+ for {
+ row := csv_reader.read() or { break }
+ row_count++
+ if row_count == 1 {
+ assert row[0] == 'name'
+ assert row[1] == 'multiple\n\n line'
+ assert row[2] == 'value'
+ } else if row_count == 2 {
+ assert row[0] == 'one'
+ assert row[1] == 'first'
+ assert row[2] == '1'
+ }
+ }
+ assert row_count == 2
+}
+
+fn test_field_quotes_for_parts() {
+ data := 'a1,"b1",c1\n"a2",b2,c2\na3,b3,"c3"\na4,b4,c4\n'
+ mut csv_reader := csv.new_reader(data)
+ mut row_count := 0
+ for {
+ row := csv_reader.read() or { break }
+ row_count++
+ if row_count == 1 {
+ assert row[0] == 'a1'
+ assert row[1] == 'b1'
+ assert row[2] == 'c1'
+ } else if row_count == 2 {
+ assert row[0] == 'a2'
+ assert row[1] == 'b2'
+ assert row[2] == 'c2'
+ } else if row_count == 3 {
+ assert row[0] == 'a3'
+ assert row[1] == 'b3'
+ assert row[2] == 'c3'
+ } else if row_count == 4 {
+ assert row[0] == 'a4'
+ assert row[1] == 'b4'
+ assert row[2] == 'c4'
+ }
+ }
+ assert row_count == 4
+}
+
+fn test_field_double_quotes() {
+ row1 := '11,"12\n13"\n'
+ row2 := '21,"2""2""\n23"\n'
+ row3 := '"3""1""",32\n'
+ data := row1 + row2 + row3
+ mut csv_reader := csv.new_reader(data)
+ mut row_count := 0
+ for {
+ row := csv_reader.read() or { break }
+ row_count++
+ if row_count == 1 {
+ assert row[0] == '11'
+ assert row[1] == '12\n13'
+ } else if row_count == 2 {
+ assert row[0] == '21'
+ assert row[1] == '2"2"\n23'
+ } else if row_count == 3 {
+ assert row[0] == '3"1"'
+ assert row[1] == '32'
+ }
+ }
+ assert row_count == 3
+}
diff --git a/v_windows/v/vlib/encoding/csv/writer.v b/v_windows/v/vlib/encoding/csv/writer.v
new file mode 100644
index 0000000..735ca20
--- /dev/null
+++ b/v_windows/v/vlib/encoding/csv/writer.v
@@ -0,0 +1,80 @@
+// Copyright (c) 2019-2021 Alexander Medvednikov. All rights reserved.
+// Use of this source code is governed by an MIT license
+// that can be found in the LICENSE file.
+module csv
+
+import strings
+
+struct Writer {
+mut:
+ sb strings.Builder
+pub mut:
+ use_crlf bool
+ delimiter byte
+}
+
+pub fn new_writer() &Writer {
+ return &Writer{
+ delimiter: `,`
+ sb: strings.new_builder(200)
+ }
+}
+
+// write writes a single record
+pub fn (mut w Writer) write(record []string) ?bool {
+ if !valid_delim(w.delimiter) {
+ return IError(&ErrInvalidDelimiter{})
+ }
+ le := if w.use_crlf { '\r\n' } else { '\n' }
+ for n, field_ in record {
+ mut field := field_
+ if n > 0 {
+ w.sb.write_string(w.delimiter.ascii_str())
+ }
+ if !w.field_needs_quotes(field) {
+ w.sb.write_string(field)
+ continue
+ }
+ w.sb.write_string('"')
+ for field.len > 0 {
+ mut i := field.index_any('"\r\n')
+ if i < 0 {
+ i = field.len
+ }
+ w.sb.write_string(field[..i])
+ field = field[i..]
+ if field.len > 0 {
+ z := field[0]
+ match z {
+ `"` { w.sb.write_string('""') }
+ `\r`, `\n` { w.sb.write_string(le) }
+ else {}
+ }
+ field = field[1..]
+ }
+ }
+ w.sb.write_string('"')
+ }
+ w.sb.write_string(le)
+ return true
+}
+
+// Once we have multi dimensional array
+// pub fn (w &Writer) write_all(records [][]string) {
+// for _, record in records {
+// w.write(record)
+// }
+// }
+fn (w &Writer) field_needs_quotes(field string) bool {
+ if field == '' {
+ return false
+ }
+ if field.contains(w.delimiter.ascii_str()) || (field.index_any('"\r\n') != -1) {
+ return true
+ }
+ return false
+}
+
+pub fn (mut w Writer) str() string {
+ return w.sb.str()
+}
diff --git a/v_windows/v/vlib/encoding/csv/writer_test.v b/v_windows/v/vlib/encoding/csv/writer_test.v
new file mode 100644
index 0000000..92882dd
--- /dev/null
+++ b/v_windows/v/vlib/encoding/csv/writer_test.v
@@ -0,0 +1,11 @@
+import encoding.csv
+
+fn test_encoding_csv_writer() {
+ mut csv_writer := csv.new_writer()
+
+ csv_writer.write(['name', 'email', 'phone', 'other']) or {}
+ csv_writer.write(['joe', 'joe@blow.com', '0400000000', 'test']) or {}
+ csv_writer.write(['sam', 'sam@likesham.com', '0433000000', 'needs, quoting']) or {}
+
+ assert csv_writer.str() == 'name,email,phone,other\njoe,joe@blow.com,0400000000,test\nsam,sam@likesham.com,0433000000,"needs, quoting"\n'
+}
diff --git a/v_windows/v/vlib/encoding/hex/hex.v b/v_windows/v/vlib/encoding/hex/hex.v
new file mode 100644
index 0000000..99387f1
--- /dev/null
+++ b/v_windows/v/vlib/encoding/hex/hex.v
@@ -0,0 +1,62 @@
+module hex
+
+import strings
+
+// decode converts a hex string into an array of bytes. The expected
+// input format is 2 ASCII characters for each output byte. If the provided
+// string length is not a multiple of 2, an implicit `0` is prepended to it.
+pub fn decode(s string) ?[]byte {
+ mut hex_str := s
+ if hex_str.len >= 2 {
+ if s[0] == `0` && (s[1] == `x` || s[1] == `X`) {
+ hex_str = s[2..]
+ }
+ }
+ if hex_str.len == 0 {
+ return []byte{}
+ } else if hex_str.len == 1 {
+ return [char2nibble(hex_str[0]) ?]
+ } else if hex_str.len == 2 {
+ n1 := char2nibble(hex_str[0]) ?
+ n0 := char2nibble(hex_str[1]) ?
+ return [(n1 << 4) | n0]
+ }
+ // calculate the first byte depending on if hex_str.len is odd
+ mut val := char2nibble(hex_str[0]) ?
+ if hex_str.len & 1 == 0 {
+ val = (val << 4) | char2nibble(hex_str[1]) ?
+ }
+ // set cap to hex_str.len/2 rounded up
+ mut bytes := []byte{len: 1, cap: (hex_str.len + 1) >> 1, init: val}
+ // iterate over every 2 bytes
+ // the start index depends on if hex_str.len is odd
+ for i := 2 - (hex_str.len & 1); i < hex_str.len; i += 2 {
+ n1 := char2nibble(hex_str[i]) ?
+ n0 := char2nibble(hex_str[i + 1]) ?
+ bytes << (n1 << 4) | n0
+ }
+ return bytes
+}
+
+// encode converts an array of bytes into a string of ASCII hex bytes. The
+// output will always be a string with length a multiple of 2.
+[manualfree]
+pub fn encode(bytes []byte) string {
+ mut sb := strings.new_builder(bytes.len << 1)
+ for b in bytes {
+ sb.write_string(b.hex())
+ }
+ res := sb.str()
+ unsafe { sb.free() }
+ return res
+}
+
+// char2nibble converts an ASCII hex character to it's hex value
+fn char2nibble(b byte) ?byte {
+ match b {
+ `0`...`9` { return b - byte(`0`) }
+ `A`...`F` { return b - byte(`A`) + 10 }
+ `a`...`f` { return b - byte(`a`) + 10 }
+ else { return error('invalid hex char $b.ascii_str()') }
+ }
+}
diff --git a/v_windows/v/vlib/encoding/hex/hex_test.v b/v_windows/v/vlib/encoding/hex/hex_test.v
new file mode 100644
index 0000000..62501e9
--- /dev/null
+++ b/v_windows/v/vlib/encoding/hex/hex_test.v
@@ -0,0 +1,54 @@
+module hex
+
+fn test_decode() ? {
+ assert decode('') ? == []
+ assert decode('0') ? == [byte(0x0)]
+ assert decode('f') ? == [byte(0xf)]
+ assert decode('0f') ? == [byte(0x0f)]
+ assert decode('ff') ? == [byte(0xff)]
+ assert decode('123') ? == [byte(0x1), 0x23]
+ assert decode('1234') ? == [byte(0x12), 0x34]
+ assert decode('12345') ? == [byte(0x1), 0x23, 0x45]
+ assert decode('0123456789abcdef') ? == [byte(0x01), 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef]
+ assert decode('123456789ABCDEF') ? == [byte(0x01), 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef]
+}
+
+fn test_decode_fails() ? {
+ if x := decode('foo') {
+ return error('expected decode to fail, got $x')
+ }
+ if x := decode('g') {
+ return error('expected decode to fail, got $x')
+ }
+ if x := decode('000000000g') {
+ return error('expected decode to fail, got $x')
+ }
+ if x := decode('_') {
+ return error('expected decode to fail, got $x')
+ }
+ if x := decode('!') {
+ return error('expected decode to fail, got $x')
+ }
+}
+
+fn test_encode() ? {
+ assert encode(decode('') ?) == ''
+ assert encode(decode('0') ?) == '00'
+ assert encode(decode('f') ?) == '0f'
+ assert encode(decode('0f') ?) == '0f'
+ assert encode(decode('ff') ?) == 'ff'
+ assert encode(decode('123') ?) == '0123'
+ assert encode(decode('1234') ?) == '1234'
+ assert encode(decode('12345') ?) == '012345'
+ assert encode(decode('abcdef') ?) == 'abcdef'
+ assert encode(decode('ABCDEF') ?) == 'abcdef'
+}
+
+fn test_decode_0x() ? {
+ assert decode('0x') ? == []
+ assert decode('0x0') ? == [byte(0x0)]
+ assert decode('0X1234') ? == [byte(0x12), 0x34]
+ assert decode('0x12345') ? == [byte(0x1), 0x23, 0x45]
+ assert decode('0x0123456789abcdef') ? == [byte(0x01), 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef]
+ assert decode('0X123456789ABCDEF') ? == [byte(0x01), 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef]
+}
diff --git a/v_windows/v/vlib/encoding/utf8/east_asian/east_asian_width.v b/v_windows/v/vlib/encoding/utf8/east_asian/east_asian_width.v
new file mode 100644
index 0000000..d1ac547
--- /dev/null
+++ b/v_windows/v/vlib/encoding/utf8/east_asian/east_asian_width.v
@@ -0,0 +1,1204 @@
+// Copyright (c) 2021 Takahiro Yaota, a.k.a. zakuro. All rights reserved.
+// Use of this source code is governed by an MIT license
+// that can be found in the LICENSE file.
+
+module east_asian
+
+import encoding.utf8
+
+// EastAsianWidthType represents East_Asian_Width informative prorperty
+pub enum EastAsianWidthProperty {
+ full
+ half
+ wide
+ narrow
+ ambiguous
+ neutral
+}
+
+// display_width return the display width as number of unicode chars from a string.
+pub fn display_width(s string, ambiguous_width int) int {
+ mut i, mut n := 0, 0
+ for i < s.len {
+ c_len := utf8_char_len(s[i])
+ n += match east_asian_width_property_at(s, i) {
+ .ambiguous { ambiguous_width }
+ .full, .wide { int(2) }
+ else { int(1) }
+ }
+ i += c_len
+ }
+ return n
+}
+
+// width_property_at returns the East Asian Width properties at string[index]
+pub fn east_asian_width_property_at(s string, index int) EastAsianWidthProperty {
+ codepoint := utf8.get_uchar(s, index)
+ mut left, mut right := 0, east_asian.east_asian_width_data.len - 1
+ for left <= right {
+ middle := left + ((right - left) / 2)
+ entry := east_asian.east_asian_width_data[middle]
+ if codepoint < entry.point {
+ right = middle - 1
+ continue
+ }
+
+ last := entry.point + entry.len
+ if codepoint > last {
+ left = middle + 1
+ continue
+ }
+
+ return entry.property
+ }
+ return .neutral
+}
+
+struct EAWEntry {
+ property EastAsianWidthProperty
+ point int
+ len int
+}
+
+// EastAsianWidth-13.0.0.txt
+const (
+ east_asian_width_data = [
+ EAWEntry{.neutral, 0x0000, 32},
+ EAWEntry{.narrow, 0x0020, 95},
+ EAWEntry{.neutral, 0x007f, 34},
+ EAWEntry{.ambiguous, 0x00a1, 1},
+ EAWEntry{.narrow, 0x00a2, 2},
+ EAWEntry{.ambiguous, 0x00a4, 1},
+ EAWEntry{.narrow, 0x00a5, 2},
+ EAWEntry{.ambiguous, 0x00a7, 2},
+ EAWEntry{.neutral, 0x00a9, 1},
+ EAWEntry{.ambiguous, 0x00aa, 1},
+ EAWEntry{.neutral, 0x00ab, 1},
+ EAWEntry{.narrow, 0x00ac, 1},
+ EAWEntry{.ambiguous, 0x00ad, 2},
+ EAWEntry{.narrow, 0x00af, 1},
+ EAWEntry{.ambiguous, 0x00b0, 5},
+ EAWEntry{.neutral, 0x00b5, 1},
+ EAWEntry{.ambiguous, 0x00b6, 5},
+ EAWEntry{.neutral, 0x00bb, 1},
+ EAWEntry{.ambiguous, 0x00bc, 4},
+ EAWEntry{.neutral, 0x00c0, 6},
+ EAWEntry{.ambiguous, 0x00c6, 1},
+ EAWEntry{.neutral, 0x00c7, 9},
+ EAWEntry{.ambiguous, 0x00d0, 1},
+ EAWEntry{.neutral, 0x00d1, 6},
+ EAWEntry{.ambiguous, 0x00d7, 2},
+ EAWEntry{.neutral, 0x00d9, 5},
+ EAWEntry{.ambiguous, 0x00de, 4},
+ EAWEntry{.neutral, 0x00e2, 4},
+ EAWEntry{.ambiguous, 0x00e6, 1},
+ EAWEntry{.neutral, 0x00e7, 1},
+ EAWEntry{.ambiguous, 0x00e8, 3},
+ EAWEntry{.neutral, 0x00eb, 1},
+ EAWEntry{.ambiguous, 0x00ec, 2},
+ EAWEntry{.neutral, 0x00ee, 2},
+ EAWEntry{.ambiguous, 0x00f0, 1},
+ EAWEntry{.neutral, 0x00f1, 1},
+ EAWEntry{.ambiguous, 0x00f2, 2},
+ EAWEntry{.neutral, 0x00f4, 3},
+ EAWEntry{.ambiguous, 0x00f7, 4},
+ EAWEntry{.neutral, 0x00fb, 1},
+ EAWEntry{.ambiguous, 0x00fc, 1},
+ EAWEntry{.neutral, 0x00fd, 1},
+ EAWEntry{.ambiguous, 0x00fe, 1},
+ EAWEntry{.neutral, 0x00ff, 2},
+ EAWEntry{.ambiguous, 0x0101, 1},
+ EAWEntry{.neutral, 0x0102, 15},
+ EAWEntry{.ambiguous, 0x0111, 1},
+ EAWEntry{.neutral, 0x0112, 1},
+ EAWEntry{.ambiguous, 0x0113, 1},
+ EAWEntry{.neutral, 0x0114, 7},
+ EAWEntry{.ambiguous, 0x011b, 1},
+ EAWEntry{.neutral, 0x011c, 10},
+ EAWEntry{.ambiguous, 0x0126, 2},
+ EAWEntry{.neutral, 0x0128, 3},
+ EAWEntry{.ambiguous, 0x012b, 1},
+ EAWEntry{.neutral, 0x012c, 5},
+ EAWEntry{.ambiguous, 0x0131, 3},
+ EAWEntry{.neutral, 0x0134, 4},
+ EAWEntry{.ambiguous, 0x0138, 1},
+ EAWEntry{.neutral, 0x0139, 6},
+ EAWEntry{.ambiguous, 0x013f, 4},
+ EAWEntry{.neutral, 0x0143, 1},
+ EAWEntry{.ambiguous, 0x0144, 1},
+ EAWEntry{.neutral, 0x0145, 3},
+ EAWEntry{.ambiguous, 0x0148, 4},
+ EAWEntry{.neutral, 0x014c, 1},
+ EAWEntry{.ambiguous, 0x014d, 1},
+ EAWEntry{.neutral, 0x014e, 4},
+ EAWEntry{.ambiguous, 0x0152, 2},
+ EAWEntry{.neutral, 0x0154, 18},
+ EAWEntry{.ambiguous, 0x0166, 2},
+ EAWEntry{.neutral, 0x0168, 3},
+ EAWEntry{.ambiguous, 0x016b, 1},
+ EAWEntry{.neutral, 0x016c, 98},
+ EAWEntry{.ambiguous, 0x01ce, 1},
+ EAWEntry{.neutral, 0x01cf, 1},
+ EAWEntry{.ambiguous, 0x01d0, 1},
+ EAWEntry{.neutral, 0x01d1, 1},
+ EAWEntry{.ambiguous, 0x01d2, 1},
+ EAWEntry{.neutral, 0x01d3, 1},
+ EAWEntry{.ambiguous, 0x01d4, 1},
+ EAWEntry{.neutral, 0x01d5, 1},
+ EAWEntry{.ambiguous, 0x01d6, 1},
+ EAWEntry{.neutral, 0x01d7, 1},
+ EAWEntry{.ambiguous, 0x01d8, 1},
+ EAWEntry{.neutral, 0x01d9, 1},
+ EAWEntry{.ambiguous, 0x01da, 1},
+ EAWEntry{.neutral, 0x01db, 1},
+ EAWEntry{.ambiguous, 0x01dc, 1},
+ EAWEntry{.neutral, 0x01dd, 116},
+ EAWEntry{.ambiguous, 0x0251, 1},
+ EAWEntry{.neutral, 0x0252, 15},
+ EAWEntry{.ambiguous, 0x0261, 1},
+ EAWEntry{.neutral, 0x0262, 98},
+ EAWEntry{.ambiguous, 0x02c4, 1},
+ EAWEntry{.neutral, 0x02c5, 2},
+ EAWEntry{.ambiguous, 0x02c7, 1},
+ EAWEntry{.neutral, 0x02c8, 1},
+ EAWEntry{.ambiguous, 0x02c9, 3},
+ EAWEntry{.neutral, 0x02cc, 1},
+ EAWEntry{.ambiguous, 0x02cd, 1},
+ EAWEntry{.neutral, 0x02ce, 2},
+ EAWEntry{.ambiguous, 0x02d0, 1},
+ EAWEntry{.neutral, 0x02d1, 7},
+ EAWEntry{.ambiguous, 0x02d8, 4},
+ EAWEntry{.neutral, 0x02dc, 1},
+ EAWEntry{.ambiguous, 0x02dd, 1},
+ EAWEntry{.neutral, 0x02de, 1},
+ EAWEntry{.ambiguous, 0x02df, 1},
+ EAWEntry{.neutral, 0x02e0, 32},
+ EAWEntry{.ambiguous, 0x0300, 112},
+ EAWEntry{.neutral, 0x0370, 8},
+ EAWEntry{.neutral, 0x037a, 6},
+ EAWEntry{.neutral, 0x0384, 7},
+ EAWEntry{.neutral, 0x038c, 1},
+ EAWEntry{.neutral, 0x038e, 3},
+ EAWEntry{.ambiguous, 0x0391, 17},
+ EAWEntry{.ambiguous, 0x03a3, 7},
+ EAWEntry{.neutral, 0x03aa, 7},
+ EAWEntry{.ambiguous, 0x03b1, 17},
+ EAWEntry{.neutral, 0x03c2, 1},
+ EAWEntry{.ambiguous, 0x03c3, 7},
+ EAWEntry{.neutral, 0x03ca, 55},
+ EAWEntry{.ambiguous, 0x0401, 1},
+ EAWEntry{.neutral, 0x0402, 14},
+ EAWEntry{.ambiguous, 0x0410, 64},
+ EAWEntry{.neutral, 0x0450, 1},
+ EAWEntry{.ambiguous, 0x0451, 1},
+ EAWEntry{.neutral, 0x0452, 222},
+ EAWEntry{.neutral, 0x0531, 38},
+ EAWEntry{.neutral, 0x0559, 50},
+ EAWEntry{.neutral, 0x058d, 3},
+ EAWEntry{.neutral, 0x0591, 55},
+ EAWEntry{.neutral, 0x05d0, 27},
+ EAWEntry{.neutral, 0x05ef, 6},
+ EAWEntry{.neutral, 0x0600, 29},
+ EAWEntry{.neutral, 0x061e, 240},
+ EAWEntry{.neutral, 0x070f, 60},
+ EAWEntry{.neutral, 0x074d, 101},
+ EAWEntry{.neutral, 0x07c0, 59},
+ EAWEntry{.neutral, 0x07fd, 49},
+ EAWEntry{.neutral, 0x0830, 15},
+ EAWEntry{.neutral, 0x0840, 28},
+ EAWEntry{.neutral, 0x085e, 1},
+ EAWEntry{.neutral, 0x0860, 11},
+ EAWEntry{.neutral, 0x08a0, 21},
+ EAWEntry{.neutral, 0x08b6, 18},
+ EAWEntry{.neutral, 0x08d3, 177},
+ EAWEntry{.neutral, 0x0985, 8},
+ EAWEntry{.neutral, 0x098f, 2},
+ EAWEntry{.neutral, 0x0993, 22},
+ EAWEntry{.neutral, 0x09aa, 7},
+ EAWEntry{.neutral, 0x09b2, 1},
+ EAWEntry{.neutral, 0x09b6, 4},
+ EAWEntry{.neutral, 0x09bc, 9},
+ EAWEntry{.neutral, 0x09c7, 2},
+ EAWEntry{.neutral, 0x09cb, 4},
+ EAWEntry{.neutral, 0x09d7, 1},
+ EAWEntry{.neutral, 0x09dc, 2},
+ EAWEntry{.neutral, 0x09df, 5},
+ EAWEntry{.neutral, 0x09e6, 25},
+ EAWEntry{.neutral, 0x0a01, 3},
+ EAWEntry{.neutral, 0x0a05, 6},
+ EAWEntry{.neutral, 0x0a0f, 2},
+ EAWEntry{.neutral, 0x0a13, 22},
+ EAWEntry{.neutral, 0x0a2a, 7},
+ EAWEntry{.neutral, 0x0a32, 2},
+ EAWEntry{.neutral, 0x0a35, 2},
+ EAWEntry{.neutral, 0x0a38, 2},
+ EAWEntry{.neutral, 0x0a3c, 1},
+ EAWEntry{.neutral, 0x0a3e, 5},
+ EAWEntry{.neutral, 0x0a47, 2},
+ EAWEntry{.neutral, 0x0a4b, 3},
+ EAWEntry{.neutral, 0x0a51, 1},
+ EAWEntry{.neutral, 0x0a59, 4},
+ EAWEntry{.neutral, 0x0a5e, 1},
+ EAWEntry{.neutral, 0x0a66, 17},
+ EAWEntry{.neutral, 0x0a81, 3},
+ EAWEntry{.neutral, 0x0a85, 9},
+ EAWEntry{.neutral, 0x0a8f, 3},
+ EAWEntry{.neutral, 0x0a93, 22},
+ EAWEntry{.neutral, 0x0aaa, 7},
+ EAWEntry{.neutral, 0x0ab2, 2},
+ EAWEntry{.neutral, 0x0ab5, 5},
+ EAWEntry{.neutral, 0x0abc, 10},
+ EAWEntry{.neutral, 0x0ac7, 3},
+ EAWEntry{.neutral, 0x0acb, 3},
+ EAWEntry{.neutral, 0x0ad0, 1},
+ EAWEntry{.neutral, 0x0ae0, 4},
+ EAWEntry{.neutral, 0x0ae6, 12},
+ EAWEntry{.neutral, 0x0af9, 7},
+ EAWEntry{.neutral, 0x0b01, 3},
+ EAWEntry{.neutral, 0x0b05, 8},
+ EAWEntry{.neutral, 0x0b0f, 2},
+ EAWEntry{.neutral, 0x0b13, 22},
+ EAWEntry{.neutral, 0x0b2a, 7},
+ EAWEntry{.neutral, 0x0b32, 2},
+ EAWEntry{.neutral, 0x0b35, 5},
+ EAWEntry{.neutral, 0x0b3c, 9},
+ EAWEntry{.neutral, 0x0b47, 2},
+ EAWEntry{.neutral, 0x0b4b, 3},
+ EAWEntry{.neutral, 0x0b55, 3},
+ EAWEntry{.neutral, 0x0b5c, 2},
+ EAWEntry{.neutral, 0x0b5f, 5},
+ EAWEntry{.neutral, 0x0b66, 18},
+ EAWEntry{.neutral, 0x0b82, 2},
+ EAWEntry{.neutral, 0x0b85, 6},
+ EAWEntry{.neutral, 0x0b8e, 3},
+ EAWEntry{.neutral, 0x0b92, 4},
+ EAWEntry{.neutral, 0x0b99, 2},
+ EAWEntry{.neutral, 0x0b9c, 1},
+ EAWEntry{.neutral, 0x0b9e, 2},
+ EAWEntry{.neutral, 0x0ba3, 2},
+ EAWEntry{.neutral, 0x0ba8, 3},
+ EAWEntry{.neutral, 0x0bae, 12},
+ EAWEntry{.neutral, 0x0bbe, 5},
+ EAWEntry{.neutral, 0x0bc6, 3},
+ EAWEntry{.neutral, 0x0bca, 4},
+ EAWEntry{.neutral, 0x0bd0, 1},
+ EAWEntry{.neutral, 0x0bd7, 1},
+ EAWEntry{.neutral, 0x0be6, 21},
+ EAWEntry{.neutral, 0x0c00, 13},
+ EAWEntry{.neutral, 0x0c0e, 3},
+ EAWEntry{.neutral, 0x0c12, 23},
+ EAWEntry{.neutral, 0x0c2a, 16},
+ EAWEntry{.neutral, 0x0c3d, 8},
+ EAWEntry{.neutral, 0x0c46, 3},
+ EAWEntry{.neutral, 0x0c4a, 4},
+ EAWEntry{.neutral, 0x0c55, 2},
+ EAWEntry{.neutral, 0x0c58, 3},
+ EAWEntry{.neutral, 0x0c60, 4},
+ EAWEntry{.neutral, 0x0c66, 10},
+ EAWEntry{.neutral, 0x0c77, 22},
+ EAWEntry{.neutral, 0x0c8e, 3},
+ EAWEntry{.neutral, 0x0c92, 23},
+ EAWEntry{.neutral, 0x0caa, 10},
+ EAWEntry{.neutral, 0x0cb5, 5},
+ EAWEntry{.neutral, 0x0cbc, 9},
+ EAWEntry{.neutral, 0x0cc6, 3},
+ EAWEntry{.neutral, 0x0cca, 4},
+ EAWEntry{.neutral, 0x0cd5, 2},
+ EAWEntry{.neutral, 0x0cde, 1},
+ EAWEntry{.neutral, 0x0ce0, 4},
+ EAWEntry{.neutral, 0x0ce6, 10},
+ EAWEntry{.neutral, 0x0cf1, 2},
+ EAWEntry{.neutral, 0x0d00, 13},
+ EAWEntry{.neutral, 0x0d0e, 3},
+ EAWEntry{.neutral, 0x0d12, 51},
+ EAWEntry{.neutral, 0x0d46, 3},
+ EAWEntry{.neutral, 0x0d4a, 6},
+ EAWEntry{.neutral, 0x0d54, 16},
+ EAWEntry{.neutral, 0x0d66, 26},
+ EAWEntry{.neutral, 0x0d81, 3},
+ EAWEntry{.neutral, 0x0d85, 18},
+ EAWEntry{.neutral, 0x0d9a, 24},
+ EAWEntry{.neutral, 0x0db3, 9},
+ EAWEntry{.neutral, 0x0dbd, 1},
+ EAWEntry{.neutral, 0x0dc0, 7},
+ EAWEntry{.neutral, 0x0dca, 1},
+ EAWEntry{.neutral, 0x0dcf, 6},
+ EAWEntry{.neutral, 0x0dd6, 1},
+ EAWEntry{.neutral, 0x0dd8, 8},
+ EAWEntry{.neutral, 0x0de6, 10},
+ EAWEntry{.neutral, 0x0df2, 3},
+ EAWEntry{.neutral, 0x0e01, 58},
+ EAWEntry{.neutral, 0x0e3f, 29},
+ EAWEntry{.neutral, 0x0e81, 2},
+ EAWEntry{.neutral, 0x0e84, 1},
+ EAWEntry{.neutral, 0x0e86, 5},
+ EAWEntry{.neutral, 0x0e8c, 24},
+ EAWEntry{.neutral, 0x0ea5, 1},
+ EAWEntry{.neutral, 0x0ea7, 23},
+ EAWEntry{.neutral, 0x0ec0, 5},
+ EAWEntry{.neutral, 0x0ec6, 1},
+ EAWEntry{.neutral, 0x0ec8, 6},
+ EAWEntry{.neutral, 0x0ed0, 10},
+ EAWEntry{.neutral, 0x0edc, 4},
+ EAWEntry{.neutral, 0x0f00, 72},
+ EAWEntry{.neutral, 0x0f49, 36},
+ EAWEntry{.neutral, 0x0f71, 39},
+ EAWEntry{.neutral, 0x0f99, 36},
+ EAWEntry{.neutral, 0x0fbe, 15},
+ EAWEntry{.neutral, 0x0fce, 13},
+ EAWEntry{.neutral, 0x1000, 198},
+ EAWEntry{.neutral, 0x10c7, 1},
+ EAWEntry{.neutral, 0x10cd, 1},
+ EAWEntry{.neutral, 0x10d0, 48},
+ EAWEntry{.wide, 0x1100, 96},
+ EAWEntry{.neutral, 0x1160, 233},
+ EAWEntry{.neutral, 0x124a, 4},
+ EAWEntry{.neutral, 0x1250, 7},
+ EAWEntry{.neutral, 0x1258, 1},
+ EAWEntry{.neutral, 0x125a, 4},
+ EAWEntry{.neutral, 0x1260, 41},
+ EAWEntry{.neutral, 0x128a, 4},
+ EAWEntry{.neutral, 0x1290, 33},
+ EAWEntry{.neutral, 0x12b2, 4},
+ EAWEntry{.neutral, 0x12b8, 7},
+ EAWEntry{.neutral, 0x12c0, 1},
+ EAWEntry{.neutral, 0x12c2, 4},
+ EAWEntry{.neutral, 0x12c8, 15},
+ EAWEntry{.neutral, 0x12d8, 57},
+ EAWEntry{.neutral, 0x1312, 4},
+ EAWEntry{.neutral, 0x1318, 67},
+ EAWEntry{.neutral, 0x135d, 32},
+ EAWEntry{.neutral, 0x1380, 26},
+ EAWEntry{.neutral, 0x13a0, 86},
+ EAWEntry{.neutral, 0x13f8, 6},
+ EAWEntry{.neutral, 0x1400, 669},
+ EAWEntry{.neutral, 0x16a0, 89},
+ EAWEntry{.neutral, 0x1700, 13},
+ EAWEntry{.neutral, 0x170e, 7},
+ EAWEntry{.neutral, 0x1720, 23},
+ EAWEntry{.neutral, 0x1740, 20},
+ EAWEntry{.neutral, 0x1760, 13},
+ EAWEntry{.neutral, 0x176e, 3},
+ EAWEntry{.neutral, 0x1772, 2},
+ EAWEntry{.neutral, 0x1780, 94},
+ EAWEntry{.neutral, 0x17e0, 10},
+ EAWEntry{.neutral, 0x17f0, 10},
+ EAWEntry{.neutral, 0x1800, 15},
+ EAWEntry{.neutral, 0x1810, 10},
+ EAWEntry{.neutral, 0x1820, 89},
+ EAWEntry{.neutral, 0x1880, 43},
+ EAWEntry{.neutral, 0x18b0, 70},
+ EAWEntry{.neutral, 0x1900, 31},
+ EAWEntry{.neutral, 0x1920, 12},
+ EAWEntry{.neutral, 0x1930, 12},
+ EAWEntry{.neutral, 0x1940, 1},
+ EAWEntry{.neutral, 0x1944, 42},
+ EAWEntry{.neutral, 0x1970, 5},
+ EAWEntry{.neutral, 0x1980, 44},
+ EAWEntry{.neutral, 0x19b0, 26},
+ EAWEntry{.neutral, 0x19d0, 11},
+ EAWEntry{.neutral, 0x19de, 62},
+ EAWEntry{.neutral, 0x1a1e, 65},
+ EAWEntry{.neutral, 0x1a60, 29},
+ EAWEntry{.neutral, 0x1a7f, 11},
+ EAWEntry{.neutral, 0x1a90, 10},
+ EAWEntry{.neutral, 0x1aa0, 14},
+ EAWEntry{.neutral, 0x1ab0, 17},
+ EAWEntry{.neutral, 0x1b00, 76},
+ EAWEntry{.neutral, 0x1b50, 45},
+ EAWEntry{.neutral, 0x1b80, 116},
+ EAWEntry{.neutral, 0x1bfc, 60},
+ EAWEntry{.neutral, 0x1c3b, 15},
+ EAWEntry{.neutral, 0x1c4d, 60},
+ EAWEntry{.neutral, 0x1c90, 43},
+ EAWEntry{.neutral, 0x1cbd, 11},
+ EAWEntry{.neutral, 0x1cd0, 43},
+ EAWEntry{.neutral, 0x1d00, 250},
+ EAWEntry{.neutral, 0x1dfb, 283},
+ EAWEntry{.neutral, 0x1f18, 6},
+ EAWEntry{.neutral, 0x1f20, 38},
+ EAWEntry{.neutral, 0x1f48, 6},
+ EAWEntry{.neutral, 0x1f50, 8},
+ EAWEntry{.neutral, 0x1f59, 1},
+ EAWEntry{.neutral, 0x1f5b, 1},
+ EAWEntry{.neutral, 0x1f5d, 1},
+ EAWEntry{.neutral, 0x1f5f, 31},
+ EAWEntry{.neutral, 0x1f80, 53},
+ EAWEntry{.neutral, 0x1fb6, 15},
+ EAWEntry{.neutral, 0x1fc6, 14},
+ EAWEntry{.neutral, 0x1fd6, 6},
+ EAWEntry{.neutral, 0x1fdd, 19},
+ EAWEntry{.neutral, 0x1ff2, 3},
+ EAWEntry{.neutral, 0x1ff6, 9},
+ EAWEntry{.neutral, 0x2000, 16},
+ EAWEntry{.ambiguous, 0x2010, 1},
+ EAWEntry{.neutral, 0x2011, 2},
+ EAWEntry{.ambiguous, 0x2013, 4},
+ EAWEntry{.neutral, 0x2017, 1},
+ EAWEntry{.ambiguous, 0x2018, 2},
+ EAWEntry{.neutral, 0x201a, 2},
+ EAWEntry{.ambiguous, 0x201c, 2},
+ EAWEntry{.neutral, 0x201e, 2},
+ EAWEntry{.ambiguous, 0x2020, 3},
+ EAWEntry{.neutral, 0x2023, 1},
+ EAWEntry{.ambiguous, 0x2024, 4},
+ EAWEntry{.neutral, 0x2028, 8},
+ EAWEntry{.ambiguous, 0x2030, 1},
+ EAWEntry{.neutral, 0x2031, 1},
+ EAWEntry{.ambiguous, 0x2032, 2},
+ EAWEntry{.neutral, 0x2034, 1},
+ EAWEntry{.ambiguous, 0x2035, 1},
+ EAWEntry{.neutral, 0x2036, 5},
+ EAWEntry{.ambiguous, 0x203b, 1},
+ EAWEntry{.neutral, 0x203c, 2},
+ EAWEntry{.ambiguous, 0x203e, 1},
+ EAWEntry{.neutral, 0x203f, 38},
+ EAWEntry{.neutral, 0x2066, 12},
+ EAWEntry{.ambiguous, 0x2074, 1},
+ EAWEntry{.neutral, 0x2075, 10},
+ EAWEntry{.ambiguous, 0x207f, 1},
+ EAWEntry{.neutral, 0x2080, 1},
+ EAWEntry{.ambiguous, 0x2081, 4},
+ EAWEntry{.neutral, 0x2085, 10},
+ EAWEntry{.neutral, 0x2090, 13},
+ EAWEntry{.neutral, 0x20a0, 9},
+ EAWEntry{.half, 0x20a9, 1},
+ EAWEntry{.neutral, 0x20aa, 2},
+ EAWEntry{.ambiguous, 0x20ac, 1},
+ EAWEntry{.neutral, 0x20ad, 19},
+ EAWEntry{.neutral, 0x20d0, 33},
+ EAWEntry{.neutral, 0x2100, 3},
+ EAWEntry{.ambiguous, 0x2103, 1},
+ EAWEntry{.neutral, 0x2104, 1},
+ EAWEntry{.ambiguous, 0x2105, 1},
+ EAWEntry{.neutral, 0x2106, 3},
+ EAWEntry{.ambiguous, 0x2109, 1},
+ EAWEntry{.neutral, 0x210a, 9},
+ EAWEntry{.ambiguous, 0x2113, 1},
+ EAWEntry{.neutral, 0x2114, 2},
+ EAWEntry{.ambiguous, 0x2116, 1},
+ EAWEntry{.neutral, 0x2117, 10},
+ EAWEntry{.ambiguous, 0x2121, 2},
+ EAWEntry{.neutral, 0x2123, 3},
+ EAWEntry{.ambiguous, 0x2126, 1},
+ EAWEntry{.neutral, 0x2127, 4},
+ EAWEntry{.ambiguous, 0x212b, 1},
+ EAWEntry{.neutral, 0x212c, 39},
+ EAWEntry{.ambiguous, 0x2153, 2},
+ EAWEntry{.neutral, 0x2155, 6},
+ EAWEntry{.ambiguous, 0x215b, 4},
+ EAWEntry{.neutral, 0x215f, 1},
+ EAWEntry{.ambiguous, 0x2160, 12},
+ EAWEntry{.neutral, 0x216c, 4},
+ EAWEntry{.ambiguous, 0x2170, 10},
+ EAWEntry{.neutral, 0x217a, 15},
+ EAWEntry{.ambiguous, 0x2189, 1},
+ EAWEntry{.neutral, 0x218a, 2},
+ EAWEntry{.ambiguous, 0x2190, 10},
+ EAWEntry{.neutral, 0x219a, 30},
+ EAWEntry{.ambiguous, 0x21b8, 2},
+ EAWEntry{.neutral, 0x21ba, 24},
+ EAWEntry{.ambiguous, 0x21d2, 1},
+ EAWEntry{.neutral, 0x21d3, 1},
+ EAWEntry{.ambiguous, 0x21d4, 1},
+ EAWEntry{.neutral, 0x21d5, 18},
+ EAWEntry{.ambiguous, 0x21e7, 1},
+ EAWEntry{.neutral, 0x21e8, 24},
+ EAWEntry{.ambiguous, 0x2200, 1},
+ EAWEntry{.neutral, 0x2201, 1},
+ EAWEntry{.ambiguous, 0x2202, 2},
+ EAWEntry{.neutral, 0x2204, 3},
+ EAWEntry{.ambiguous, 0x2207, 2},
+ EAWEntry{.neutral, 0x2209, 2},
+ EAWEntry{.ambiguous, 0x220b, 1},
+ EAWEntry{.neutral, 0x220c, 3},
+ EAWEntry{.ambiguous, 0x220f, 1},
+ EAWEntry{.neutral, 0x2210, 1},
+ EAWEntry{.ambiguous, 0x2211, 1},
+ EAWEntry{.neutral, 0x2212, 3},
+ EAWEntry{.ambiguous, 0x2215, 1},
+ EAWEntry{.neutral, 0x2216, 4},
+ EAWEntry{.ambiguous, 0x221a, 1},
+ EAWEntry{.neutral, 0x221b, 2},
+ EAWEntry{.ambiguous, 0x221d, 4},
+ EAWEntry{.neutral, 0x2221, 2},
+ EAWEntry{.ambiguous, 0x2223, 1},
+ EAWEntry{.neutral, 0x2224, 1},
+ EAWEntry{.ambiguous, 0x2225, 1},
+ EAWEntry{.neutral, 0x2226, 1},
+ EAWEntry{.ambiguous, 0x2227, 6},
+ EAWEntry{.neutral, 0x222d, 1},
+ EAWEntry{.ambiguous, 0x222e, 1},
+ EAWEntry{.neutral, 0x222f, 5},
+ EAWEntry{.ambiguous, 0x2234, 4},
+ EAWEntry{.neutral, 0x2238, 4},
+ EAWEntry{.ambiguous, 0x223c, 2},
+ EAWEntry{.neutral, 0x223e, 10},
+ EAWEntry{.ambiguous, 0x2248, 1},
+ EAWEntry{.neutral, 0x2249, 3},
+ EAWEntry{.ambiguous, 0x224c, 1},
+ EAWEntry{.neutral, 0x224d, 5},
+ EAWEntry{.ambiguous, 0x2252, 1},
+ EAWEntry{.neutral, 0x2253, 13},
+ EAWEntry{.ambiguous, 0x2260, 2},
+ EAWEntry{.neutral, 0x2262, 2},
+ EAWEntry{.ambiguous, 0x2264, 4},
+ EAWEntry{.neutral, 0x2268, 2},
+ EAWEntry{.ambiguous, 0x226a, 2},
+ EAWEntry{.neutral, 0x226c, 2},
+ EAWEntry{.ambiguous, 0x226e, 2},
+ EAWEntry{.neutral, 0x2270, 18},
+ EAWEntry{.ambiguous, 0x2282, 2},
+ EAWEntry{.neutral, 0x2284, 2},
+ EAWEntry{.ambiguous, 0x2286, 2},
+ EAWEntry{.neutral, 0x2288, 13},
+ EAWEntry{.ambiguous, 0x2295, 1},
+ EAWEntry{.neutral, 0x2296, 3},
+ EAWEntry{.ambiguous, 0x2299, 1},
+ EAWEntry{.neutral, 0x229a, 11},
+ EAWEntry{.ambiguous, 0x22a5, 1},
+ EAWEntry{.neutral, 0x22a6, 25},
+ EAWEntry{.ambiguous, 0x22bf, 1},
+ EAWEntry{.neutral, 0x22c0, 82},
+ EAWEntry{.ambiguous, 0x2312, 1},
+ EAWEntry{.neutral, 0x2313, 7},
+ EAWEntry{.wide, 0x231a, 2},
+ EAWEntry{.neutral, 0x231c, 13},
+ EAWEntry{.wide, 0x2329, 2},
+ EAWEntry{.neutral, 0x232b, 190},
+ EAWEntry{.wide, 0x23e9, 4},
+ EAWEntry{.neutral, 0x23ed, 3},
+ EAWEntry{.wide, 0x23f0, 1},
+ EAWEntry{.neutral, 0x23f1, 2},
+ EAWEntry{.wide, 0x23f3, 1},
+ EAWEntry{.neutral, 0x23f4, 51},
+ EAWEntry{.neutral, 0x2440, 11},
+ EAWEntry{.ambiguous, 0x2460, 138},
+ EAWEntry{.neutral, 0x24ea, 1},
+ EAWEntry{.ambiguous, 0x24eb, 97},
+ EAWEntry{.neutral, 0x254c, 4},
+ EAWEntry{.ambiguous, 0x2550, 36},
+ EAWEntry{.neutral, 0x2574, 12},
+ EAWEntry{.ambiguous, 0x2580, 16},
+ EAWEntry{.neutral, 0x2590, 2},
+ EAWEntry{.ambiguous, 0x2592, 4},
+ EAWEntry{.neutral, 0x2596, 10},
+ EAWEntry{.ambiguous, 0x25a0, 2},
+ EAWEntry{.neutral, 0x25a2, 1},
+ EAWEntry{.ambiguous, 0x25a3, 7},
+ EAWEntry{.neutral, 0x25aa, 8},
+ EAWEntry{.ambiguous, 0x25b2, 2},
+ EAWEntry{.neutral, 0x25b4, 2},
+ EAWEntry{.ambiguous, 0x25b6, 2},
+ EAWEntry{.neutral, 0x25b8, 4},
+ EAWEntry{.ambiguous, 0x25bc, 2},
+ EAWEntry{.neutral, 0x25be, 2},
+ EAWEntry{.ambiguous, 0x25c0, 2},
+ EAWEntry{.neutral, 0x25c2, 4},
+ EAWEntry{.ambiguous, 0x25c6, 3},
+ EAWEntry{.neutral, 0x25c9, 2},
+ EAWEntry{.ambiguous, 0x25cb, 1},
+ EAWEntry{.neutral, 0x25cc, 2},
+ EAWEntry{.ambiguous, 0x25ce, 4},
+ EAWEntry{.neutral, 0x25d2, 16},
+ EAWEntry{.ambiguous, 0x25e2, 4},
+ EAWEntry{.neutral, 0x25e6, 9},
+ EAWEntry{.ambiguous, 0x25ef, 1},
+ EAWEntry{.neutral, 0x25f0, 13},
+ EAWEntry{.wide, 0x25fd, 2},
+ EAWEntry{.neutral, 0x25ff, 6},
+ EAWEntry{.ambiguous, 0x2605, 2},
+ EAWEntry{.neutral, 0x2607, 2},
+ EAWEntry{.ambiguous, 0x2609, 1},
+ EAWEntry{.neutral, 0x260a, 4},
+ EAWEntry{.ambiguous, 0x260e, 2},
+ EAWEntry{.neutral, 0x2610, 4},
+ EAWEntry{.wide, 0x2614, 2},
+ EAWEntry{.neutral, 0x2616, 6},
+ EAWEntry{.ambiguous, 0x261c, 1},
+ EAWEntry{.neutral, 0x261d, 1},
+ EAWEntry{.ambiguous, 0x261e, 1},
+ EAWEntry{.neutral, 0x261f, 33},
+ EAWEntry{.ambiguous, 0x2640, 1},
+ EAWEntry{.neutral, 0x2641, 1},
+ EAWEntry{.ambiguous, 0x2642, 1},
+ EAWEntry{.neutral, 0x2643, 5},
+ EAWEntry{.wide, 0x2648, 12},
+ EAWEntry{.neutral, 0x2654, 12},
+ EAWEntry{.ambiguous, 0x2660, 2},
+ EAWEntry{.neutral, 0x2662, 1},
+ EAWEntry{.ambiguous, 0x2663, 3},
+ EAWEntry{.neutral, 0x2666, 1},
+ EAWEntry{.ambiguous, 0x2667, 4},
+ EAWEntry{.neutral, 0x266b, 1},
+ EAWEntry{.ambiguous, 0x266c, 2},
+ EAWEntry{.neutral, 0x266e, 1},
+ EAWEntry{.ambiguous, 0x266f, 1},
+ EAWEntry{.neutral, 0x2670, 15},
+ EAWEntry{.wide, 0x267f, 1},
+ EAWEntry{.neutral, 0x2680, 19},
+ EAWEntry{.wide, 0x2693, 1},
+ EAWEntry{.neutral, 0x2694, 10},
+ EAWEntry{.ambiguous, 0x269e, 2},
+ EAWEntry{.neutral, 0x26a0, 1},
+ EAWEntry{.wide, 0x26a1, 1},
+ EAWEntry{.neutral, 0x26a2, 8},
+ EAWEntry{.wide, 0x26aa, 2},
+ EAWEntry{.neutral, 0x26ac, 17},
+ EAWEntry{.wide, 0x26bd, 2},
+ EAWEntry{.ambiguous, 0x26bf, 1},
+ EAWEntry{.neutral, 0x26c0, 4},
+ EAWEntry{.wide, 0x26c4, 2},
+ EAWEntry{.ambiguous, 0x26c6, 8},
+ EAWEntry{.wide, 0x26ce, 1},
+ EAWEntry{.ambiguous, 0x26cf, 5},
+ EAWEntry{.wide, 0x26d4, 1},
+ EAWEntry{.ambiguous, 0x26d5, 13},
+ EAWEntry{.neutral, 0x26e2, 1},
+ EAWEntry{.ambiguous, 0x26e3, 1},
+ EAWEntry{.neutral, 0x26e4, 4},
+ EAWEntry{.ambiguous, 0x26e8, 2},
+ EAWEntry{.wide, 0x26ea, 1},
+ EAWEntry{.ambiguous, 0x26eb, 7},
+ EAWEntry{.wide, 0x26f2, 2},
+ EAWEntry{.ambiguous, 0x26f4, 1},
+ EAWEntry{.wide, 0x26f5, 1},
+ EAWEntry{.ambiguous, 0x26f6, 4},
+ EAWEntry{.wide, 0x26fa, 1},
+ EAWEntry{.ambiguous, 0x26fb, 2},
+ EAWEntry{.wide, 0x26fd, 1},
+ EAWEntry{.ambiguous, 0x26fe, 2},
+ EAWEntry{.neutral, 0x2700, 5},
+ EAWEntry{.wide, 0x2705, 1},
+ EAWEntry{.neutral, 0x2706, 4},
+ EAWEntry{.wide, 0x270a, 2},
+ EAWEntry{.neutral, 0x270c, 28},
+ EAWEntry{.wide, 0x2728, 1},
+ EAWEntry{.neutral, 0x2729, 20},
+ EAWEntry{.ambiguous, 0x273d, 1},
+ EAWEntry{.neutral, 0x273e, 14},
+ EAWEntry{.wide, 0x274c, 1},
+ EAWEntry{.neutral, 0x274d, 1},
+ EAWEntry{.wide, 0x274e, 1},
+ EAWEntry{.neutral, 0x274f, 4},
+ EAWEntry{.wide, 0x2753, 3},
+ EAWEntry{.neutral, 0x2756, 1},
+ EAWEntry{.wide, 0x2757, 1},
+ EAWEntry{.neutral, 0x2758, 30},
+ EAWEntry{.ambiguous, 0x2776, 10},
+ EAWEntry{.neutral, 0x2780, 21},
+ EAWEntry{.wide, 0x2795, 3},
+ EAWEntry{.neutral, 0x2798, 24},
+ EAWEntry{.wide, 0x27b0, 1},
+ EAWEntry{.neutral, 0x27b1, 14},
+ EAWEntry{.wide, 0x27bf, 1},
+ EAWEntry{.neutral, 0x27c0, 38},
+ EAWEntry{.narrow, 0x27e6, 8},
+ EAWEntry{.neutral, 0x27ee, 407},
+ EAWEntry{.narrow, 0x2985, 2},
+ EAWEntry{.neutral, 0x2987, 404},
+ EAWEntry{.wide, 0x2b1b, 2},
+ EAWEntry{.neutral, 0x2b1d, 51},
+ EAWEntry{.wide, 0x2b50, 1},
+ EAWEntry{.neutral, 0x2b51, 4},
+ EAWEntry{.wide, 0x2b55, 1},
+ EAWEntry{.ambiguous, 0x2b56, 4},
+ EAWEntry{.neutral, 0x2b5a, 26},
+ EAWEntry{.neutral, 0x2b76, 32},
+ EAWEntry{.neutral, 0x2b97, 152},
+ EAWEntry{.neutral, 0x2c30, 47},
+ EAWEntry{.neutral, 0x2c60, 148},
+ EAWEntry{.neutral, 0x2cf9, 45},
+ EAWEntry{.neutral, 0x2d27, 1},
+ EAWEntry{.neutral, 0x2d2d, 1},
+ EAWEntry{.neutral, 0x2d30, 56},
+ EAWEntry{.neutral, 0x2d6f, 2},
+ EAWEntry{.neutral, 0x2d7f, 24},
+ EAWEntry{.neutral, 0x2da0, 7},
+ EAWEntry{.neutral, 0x2da8, 7},
+ EAWEntry{.neutral, 0x2db0, 7},
+ EAWEntry{.neutral, 0x2db8, 7},
+ EAWEntry{.neutral, 0x2dc0, 7},
+ EAWEntry{.neutral, 0x2dc8, 7},
+ EAWEntry{.neutral, 0x2dd0, 7},
+ EAWEntry{.neutral, 0x2dd8, 7},
+ EAWEntry{.neutral, 0x2de0, 115},
+ EAWEntry{.wide, 0x2e80, 26},
+ EAWEntry{.wide, 0x2e9b, 89},
+ EAWEntry{.wide, 0x2f00, 214},
+ EAWEntry{.wide, 0x2ff0, 12},
+ EAWEntry{.full, 0x3000, 1},
+ EAWEntry{.wide, 0x3001, 62},
+ EAWEntry{.neutral, 0x303f, 1},
+ EAWEntry{.wide, 0x3041, 86},
+ EAWEntry{.wide, 0x3099, 103},
+ EAWEntry{.wide, 0x3105, 43},
+ EAWEntry{.wide, 0x3131, 94},
+ EAWEntry{.wide, 0x3190, 84},
+ EAWEntry{.wide, 0x31f0, 47},
+ EAWEntry{.wide, 0x3220, 40},
+ EAWEntry{.ambiguous, 0x3248, 8},
+ EAWEntry{.wide, 0x3250, 7024},
+ EAWEntry{.neutral, 0x4dc0, 64},
+ EAWEntry{.wide, 0x4e00, 22157},
+ EAWEntry{.wide, 0xa490, 55},
+ EAWEntry{.neutral, 0xa4d0, 348},
+ EAWEntry{.neutral, 0xa640, 184},
+ EAWEntry{.neutral, 0xa700, 192},
+ EAWEntry{.neutral, 0xa7c2, 9},
+ EAWEntry{.neutral, 0xa7f5, 56},
+ EAWEntry{.neutral, 0xa830, 10},
+ EAWEntry{.neutral, 0xa840, 56},
+ EAWEntry{.neutral, 0xa880, 70},
+ EAWEntry{.neutral, 0xa8ce, 12},
+ EAWEntry{.neutral, 0xa8e0, 116},
+ EAWEntry{.neutral, 0xa95f, 1},
+ EAWEntry{.wide, 0xa960, 29},
+ EAWEntry{.neutral, 0xa980, 78},
+ EAWEntry{.neutral, 0xa9cf, 11},
+ EAWEntry{.neutral, 0xa9de, 33},
+ EAWEntry{.neutral, 0xaa00, 55},
+ EAWEntry{.neutral, 0xaa40, 14},
+ EAWEntry{.neutral, 0xaa50, 10},
+ EAWEntry{.neutral, 0xaa5c, 103},
+ EAWEntry{.neutral, 0xaadb, 28},
+ EAWEntry{.neutral, 0xab01, 6},
+ EAWEntry{.neutral, 0xab09, 6},
+ EAWEntry{.neutral, 0xab11, 6},
+ EAWEntry{.neutral, 0xab20, 7},
+ EAWEntry{.neutral, 0xab28, 7},
+ EAWEntry{.neutral, 0xab30, 60},
+ EAWEntry{.neutral, 0xab70, 126},
+ EAWEntry{.neutral, 0xabf0, 10},
+ EAWEntry{.wide, 0xac00, 11172},
+ EAWEntry{.neutral, 0xd7b0, 23},
+ EAWEntry{.neutral, 0xd7cb, 49},
+ EAWEntry{.neutral, 0xd800, 2048},
+ EAWEntry{.ambiguous, 0xe000, 6400},
+ EAWEntry{.wide, 0xf900, 512},
+ EAWEntry{.neutral, 0xfb00, 7},
+ EAWEntry{.neutral, 0xfb13, 5},
+ EAWEntry{.neutral, 0xfb1d, 26},
+ EAWEntry{.neutral, 0xfb38, 5},
+ EAWEntry{.neutral, 0xfb3e, 1},
+ EAWEntry{.neutral, 0xfb40, 2},
+ EAWEntry{.neutral, 0xfb43, 2},
+ EAWEntry{.neutral, 0xfb46, 124},
+ EAWEntry{.neutral, 0xfbd3, 365},
+ EAWEntry{.neutral, 0xfd50, 64},
+ EAWEntry{.neutral, 0xfd92, 54},
+ EAWEntry{.neutral, 0xfdf0, 14},
+ EAWEntry{.ambiguous, 0xfe00, 16},
+ EAWEntry{.wide, 0xfe10, 10},
+ EAWEntry{.neutral, 0xfe20, 16},
+ EAWEntry{.wide, 0xfe30, 35},
+ EAWEntry{.wide, 0xfe54, 19},
+ EAWEntry{.wide, 0xfe68, 4},
+ EAWEntry{.neutral, 0xfe70, 5},
+ EAWEntry{.neutral, 0xfe76, 135},
+ EAWEntry{.neutral, 0xfeff, 1},
+ EAWEntry{.full, 0xff01, 96},
+ EAWEntry{.half, 0xff61, 94},
+ EAWEntry{.half, 0xffc2, 6},
+ EAWEntry{.half, 0xffca, 6},
+ EAWEntry{.half, 0xffd2, 6},
+ EAWEntry{.half, 0xffda, 3},
+ EAWEntry{.full, 0xffe0, 7},
+ EAWEntry{.half, 0xffe8, 7},
+ EAWEntry{.neutral, 0xfff9, 4},
+ EAWEntry{.ambiguous, 0xfffd, 1},
+ EAWEntry{.neutral, 0x10000, 12},
+ EAWEntry{.neutral, 0x1000d, 26},
+ EAWEntry{.neutral, 0x10028, 19},
+ EAWEntry{.neutral, 0x1003c, 2},
+ EAWEntry{.neutral, 0x1003f, 15},
+ EAWEntry{.neutral, 0x10050, 14},
+ EAWEntry{.neutral, 0x10080, 123},
+ EAWEntry{.neutral, 0x10100, 3},
+ EAWEntry{.neutral, 0x10107, 45},
+ EAWEntry{.neutral, 0x10137, 88},
+ EAWEntry{.neutral, 0x10190, 13},
+ EAWEntry{.neutral, 0x101a0, 1},
+ EAWEntry{.neutral, 0x101d0, 46},
+ EAWEntry{.neutral, 0x10280, 29},
+ EAWEntry{.neutral, 0x102a0, 49},
+ EAWEntry{.neutral, 0x102e0, 28},
+ EAWEntry{.neutral, 0x10300, 36},
+ EAWEntry{.neutral, 0x1032d, 30},
+ EAWEntry{.neutral, 0x10350, 43},
+ EAWEntry{.neutral, 0x10380, 30},
+ EAWEntry{.neutral, 0x1039f, 37},
+ EAWEntry{.neutral, 0x103c8, 14},
+ EAWEntry{.neutral, 0x10400, 158},
+ EAWEntry{.neutral, 0x104a0, 10},
+ EAWEntry{.neutral, 0x104b0, 36},
+ EAWEntry{.neutral, 0x104d8, 36},
+ EAWEntry{.neutral, 0x10500, 40},
+ EAWEntry{.neutral, 0x10530, 52},
+ EAWEntry{.neutral, 0x1056f, 1},
+ EAWEntry{.neutral, 0x10600, 311},
+ EAWEntry{.neutral, 0x10740, 22},
+ EAWEntry{.neutral, 0x10760, 8},
+ EAWEntry{.neutral, 0x10800, 6},
+ EAWEntry{.neutral, 0x10808, 1},
+ EAWEntry{.neutral, 0x1080a, 44},
+ EAWEntry{.neutral, 0x10837, 2},
+ EAWEntry{.neutral, 0x1083c, 1},
+ EAWEntry{.neutral, 0x1083f, 23},
+ EAWEntry{.neutral, 0x10857, 72},
+ EAWEntry{.neutral, 0x108a7, 9},
+ EAWEntry{.neutral, 0x108e0, 19},
+ EAWEntry{.neutral, 0x108f4, 2},
+ EAWEntry{.neutral, 0x108fb, 33},
+ EAWEntry{.neutral, 0x1091f, 27},
+ EAWEntry{.neutral, 0x1093f, 1},
+ EAWEntry{.neutral, 0x10980, 56},
+ EAWEntry{.neutral, 0x109bc, 20},
+ EAWEntry{.neutral, 0x109d2, 50},
+ EAWEntry{.neutral, 0x10a05, 2},
+ EAWEntry{.neutral, 0x10a0c, 8},
+ EAWEntry{.neutral, 0x10a15, 3},
+ EAWEntry{.neutral, 0x10a19, 29},
+ EAWEntry{.neutral, 0x10a38, 3},
+ EAWEntry{.neutral, 0x10a3f, 10},
+ EAWEntry{.neutral, 0x10a50, 9},
+ EAWEntry{.neutral, 0x10a60, 64},
+ EAWEntry{.neutral, 0x10ac0, 39},
+ EAWEntry{.neutral, 0x10aeb, 12},
+ EAWEntry{.neutral, 0x10b00, 54},
+ EAWEntry{.neutral, 0x10b39, 29},
+ EAWEntry{.neutral, 0x10b58, 27},
+ EAWEntry{.neutral, 0x10b78, 26},
+ EAWEntry{.neutral, 0x10b99, 4},
+ EAWEntry{.neutral, 0x10ba9, 7},
+ EAWEntry{.neutral, 0x10c00, 73},
+ EAWEntry{.neutral, 0x10c80, 51},
+ EAWEntry{.neutral, 0x10cc0, 51},
+ EAWEntry{.neutral, 0x10cfa, 46},
+ EAWEntry{.neutral, 0x10d30, 10},
+ EAWEntry{.neutral, 0x10e60, 31},
+ EAWEntry{.neutral, 0x10e80, 42},
+ EAWEntry{.neutral, 0x10eab, 3},
+ EAWEntry{.neutral, 0x10eb0, 2},
+ EAWEntry{.neutral, 0x10f00, 40},
+ EAWEntry{.neutral, 0x10f30, 42},
+ EAWEntry{.neutral, 0x10fb0, 28},
+ EAWEntry{.neutral, 0x10fe0, 23},
+ EAWEntry{.neutral, 0x11000, 78},
+ EAWEntry{.neutral, 0x11052, 30},
+ EAWEntry{.neutral, 0x1107f, 67},
+ EAWEntry{.neutral, 0x110cd, 1},
+ EAWEntry{.neutral, 0x110d0, 25},
+ EAWEntry{.neutral, 0x110f0, 10},
+ EAWEntry{.neutral, 0x11100, 53},
+ EAWEntry{.neutral, 0x11136, 18},
+ EAWEntry{.neutral, 0x11150, 39},
+ EAWEntry{.neutral, 0x11180, 96},
+ EAWEntry{.neutral, 0x111e1, 20},
+ EAWEntry{.neutral, 0x11200, 18},
+ EAWEntry{.neutral, 0x11213, 44},
+ EAWEntry{.neutral, 0x11280, 7},
+ EAWEntry{.neutral, 0x11288, 1},
+ EAWEntry{.neutral, 0x1128a, 4},
+ EAWEntry{.neutral, 0x1128f, 15},
+ EAWEntry{.neutral, 0x1129f, 11},
+ EAWEntry{.neutral, 0x112b0, 59},
+ EAWEntry{.neutral, 0x112f0, 10},
+ EAWEntry{.neutral, 0x11300, 4},
+ EAWEntry{.neutral, 0x11305, 8},
+ EAWEntry{.neutral, 0x1130f, 2},
+ EAWEntry{.neutral, 0x11313, 22},
+ EAWEntry{.neutral, 0x1132a, 7},
+ EAWEntry{.neutral, 0x11332, 2},
+ EAWEntry{.neutral, 0x11335, 5},
+ EAWEntry{.neutral, 0x1133b, 10},
+ EAWEntry{.neutral, 0x11347, 2},
+ EAWEntry{.neutral, 0x1134b, 3},
+ EAWEntry{.neutral, 0x11350, 1},
+ EAWEntry{.neutral, 0x11357, 1},
+ EAWEntry{.neutral, 0x1135d, 7},
+ EAWEntry{.neutral, 0x11366, 7},
+ EAWEntry{.neutral, 0x11370, 5},
+ EAWEntry{.neutral, 0x11400, 92},
+ EAWEntry{.neutral, 0x1145d, 5},
+ EAWEntry{.neutral, 0x11480, 72},
+ EAWEntry{.neutral, 0x114d0, 10},
+ EAWEntry{.neutral, 0x11580, 54},
+ EAWEntry{.neutral, 0x115b8, 38},
+ EAWEntry{.neutral, 0x11600, 69},
+ EAWEntry{.neutral, 0x11650, 10},
+ EAWEntry{.neutral, 0x11660, 13},
+ EAWEntry{.neutral, 0x11680, 57},
+ EAWEntry{.neutral, 0x116c0, 10},
+ EAWEntry{.neutral, 0x11700, 27},
+ EAWEntry{.neutral, 0x1171d, 15},
+ EAWEntry{.neutral, 0x11730, 16},
+ EAWEntry{.neutral, 0x11800, 60},
+ EAWEntry{.neutral, 0x118a0, 83},
+ EAWEntry{.neutral, 0x118ff, 8},
+ EAWEntry{.neutral, 0x11909, 1},
+ EAWEntry{.neutral, 0x1190c, 8},
+ EAWEntry{.neutral, 0x11915, 2},
+ EAWEntry{.neutral, 0x11918, 30},
+ EAWEntry{.neutral, 0x11937, 2},
+ EAWEntry{.neutral, 0x1193b, 12},
+ EAWEntry{.neutral, 0x11950, 10},
+ EAWEntry{.neutral, 0x119a0, 8},
+ EAWEntry{.neutral, 0x119aa, 46},
+ EAWEntry{.neutral, 0x119da, 11},
+ EAWEntry{.neutral, 0x11a00, 72},
+ EAWEntry{.neutral, 0x11a50, 83},
+ EAWEntry{.neutral, 0x11ac0, 57},
+ EAWEntry{.neutral, 0x11c00, 9},
+ EAWEntry{.neutral, 0x11c0a, 45},
+ EAWEntry{.neutral, 0x11c38, 14},
+ EAWEntry{.neutral, 0x11c50, 29},
+ EAWEntry{.neutral, 0x11c70, 32},
+ EAWEntry{.neutral, 0x11c92, 22},
+ EAWEntry{.neutral, 0x11ca9, 14},
+ EAWEntry{.neutral, 0x11d00, 7},
+ EAWEntry{.neutral, 0x11d08, 2},
+ EAWEntry{.neutral, 0x11d0b, 44},
+ EAWEntry{.neutral, 0x11d3a, 1},
+ EAWEntry{.neutral, 0x11d3c, 2},
+ EAWEntry{.neutral, 0x11d3f, 9},
+ EAWEntry{.neutral, 0x11d50, 10},
+ EAWEntry{.neutral, 0x11d60, 6},
+ EAWEntry{.neutral, 0x11d67, 2},
+ EAWEntry{.neutral, 0x11d6a, 37},
+ EAWEntry{.neutral, 0x11d90, 2},
+ EAWEntry{.neutral, 0x11d93, 6},
+ EAWEntry{.neutral, 0x11da0, 10},
+ EAWEntry{.neutral, 0x11ee0, 25},
+ EAWEntry{.neutral, 0x11fb0, 1},
+ EAWEntry{.neutral, 0x11fc0, 50},
+ EAWEntry{.neutral, 0x11fff, 923},
+ EAWEntry{.neutral, 0x12400, 111},
+ EAWEntry{.neutral, 0x12470, 5},
+ EAWEntry{.neutral, 0x12480, 196},
+ EAWEntry{.neutral, 0x13000, 1071},
+ EAWEntry{.neutral, 0x13430, 9},
+ EAWEntry{.neutral, 0x14400, 583},
+ EAWEntry{.neutral, 0x16800, 569},
+ EAWEntry{.neutral, 0x16a40, 31},
+ EAWEntry{.neutral, 0x16a60, 10},
+ EAWEntry{.neutral, 0x16a6e, 2},
+ EAWEntry{.neutral, 0x16ad0, 30},
+ EAWEntry{.neutral, 0x16af0, 6},
+ EAWEntry{.neutral, 0x16b00, 70},
+ EAWEntry{.neutral, 0x16b50, 10},
+ EAWEntry{.neutral, 0x16b5b, 7},
+ EAWEntry{.neutral, 0x16b63, 21},
+ EAWEntry{.neutral, 0x16b7d, 19},
+ EAWEntry{.neutral, 0x16e40, 91},
+ EAWEntry{.neutral, 0x16f00, 75},
+ EAWEntry{.neutral, 0x16f4f, 57},
+ EAWEntry{.neutral, 0x16f8f, 17},
+ EAWEntry{.wide, 0x16fe0, 5},
+ EAWEntry{.wide, 0x16ff0, 2},
+ EAWEntry{.wide, 0x17000, 6136},
+ EAWEntry{.wide, 0x18800, 1238},
+ EAWEntry{.wide, 0x18d00, 9},
+ EAWEntry{.wide, 0x1b000, 287},
+ EAWEntry{.wide, 0x1b150, 3},
+ EAWEntry{.wide, 0x1b164, 4},
+ EAWEntry{.wide, 0x1b170, 396},
+ EAWEntry{.neutral, 0x1bc00, 107},
+ EAWEntry{.neutral, 0x1bc70, 13},
+ EAWEntry{.neutral, 0x1bc80, 9},
+ EAWEntry{.neutral, 0x1bc90, 10},
+ EAWEntry{.neutral, 0x1bc9c, 8},
+ EAWEntry{.neutral, 0x1d000, 246},
+ EAWEntry{.neutral, 0x1d100, 39},
+ EAWEntry{.neutral, 0x1d129, 192},
+ EAWEntry{.neutral, 0x1d200, 70},
+ EAWEntry{.neutral, 0x1d2e0, 20},
+ EAWEntry{.neutral, 0x1d300, 87},
+ EAWEntry{.neutral, 0x1d360, 25},
+ EAWEntry{.neutral, 0x1d400, 85},
+ EAWEntry{.neutral, 0x1d456, 71},
+ EAWEntry{.neutral, 0x1d49e, 2},
+ EAWEntry{.neutral, 0x1d4a2, 1},
+ EAWEntry{.neutral, 0x1d4a5, 2},
+ EAWEntry{.neutral, 0x1d4a9, 4},
+ EAWEntry{.neutral, 0x1d4ae, 12},
+ EAWEntry{.neutral, 0x1d4bb, 1},
+ EAWEntry{.neutral, 0x1d4bd, 7},
+ EAWEntry{.neutral, 0x1d4c5, 65},
+ EAWEntry{.neutral, 0x1d507, 4},
+ EAWEntry{.neutral, 0x1d50d, 8},
+ EAWEntry{.neutral, 0x1d516, 7},
+ EAWEntry{.neutral, 0x1d51e, 28},
+ EAWEntry{.neutral, 0x1d53b, 4},
+ EAWEntry{.neutral, 0x1d540, 5},
+ EAWEntry{.neutral, 0x1d546, 1},
+ EAWEntry{.neutral, 0x1d54a, 7},
+ EAWEntry{.neutral, 0x1d552, 340},
+ EAWEntry{.neutral, 0x1d6a8, 292},
+ EAWEntry{.neutral, 0x1d7ce, 702},
+ EAWEntry{.neutral, 0x1da9b, 5},
+ EAWEntry{.neutral, 0x1daa1, 15},
+ EAWEntry{.neutral, 0x1e000, 7},
+ EAWEntry{.neutral, 0x1e008, 17},
+ EAWEntry{.neutral, 0x1e01b, 7},
+ EAWEntry{.neutral, 0x1e023, 2},
+ EAWEntry{.neutral, 0x1e026, 5},
+ EAWEntry{.neutral, 0x1e100, 45},
+ EAWEntry{.neutral, 0x1e130, 14},
+ EAWEntry{.neutral, 0x1e140, 10},
+ EAWEntry{.neutral, 0x1e14e, 2},
+ EAWEntry{.neutral, 0x1e2c0, 58},
+ EAWEntry{.neutral, 0x1e2ff, 1},
+ EAWEntry{.neutral, 0x1e800, 197},
+ EAWEntry{.neutral, 0x1e8c7, 16},
+ EAWEntry{.neutral, 0x1e900, 76},
+ EAWEntry{.neutral, 0x1e950, 10},
+ EAWEntry{.neutral, 0x1e95e, 2},
+ EAWEntry{.neutral, 0x1ec71, 68},
+ EAWEntry{.neutral, 0x1ed01, 61},
+ EAWEntry{.neutral, 0x1ee00, 4},
+ EAWEntry{.neutral, 0x1ee05, 27},
+ EAWEntry{.neutral, 0x1ee21, 2},
+ EAWEntry{.neutral, 0x1ee24, 1},
+ EAWEntry{.neutral, 0x1ee27, 1},
+ EAWEntry{.neutral, 0x1ee29, 10},
+ EAWEntry{.neutral, 0x1ee34, 4},
+ EAWEntry{.neutral, 0x1ee39, 1},
+ EAWEntry{.neutral, 0x1ee3b, 1},
+ EAWEntry{.neutral, 0x1ee42, 1},
+ EAWEntry{.neutral, 0x1ee47, 1},
+ EAWEntry{.neutral, 0x1ee49, 1},
+ EAWEntry{.neutral, 0x1ee4b, 1},
+ EAWEntry{.neutral, 0x1ee4d, 3},
+ EAWEntry{.neutral, 0x1ee51, 2},
+ EAWEntry{.neutral, 0x1ee54, 1},
+ EAWEntry{.neutral, 0x1ee57, 1},
+ EAWEntry{.neutral, 0x1ee59, 1},
+ EAWEntry{.neutral, 0x1ee5b, 1},
+ EAWEntry{.neutral, 0x1ee5d, 1},
+ EAWEntry{.neutral, 0x1ee5f, 1},
+ EAWEntry{.neutral, 0x1ee61, 2},
+ EAWEntry{.neutral, 0x1ee64, 1},
+ EAWEntry{.neutral, 0x1ee67, 4},
+ EAWEntry{.neutral, 0x1ee6c, 7},
+ EAWEntry{.neutral, 0x1ee74, 4},
+ EAWEntry{.neutral, 0x1ee79, 4},
+ EAWEntry{.neutral, 0x1ee7e, 1},
+ EAWEntry{.neutral, 0x1ee80, 10},
+ EAWEntry{.neutral, 0x1ee8b, 17},
+ EAWEntry{.neutral, 0x1eea1, 3},
+ EAWEntry{.neutral, 0x1eea5, 5},
+ EAWEntry{.neutral, 0x1eeab, 17},
+ EAWEntry{.neutral, 0x1eef0, 2},
+ EAWEntry{.neutral, 0x1f000, 4},
+ EAWEntry{.wide, 0x1f004, 1},
+ EAWEntry{.neutral, 0x1f005, 39},
+ EAWEntry{.neutral, 0x1f030, 100},
+ EAWEntry{.neutral, 0x1f0a0, 15},
+ EAWEntry{.neutral, 0x1f0b1, 15},
+ EAWEntry{.neutral, 0x1f0c1, 14},
+ EAWEntry{.wide, 0x1f0cf, 1},
+ EAWEntry{.neutral, 0x1f0d1, 37},
+ EAWEntry{.ambiguous, 0x1f100, 11},
+ EAWEntry{.neutral, 0x1f10b, 5},
+ EAWEntry{.ambiguous, 0x1f110, 30},
+ EAWEntry{.neutral, 0x1f12e, 2},
+ EAWEntry{.ambiguous, 0x1f130, 58},
+ EAWEntry{.neutral, 0x1f16a, 6},
+ EAWEntry{.ambiguous, 0x1f170, 30},
+ EAWEntry{.wide, 0x1f18e, 1},
+ EAWEntry{.ambiguous, 0x1f18f, 2},
+ EAWEntry{.wide, 0x1f191, 10},
+ EAWEntry{.ambiguous, 0x1f19b, 18},
+ EAWEntry{.neutral, 0x1f1ad, 1},
+ EAWEntry{.neutral, 0x1f1e6, 26},
+ EAWEntry{.wide, 0x1f200, 3},
+ EAWEntry{.wide, 0x1f210, 44},
+ EAWEntry{.wide, 0x1f240, 9},
+ EAWEntry{.wide, 0x1f250, 2},
+ EAWEntry{.wide, 0x1f260, 6},
+ EAWEntry{.wide, 0x1f300, 33},
+ EAWEntry{.neutral, 0x1f321, 12},
+ EAWEntry{.wide, 0x1f32d, 9},
+ EAWEntry{.neutral, 0x1f336, 1},
+ EAWEntry{.wide, 0x1f337, 70},
+ EAWEntry{.neutral, 0x1f37d, 1},
+ EAWEntry{.wide, 0x1f37e, 22},
+ EAWEntry{.neutral, 0x1f394, 12},
+ EAWEntry{.wide, 0x1f3a0, 43},
+ EAWEntry{.neutral, 0x1f3cb, 4},
+ EAWEntry{.wide, 0x1f3cf, 5},
+ EAWEntry{.neutral, 0x1f3d4, 12},
+ EAWEntry{.wide, 0x1f3e0, 17},
+ EAWEntry{.neutral, 0x1f3f1, 3},
+ EAWEntry{.wide, 0x1f3f4, 1},
+ EAWEntry{.neutral, 0x1f3f5, 3},
+ EAWEntry{.wide, 0x1f3f8, 71},
+ EAWEntry{.neutral, 0x1f43f, 1},
+ EAWEntry{.wide, 0x1f440, 1},
+ EAWEntry{.neutral, 0x1f441, 1},
+ EAWEntry{.wide, 0x1f442, 187},
+ EAWEntry{.neutral, 0x1f4fd, 2},
+ EAWEntry{.wide, 0x1f4ff, 63},
+ EAWEntry{.neutral, 0x1f53e, 13},
+ EAWEntry{.wide, 0x1f54b, 4},
+ EAWEntry{.neutral, 0x1f54f, 1},
+ EAWEntry{.wide, 0x1f550, 24},
+ EAWEntry{.neutral, 0x1f568, 18},
+ EAWEntry{.wide, 0x1f57a, 1},
+ EAWEntry{.neutral, 0x1f57b, 26},
+ EAWEntry{.wide, 0x1f595, 2},
+ EAWEntry{.neutral, 0x1f597, 13},
+ EAWEntry{.wide, 0x1f5a4, 1},
+ EAWEntry{.neutral, 0x1f5a5, 86},
+ EAWEntry{.wide, 0x1f5fb, 85},
+ EAWEntry{.neutral, 0x1f650, 48},
+ EAWEntry{.wide, 0x1f680, 70},
+ EAWEntry{.neutral, 0x1f6c6, 6},
+ EAWEntry{.wide, 0x1f6cc, 1},
+ EAWEntry{.neutral, 0x1f6cd, 3},
+ EAWEntry{.wide, 0x1f6d0, 3},
+ EAWEntry{.neutral, 0x1f6d3, 2},
+ EAWEntry{.wide, 0x1f6d5, 3},
+ EAWEntry{.neutral, 0x1f6e0, 11},
+ EAWEntry{.wide, 0x1f6eb, 2},
+ EAWEntry{.neutral, 0x1f6f0, 4},
+ EAWEntry{.wide, 0x1f6f4, 9},
+ EAWEntry{.neutral, 0x1f700, 116},
+ EAWEntry{.neutral, 0x1f780, 89},
+ EAWEntry{.wide, 0x1f7e0, 12},
+ EAWEntry{.neutral, 0x1f800, 12},
+ EAWEntry{.neutral, 0x1f810, 56},
+ EAWEntry{.neutral, 0x1f850, 10},
+ EAWEntry{.neutral, 0x1f860, 40},
+ EAWEntry{.neutral, 0x1f890, 30},
+ EAWEntry{.neutral, 0x1f8b0, 2},
+ EAWEntry{.neutral, 0x1f900, 12},
+ EAWEntry{.wide, 0x1f90c, 47},
+ EAWEntry{.neutral, 0x1f93b, 1},
+ EAWEntry{.wide, 0x1f93c, 10},
+ EAWEntry{.neutral, 0x1f946, 1},
+ EAWEntry{.wide, 0x1f947, 50},
+ EAWEntry{.wide, 0x1f97a, 82},
+ EAWEntry{.wide, 0x1f9cd, 51},
+ EAWEntry{.neutral, 0x1fa00, 84},
+ EAWEntry{.neutral, 0x1fa60, 14},
+ EAWEntry{.wide, 0x1fa70, 5},
+ EAWEntry{.wide, 0x1fa78, 3},
+ EAWEntry{.wide, 0x1fa80, 7},
+ EAWEntry{.wide, 0x1fa90, 25},
+ EAWEntry{.wide, 0x1fab0, 7},
+ EAWEntry{.wide, 0x1fac0, 3},
+ EAWEntry{.wide, 0x1fad0, 7},
+ EAWEntry{.neutral, 0x1fb00, 147},
+ EAWEntry{.neutral, 0x1fb94, 55},
+ EAWEntry{.neutral, 0x1fbf0, 10},
+ EAWEntry{.wide, 0x20000, 65534},
+ EAWEntry{.wide, 0x30000, 65534},
+ EAWEntry{.neutral, 0xe0001, 1},
+ EAWEntry{.neutral, 0xe0020, 96},
+ EAWEntry{.ambiguous, 0xe0100, 240},
+ EAWEntry{.ambiguous, 0xf0000, 65534},
+ EAWEntry{.ambiguous, 0x100000, 65534},
+ ]
+)
diff --git a/v_windows/v/vlib/encoding/utf8/east_asian/east_asian_width_test.v b/v_windows/v/vlib/encoding/utf8/east_asian/east_asian_width_test.v
new file mode 100644
index 0000000..a44a9f8
--- /dev/null
+++ b/v_windows/v/vlib/encoding/utf8/east_asian/east_asian_width_test.v
@@ -0,0 +1,23 @@
+module east_asian
+
+fn test_width() {
+ assert east_asian_width_property_at('A', 0) == .narrow
+ assert east_asian_width_property_at('A', 0) == .full
+ assert east_asian_width_property_at('ア', 0) == .half
+ assert east_asian_width_property_at('ア', 0) == .wide
+ assert east_asian_width_property_at('☆', 0) == .ambiguous
+ assert east_asian_width_property_at('ج', 0) == .neutral
+ assert display_width('abc', 1) == 3
+ assert display_width('ひらがな', 1) == 8
+ assert display_width('カタカナ', 1) == 8
+ assert display_width('カタカナ', 1) == 4
+ assert display_width('한글', 1) == 4
+ assert display_width('한자', 1) == 4
+ assert display_width('漢字', 1) == 4
+ assert display_width('简体字', 1) == 6
+ assert display_width('繁體字', 1) == 6
+ assert display_width('अरबी लिपि', 1) == 9
+ assert display_width('☆', 1) == 1
+ assert display_width('☆', 2) == 2
+ assert display_width('🐈👽📛', 1) == 6
+}
diff --git a/v_windows/v/vlib/encoding/utf8/encoding_utf8_test.v b/v_windows/v/vlib/encoding/utf8/encoding_utf8_test.v
new file mode 100644
index 0000000..ebea87c
--- /dev/null
+++ b/v_windows/v/vlib/encoding/utf8/encoding_utf8_test.v
@@ -0,0 +1,9 @@
+import encoding.utf8
+
+fn test_validate_str() {
+ assert utf8.validate_str('añçá') == true
+ assert utf8.validate_str('\x61\xC3\xB1\xC3\xA7\xC3\xA1') == true
+ assert utf8.validate_str('\xC0\xC1') == false
+ assert utf8.validate_str('\xF5\xFF') == false
+ assert utf8.validate_str('\xE0\xEF') == false
+}
diff --git a/v_windows/v/vlib/encoding/utf8/utf8.v b/v_windows/v/vlib/encoding/utf8/utf8.v
new file mode 100644
index 0000000..88c598f
--- /dev/null
+++ b/v_windows/v/vlib/encoding/utf8/utf8.v
@@ -0,0 +1,88 @@
+module utf8
+
+struct Utf8State {
+mut:
+ index int
+ subindex int
+ failed bool
+}
+
+pub fn validate_str(str string) bool {
+ return validate(str.str, str.len)
+}
+
+pub fn validate(data &byte, len int) bool {
+ mut state := Utf8State{}
+ for i := 0; i < len; i++ {
+ s := unsafe { data[i] }
+ if s == 0 {
+ break
+ }
+ state.next_state(s)
+ if state.failed {
+ return false
+ }
+ }
+ return !state.failed && state.subindex <= 0
+}
+
+fn (mut s Utf8State) seq(r0 bool, r1 bool, is_tail bool) bool {
+ if s.subindex == 0 || (s.index > 1 && s.subindex == 1) || (s.index >= 6 && s.subindex == 2) {
+ if (s.subindex == 0 && r0) || (s.subindex == 1 && r1) || (s.subindex == 2 && is_tail) {
+ s.subindex++
+ return true
+ }
+ } else {
+ s.failed = true
+ if is_tail {
+ s.index = 0
+ s.subindex = 0
+ s.failed = false
+ }
+ return true
+ }
+ s.index++
+ s.subindex = 0
+ return false
+}
+
+fn (mut s Utf8State) next_state(c byte) {
+ // sequence 1
+ if s.index == 0 {
+ if (c >= 0x00 + 1 && c <= 0x7F) || c == 0x00 {
+ return
+ }
+ s.index++
+ s.subindex = 0
+ }
+ is_tail := c >= 0x80 && c <= 0xBF
+ // sequence 2
+ if s.index == 1 && s.seq(c >= 0xC2 && c <= 0xDF, false, is_tail) {
+ return
+ }
+ // sequence 3
+ if s.index == 2 && s.seq(c == 0xE0, c >= 0xA0 && c <= 0xBF, is_tail) {
+ return
+ }
+ if s.index == 3 && s.seq(c >= 0xE1 && c <= 0xEC, c >= 0x80 && c <= 0xBF, is_tail) {
+ return
+ }
+ if s.index == 4 && s.seq(c == 0xED, c >= 0x80 && c <= 0x9F, is_tail) {
+ return
+ }
+ if s.index == 5 && s.seq(c >= 0xEE && c <= 0xEF, c >= 0x80 && c <= 0xBF, is_tail) {
+ return
+ }
+ // sequence 4
+ if s.index == 6 && s.seq(c == 0xF0, c >= 0x90 && c <= 0xBF, is_tail) {
+ return
+ }
+ if s.index == 7 && s.seq(c >= 0xF1 && c <= 0xF3, c >= 0x80 && c <= 0xBF, is_tail) {
+ return
+ }
+ if s.index == 8 && s.seq(c == 0xF4, c >= 0x80 && c <= 0x8F, is_tail) {
+ return
+ }
+ // we should never reach here
+ s.failed = true
+}
diff --git a/v_windows/v/vlib/encoding/utf8/utf8_util.v b/v_windows/v/vlib/encoding/utf8/utf8_util.v
new file mode 100644
index 0000000..2e3da0d
--- /dev/null
+++ b/v_windows/v/vlib/encoding/utf8/utf8_util.v
@@ -0,0 +1,1161 @@
+/*
+utf-8 util
+
+Copyright (c) 2019-2021 Dario Deledda. All rights reserved.
+Use of this source code is governed by an MIT license
+that can be found in the LICENSE file.
+
+This file contains utilities for utf8 strings
+*/
+module utf8
+
+/*
+Utility functions
+*/
+
+// len return the length as number of unicode chars from a string
+pub fn len(s string) int {
+ if s.len == 0 {
+ return 0
+ }
+
+ mut count := 0
+ mut index := 0
+
+ for {
+ ch_len := utf8_char_len(s[index])
+ index += ch_len
+ count++
+ if index >= s.len {
+ break
+ }
+ }
+ return count
+}
+
+// get_uchar convert a unicode glyph in string[index] into a int unicode char
+pub fn get_uchar(s string, index int) int {
+ mut res := 0
+ mut ch_len := 0
+ if s.len > 0 {
+ ch_len = utf8_char_len(s[index])
+
+ if ch_len == 1 {
+ return u16(s[index])
+ }
+ if ch_len > 1 && ch_len < 5 {
+ mut lword := 0
+ for i := 0; i < ch_len; i++ {
+ lword = (lword << 8) | int(s[index + i])
+ }
+
+ // 2 byte utf-8
+ // byte format: 110xxxxx 10xxxxxx
+ //
+ if ch_len == 2 {
+ res = (lword & 0x1f00) >> 2 | (lword & 0x3f)
+ }
+ // 3 byte utf-8
+ // byte format: 1110xxxx 10xxxxxx 10xxxxxx
+ //
+ else if ch_len == 3 {
+ res = (lword & 0x0f0000) >> 4 | (lword & 0x3f00) >> 2 | (lword & 0x3f)
+ }
+ // 4 byte utf-8
+ // byte format: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+ //
+ else if ch_len == 4 {
+ res = ((lword & 0x07000000) >> 6) | ((lword & 0x003f0000) >> 4) | ((lword & 0x00003F00) >> 2) | (lword & 0x0000003f)
+ }
+ }
+ }
+ return res
+}
+
+// raw_index - get the raw chracter from the string by the given index value.
+// example: '我是V Lang'.raw_index(1) => '是'
+
+// raw_index - get the raw chracter from the string by the given index value.
+// example: utf8.raw_index('我是V Lang', 1) => '是'
+pub fn raw_index(s string, index int) string {
+ mut r := []rune{}
+
+ for i := 0; i < s.len; i++ {
+ if r.len - 1 == index {
+ break
+ }
+
+ b := s[i]
+ ch_len := ((0xe5000000 >> ((b >> 3) & 0x1e)) & 3)
+
+ r << if ch_len > 0 {
+ i += ch_len
+ rune(get_uchar(s, i - ch_len))
+ } else {
+ rune(b)
+ }
+ }
+
+ return r[index].str()
+}
+
+// reverse - returns a reversed string.
+// example: utf8.reverse('你好世界hello world') => 'dlrow olleh界世好你'.
+pub fn reverse(s string) string {
+ len_s := len(s)
+ if len_s == 0 || len_s == 1 {
+ return s.clone()
+ }
+ mut str_array := []string{}
+ for i in 0 .. len_s {
+ str_array << raw_index(s, i)
+ }
+ str_array = str_array.reverse()
+ return str_array.join('')
+}
+
+/*
+Conversion functions
+*/
+
+// to_upper return an uppercase string from a string
+pub fn to_upper(s string) string {
+ return up_low(s, true)
+}
+
+// to_lower return an lowercase string from a string
+pub fn to_lower(s string) string {
+ return up_low(s, false)
+}
+
+/*
+Punctuation functions
+
+The "western" function search on a small table, that is quicker than
+the global unicode table search. **Use only for western chars**.
+*/
+
+//
+// Western
+//
+
+// is_punct return true if the string[index] byte is the start of a unicode western punctuation
+pub fn is_punct(s string, index int) bool {
+ return is_uchar_punct(get_uchar(s, index))
+}
+
+// is_uchar_punct return true if the input unicode is a western unicode punctuation
+pub fn is_uchar_punct(uchar int) bool {
+ return find_punct_in_table(uchar, utf8.unicode_punct_western) != 0
+}
+
+//
+// Global
+//
+
+// is_global_punct return true if the string[index] byte of is the start of a global unicode punctuation
+pub fn is_global_punct(s string, index int) bool {
+ return is_uchar_global_punct(get_uchar(s, index))
+}
+
+// is_uchar_global_punct return true if the input unicode is a global unicode punctuation
+pub fn is_uchar_global_punct(uchar int) bool {
+ return find_punct_in_table(uchar, utf8.unicode_punct) != 0
+}
+
+/*
+Private functions
+*/
+
+// Raw to_lower utf-8 function
+fn utf8_to_lower(in_cp int) int {
+ mut cp := in_cp
+ if ((0x0041 <= cp) && (0x005a >= cp)) || ((0x00c0 <= cp) && (0x00d6 >= cp))
+ || ((0x00d8 <= cp) && (0x00de >= cp)) || ((0x0391 <= cp) && (0x03a1 >= cp))
+ || ((0x03a3 <= cp) && (0x03ab >= cp)) || ((0x0410 <= cp) && (0x042f >= cp)) {
+ cp += 32
+ } else if (0x0400 <= cp) && (0x040f >= cp) {
+ cp += 80
+ } else if ((0x0100 <= cp) && (0x012f >= cp)) || ((0x0132 <= cp) && (0x0137 >= cp))
+ || ((0x014a <= cp) && (0x0177 >= cp)) || ((0x0182 <= cp) && (0x0185 >= cp))
+ || ((0x01a0 <= cp) && (0x01a5 >= cp)) || ((0x01de <= cp) && (0x01ef >= cp))
+ || ((0x01f8 <= cp) && (0x021f >= cp)) || ((0x0222 <= cp) && (0x0233 >= cp))
+ || ((0x0246 <= cp) && (0x024f >= cp)) || ((0x03d8 <= cp) && (0x03ef >= cp))
+ || ((0x0460 <= cp) && (0x0481 >= cp)) || ((0x048a <= cp) && (0x04ff >= cp)) {
+ cp |= 0x1
+ } else if ((0x0139 <= cp) && (0x0148 >= cp)) || ((0x0179 <= cp) && (0x017e >= cp))
+ || ((0x01af <= cp) && (0x01b0 >= cp)) || ((0x01b3 <= cp) && (0x01b6 >= cp))
+ || ((0x01cd <= cp) && (0x01dc >= cp)) {
+ cp += 1
+ cp &= ~0x1
+ } else if ((0x0531 <= cp) && (0x0556 >= cp)) || ((0x10A0 <= cp) && (0x10C5 >= cp)) {
+ // ARMENIAN or GEORGIAN
+ cp += 0x30
+ } else if (((0x1E00 <= cp) && (0x1E94 >= cp)) || ((0x1EA0 <= cp) && (0x1EF8 >= cp)))
+ && (cp & 1 == 0) {
+ // LATIN CAPITAL LETTER
+ cp += 1
+ } else if (0x24B6 <= cp) && (0x24CF >= cp) {
+ // CIRCLED LATIN
+ cp += 0x1a
+ } else if (0xFF21 <= cp) && (0xFF3A >= cp) {
+ // FULLWIDTH LATIN CAPITAL
+ cp += 0x19
+ } else if ((0x1F08 <= cp) && (0x1F0F >= cp)) || ((0x1F18 <= cp) && (0x1F1D >= cp))
+ || ((0x1F28 <= cp) && (0x1F2F >= cp)) || ((0x1F38 <= cp) && (0x1F3F >= cp))
+ || ((0x1F48 <= cp) && (0x1F4D >= cp)) || ((0x1F68 <= cp) && (0x1F6F >= cp))
+ || ((0x1F88 <= cp) && (0x1F8F >= cp)) || ((0x1F98 <= cp) && (0x1F9F >= cp))
+ || ((0x1FA8 <= cp) && (0x1FAF >= cp)) {
+ // GREEK
+ cp -= 8
+ } else {
+ match cp {
+ 0x0178 { cp = 0x00ff }
+ 0x0243 { cp = 0x0180 }
+ 0x018e { cp = 0x01dd }
+ 0x023d { cp = 0x019a }
+ 0x0220 { cp = 0x019e }
+ 0x01b7 { cp = 0x0292 }
+ 0x01c4 { cp = 0x01c6 }
+ 0x01c7 { cp = 0x01c9 }
+ 0x01ca { cp = 0x01cc }
+ 0x01f1 { cp = 0x01f3 }
+ 0x01f7 { cp = 0x01bf }
+ 0x0187 { cp = 0x0188 }
+ 0x018b { cp = 0x018c }
+ 0x0191 { cp = 0x0192 }
+ 0x0198 { cp = 0x0199 }
+ 0x01a7 { cp = 0x01a8 }
+ 0x01ac { cp = 0x01ad }
+ 0x01af { cp = 0x01b0 }
+ 0x01b8 { cp = 0x01b9 }
+ 0x01bc { cp = 0x01bd }
+ 0x01f4 { cp = 0x01f5 }
+ 0x023b { cp = 0x023c }
+ 0x0241 { cp = 0x0242 }
+ 0x03fd { cp = 0x037b }
+ 0x03fe { cp = 0x037c }
+ 0x03ff { cp = 0x037d }
+ 0x037f { cp = 0x03f3 }
+ 0x0386 { cp = 0x03ac }
+ 0x0388 { cp = 0x03ad }
+ 0x0389 { cp = 0x03ae }
+ 0x038a { cp = 0x03af }
+ 0x038c { cp = 0x03cc }
+ 0x038e { cp = 0x03cd }
+ 0x038f { cp = 0x03ce }
+ 0x0370 { cp = 0x0371 }
+ 0x0372 { cp = 0x0373 }
+ 0x0376 { cp = 0x0377 }
+ 0x03f4 { cp = 0x03b8 }
+ 0x03cf { cp = 0x03d7 }
+ 0x03f9 { cp = 0x03f2 }
+ 0x03f7 { cp = 0x03f8 }
+ 0x03fa { cp = 0x03fb }
+ // GREEK
+ 0x1F59 { cp = 0x1F51 }
+ 0x1F5B { cp = 0x1F53 }
+ 0x1F5D { cp = 0x1F55 }
+ 0x1F5F { cp = 0x1F57 }
+ 0x1FB8 { cp = 0x1FB0 }
+ 0x1FB9 { cp = 0x1FB1 }
+ 0x1FD8 { cp = 0x1FD0 }
+ 0x1FD9 { cp = 0x1FD1 }
+ 0x1FE8 { cp = 0x1FE0 }
+ 0x1FE9 { cp = 0x1FE1 }
+ else {}
+ }
+ }
+
+ return cp
+}
+
+// Raw to_upper utf-8 function
+fn utf8_to_upper(in_cp int) int {
+ mut cp := in_cp
+ if ((0x0061 <= cp) && (0x007a >= cp)) || ((0x00e0 <= cp) && (0x00f6 >= cp))
+ || ((0x00f8 <= cp) && (0x00fe >= cp)) || ((0x03b1 <= cp) && (0x03c1 >= cp))
+ || ((0x03c3 <= cp) && (0x03cb >= cp)) || ((0x0430 <= cp) && (0x044f >= cp)) {
+ cp -= 32
+ } else if (0x0450 <= cp) && (0x045f >= cp) {
+ cp -= 80
+ } else if ((0x0100 <= cp) && (0x012f >= cp)) || ((0x0132 <= cp) && (0x0137 >= cp))
+ || ((0x014a <= cp) && (0x0177 >= cp)) || ((0x0182 <= cp) && (0x0185 >= cp))
+ || ((0x01a0 <= cp) && (0x01a5 >= cp)) || ((0x01de <= cp) && (0x01ef >= cp))
+ || ((0x01f8 <= cp) && (0x021f >= cp)) || ((0x0222 <= cp) && (0x0233 >= cp))
+ || ((0x0246 <= cp) && (0x024f >= cp)) || ((0x03d8 <= cp) && (0x03ef >= cp))
+ || ((0x0460 <= cp) && (0x0481 >= cp)) || ((0x048a <= cp) && (0x04ff >= cp)) {
+ cp &= ~0x1
+ } else if ((0x0139 <= cp) && (0x0148 >= cp)) || ((0x0179 <= cp) && (0x017e >= cp))
+ || ((0x01af <= cp) && (0x01b0 >= cp)) || ((0x01b3 <= cp) && (0x01b6 >= cp))
+ || ((0x01cd <= cp) && (0x01dc >= cp)) {
+ cp -= 1
+ cp |= 0x1
+ } else if ((0x0561 <= cp) && (0x0586 >= cp)) || ((0x10D0 <= cp) && (0x10F5 >= cp)) {
+ // ARMENIAN or GEORGIAN
+ cp -= 0x30
+ } else if (((0x1E01 <= cp) && (0x1E95 >= cp)) || ((0x1EA1 <= cp) && (0x1EF9 >= cp)))
+ && (cp & 1 == 1) {
+ // LATIN CAPITAL LETTER
+ cp -= 1
+ } else if (0x24D0 <= cp) && (0x24E9 >= cp) {
+ // CIRCLED LATIN
+ cp -= 0x1a
+ } else if (0xFF41 <= cp) && (0xFF5A >= cp) {
+ // FULLWIDTH LATIN CAPITAL
+ cp -= 0x19
+ } else if ((0x1F00 <= cp) && (0x1F07 >= cp)) || ((0x1F10 <= cp) && (0x1F15 >= cp))
+ || ((0x1F20 <= cp) && (0x1F27 >= cp)) || ((0x1F30 <= cp) && (0x1F37 >= cp))
+ || ((0x1F40 <= cp) && (0x1F45 >= cp)) || ((0x1F60 <= cp) && (0x1F67 >= cp))
+ || ((0x1F80 <= cp) && (0x1F87 >= cp)) || ((0x1F90 <= cp) && (0x1F97 >= cp))
+ || ((0x1FA0 <= cp) && (0x1FA7 >= cp)) {
+ // GREEK
+ cp += 8
+ } else {
+ match cp {
+ 0x00ff { cp = 0x0178 }
+ 0x0180 { cp = 0x0243 }
+ 0x01dd { cp = 0x018e }
+ 0x019a { cp = 0x023d }
+ 0x019e { cp = 0x0220 }
+ 0x0292 { cp = 0x01b7 }
+ 0x01c6 { cp = 0x01c4 }
+ 0x01c9 { cp = 0x01c7 }
+ 0x01cc { cp = 0x01ca }
+ 0x01f3 { cp = 0x01f1 }
+ 0x01bf { cp = 0x01f7 }
+ 0x0188 { cp = 0x0187 }
+ 0x018c { cp = 0x018b }
+ 0x0192 { cp = 0x0191 }
+ 0x0199 { cp = 0x0198 }
+ 0x01a8 { cp = 0x01a7 }
+ 0x01ad { cp = 0x01ac }
+ 0x01b0 { cp = 0x01af }
+ 0x01b9 { cp = 0x01b8 }
+ 0x01bd { cp = 0x01bc }
+ 0x01f5 { cp = 0x01f4 }
+ 0x023c { cp = 0x023b }
+ 0x0242 { cp = 0x0241 }
+ 0x037b { cp = 0x03fd }
+ 0x037c { cp = 0x03fe }
+ 0x037d { cp = 0x03ff }
+ 0x03f3 { cp = 0x037f }
+ 0x03ac { cp = 0x0386 }
+ 0x03ad { cp = 0x0388 }
+ 0x03ae { cp = 0x0389 }
+ 0x03af { cp = 0x038a }
+ 0x03cc { cp = 0x038c }
+ 0x03cd { cp = 0x038e }
+ 0x03ce { cp = 0x038f }
+ 0x0371 { cp = 0x0370 }
+ 0x0373 { cp = 0x0372 }
+ 0x0377 { cp = 0x0376 }
+ 0x03d1 { cp = 0x0398 }
+ 0x03d7 { cp = 0x03cf }
+ 0x03f2 { cp = 0x03f9 }
+ 0x03f8 { cp = 0x03f7 }
+ 0x03fb { cp = 0x03fa }
+ // GREEK
+ 0x1F51 { cp = 0x1F59 }
+ 0x1F53 { cp = 0x1F5B }
+ 0x1F55 { cp = 0x1F5D }
+ 0x1F57 { cp = 0x1F5F }
+ 0x1FB0 { cp = 0x1FB8 }
+ 0x1FB1 { cp = 0x1FB9 }
+ 0x1FD0 { cp = 0x1FD8 }
+ 0x1FD1 { cp = 0x1FD9 }
+ 0x1FE0 { cp = 0x1FE8 }
+ 0x1FE1 { cp = 0x1FE9 }
+ else {}
+ }
+ }
+
+ return cp
+}
+
+//
+// if upper_flag == true then make low ==> upper conversion
+// if upper_flag == false then make upper ==> low conversion
+//
+// up_low make the dirt job
+fn up_low(s string, upper_flag bool) string {
+ mut index := 0
+ mut tab_char := 0
+ mut str_res := unsafe { malloc_noscan(s.len + 1) }
+
+ for {
+ ch_len := utf8_char_len(s[index])
+
+ if ch_len == 1 {
+ if upper_flag == true {
+ unsafe {
+ str_res[index] = byte(C.toupper(s.str[index]))
+ }
+ } else {
+ unsafe {
+ str_res[index] = byte(C.tolower(s.str[index]))
+ }
+ }
+ } else if ch_len > 1 && ch_len < 5 {
+ mut lword := 0
+
+ for i := 0; i < ch_len; i++ {
+ lword = (lword << 8) | int(s[index + i])
+ }
+
+ // println("#${index} ($lword)")
+
+ mut res := 0
+
+ // 2 byte utf-8
+ // byte format: 110xxxxx 10xxxxxx
+ //
+ if ch_len == 2 {
+ res = (lword & 0x1f00) >> 2 | (lword & 0x3f)
+ }
+ // 3 byte utf-8
+ // byte format: 1110xxxx 10xxxxxx 10xxxxxx
+ //
+ else if ch_len == 3 {
+ res = (lword & 0x0f0000) >> 4 | (lword & 0x3f00) >> 2 | (lword & 0x3f)
+ }
+ // 4 byte utf-8
+ // byte format: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+ //
+ else if ch_len == 4 {
+ res = ((lword & 0x07000000) >> 6) | ((lword & 0x003f0000) >> 4) | ((lword & 0x00003F00) >> 2) | (lword & 0x0000003f)
+ }
+
+ // println("res: ${res.hex():8}")
+
+ if upper_flag == false {
+ tab_char = utf8_to_lower(res)
+ } else {
+ tab_char = utf8_to_upper(res)
+ }
+
+ if ch_len == 2 {
+ ch0 := byte((tab_char >> 6) & 0x1f) | 0xc0 // 110x xxxx
+ ch1 := byte((tab_char >> 0) & 0x3f) | 0x80 // 10xx xxxx
+ // C.printf("[%02x%02x] \n",ch0,ch1)
+
+ unsafe {
+ str_res[index + 0] = ch0
+ str_res[index + 1] = ch1
+ }
+ //****************************************************************
+ // BUG: doesn't compile, workaround use shitf to right of 0 bit
+ //****************************************************************
+ // str_res[index + 1 ] = byte( tab_char & 0xbf ) // 1011 1111
+ } else if ch_len == 3 {
+ ch0 := byte((tab_char >> 12) & 0x0f) | 0xe0 // 1110 xxxx
+ ch1 := byte((tab_char >> 6) & 0x3f) | 0x80 // 10xx xxxx
+ ch2 := byte((tab_char >> 0) & 0x3f) | 0x80 // 10xx xxxx
+ // C.printf("[%02x%02x%02x] \n",ch0,ch1,ch2)
+
+ unsafe {
+ str_res[index + 0] = ch0
+ str_res[index + 1] = ch1
+ str_res[index + 2] = ch2
+ }
+ }
+ // TODO: write if needed
+ else if ch_len == 4 {
+ // place holder!!
+ // at the present time simply copy the utf8 char
+ for i in 0 .. ch_len {
+ unsafe {
+ str_res[index + i] = s[index + i]
+ }
+ }
+ }
+ } else {
+ // other cases, just copy the string
+ for i in 0 .. ch_len {
+ unsafe {
+ str_res[index + i] = s[index + i]
+ }
+ }
+ }
+
+ index += ch_len
+
+ // we are done, exit the loop
+ if index >= s.len {
+ break
+ }
+ }
+
+ // for c compatibility set the ending 0
+ unsafe {
+ str_res[index] = 0
+ // C.printf("str_res: %s\n--------------\n",str_res)
+ return tos(str_res, s.len)
+ }
+}
+
+// find punct in lockup table
+fn find_punct_in_table(in_code int, in_table []int) int {
+ //
+ // We will use a simple binary search
+ //
+
+ mut first_index := 0
+ mut last_index := (in_table.len)
+ mut index := 0
+ mut x := 0
+
+ for {
+ index = (first_index + last_index) >> 1
+ x = in_table[index]
+ // C.printf("(%d..%d) index:%d base[%08x]==>[%08x]\n",first_index,last_index,index,in_code,x)
+
+ if x == in_code {
+ return index
+ } else if x > in_code {
+ last_index = index
+ } else {
+ first_index = index
+ }
+
+ if (last_index - first_index) <= 1 {
+ break
+ }
+ }
+ // C.printf("not found.\n")
+ return 0
+}
+
+/*
+Unicode punctuation chars
+
+source: http://www.unicode.org/faq/punctuation_symbols.html
+*/
+const (
+ // Western punctuation mark
+ // Character Name Browser Image
+ unicode_punct_western = [
+ 0x0021 /* EXCLAMATION MARK ! */,
+ 0x0022 /* QUOTATION MARK " */,
+ 0x0027 /* APOSTROPHE ' */,
+ 0x002A /* ASTERISK * */,
+ 0x002C /* COMMA , */,
+ 0x002E /* FULL STOP . */,
+ 0x002F /* SOLIDUS / */,
+ 0x003A /* COLON : */,
+ 0x003B /* SEMICOLON ; */,
+ 0x003F /* QUESTION MARK ? */,
+ 0x00A1 /* INVERTED EXCLAMATION MARK ¡ */,
+ 0x00A7 /* SECTION SIGN § */,
+ 0x00B6 /* PILCROW SIGN ¶ */,
+ 0x00B7 /* MIDDLE DOT · */,
+ 0x00BF /* INVERTED QUESTION MARK ¿ */,
+ 0x037E /* GREEK QUESTION MARK ; */,
+ 0x0387 /* GREEK ANO TELEIA · */,
+ 0x055A /* ARMENIAN APOSTROPHE ՚ */,
+ 0x055B /* ARMENIAN EMPHASIS MARK ՛ */,
+ 0x055C /* ARMENIAN EXCLAMATION MARK ՜ */,
+ 0x055D /* ARMENIAN COMMA ՝ */,
+ 0x055E /* ARMENIAN QUESTION MARK ՞ */,
+ 0x055F /* ARMENIAN ABBREVIATION MARK ՟ */,
+ 0x0589 /* ARMENIAN FULL STOP ։ */,
+ 0x05C0 /* HEBREW PUNCTUATION PASEQ ׀ */,
+ 0x05C3 /* HEBREW PUNCTUATION SOF PASUQ ׃ */,
+ 0x05C6 /* HEBREW PUNCTUATION NUN HAFUKHA ׆ */,
+ 0x05F3 /* HEBREW PUNCTUATION GERESH ׳ */,
+ 0x05F4 /* HEBREW PUNCTUATION GERSHAYIM ״ */,
+ ]
+
+ // Unicode Characters in the 'Punctuation, Other' Category
+ // Character Name Browser Image
+ unicode_punct = [
+ 0x0021 /* EXCLAMATION MARK ! */,
+ 0x0022 /* QUOTATION MARK " */,
+ 0x0023 /* NUMBER SIGN # */,
+ 0x0025 /* PERCENT SIGN % */,
+ 0x0026 /* AMPERSAND & */,
+ 0x0027 /* APOSTROPHE ' */,
+ 0x002A /* ASTERISK * */,
+ 0x002C /* COMMA , */,
+ 0x002E /* FULL STOP . */,
+ 0x002F /* SOLIDUS / */,
+ 0x003A /* COLON : */,
+ 0x003B /* SEMICOLON ; */,
+ 0x003F /* QUESTION MARK ? */,
+ 0x0040 /* COMMERCIAL AT @ */,
+ 0x005C /* REVERSE SOLIDUS \ */,
+ 0x00A1 /* INVERTED EXCLAMATION MARK ¡ */,
+ 0x00A7 /* SECTION SIGN § */,
+ 0x00B6 /* PILCROW SIGN ¶ */,
+ 0x00B7 /* MIDDLE DOT · */,
+ 0x00BF /* INVERTED QUESTION MARK ¿ */,
+ 0x037E /* GREEK QUESTION MARK ; */,
+ 0x0387 /* GREEK ANO TELEIA · */,
+ 0x055A /* ARMENIAN APOSTROPHE ՚ */,
+ 0x055B /* ARMENIAN EMPHASIS MARK ՛ */,
+ 0x055C /* ARMENIAN EXCLAMATION MARK ՜ */,
+ 0x055D /* ARMENIAN COMMA ՝ */,
+ 0x055E /* ARMENIAN QUESTION MARK ՞ */,
+ 0x055F /* ARMENIAN ABBREVIATION MARK ՟ */,
+ 0x0589 /* ARMENIAN FULL STOP ։ */,
+ 0x05C0 /* HEBREW PUNCTUATION PASEQ ׀ */,
+ 0x05C3 /* HEBREW PUNCTUATION SOF PASUQ ׃ */,
+ 0x05C6 /* HEBREW PUNCTUATION NUN HAFUKHA ׆ */,
+ 0x05F3 /* HEBREW PUNCTUATION GERESH ׳ */,
+ 0x05F4 /* HEBREW PUNCTUATION GERSHAYIM ״ */,
+ 0x0609 /* ARABIC-INDIC PER MILLE SIGN ؉ */,
+ 0x060A /* ARABIC-INDIC PER TEN THOUSAND SIGN ؊ */,
+ 0x060C /* ARABIC COMMA ، */,
+ 0x060D /* ARABIC DATE SEPARATOR ؍ */,
+ 0x061B /* ARABIC SEMICOLON ؛ */,
+ 0x061E /* ARABIC TRIPLE DOT PUNCTUATION MARK ؞ */,
+ 0x061F /* ARABIC QUESTION MARK ؟ */,
+ 0x066A /* ARABIC PERCENT SIGN ٪ */,
+ 0x066B /* ARABIC DECIMAL SEPARATOR ٫ */,
+ 0x066C /* ARABIC THOUSANDS SEPARATOR ٬ */,
+ 0x066D /* ARABIC FIVE POINTED STAR ٭ */,
+ 0x06D4 /* ARABIC FULL STOP ۔ */,
+ 0x0700 /* SYRIAC END OF PARAGRAPH ܀ */,
+ 0x0701 /* SYRIAC SUPRALINEAR FULL STOP ܁ */,
+ 0x0702 /* SYRIAC SUBLINEAR FULL STOP ܂ */,
+ 0x0703 /* SYRIAC SUPRALINEAR COLON ܃ */,
+ 0x0704 /* SYRIAC SUBLINEAR COLON ܄ */,
+ 0x0705 /* SYRIAC HORIZONTAL COLON ܅ */,
+ 0x0706 /* SYRIAC COLON SKEWED LEFT ܆ */,
+ 0x0707 /* SYRIAC COLON SKEWED RIGHT ܇ */,
+ 0x0708 /* SYRIAC SUPRALINEAR COLON SKEWED LEFT ܈ */,
+ 0x0709 /* SYRIAC SUBLINEAR COLON SKEWED RIGHT ܉ */,
+ 0x070A /* SYRIAC CONTRACTION ܊ */,
+ 0x070B /* SYRIAC HARKLEAN OBELUS ܋ */,
+ 0x070C /* SYRIAC HARKLEAN METOBELUS ܌ */,
+ 0x070D /* SYRIAC HARKLEAN ASTERISCUS ܍ */,
+ 0x07F7 /* NKO SYMBOL GBAKURUNEN ߷ */,
+ 0x07F8 /* NKO COMMA ߸ */,
+ 0x07F9 /* NKO EXCLAMATION MARK ߹ */,
+ 0x0830 /* SAMARITAN PUNCTUATION NEQUDAA ࠰ */,
+ 0x0831 /* SAMARITAN PUNCTUATION AFSAAQ ࠱ */,
+ 0x0832 /* SAMARITAN PUNCTUATION ANGED ࠲ */,
+ 0x0833 /* SAMARITAN PUNCTUATION BAU ࠳ */,
+ 0x0834 /* SAMARITAN PUNCTUATION ATMAAU ࠴ */,
+ 0x0835 /* SAMARITAN PUNCTUATION SHIYYAALAA ࠵ */,
+ 0x0836 /* SAMARITAN ABBREVIATION MARK ࠶ */,
+ 0x0837 /* SAMARITAN PUNCTUATION MELODIC QITSA ࠷ */,
+ 0x0838 /* SAMARITAN PUNCTUATION ZIQAA ࠸ */,
+ 0x0839 /* SAMARITAN PUNCTUATION QITSA ࠹ */,
+ 0x083A /* SAMARITAN PUNCTUATION ZAEF ࠺ */,
+ 0x083B /* SAMARITAN PUNCTUATION TURU ࠻ */,
+ 0x083C /* SAMARITAN PUNCTUATION ARKAANU ࠼ */,
+ 0x083D /* SAMARITAN PUNCTUATION SOF MASHFAAT ࠽ */,
+ 0x083E /* SAMARITAN PUNCTUATION ANNAAU ࠾ */,
+ 0x085E /* MANDAIC PUNCTUATION ࡞ */,
+ 0x0964 /* DEVANAGARI DANDA । */,
+ 0x0965 /* DEVANAGARI DOUBLE DANDA ॥ */,
+ 0x0970 /* DEVANAGARI ABBREVIATION SIGN ॰ */,
+ 0x09FD /* BENGALI ABBREVIATION SIGN ৽ */,
+ 0x0A76 /* GURMUKHI ABBREVIATION SIGN ੶ */,
+ 0x0AF0 /* GUJARATI ABBREVIATION SIGN ૰ */,
+ 0x0C77 /* TELUGU SIGN SIDDHAM ౷ */,
+ 0x0C84 /* KANNADA SIGN SIDDHAM ಄ */,
+ 0x0DF4 /* SINHALA PUNCTUATION KUNDDALIYA ෴ */,
+ 0x0E4F /* THAI CHARACTER FONGMAN ๏ */,
+ 0x0E5A /* THAI CHARACTER ANGKHANKHU ๚ */,
+ 0x0E5B /* THAI CHARACTER KHOMUT ๛ */,
+ 0x0F04 /* TIBETAN MARK INITIAL YIG MGO MDUN MA ༄ */,
+ 0x0F05 /* TIBETAN MARK CLOSING YIG MGO SGAB MA ༅ */,
+ 0x0F06 /* TIBETAN MARK CARET YIG MGO PHUR SHAD MA ༆ */,
+ 0x0F07 /* TIBETAN MARK YIG MGO TSHEG SHAD MA ༇ */,
+ 0x0F08 /* TIBETAN MARK SBRUL SHAD ༈ */,
+ 0x0F09 /* TIBETAN MARK BSKUR YIG MGO ༉ */,
+ 0x0F0A /* TIBETAN MARK BKA- SHOG YIG MGO ༊ */,
+ 0x0F0B /* TIBETAN MARK INTERSYLLABIC TSHEG ་ */,
+ 0x0F0C /* TIBETAN MARK DELIMITER TSHEG BSTAR ༌ */,
+ 0x0F0D /* TIBETAN MARK SHAD ། */,
+ 0x0F0E /* TIBETAN MARK NYIS SHAD ༎ */,
+ 0x0F0F /* TIBETAN MARK TSHEG SHAD ༏ */,
+ 0x0F10 /* TIBETAN MARK NYIS TSHEG SHAD ༐ */,
+ 0x0F11 /* TIBETAN MARK RIN CHEN SPUNGS SHAD ༑ */,
+ 0x0F12 /* TIBETAN MARK RGYA GRAM SHAD ༒ */,
+ 0x0F14 /* TIBETAN MARK GTER TSHEG ༔ */,
+ 0x0F85 /* TIBETAN MARK PALUTA ྅ */,
+ 0x0FD0 /* TIBETAN MARK BSKA- SHOG GI MGO RGYAN ࿐ */,
+ 0x0FD1 /* TIBETAN MARK MNYAM YIG GI MGO RGYAN ࿑ */,
+ 0x0FD2 /* TIBETAN MARK NYIS TSHEG ࿒ */,
+ 0x0FD3 /* TIBETAN MARK INITIAL BRDA RNYING YIG MGO MDUN MA ࿓ */,
+ 0x0FD4 /* TIBETAN MARK CLOSING BRDA RNYING YIG MGO SGAB MA ࿔ */,
+ 0x0FD9 /* TIBETAN MARK LEADING MCHAN RTAGS ࿙ */,
+ 0x0FDA /* TIBETAN MARK TRAILING MCHAN RTAGS ࿚ */,
+ 0x104A /* MYANMAR SIGN LITTLE SECTION ၊ */,
+ 0x104B /* MYANMAR SIGN SECTION ။ */,
+ 0x104C /* MYANMAR SYMBOL LOCATIVE ၌ */,
+ 0x104D /* MYANMAR SYMBOL COMPLETED ၍ */,
+ 0x104E /* MYANMAR SYMBOL AFOREMENTIONED ၎ */,
+ 0x104F /* MYANMAR SYMBOL GENITIVE ၏ */,
+ 0x10FB /* GEORGIAN PARAGRAPH SEPARATOR ჻ */,
+ 0x1360 /* ETHIOPIC SECTION MARK ፠ */,
+ 0x1361 /* ETHIOPIC WORDSPACE ፡ */,
+ 0x1362 /* ETHIOPIC FULL STOP ። */,
+ 0x1363 /* ETHIOPIC COMMA ፣ */,
+ 0x1364 /* ETHIOPIC SEMICOLON ፤ */,
+ 0x1365 /* ETHIOPIC COLON ፥ */,
+ 0x1366 /* ETHIOPIC PREFACE COLON ፦ */,
+ 0x1367 /* ETHIOPIC QUESTION MARK ፧ */,
+ 0x1368 /* ETHIOPIC PARAGRAPH SEPARATOR ፨ */,
+ 0x166E /* CANADIAN SYLLABICS FULL STOP ᙮ */,
+ 0x16EB /* RUNIC SINGLE PUNCTUATION ᛫ */,
+ 0x16EC /* RUNIC MULTIPLE PUNCTUATION ᛬ */,
+ 0x16ED /* RUNIC CROSS PUNCTUATION ᛭ */,
+ 0x1735 /* PHILIPPINE SINGLE PUNCTUATION ᜵ */,
+ 0x1736 /* PHILIPPINE DOUBLE PUNCTUATION ᜶ */,
+ 0x17D4 /* KHMER SIGN KHAN ។ */,
+ 0x17D5 /* KHMER SIGN BARIYOOSAN ៕ */,
+ 0x17D6 /* KHMER SIGN CAMNUC PII KUUH ៖ */,
+ 0x17D8 /* KHMER SIGN BEYYAL ៘ */,
+ 0x17D9 /* KHMER SIGN PHNAEK MUAN ៙ */,
+ 0x17DA /* KHMER SIGN KOOMUUT ៚ */,
+ 0x1800 /* MONGOLIAN BIRGA ᠀ */,
+ 0x1801 /* MONGOLIAN ELLIPSIS ᠁ */,
+ 0x1802 /* MONGOLIAN COMMA ᠂ */,
+ 0x1803 /* MONGOLIAN FULL STOP ᠃ */,
+ 0x1804 /* MONGOLIAN COLON ᠄ */,
+ 0x1805 /* MONGOLIAN FOUR DOTS ᠅ */,
+ 0x1807 /* MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER ᠇ */,
+ 0x1808 /* MONGOLIAN MANCHU COMMA ᠈ */,
+ 0x1809 /* MONGOLIAN MANCHU FULL STOP ᠉ */,
+ 0x180A /* MONGOLIAN NIRUGU ᠊ */,
+ 0x1944 /* LIMBU EXCLAMATION MARK ᥄ */,
+ 0x1945 /* LIMBU QUESTION MARK ᥅ */,
+ 0x1A1E /* BUGINESE PALLAWA ᨞ */,
+ 0x1A1F /* BUGINESE END OF SECTION ᨟ */,
+ 0x1AA0 /* TAI THAM SIGN WIANG ᪠ */,
+ 0x1AA1 /* TAI THAM SIGN WIANGWAAK ᪡ */,
+ 0x1AA2 /* TAI THAM SIGN SAWAN ᪢ */,
+ 0x1AA3 /* TAI THAM SIGN KEOW ᪣ */,
+ 0x1AA4 /* TAI THAM SIGN HOY ᪤ */,
+ 0x1AA5 /* TAI THAM SIGN DOKMAI ᪥ */,
+ 0x1AA6 /* TAI THAM SIGN REVERSED ROTATED RANA ᪦ */,
+ 0x1AA8 /* TAI THAM SIGN KAAN ᪨ */,
+ 0x1AA9 /* TAI THAM SIGN KAANKUU ᪩ */,
+ 0x1AAA /* TAI THAM SIGN SATKAAN ᪪ */,
+ 0x1AAB /* TAI THAM SIGN SATKAANKUU ᪫ */,
+ 0x1AAC /* TAI THAM SIGN HANG ᪬ */,
+ 0x1AAD /* TAI THAM SIGN CAANG ᪭ */,
+ 0x1B5A /* BALINESE PANTI ᭚ */,
+ 0x1B5B /* BALINESE PAMADA ᭛ */,
+ 0x1B5C /* BALINESE WINDU ᭜ */,
+ 0x1B5D /* BALINESE CARIK PAMUNGKAH ᭝ */,
+ 0x1B5E /* BALINESE CARIK SIKI ᭞ */,
+ 0x1B5F /* BALINESE CARIK PAREREN ᭟ */,
+ 0x1B60 /* BALINESE PAMENENG ᭠ */,
+ 0x1BFC /* BATAK SYMBOL BINDU NA METEK ᯼ */,
+ 0x1BFD /* BATAK SYMBOL BINDU PINARBORAS ᯽ */,
+ 0x1BFE /* BATAK SYMBOL BINDU JUDUL ᯾ */,
+ 0x1BFF /* BATAK SYMBOL BINDU PANGOLAT ᯿ */,
+ 0x1C3B /* LEPCHA PUNCTUATION TA-ROL ᰻ */,
+ 0x1C3C /* LEPCHA PUNCTUATION NYET THYOOM TA-ROL ᰼ */,
+ 0x1C3D /* LEPCHA PUNCTUATION CER-WA ᰽ */,
+ 0x1C3E /* LEPCHA PUNCTUATION TSHOOK CER-WA ᰾ */,
+ 0x1C3F /* LEPCHA PUNCTUATION TSHOOK ᰿ */,
+ 0x1C7E /* OL CHIKI PUNCTUATION MUCAAD ᱾ */,
+ 0x1C7F /* OL CHIKI PUNCTUATION DOUBLE MUCAAD ᱿ */,
+ 0x1CC0 /* SUNDANESE PUNCTUATION BINDU SURYA ᳀ */,
+ 0x1CC1 /* SUNDANESE PUNCTUATION BINDU PANGLONG ᳁ */,
+ 0x1CC2 /* SUNDANESE PUNCTUATION BINDU PURNAMA ᳂ */,
+ 0x1CC3 /* SUNDANESE PUNCTUATION BINDU CAKRA ᳃ */,
+ 0x1CC4 /* SUNDANESE PUNCTUATION BINDU LEU SATANGA ᳄ */,
+ 0x1CC5 /* SUNDANESE PUNCTUATION BINDU KA SATANGA ᳅ */,
+ 0x1CC6 /* SUNDANESE PUNCTUATION BINDU DA SATANGA ᳆ */,
+ 0x1CC7 /* SUNDANESE PUNCTUATION BINDU BA SATANGA ᳇ */,
+ 0x1CD3 /* VEDIC SIGN NIHSHVASA ᳓ */,
+ 0x2016 /* DOUBLE VERTICAL LINE ‖ */,
+ 0x2017 /* DOUBLE LOW LINE ‗ */,
+ 0x2020 /* DAGGER † */,
+ 0x2021 /* DOUBLE DAGGER ‡ */,
+ 0x2022 /* BULLET • */,
+ 0x2023 /* TRIANGULAR BULLET ‣ */,
+ 0x2024 /* ONE DOT LEADER ․ */,
+ 0x2025 /* TWO DOT LEADER ‥ */,
+ 0x2026 /* HORIZONTAL ELLIPSIS … */,
+ 0x2027 /* HYPHENATION POINT ‧ */,
+ 0x2030 /* PER MILLE SIGN ‰ */,
+ 0x2031 /* PER TEN THOUSAND SIGN ‱ */,
+ 0x2032 /* PRIME ′ */,
+ 0x2033 /* DOUBLE PRIME ″ */,
+ 0x2034 /* TRIPLE PRIME ‴ */,
+ 0x2035 /* REVERSED PRIME ‵ */,
+ 0x2036 /* REVERSED DOUBLE PRIME ‶ */,
+ 0x2037 /* REVERSED TRIPLE PRIME ‷ */,
+ 0x2038 /* CARET ‸ */,
+ 0x203B /* REFERENCE MARK ※ */,
+ 0x203C /* DOUBLE EXCLAMATION MARK ‼ */,
+ 0x203D /* INTERROBANG ‽ */,
+ 0x203E /* OVERLINE ‾ */,
+ 0x2041 /* CARET INSERTION POINT ⁁ */,
+ 0x2042 /* ASTERISM ⁂ */,
+ 0x2043 /* HYPHEN BULLET ⁃ */,
+ 0x2047 /* DOUBLE QUESTION MARK ⁇ */,
+ 0x2048 /* QUESTION EXCLAMATION MARK ⁈ */,
+ 0x2049 /* EXCLAMATION QUESTION MARK ⁉ */,
+ 0x204A /* TIRONIAN SIGN ET ⁊ */,
+ 0x204B /* REVERSED PILCROW SIGN ⁋ */,
+ 0x204C /* BLACK LEFTWARDS BULLET ⁌ */,
+ 0x204D /* BLACK RIGHTWARDS BULLET ⁍ */,
+ 0x204E /* LOW ASTERISK ⁎ */,
+ 0x204F /* REVERSED SEMICOLON ⁏ */,
+ 0x2050 /* CLOSE UP ⁐ */,
+ 0x2051 /* TWO ASTERISKS ALIGNED VERTICALLY ⁑ */,
+ 0x2053 /* SWUNG DASH ⁓ */,
+ 0x2055 /* FLOWER PUNCTUATION MARK ⁕ */,
+ 0x2056 /* THREE DOT PUNCTUATION ⁖ */,
+ 0x2057 /* QUADRUPLE PRIME ⁗ */,
+ 0x2058 /* FOUR DOT PUNCTUATION ⁘ */,
+ 0x2059 /* FIVE DOT PUNCTUATION ⁙ */,
+ 0x205A /* TWO DOT PUNCTUATION ⁚ */,
+ 0x205B /* FOUR DOT MARK ⁛ */,
+ 0x205C /* DOTTED CROSS ⁜ */,
+ 0x205D /* TRICOLON ⁝ */,
+ 0x205E /* VERTICAL FOUR DOTS ⁞ */,
+ 0x2CF9 /* COPTIC OLD NUBIAN FULL STOP ⳹ */,
+ 0x2CFA /* COPTIC OLD NUBIAN DIRECT QUESTION MARK ⳺ */,
+ 0x2CFB /* COPTIC OLD NUBIAN INDIRECT QUESTION MARK ⳻ */,
+ 0x2CFC /* COPTIC OLD NUBIAN VERSE DIVIDER ⳼ */,
+ 0x2CFE /* COPTIC FULL STOP ⳾ */,
+ 0x2CFF /* COPTIC MORPHOLOGICAL DIVIDER ⳿ */,
+ 0x2D70 /* TIFINAGH SEPARATOR MARK ⵰ */,
+ 0x2E00 /* RIGHT ANGLE SUBSTITUTION MARKER ⸀ */,
+ 0x2E01 /* RIGHT ANGLE DOTTED SUBSTITUTION MARKER ⸁ */,
+ 0x2E06 /* RAISED INTERPOLATION MARKER ⸆ */,
+ 0x2E07 /* RAISED DOTTED INTERPOLATION MARKER ⸇ */,
+ 0x2E08 /* DOTTED TRANSPOSITION MARKER ⸈ */,
+ 0x2E0B /* RAISED SQUARE ⸋ */,
+ 0x2E0E /* EDITORIAL CORONIS ⸎ */,
+ 0x2E0F /* PARAGRAPHOS ⸏ */,
+ 0x2E10 /* FORKED PARAGRAPHOS ⸐ */,
+ 0x2E11 /* REVERSED FORKED PARAGRAPHOS ⸑ */,
+ 0x2E12 /* HYPODIASTOLE ⸒ */,
+ 0x2E13 /* DOTTED OBELOS ⸓ */,
+ 0x2E14 /* DOWNWARDS ANCORA ⸔ */,
+ 0x2E15 /* UPWARDS ANCORA ⸕ */,
+ 0x2E16 /* DOTTED RIGHT-POINTING ANGLE ⸖ */,
+ 0x2E18 /* INVERTED INTERROBANG ⸘ */,
+ 0x2E19 /* PALM BRANCH ⸙ */,
+ 0x2E1B /* TILDE WITH RING ABOVE ⸛ */,
+ 0x2E1E /* TILDE WITH DOT ABOVE ⸞ */,
+ 0x2E1F /* TILDE WITH DOT BELOW ⸟ */,
+ 0x2E2A /* TWO DOTS OVER ONE DOT PUNCTUATION ⸪ */,
+ 0x2E2B /* ONE DOT OVER TWO DOTS PUNCTUATION ⸫ */,
+ 0x2E2C /* SQUARED FOUR DOT PUNCTUATION ⸬ */,
+ 0x2E2D /* FIVE DOT MARK ⸭ */,
+ 0x2E2E /* REVERSED QUESTION MARK ⸮ */,
+ 0x2E30 /* RING POINT ⸰ */,
+ 0x2E31 /* WORD SEPARATOR MIDDLE DOT ⸱ */,
+ 0x2E32 /* TURNED COMMA ⸲ */,
+ 0x2E33 /* RAISED DOT ⸳ */,
+ 0x2E34 /* RAISED COMMA ⸴ */,
+ 0x2E35 /* TURNED SEMICOLON ⸵ */,
+ 0x2E36 /* DAGGER WITH LEFT GUARD ⸶ */,
+ 0x2E37 /* DAGGER WITH RIGHT GUARD ⸷ */,
+ 0x2E38 /* TURNED DAGGER ⸸ */,
+ 0x2E39 /* TOP HALF SECTION SIGN ⸹ */,
+ 0x2E3C /* STENOGRAPHIC FULL STOP ⸼ */,
+ 0x2E3D /* VERTICAL SIX DOTS ⸽ */,
+ 0x2E3E /* WIGGLY VERTICAL LINE ⸾ */,
+ 0x2E3F /* CAPITULUM ⸿ */,
+ 0x2E41 /* REVERSED COMMA ⹁ */,
+ 0x2E43 /* DASH WITH LEFT UPTURN ⹃ */,
+ 0x2E44 /* DOUBLE SUSPENSION MARK ⹄ */,
+ 0x2E45 /* INVERTED LOW KAVYKA ⹅ */,
+ 0x2E46 /* INVERTED LOW KAVYKA WITH KAVYKA ABOVE ⹆ */,
+ 0x2E47 /* LOW KAVYKA ⹇ */,
+ 0x2E48 /* LOW KAVYKA WITH DOT ⹈ */,
+ 0x2E49 /* DOUBLE STACKED COMMA ⹉ */,
+ 0x2E4A /* DOTTED SOLIDUS ⹊ */,
+ 0x2E4B /* TRIPLE DAGGER ⹋ */,
+ 0x2E4C /* MEDIEVAL COMMA ⹌ */,
+ 0x2E4D /* PARAGRAPHUS MARK ⹍ */,
+ 0x2E4E /* PUNCTUS ELEVATUS MARK ⹎ */,
+ 0x2E4F /* CORNISH VERSE DIVIDER ⹏ */,
+ 0x3001 /* IDEOGRAPHIC COMMA 、 */,
+ 0x3002 /* IDEOGRAPHIC FULL STOP 。 */,
+ 0x3003 /* DITTO MARK 〃 */,
+ 0x303D /* PART ALTERNATION MARK 〽 */,
+ 0x30FB /* KATAKANA MIDDLE DOT ・ */,
+ 0xA4FE /* LISU PUNCTUATION COMMA ꓾ */,
+ 0xA4FF /* LISU PUNCTUATION FULL STOP ꓿ */,
+ 0xA60D /* VAI COMMA ꘍ */,
+ 0xA60E /* VAI FULL STOP ꘎ */,
+ 0xA60F /* VAI QUESTION MARK ꘏ */,
+ 0xA673 /* SLAVONIC ASTERISK ꙳ */,
+ 0xA67E /* CYRILLIC KAVYKA ꙾ */,
+ 0xA6F2 /* BAMUM NJAEMLI ꛲ */,
+ 0xA6F3 /* BAMUM FULL STOP ꛳ */,
+ 0xA6F4 /* BAMUM COLON ꛴ */,
+ 0xA6F5 /* BAMUM COMMA ꛵ */,
+ 0xA6F6 /* BAMUM SEMICOLON ꛶ */,
+ 0xA6F7 /* BAMUM QUESTION MARK ꛷ */,
+ 0xA874 /* PHAGS-PA SINGLE HEAD MARK ꡴ */,
+ 0xA875 /* PHAGS-PA DOUBLE HEAD MARK ꡵ */,
+ 0xA876 /* PHAGS-PA MARK SHAD ꡶ */,
+ 0xA877 /* PHAGS-PA MARK DOUBLE SHAD ꡷ */,
+ 0xA8CE /* SAURASHTRA DANDA ꣎ */,
+ 0xA8CF /* SAURASHTRA DOUBLE DANDA ꣏ */,
+ 0xA8F8 /* DEVANAGARI SIGN PUSHPIKA ꣸ */,
+ 0xA8F9 /* DEVANAGARI GAP FILLER ꣹ */,
+ 0xA8FA /* DEVANAGARI CARET ꣺ */,
+ 0xA8FC /* DEVANAGARI SIGN SIDDHAM ꣼ */,
+ 0xA92E /* KAYAH LI SIGN CWI ꤮ */,
+ 0xA92F /* KAYAH LI SIGN SHYA ꤯ */,
+ 0xA95F /* REJANG SECTION MARK ꥟ */,
+ 0xA9C1 /* JAVANESE LEFT RERENGGAN ꧁ */,
+ 0xA9C2 /* JAVANESE RIGHT RERENGGAN ꧂ */,
+ 0xA9C3 /* JAVANESE PADA ANDAP ꧃ */,
+ 0xA9C4 /* JAVANESE PADA MADYA ꧄ */,
+ 0xA9C5 /* JAVANESE PADA LUHUR ꧅ */,
+ 0xA9C6 /* JAVANESE PADA WINDU ꧆ */,
+ 0xA9C7 /* JAVANESE PADA PANGKAT ꧇ */,
+ 0xA9C8 /* JAVANESE PADA LINGSA ꧈ */,
+ 0xA9C9 /* JAVANESE PADA LUNGSI ꧉ */,
+ 0xA9CA /* JAVANESE PADA ADEG ꧊ */,
+ 0xA9CB /* JAVANESE PADA ADEG ADEG ꧋ */,
+ 0xA9CC /* JAVANESE PADA PISELEH ꧌ */,
+ 0xA9CD /* JAVANESE TURNED PADA PISELEH ꧍ */,
+ 0xA9DE /* JAVANESE PADA TIRTA TUMETES ꧞ */,
+ 0xA9DF /* JAVANESE PADA ISEN-ISEN ꧟ */,
+ 0xAA5C /* CHAM PUNCTUATION SPIRAL ꩜ */,
+ 0xAA5D /* CHAM PUNCTUATION DANDA ꩝ */,
+ 0xAA5E /* CHAM PUNCTUATION DOUBLE DANDA ꩞ */,
+ 0xAA5F /* CHAM PUNCTUATION TRIPLE DANDA ꩟ */,
+ 0xAADE /* TAI VIET SYMBOL HO HOI ꫞ */,
+ 0xAADF /* TAI VIET SYMBOL KOI KOI ꫟ */,
+ 0xAAF0 /* MEETEI MAYEK CHEIKHAN ꫰ */,
+ 0xAAF1 /* MEETEI MAYEK AHANG KHUDAM ꫱ */,
+ 0xABEB /* MEETEI MAYEK CHEIKHEI ꯫ */,
+ 0xFE10 /* PRESENTATION FORM FOR VERTICAL COMMA ︐ */,
+ 0xFE11 /* PRESENTATION FORM FOR VERTICAL IDEOGRAPHIC COMMA ︑ */,
+ 0xFE12 /* PRESENTATION FORM FOR VERTICAL IDEOGRAPHIC FULL STOP ︒ */,
+ 0xFE13 /* PRESENTATION FORM FOR VERTICAL COLON ︓ */,
+ 0xFE14 /* PRESENTATION FORM FOR VERTICAL SEMICOLON ︔ */,
+ 0xFE15 /* PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK ︕ */,
+ 0xFE16 /* PRESENTATION FORM FOR VERTICAL QUESTION MARK ︖ */,
+ 0xFE19 /* PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS ︙ */,
+ 0xFE30 /* PRESENTATION FORM FOR VERTICAL TWO DOT LEADER ︰ */,
+ 0xFE45 /* SESAME DOT ﹅ */,
+ 0xFE46 /* WHITE SESAME DOT ﹆ */,
+ 0xFE49 /* DASHED OVERLINE ﹉ */,
+ 0xFE4A /* CENTRELINE OVERLINE ﹊ */,
+ 0xFE4B /* WAVY OVERLINE ﹋ */,
+ 0xFE4C /* DOUBLE WAVY OVERLINE ﹌ */,
+ 0xFE50 /* SMALL COMMA ﹐ */,
+ 0xFE51 /* SMALL IDEOGRAPHIC COMMA ﹑ */,
+ 0xFE52 /* SMALL FULL STOP ﹒ */,
+ 0xFE54 /* SMALL SEMICOLON ﹔ */,
+ 0xFE55 /* SMALL COLON ﹕ */,
+ 0xFE56 /* SMALL QUESTION MARK ﹖ */,
+ 0xFE57 /* SMALL EXCLAMATION MARK ﹗ */,
+ 0xFE5F /* SMALL NUMBER SIGN ﹟ */,
+ 0xFE60 /* SMALL AMPERSAND ﹠ */,
+ 0xFE61 /* SMALL ASTERISK ﹡ */,
+ 0xFE68 /* SMALL REVERSE SOLIDUS ﹨ */,
+ 0xFE6A /* SMALL PERCENT SIGN ﹪ */,
+ 0xFE6B /* SMALL COMMERCIAL AT ﹫ */,
+ 0xFF01 /* FULLWIDTH EXCLAMATION MARK ! */,
+ 0xFF02 /* FULLWIDTH QUOTATION MARK " */,
+ 0xFF03 /* FULLWIDTH NUMBER SIGN # */,
+ 0xFF05 /* FULLWIDTH PERCENT SIGN % */,
+ 0xFF06 /* FULLWIDTH AMPERSAND & */,
+ 0xFF07 /* FULLWIDTH APOSTROPHE ' */,
+ 0xFF0A /* FULLWIDTH ASTERISK * */,
+ 0xFF0C /* FULLWIDTH COMMA , */,
+ 0xFF0E /* FULLWIDTH FULL STOP . */,
+ 0xFF0F /* FULLWIDTH SOLIDUS / */,
+ 0xFF1A /* FULLWIDTH COLON : */,
+ 0xFF1B /* FULLWIDTH SEMICOLON ; */,
+ 0xFF1F /* FULLWIDTH QUESTION MARK ? */,
+ 0xFF20 /* FULLWIDTH COMMERCIAL AT @ */,
+ 0xFF3C /* FULLWIDTH REVERSE SOLIDUS \ */,
+ 0xFF61 /* HALFWIDTH IDEOGRAPHIC FULL STOP 。 */,
+ 0xFF64 /* HALFWIDTH IDEOGRAPHIC COMMA 、 */,
+ 0xFF65 /* HALFWIDTH KATAKANA MIDDLE DOT ・ */,
+ 0x10100 /* AEGEAN WORD SEPARATOR LINE 𐄀 */,
+ 0x10101 /* AEGEAN WORD SEPARATOR DOT 𐄁 */,
+ 0x10102 /* AEGEAN CHECK MARK 𐄂 */,
+ 0x1039F /* UGARITIC WORD DIVIDER 𐎟 */,
+ 0x103D0 /* OLD PERSIAN WORD DIVIDER 𐏐 */,
+ 0x1056F /* CAUCASIAN ALBANIAN CITATION MARK 𐕯 */,
+ 0x10857 /* IMPERIAL ARAMAIC SECTION SIGN 𐡗 */,
+ 0x1091F /* PHOENICIAN WORD SEPARATOR 𐤟 */,
+ 0x1093F /* LYDIAN TRIANGULAR MARK 𐤿 */,
+ 0x10A50 /* KHAROSHTHI PUNCTUATION DOT 𐩐 */,
+ 0x10A51 /* KHAROSHTHI PUNCTUATION SMALL CIRCLE 𐩑 */,
+ 0x10A52 /* KHAROSHTHI PUNCTUATION CIRCLE 𐩒 */,
+ 0x10A53 /* KHAROSHTHI PUNCTUATION CRESCENT BAR 𐩓 */,
+ 0x10A54 /* KHAROSHTHI PUNCTUATION MANGALAM 𐩔 */,
+ 0x10A55 /* KHAROSHTHI PUNCTUATION LOTUS 𐩕 */,
+ 0x10A56 /* KHAROSHTHI PUNCTUATION DANDA 𐩖 */,
+ 0x10A57 /* KHAROSHTHI PUNCTUATION DOUBLE DANDA 𐩗 */,
+ 0x10A58 /* KHAROSHTHI PUNCTUATION LINES 𐩘 */,
+ 0x10A7F /* OLD SOUTH ARABIAN NUMERIC INDICATOR 𐩿 */,
+ 0x10AF0 /* MANICHAEAN PUNCTUATION STAR 𐫰 */,
+ 0x10AF1 /* MANICHAEAN PUNCTUATION FLEURON 𐫱 */,
+ 0x10AF2 /* MANICHAEAN PUNCTUATION DOUBLE DOT WITHIN DOT 𐫲 */,
+ 0x10AF3 /* MANICHAEAN PUNCTUATION DOT WITHIN DOT 𐫳 */,
+ 0x10AF4 /* MANICHAEAN PUNCTUATION DOT 𐫴 */,
+ 0x10AF5 /* MANICHAEAN PUNCTUATION TWO DOTS 𐫵 */,
+ 0x10AF6 /* MANICHAEAN PUNCTUATION LINE FILLER 𐫶 */,
+ 0x10B39 /* AVESTAN ABBREVIATION MARK 𐬹 */,
+ 0x10B3A /* TINY TWO DOTS OVER ONE DOT PUNCTUATION 𐬺 */,
+ 0x10B3B /* SMALL TWO DOTS OVER ONE DOT PUNCTUATION 𐬻 */,
+ 0x10B3C /* LARGE TWO DOTS OVER ONE DOT PUNCTUATION 𐬼 */,
+ 0x10B3D /* LARGE ONE DOT OVER TWO DOTS PUNCTUATION 𐬽 */,
+ 0x10B3E /* LARGE TWO RINGS OVER ONE RING PUNCTUATION 𐬾 */,
+ 0x10B3F /* LARGE ONE RING OVER TWO RINGS PUNCTUATION 𐬿 */,
+ 0x10B99 /* PSALTER PAHLAVI SECTION MARK 𐮙 */,
+ 0x10B9A /* PSALTER PAHLAVI TURNED SECTION MARK 𐮚 */,
+ 0x10B9B /* PSALTER PAHLAVI FOUR DOTS WITH CROSS 𐮛 */,
+ 0x10B9C /* PSALTER PAHLAVI FOUR DOTS WITH DOT 𐮜 */,
+ 0x10F55 /* SOGDIAN PUNCTUATION TWO VERTICAL BARS 𐽕 */,
+ 0x10F56 /* SOGDIAN PUNCTUATION TWO VERTICAL BARS WITH DOTS 𐽖 */,
+ 0x10F57 /* SOGDIAN PUNCTUATION CIRCLE WITH DOT 𐽗 */,
+ 0x10F58 /* SOGDIAN PUNCTUATION TWO CIRCLES WITH DOTS 𐽘 */,
+ 0x10F59 /* SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT 𐽙 */,
+ 0x11047 /* BRAHMI DANDA 𑁇 */,
+ 0x11048 /* BRAHMI DOUBLE DANDA 𑁈 */,
+ 0x11049 /* BRAHMI PUNCTUATION DOT 𑁉 */,
+ 0x1104A /* BRAHMI PUNCTUATION DOUBLE DOT 𑁊 */,
+ 0x1104B /* BRAHMI PUNCTUATION LINE 𑁋 */,
+ 0x1104C /* BRAHMI PUNCTUATION CRESCENT BAR 𑁌 */,
+ 0x1104D /* BRAHMI PUNCTUATION LOTUS 𑁍 */,
+ 0x110BB /* KAITHI ABBREVIATION SIGN 𑂻 */,
+ 0x110BC /* KAITHI ENUMERATION SIGN 𑂼 */,
+ 0x110BE /* KAITHI SECTION MARK 𑂾 */,
+ 0x110BF /* KAITHI DOUBLE SECTION MARK 𑂿 */,
+ 0x110C0 /* KAITHI DANDA 𑃀 */,
+ 0x110C1 /* KAITHI DOUBLE DANDA 𑃁 */,
+ 0x11140 /* CHAKMA SECTION MARK 𑅀 */,
+ 0x11141 /* CHAKMA DANDA 𑅁 */,
+ 0x11142 /* CHAKMA DOUBLE DANDA 𑅂 */,
+ 0x11143 /* CHAKMA QUESTION MARK 𑅃 */,
+ 0x11174 /* MAHAJANI ABBREVIATION SIGN 𑅴 */,
+ 0x11175 /* MAHAJANI SECTION MARK 𑅵 */,
+ 0x111C5 /* SHARADA DANDA 𑇅 */,
+ 0x111C6 /* SHARADA DOUBLE DANDA 𑇆 */,
+ 0x111C7 /* SHARADA ABBREVIATION SIGN 𑇇 */,
+ 0x111C8 /* SHARADA SEPARATOR 𑇈 */,
+ 0x111CD /* SHARADA SUTRA MARK 𑇍 */,
+ 0x111DB /* SHARADA SIGN SIDDHAM 𑇛 */,
+ 0x111DD /* SHARADA CONTINUATION SIGN 𑇝 */,
+ 0x111DE /* SHARADA SECTION MARK-1 𑇞 */,
+ 0x111DF /* SHARADA SECTION MARK-2 𑇟 */,
+ 0x11238 /* KHOJKI DANDA 𑈸 */,
+ 0x11239 /* KHOJKI DOUBLE DANDA 𑈹 */,
+ 0x1123A /* KHOJKI WORD SEPARATOR 𑈺 */,
+ 0x1123B /* KHOJKI SECTION MARK 𑈻 */,
+ 0x1123C /* KHOJKI DOUBLE SECTION MARK 𑈼 */,
+ 0x1123D /* KHOJKI ABBREVIATION SIGN 𑈽 */,
+ 0x112A9 /* MULTANI SECTION MARK 𑊩 */,
+ 0x1144B /* NEWA DANDA 𑑋 */,
+ 0x1144C /* NEWA DOUBLE DANDA 𑑌 */,
+ 0x1144D /* NEWA COMMA 𑑍 */,
+ 0x1144E /* NEWA GAP FILLER 𑑎 */,
+ 0x1144F /* NEWA ABBREVIATION SIGN 𑑏 */,
+ 0x1145B /* NEWA PLACEHOLDER MARK 𑑛 */,
+ 0x1145D /* NEWA INSERTION SIGN 𑑝 */,
+ 0x114C6 /* TIRHUTA ABBREVIATION SIGN 𑓆 */,
+ 0x115C1 /* SIDDHAM SIGN SIDDHAM 𑗁 */,
+ 0x115C2 /* SIDDHAM DANDA 𑗂 */,
+ 0x115C3 /* SIDDHAM DOUBLE DANDA 𑗃 */,
+ 0x115C4 /* SIDDHAM SEPARATOR DOT 𑗄 */,
+ 0x115C5 /* SIDDHAM SEPARATOR BAR 𑗅 */,
+ 0x115C6 /* SIDDHAM REPETITION MARK-1 𑗆 */,
+ 0x115C7 /* SIDDHAM REPETITION MARK-2 𑗇 */,
+ 0x115C8 /* SIDDHAM REPETITION MARK-3 𑗈 */,
+ 0x115C9 /* SIDDHAM END OF TEXT MARK 𑗉 */,
+ 0x115CA /* SIDDHAM SECTION MARK WITH TRIDENT AND U-SHAPED ORNAMENTS 𑗊 */,
+ 0x115CB /* SIDDHAM SECTION MARK WITH TRIDENT AND DOTTED CRESCENTS 𑗋 */,
+ 0x115CC /* SIDDHAM SECTION MARK WITH RAYS AND DOTTED CRESCENTS 𑗌 */,
+ 0x115CD /* SIDDHAM SECTION MARK WITH RAYS AND DOTTED DOUBLE CRESCENTS 𑗍 */,
+ 0x115CE /* SIDDHAM SECTION MARK WITH RAYS AND DOTTED TRIPLE CRESCENTS 𑗎 */,
+ 0x115CF /* SIDDHAM SECTION MARK DOUBLE RING 𑗏 */,
+ 0x115D0 /* SIDDHAM SECTION MARK DOUBLE RING WITH RAYS 𑗐 */,
+ 0x115D1 /* SIDDHAM SECTION MARK WITH DOUBLE CRESCENTS 𑗑 */,
+ 0x115D2 /* SIDDHAM SECTION MARK WITH TRIPLE CRESCENTS 𑗒 */,
+ 0x115D3 /* SIDDHAM SECTION MARK WITH QUADRUPLE CRESCENTS 𑗓 */,
+ 0x115D4 /* SIDDHAM SECTION MARK WITH SEPTUPLE CRESCENTS 𑗔 */,
+ 0x115D5 /* SIDDHAM SECTION MARK WITH CIRCLES AND RAYS 𑗕 */,
+ 0x115D6 /* SIDDHAM SECTION MARK WITH CIRCLES AND TWO ENCLOSURES 𑗖 */,
+ 0x115D7 /* SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES 𑗗 */,
+ 0x11641 /* MODI DANDA 𑙁 */,
+ 0x11642 /* MODI DOUBLE DANDA 𑙂 */,
+ 0x11643 /* MODI ABBREVIATION SIGN 𑙃 */,
+ 0x11660 /* MONGOLIAN BIRGA WITH ORNAMENT 𑙠 */,
+ 0x11661 /* MONGOLIAN ROTATED BIRGA 𑙡 */,
+ 0x11662 /* MONGOLIAN DOUBLE BIRGA WITH ORNAMENT 𑙢 */,
+ 0x11663 /* MONGOLIAN TRIPLE BIRGA WITH ORNAMENT 𑙣 */,
+ 0x11664 /* MONGOLIAN BIRGA WITH DOUBLE ORNAMENT 𑙤 */,
+ 0x11665 /* MONGOLIAN ROTATED BIRGA WITH ORNAMENT 𑙥 */,
+ 0x11666 /* MONGOLIAN ROTATED BIRGA WITH DOUBLE ORNAMENT 𑙦 */,
+ 0x11667 /* MONGOLIAN INVERTED BIRGA 𑙧 */,
+ 0x11668 /* MONGOLIAN INVERTED BIRGA WITH DOUBLE ORNAMENT 𑙨 */,
+ 0x11669 /* MONGOLIAN SWIRL BIRGA 𑙩 */,
+ 0x1166A /* MONGOLIAN SWIRL BIRGA WITH ORNAMENT 𑙪 */,
+ 0x1166B /* MONGOLIAN SWIRL BIRGA WITH DOUBLE ORNAMENT 𑙫 */,
+ 0x1166C /* MONGOLIAN TURNED SWIRL BIRGA WITH DOUBLE ORNAMENT 𑙬 */,
+ 0x1173C /* AHOM SIGN SMALL SECTION 𑜼 */,
+ 0x1173D /* AHOM SIGN SECTION 𑜽 */,
+ 0x1173E /* AHOM SIGN RULAI 𑜾 */,
+ 0x1183B /* DOGRA ABBREVIATION SIGN 𑠻 */,
+ 0x119E2 /* NANDINAGARI SIGN SIDDHAM 𑧢 */,
+ 0x11A3F /* ZANABAZAR SQUARE INITIAL HEAD MARK 𑨿 */,
+ 0x11A40 /* ZANABAZAR SQUARE CLOSING HEAD MARK 𑩀 */,
+ 0x11A41 /* ZANABAZAR SQUARE MARK TSHEG 𑩁 */,
+ 0x11A42 /* ZANABAZAR SQUARE MARK SHAD 𑩂 */,
+ 0x11A43 /* ZANABAZAR SQUARE MARK DOUBLE SHAD 𑩃 */,
+ 0x11A44 /* ZANABAZAR SQUARE MARK LONG TSHEG 𑩄 */,
+ 0x11A45 /* ZANABAZAR SQUARE INITIAL DOUBLE-LINED HEAD MARK 𑩅 */,
+ 0x11A46 /* ZANABAZAR SQUARE CLOSING DOUBLE-LINED HEAD MARK 𑩆 */,
+ 0x11A9A /* SOYOMBO MARK TSHEG 𑪚 */,
+ 0x11A9B /* SOYOMBO MARK SHAD 𑪛 */,
+ 0x11A9C /* SOYOMBO MARK DOUBLE SHAD 𑪜 */,
+ 0x11A9E /* SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME 𑪞 */,
+ 0x11A9F /* SOYOMBO HEAD MARK WITH MOON AND SUN AND FLAME 𑪟 */,
+ 0x11AA0 /* SOYOMBO HEAD MARK WITH MOON AND SUN 𑪠 */,
+ 0x11AA1 /* SOYOMBO TERMINAL MARK-1 𑪡 */,
+ 0x11AA2 /* SOYOMBO TERMINAL MARK-2 𑪢 */,
+ 0x11C41 /* BHAIKSUKI DANDA 𑱁 */,
+ 0x11C42 /* BHAIKSUKI DOUBLE DANDA 𑱂 */,
+ 0x11C43 /* BHAIKSUKI WORD SEPARATOR 𑱃 */,
+ 0x11C44 /* BHAIKSUKI GAP FILLER-1 𑱄 */,
+ 0x11C45 /* BHAIKSUKI GAP FILLER-2 𑱅 */,
+ 0x11C70 /* MARCHEN HEAD MARK 𑱰 */,
+ 0x11C71 /* MARCHEN MARK SHAD 𑱱 */,
+ 0x11EF7 /* MAKASAR PASSIMBANG 𑻷 */,
+ 0x11EF8 /* MAKASAR END OF SECTION 𑻸 */,
+ 0x11FFF /* TAMIL PUNCTUATION END OF TEXT 𑿿 */,
+ 0x12470 /* CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER 𒑰 */,
+ 0x12471 /* CUNEIFORM PUNCTUATION SIGN VERTICAL COLON 𒑱 */,
+ 0x12472 /* CUNEIFORM PUNCTUATION SIGN DIAGONAL COLON 𒑲 */,
+ 0x12473 /* CUNEIFORM PUNCTUATION SIGN DIAGONAL TRICOLON 𒑳 */,
+ 0x12474 /* CUNEIFORM PUNCTUATION SIGN DIAGONAL QUADCOLON 𒑴 */,
+ 0x16A6E /* MRO DANDA 𖩮 */,
+ 0x16A6F /* MRO DOUBLE DANDA 𖩯 */,
+ 0x16AF5 /* BASSA VAH FULL STOP 𖫵 */,
+ 0x16B37 /* PAHAWH HMONG SIGN VOS THOM 𖬷 */,
+ 0x16B38 /* PAHAWH HMONG SIGN VOS TSHAB CEEB 𖬸 */,
+ 0x16B39 /* PAHAWH HMONG SIGN CIM CHEEM 𖬹 */,
+ 0x16B3A /* PAHAWH HMONG SIGN VOS THIAB 𖬺 */,
+ 0x16B3B /* PAHAWH HMONG SIGN VOS FEEM 𖬻 */,
+ 0x16B44 /* PAHAWH HMONG SIGN XAUS 𖭄 */,
+ 0x16E97 /* MEDEFAIDRIN COMMA 𖺗 */,
+ 0x16E98 /* MEDEFAIDRIN FULL STOP 𖺘 */,
+ 0x16E99 /* MEDEFAIDRIN SYMBOL AIVA 𖺙 */,
+ 0x16E9A /* MEDEFAIDRIN EXCLAMATION OH 𖺚 */,
+ 0x16FE2 /* OLD CHINESE HOOK MARK 𖿢 */,
+ 0x1BC9F /* DUPLOYAN PUNCTUATION CHINOOK FULL STOP 𛲟 */,
+ 0x1DA87 /* SIGNWRITING COMMA 𝪇 */,
+ 0x1DA88 /* SIGNWRITING FULL STOP 𝪈 */,
+ 0x1DA89 /* SIGNWRITING SEMICOLON 𝪉 */,
+ 0x1DA8A /* SIGNWRITING COLON 𝪊 */,
+ 0x1DA8B /* SIGNWRITING PARENTHESIS 𝪋 */,
+ 0x1E95E /* ADLAM INITIAL EXCLAMATION MARK 𞥞 */,
+ 0x1E95F /* ADLAM INITIAL QUESTION MARK */,
+ ]
+)
diff --git a/v_windows/v/vlib/encoding/utf8/utf8_util_test.v b/v_windows/v/vlib/encoding/utf8/utf8_util_test.v
new file mode 100644
index 0000000..f09cb76
--- /dev/null
+++ b/v_windows/v/vlib/encoding/utf8/utf8_util_test.v
@@ -0,0 +1,66 @@
+import encoding.utf8
+
+fn test_utf8_util() {
+ // string test
+ src := 'ăĂ ôÔ testo 怔' //_\u1E5A\u1E5B<=>\u1F49\u1F41<=>\u0128\u012a\u012c" // len 29 runes, raw 49 bytes
+ src_upper := 'ĂĂ ÔÔ TESTO Æ€”' //_\u1E5A\u1E5A<=>\u1F49\u1F49<=>\u0128\u012A\u012C"
+ src_lower := 'ăă ôô testo 怔' //_\u1E5B\u1E5B<=>\u1F41\u1F41<=>\u0129\u012B\u012D"
+ upper := utf8.to_upper(src)
+ lower := utf8.to_lower(src)
+ assert upper == src_upper
+ assert lower == src_lower
+
+ assert utf8.to_upper('абвёabc12{') == 'АБВЁABC12{'
+ assert utf8.to_lower('АБВЁABC12{') == 'абвёabc12{'
+
+ // test len function
+ assert utf8.len('') == 0
+ assert utf8.len('pippo') == 5
+ assert utf8.len(src) == 15 // 29
+ assert src.len == 24 // 49
+
+ // western punctuation
+ a := '.abc?abcòàè.'
+ assert utf8.is_punct(a, 0) == true
+ assert utf8.is_punct('b', 0) == false
+ assert utf8.is_uchar_punct(0x002E) == true
+ assert utf8.is_punct(a, 4) == true // ?
+ assert utf8.is_punct(a, 14) == true // last .
+ assert utf8.is_punct(a, 12) == false // è
+ println('OK western')
+
+ // global punctuation
+ b := '.ĂĂa. ÔÔ TESTO Æ€'
+ assert utf8.is_global_punct(b, 0) == true
+ assert utf8.is_global_punct('.', 0) == true
+ assert utf8.is_uchar_punct(0x002E) == true
+ assert utf8.is_global_punct(b, 6) == true // .
+ assert utf8.is_global_punct(b, 1) == false // a
+
+ // test utility functions
+ assert utf8.get_uchar(b, 0) == 0x002E
+}
+
+fn test_raw_indexing() {
+ a := '我是V Lang!'
+
+ // test non ascii characters
+ assert utf8.raw_index(a, 0) == '我'
+ assert utf8.raw_index(a, 1) == '是'
+
+ // test ascii characters
+ assert utf8.raw_index(a, 2) == 'V'
+ assert utf8.raw_index(a, 3) == ' '
+ assert utf8.raw_index(a, 4) == 'L'
+ assert utf8.raw_index(a, 5) == 'a'
+ assert utf8.raw_index(a, 6) == 'n'
+ assert utf8.raw_index(a, 7) == 'g'
+ assert utf8.raw_index(a, 8) == '!'
+}
+
+fn test_reversed() {
+ a := '我是V Lang!'
+ b := '你好世界hello world'
+ assert utf8.reverse(a) == '!gnaL V是我'
+ assert utf8.reverse(b) == 'dlrow olleh界世好你'
+}