diff options
Diffstat (limited to 'v_windows/v/vlib/encoding/utf8')
| -rw-r--r-- | v_windows/v/vlib/encoding/utf8/east_asian/east_asian_width.v | 1204 | ||||
| -rw-r--r-- | v_windows/v/vlib/encoding/utf8/east_asian/east_asian_width_test.v | 23 | ||||
| -rw-r--r-- | v_windows/v/vlib/encoding/utf8/encoding_utf8_test.v | 9 | ||||
| -rw-r--r-- | v_windows/v/vlib/encoding/utf8/utf8.v | 88 | ||||
| -rw-r--r-- | v_windows/v/vlib/encoding/utf8/utf8_util.v | 1161 | ||||
| -rw-r--r-- | v_windows/v/vlib/encoding/utf8/utf8_util_test.v | 66 | 
6 files changed, 2551 insertions, 0 deletions
diff --git a/v_windows/v/vlib/encoding/utf8/east_asian/east_asian_width.v b/v_windows/v/vlib/encoding/utf8/east_asian/east_asian_width.v new file mode 100644 index 0000000..d1ac547 --- /dev/null +++ b/v_windows/v/vlib/encoding/utf8/east_asian/east_asian_width.v @@ -0,0 +1,1204 @@ +// Copyright (c) 2021 Takahiro Yaota, a.k.a. zakuro. All rights reserved. +// Use of this source code is governed by an MIT license +// that can be found in the LICENSE file. + +module east_asian + +import encoding.utf8 + +// EastAsianWidthType represents East_Asian_Width informative prorperty +pub enum EastAsianWidthProperty { +	full +	half +	wide +	narrow +	ambiguous +	neutral +} + +// display_width return the display width as number of unicode chars from a string. +pub fn display_width(s string, ambiguous_width int) int { +	mut i, mut n := 0, 0 +	for i < s.len { +		c_len := utf8_char_len(s[i]) +		n += match east_asian_width_property_at(s, i) { +			.ambiguous { ambiguous_width } +			.full, .wide { int(2) } +			else { int(1) } +		} +		i += c_len +	} +	return n +} + +// width_property_at returns the East Asian Width properties at string[index] +pub fn east_asian_width_property_at(s string, index int) EastAsianWidthProperty { +	codepoint := utf8.get_uchar(s, index) +	mut left, mut right := 0, east_asian.east_asian_width_data.len - 1 +	for left <= right { +		middle := left + ((right - left) / 2) +		entry := east_asian.east_asian_width_data[middle] +		if codepoint < entry.point { +			right = middle - 1 +			continue +		} + +		last := entry.point + entry.len +		if codepoint > last { +			left = middle + 1 +			continue +		} + +		return entry.property +	} +	return .neutral +} + +struct EAWEntry { +	property EastAsianWidthProperty +	point    int +	len      int +} + +// EastAsianWidth-13.0.0.txt +const ( +	east_asian_width_data = [ +		EAWEntry{.neutral, 0x0000, 32}, +		EAWEntry{.narrow, 0x0020, 95}, +		EAWEntry{.neutral, 0x007f, 34}, +		EAWEntry{.ambiguous, 0x00a1, 1}, +		EAWEntry{.narrow, 0x00a2, 2}, +		EAWEntry{.ambiguous, 0x00a4, 1}, +		EAWEntry{.narrow, 0x00a5, 2}, +		EAWEntry{.ambiguous, 0x00a7, 2}, +		EAWEntry{.neutral, 0x00a9, 1}, +		EAWEntry{.ambiguous, 0x00aa, 1}, +		EAWEntry{.neutral, 0x00ab, 1}, +		EAWEntry{.narrow, 0x00ac, 1}, +		EAWEntry{.ambiguous, 0x00ad, 2}, +		EAWEntry{.narrow, 0x00af, 1}, +		EAWEntry{.ambiguous, 0x00b0, 5}, +		EAWEntry{.neutral, 0x00b5, 1}, +		EAWEntry{.ambiguous, 0x00b6, 5}, +		EAWEntry{.neutral, 0x00bb, 1}, +		EAWEntry{.ambiguous, 0x00bc, 4}, +		EAWEntry{.neutral, 0x00c0, 6}, +		EAWEntry{.ambiguous, 0x00c6, 1}, +		EAWEntry{.neutral, 0x00c7, 9}, +		EAWEntry{.ambiguous, 0x00d0, 1}, +		EAWEntry{.neutral, 0x00d1, 6}, +		EAWEntry{.ambiguous, 0x00d7, 2}, +		EAWEntry{.neutral, 0x00d9, 5}, +		EAWEntry{.ambiguous, 0x00de, 4}, +		EAWEntry{.neutral, 0x00e2, 4}, +		EAWEntry{.ambiguous, 0x00e6, 1}, +		EAWEntry{.neutral, 0x00e7, 1}, +		EAWEntry{.ambiguous, 0x00e8, 3}, +		EAWEntry{.neutral, 0x00eb, 1}, +		EAWEntry{.ambiguous, 0x00ec, 2}, +		EAWEntry{.neutral, 0x00ee, 2}, +		EAWEntry{.ambiguous, 0x00f0, 1}, +		EAWEntry{.neutral, 0x00f1, 1}, +		EAWEntry{.ambiguous, 0x00f2, 2}, +		EAWEntry{.neutral, 0x00f4, 3}, +		EAWEntry{.ambiguous, 0x00f7, 4}, +		EAWEntry{.neutral, 0x00fb, 1}, +		EAWEntry{.ambiguous, 0x00fc, 1}, +		EAWEntry{.neutral, 0x00fd, 1}, +		EAWEntry{.ambiguous, 0x00fe, 1}, +		EAWEntry{.neutral, 0x00ff, 2}, +		EAWEntry{.ambiguous, 0x0101, 1}, +		EAWEntry{.neutral, 0x0102, 15}, +		EAWEntry{.ambiguous, 0x0111, 1}, +		EAWEntry{.neutral, 0x0112, 1}, +		EAWEntry{.ambiguous, 0x0113, 1}, +		EAWEntry{.neutral, 0x0114, 7}, +		EAWEntry{.ambiguous, 0x011b, 1}, +		EAWEntry{.neutral, 0x011c, 10}, +		EAWEntry{.ambiguous, 0x0126, 2}, +		EAWEntry{.neutral, 0x0128, 3}, +		EAWEntry{.ambiguous, 0x012b, 1}, +		EAWEntry{.neutral, 0x012c, 5}, +		EAWEntry{.ambiguous, 0x0131, 3}, +		EAWEntry{.neutral, 0x0134, 4}, +		EAWEntry{.ambiguous, 0x0138, 1}, +		EAWEntry{.neutral, 0x0139, 6}, +		EAWEntry{.ambiguous, 0x013f, 4}, +		EAWEntry{.neutral, 0x0143, 1}, +		EAWEntry{.ambiguous, 0x0144, 1}, +		EAWEntry{.neutral, 0x0145, 3}, +		EAWEntry{.ambiguous, 0x0148, 4}, +		EAWEntry{.neutral, 0x014c, 1}, +		EAWEntry{.ambiguous, 0x014d, 1}, +		EAWEntry{.neutral, 0x014e, 4}, +		EAWEntry{.ambiguous, 0x0152, 2}, +		EAWEntry{.neutral, 0x0154, 18}, +		EAWEntry{.ambiguous, 0x0166, 2}, +		EAWEntry{.neutral, 0x0168, 3}, +		EAWEntry{.ambiguous, 0x016b, 1}, +		EAWEntry{.neutral, 0x016c, 98}, +		EAWEntry{.ambiguous, 0x01ce, 1}, +		EAWEntry{.neutral, 0x01cf, 1}, +		EAWEntry{.ambiguous, 0x01d0, 1}, +		EAWEntry{.neutral, 0x01d1, 1}, +		EAWEntry{.ambiguous, 0x01d2, 1}, +		EAWEntry{.neutral, 0x01d3, 1}, +		EAWEntry{.ambiguous, 0x01d4, 1}, +		EAWEntry{.neutral, 0x01d5, 1}, +		EAWEntry{.ambiguous, 0x01d6, 1}, +		EAWEntry{.neutral, 0x01d7, 1}, +		EAWEntry{.ambiguous, 0x01d8, 1}, +		EAWEntry{.neutral, 0x01d9, 1}, +		EAWEntry{.ambiguous, 0x01da, 1}, +		EAWEntry{.neutral, 0x01db, 1}, +		EAWEntry{.ambiguous, 0x01dc, 1}, +		EAWEntry{.neutral, 0x01dd, 116}, +		EAWEntry{.ambiguous, 0x0251, 1}, +		EAWEntry{.neutral, 0x0252, 15}, +		EAWEntry{.ambiguous, 0x0261, 1}, +		EAWEntry{.neutral, 0x0262, 98}, +		EAWEntry{.ambiguous, 0x02c4, 1}, +		EAWEntry{.neutral, 0x02c5, 2}, +		EAWEntry{.ambiguous, 0x02c7, 1}, +		EAWEntry{.neutral, 0x02c8, 1}, +		EAWEntry{.ambiguous, 0x02c9, 3}, +		EAWEntry{.neutral, 0x02cc, 1}, +		EAWEntry{.ambiguous, 0x02cd, 1}, +		EAWEntry{.neutral, 0x02ce, 2}, +		EAWEntry{.ambiguous, 0x02d0, 1}, +		EAWEntry{.neutral, 0x02d1, 7}, +		EAWEntry{.ambiguous, 0x02d8, 4}, +		EAWEntry{.neutral, 0x02dc, 1}, +		EAWEntry{.ambiguous, 0x02dd, 1}, +		EAWEntry{.neutral, 0x02de, 1}, +		EAWEntry{.ambiguous, 0x02df, 1}, +		EAWEntry{.neutral, 0x02e0, 32}, +		EAWEntry{.ambiguous, 0x0300, 112}, +		EAWEntry{.neutral, 0x0370, 8}, +		EAWEntry{.neutral, 0x037a, 6}, +		EAWEntry{.neutral, 0x0384, 7}, +		EAWEntry{.neutral, 0x038c, 1}, +		EAWEntry{.neutral, 0x038e, 3}, +		EAWEntry{.ambiguous, 0x0391, 17}, +		EAWEntry{.ambiguous, 0x03a3, 7}, +		EAWEntry{.neutral, 0x03aa, 7}, +		EAWEntry{.ambiguous, 0x03b1, 17}, +		EAWEntry{.neutral, 0x03c2, 1}, +		EAWEntry{.ambiguous, 0x03c3, 7}, +		EAWEntry{.neutral, 0x03ca, 55}, +		EAWEntry{.ambiguous, 0x0401, 1}, +		EAWEntry{.neutral, 0x0402, 14}, +		EAWEntry{.ambiguous, 0x0410, 64}, +		EAWEntry{.neutral, 0x0450, 1}, +		EAWEntry{.ambiguous, 0x0451, 1}, +		EAWEntry{.neutral, 0x0452, 222}, +		EAWEntry{.neutral, 0x0531, 38}, +		EAWEntry{.neutral, 0x0559, 50}, +		EAWEntry{.neutral, 0x058d, 3}, +		EAWEntry{.neutral, 0x0591, 55}, +		EAWEntry{.neutral, 0x05d0, 27}, +		EAWEntry{.neutral, 0x05ef, 6}, +		EAWEntry{.neutral, 0x0600, 29}, +		EAWEntry{.neutral, 0x061e, 240}, +		EAWEntry{.neutral, 0x070f, 60}, +		EAWEntry{.neutral, 0x074d, 101}, +		EAWEntry{.neutral, 0x07c0, 59}, +		EAWEntry{.neutral, 0x07fd, 49}, +		EAWEntry{.neutral, 0x0830, 15}, +		EAWEntry{.neutral, 0x0840, 28}, +		EAWEntry{.neutral, 0x085e, 1}, +		EAWEntry{.neutral, 0x0860, 11}, +		EAWEntry{.neutral, 0x08a0, 21}, +		EAWEntry{.neutral, 0x08b6, 18}, +		EAWEntry{.neutral, 0x08d3, 177}, +		EAWEntry{.neutral, 0x0985, 8}, +		EAWEntry{.neutral, 0x098f, 2}, +		EAWEntry{.neutral, 0x0993, 22}, +		EAWEntry{.neutral, 0x09aa, 7}, +		EAWEntry{.neutral, 0x09b2, 1}, +		EAWEntry{.neutral, 0x09b6, 4}, +		EAWEntry{.neutral, 0x09bc, 9}, +		EAWEntry{.neutral, 0x09c7, 2}, +		EAWEntry{.neutral, 0x09cb, 4}, +		EAWEntry{.neutral, 0x09d7, 1}, +		EAWEntry{.neutral, 0x09dc, 2}, +		EAWEntry{.neutral, 0x09df, 5}, +		EAWEntry{.neutral, 0x09e6, 25}, +		EAWEntry{.neutral, 0x0a01, 3}, +		EAWEntry{.neutral, 0x0a05, 6}, +		EAWEntry{.neutral, 0x0a0f, 2}, +		EAWEntry{.neutral, 0x0a13, 22}, +		EAWEntry{.neutral, 0x0a2a, 7}, +		EAWEntry{.neutral, 0x0a32, 2}, +		EAWEntry{.neutral, 0x0a35, 2}, +		EAWEntry{.neutral, 0x0a38, 2}, +		EAWEntry{.neutral, 0x0a3c, 1}, +		EAWEntry{.neutral, 0x0a3e, 5}, +		EAWEntry{.neutral, 0x0a47, 2}, +		EAWEntry{.neutral, 0x0a4b, 3}, +		EAWEntry{.neutral, 0x0a51, 1}, +		EAWEntry{.neutral, 0x0a59, 4}, +		EAWEntry{.neutral, 0x0a5e, 1}, +		EAWEntry{.neutral, 0x0a66, 17}, +		EAWEntry{.neutral, 0x0a81, 3}, +		EAWEntry{.neutral, 0x0a85, 9}, +		EAWEntry{.neutral, 0x0a8f, 3}, +		EAWEntry{.neutral, 0x0a93, 22}, +		EAWEntry{.neutral, 0x0aaa, 7}, +		EAWEntry{.neutral, 0x0ab2, 2}, +		EAWEntry{.neutral, 0x0ab5, 5}, +		EAWEntry{.neutral, 0x0abc, 10}, +		EAWEntry{.neutral, 0x0ac7, 3}, +		EAWEntry{.neutral, 0x0acb, 3}, +		EAWEntry{.neutral, 0x0ad0, 1}, +		EAWEntry{.neutral, 0x0ae0, 4}, +		EAWEntry{.neutral, 0x0ae6, 12}, +		EAWEntry{.neutral, 0x0af9, 7}, +		EAWEntry{.neutral, 0x0b01, 3}, +		EAWEntry{.neutral, 0x0b05, 8}, +		EAWEntry{.neutral, 0x0b0f, 2}, +		EAWEntry{.neutral, 0x0b13, 22}, +		EAWEntry{.neutral, 0x0b2a, 7}, +		EAWEntry{.neutral, 0x0b32, 2}, +		EAWEntry{.neutral, 0x0b35, 5}, +		EAWEntry{.neutral, 0x0b3c, 9}, +		EAWEntry{.neutral, 0x0b47, 2}, +		EAWEntry{.neutral, 0x0b4b, 3}, +		EAWEntry{.neutral, 0x0b55, 3}, +		EAWEntry{.neutral, 0x0b5c, 2}, +		EAWEntry{.neutral, 0x0b5f, 5}, +		EAWEntry{.neutral, 0x0b66, 18}, +		EAWEntry{.neutral, 0x0b82, 2}, +		EAWEntry{.neutral, 0x0b85, 6}, +		EAWEntry{.neutral, 0x0b8e, 3}, +		EAWEntry{.neutral, 0x0b92, 4}, +		EAWEntry{.neutral, 0x0b99, 2}, +		EAWEntry{.neutral, 0x0b9c, 1}, +		EAWEntry{.neutral, 0x0b9e, 2}, +		EAWEntry{.neutral, 0x0ba3, 2}, +		EAWEntry{.neutral, 0x0ba8, 3}, +		EAWEntry{.neutral, 0x0bae, 12}, +		EAWEntry{.neutral, 0x0bbe, 5}, +		EAWEntry{.neutral, 0x0bc6, 3}, +		EAWEntry{.neutral, 0x0bca, 4}, +		EAWEntry{.neutral, 0x0bd0, 1}, +		EAWEntry{.neutral, 0x0bd7, 1}, +		EAWEntry{.neutral, 0x0be6, 21}, +		EAWEntry{.neutral, 0x0c00, 13}, +		EAWEntry{.neutral, 0x0c0e, 3}, +		EAWEntry{.neutral, 0x0c12, 23}, +		EAWEntry{.neutral, 0x0c2a, 16}, +		EAWEntry{.neutral, 0x0c3d, 8}, +		EAWEntry{.neutral, 0x0c46, 3}, +		EAWEntry{.neutral, 0x0c4a, 4}, +		EAWEntry{.neutral, 0x0c55, 2}, +		EAWEntry{.neutral, 0x0c58, 3}, +		EAWEntry{.neutral, 0x0c60, 4}, +		EAWEntry{.neutral, 0x0c66, 10}, +		EAWEntry{.neutral, 0x0c77, 22}, +		EAWEntry{.neutral, 0x0c8e, 3}, +		EAWEntry{.neutral, 0x0c92, 23}, +		EAWEntry{.neutral, 0x0caa, 10}, +		EAWEntry{.neutral, 0x0cb5, 5}, +		EAWEntry{.neutral, 0x0cbc, 9}, +		EAWEntry{.neutral, 0x0cc6, 3}, +		EAWEntry{.neutral, 0x0cca, 4}, +		EAWEntry{.neutral, 0x0cd5, 2}, +		EAWEntry{.neutral, 0x0cde, 1}, +		EAWEntry{.neutral, 0x0ce0, 4}, +		EAWEntry{.neutral, 0x0ce6, 10}, +		EAWEntry{.neutral, 0x0cf1, 2}, +		EAWEntry{.neutral, 0x0d00, 13}, +		EAWEntry{.neutral, 0x0d0e, 3}, +		EAWEntry{.neutral, 0x0d12, 51}, +		EAWEntry{.neutral, 0x0d46, 3}, +		EAWEntry{.neutral, 0x0d4a, 6}, +		EAWEntry{.neutral, 0x0d54, 16}, +		EAWEntry{.neutral, 0x0d66, 26}, +		EAWEntry{.neutral, 0x0d81, 3}, +		EAWEntry{.neutral, 0x0d85, 18}, +		EAWEntry{.neutral, 0x0d9a, 24}, +		EAWEntry{.neutral, 0x0db3, 9}, +		EAWEntry{.neutral, 0x0dbd, 1}, +		EAWEntry{.neutral, 0x0dc0, 7}, +		EAWEntry{.neutral, 0x0dca, 1}, +		EAWEntry{.neutral, 0x0dcf, 6}, +		EAWEntry{.neutral, 0x0dd6, 1}, +		EAWEntry{.neutral, 0x0dd8, 8}, +		EAWEntry{.neutral, 0x0de6, 10}, +		EAWEntry{.neutral, 0x0df2, 3}, +		EAWEntry{.neutral, 0x0e01, 58}, +		EAWEntry{.neutral, 0x0e3f, 29}, +		EAWEntry{.neutral, 0x0e81, 2}, +		EAWEntry{.neutral, 0x0e84, 1}, +		EAWEntry{.neutral, 0x0e86, 5}, +		EAWEntry{.neutral, 0x0e8c, 24}, +		EAWEntry{.neutral, 0x0ea5, 1}, +		EAWEntry{.neutral, 0x0ea7, 23}, +		EAWEntry{.neutral, 0x0ec0, 5}, +		EAWEntry{.neutral, 0x0ec6, 1}, +		EAWEntry{.neutral, 0x0ec8, 6}, +		EAWEntry{.neutral, 0x0ed0, 10}, +		EAWEntry{.neutral, 0x0edc, 4}, +		EAWEntry{.neutral, 0x0f00, 72}, +		EAWEntry{.neutral, 0x0f49, 36}, +		EAWEntry{.neutral, 0x0f71, 39}, +		EAWEntry{.neutral, 0x0f99, 36}, +		EAWEntry{.neutral, 0x0fbe, 15}, +		EAWEntry{.neutral, 0x0fce, 13}, +		EAWEntry{.neutral, 0x1000, 198}, +		EAWEntry{.neutral, 0x10c7, 1}, +		EAWEntry{.neutral, 0x10cd, 1}, +		EAWEntry{.neutral, 0x10d0, 48}, +		EAWEntry{.wide, 0x1100, 96}, +		EAWEntry{.neutral, 0x1160, 233}, +		EAWEntry{.neutral, 0x124a, 4}, +		EAWEntry{.neutral, 0x1250, 7}, +		EAWEntry{.neutral, 0x1258, 1}, +		EAWEntry{.neutral, 0x125a, 4}, +		EAWEntry{.neutral, 0x1260, 41}, +		EAWEntry{.neutral, 0x128a, 4}, +		EAWEntry{.neutral, 0x1290, 33}, +		EAWEntry{.neutral, 0x12b2, 4}, +		EAWEntry{.neutral, 0x12b8, 7}, +		EAWEntry{.neutral, 0x12c0, 1}, +		EAWEntry{.neutral, 0x12c2, 4}, +		EAWEntry{.neutral, 0x12c8, 15}, +		EAWEntry{.neutral, 0x12d8, 57}, +		EAWEntry{.neutral, 0x1312, 4}, +		EAWEntry{.neutral, 0x1318, 67}, +		EAWEntry{.neutral, 0x135d, 32}, +		EAWEntry{.neutral, 0x1380, 26}, +		EAWEntry{.neutral, 0x13a0, 86}, +		EAWEntry{.neutral, 0x13f8, 6}, +		EAWEntry{.neutral, 0x1400, 669}, +		EAWEntry{.neutral, 0x16a0, 89}, +		EAWEntry{.neutral, 0x1700, 13}, +		EAWEntry{.neutral, 0x170e, 7}, +		EAWEntry{.neutral, 0x1720, 23}, +		EAWEntry{.neutral, 0x1740, 20}, +		EAWEntry{.neutral, 0x1760, 13}, +		EAWEntry{.neutral, 0x176e, 3}, +		EAWEntry{.neutral, 0x1772, 2}, +		EAWEntry{.neutral, 0x1780, 94}, +		EAWEntry{.neutral, 0x17e0, 10}, +		EAWEntry{.neutral, 0x17f0, 10}, +		EAWEntry{.neutral, 0x1800, 15}, +		EAWEntry{.neutral, 0x1810, 10}, +		EAWEntry{.neutral, 0x1820, 89}, +		EAWEntry{.neutral, 0x1880, 43}, +		EAWEntry{.neutral, 0x18b0, 70}, +		EAWEntry{.neutral, 0x1900, 31}, +		EAWEntry{.neutral, 0x1920, 12}, +		EAWEntry{.neutral, 0x1930, 12}, +		EAWEntry{.neutral, 0x1940, 1}, +		EAWEntry{.neutral, 0x1944, 42}, +		EAWEntry{.neutral, 0x1970, 5}, +		EAWEntry{.neutral, 0x1980, 44}, +		EAWEntry{.neutral, 0x19b0, 26}, +		EAWEntry{.neutral, 0x19d0, 11}, +		EAWEntry{.neutral, 0x19de, 62}, +		EAWEntry{.neutral, 0x1a1e, 65}, +		EAWEntry{.neutral, 0x1a60, 29}, +		EAWEntry{.neutral, 0x1a7f, 11}, +		EAWEntry{.neutral, 0x1a90, 10}, +		EAWEntry{.neutral, 0x1aa0, 14}, +		EAWEntry{.neutral, 0x1ab0, 17}, +		EAWEntry{.neutral, 0x1b00, 76}, +		EAWEntry{.neutral, 0x1b50, 45}, +		EAWEntry{.neutral, 0x1b80, 116}, +		EAWEntry{.neutral, 0x1bfc, 60}, +		EAWEntry{.neutral, 0x1c3b, 15}, +		EAWEntry{.neutral, 0x1c4d, 60}, +		EAWEntry{.neutral, 0x1c90, 43}, +		EAWEntry{.neutral, 0x1cbd, 11}, +		EAWEntry{.neutral, 0x1cd0, 43}, +		EAWEntry{.neutral, 0x1d00, 250}, +		EAWEntry{.neutral, 0x1dfb, 283}, +		EAWEntry{.neutral, 0x1f18, 6}, +		EAWEntry{.neutral, 0x1f20, 38}, +		EAWEntry{.neutral, 0x1f48, 6}, +		EAWEntry{.neutral, 0x1f50, 8}, +		EAWEntry{.neutral, 0x1f59, 1}, +		EAWEntry{.neutral, 0x1f5b, 1}, +		EAWEntry{.neutral, 0x1f5d, 1}, +		EAWEntry{.neutral, 0x1f5f, 31}, +		EAWEntry{.neutral, 0x1f80, 53}, +		EAWEntry{.neutral, 0x1fb6, 15}, +		EAWEntry{.neutral, 0x1fc6, 14}, +		EAWEntry{.neutral, 0x1fd6, 6}, +		EAWEntry{.neutral, 0x1fdd, 19}, +		EAWEntry{.neutral, 0x1ff2, 3}, +		EAWEntry{.neutral, 0x1ff6, 9}, +		EAWEntry{.neutral, 0x2000, 16}, +		EAWEntry{.ambiguous, 0x2010, 1}, +		EAWEntry{.neutral, 0x2011, 2}, +		EAWEntry{.ambiguous, 0x2013, 4}, +		EAWEntry{.neutral, 0x2017, 1}, +		EAWEntry{.ambiguous, 0x2018, 2}, +		EAWEntry{.neutral, 0x201a, 2}, +		EAWEntry{.ambiguous, 0x201c, 2}, +		EAWEntry{.neutral, 0x201e, 2}, +		EAWEntry{.ambiguous, 0x2020, 3}, +		EAWEntry{.neutral, 0x2023, 1}, +		EAWEntry{.ambiguous, 0x2024, 4}, +		EAWEntry{.neutral, 0x2028, 8}, +		EAWEntry{.ambiguous, 0x2030, 1}, +		EAWEntry{.neutral, 0x2031, 1}, +		EAWEntry{.ambiguous, 0x2032, 2}, +		EAWEntry{.neutral, 0x2034, 1}, +		EAWEntry{.ambiguous, 0x2035, 1}, +		EAWEntry{.neutral, 0x2036, 5}, +		EAWEntry{.ambiguous, 0x203b, 1}, +		EAWEntry{.neutral, 0x203c, 2}, +		EAWEntry{.ambiguous, 0x203e, 1}, +		EAWEntry{.neutral, 0x203f, 38}, +		EAWEntry{.neutral, 0x2066, 12}, +		EAWEntry{.ambiguous, 0x2074, 1}, +		EAWEntry{.neutral, 0x2075, 10}, +		EAWEntry{.ambiguous, 0x207f, 1}, +		EAWEntry{.neutral, 0x2080, 1}, +		EAWEntry{.ambiguous, 0x2081, 4}, +		EAWEntry{.neutral, 0x2085, 10}, +		EAWEntry{.neutral, 0x2090, 13}, +		EAWEntry{.neutral, 0x20a0, 9}, +		EAWEntry{.half, 0x20a9, 1}, +		EAWEntry{.neutral, 0x20aa, 2}, +		EAWEntry{.ambiguous, 0x20ac, 1}, +		EAWEntry{.neutral, 0x20ad, 19}, +		EAWEntry{.neutral, 0x20d0, 33}, +		EAWEntry{.neutral, 0x2100, 3}, +		EAWEntry{.ambiguous, 0x2103, 1}, +		EAWEntry{.neutral, 0x2104, 1}, +		EAWEntry{.ambiguous, 0x2105, 1}, +		EAWEntry{.neutral, 0x2106, 3}, +		EAWEntry{.ambiguous, 0x2109, 1}, +		EAWEntry{.neutral, 0x210a, 9}, +		EAWEntry{.ambiguous, 0x2113, 1}, +		EAWEntry{.neutral, 0x2114, 2}, +		EAWEntry{.ambiguous, 0x2116, 1}, +		EAWEntry{.neutral, 0x2117, 10}, +		EAWEntry{.ambiguous, 0x2121, 2}, +		EAWEntry{.neutral, 0x2123, 3}, +		EAWEntry{.ambiguous, 0x2126, 1}, +		EAWEntry{.neutral, 0x2127, 4}, +		EAWEntry{.ambiguous, 0x212b, 1}, +		EAWEntry{.neutral, 0x212c, 39}, +		EAWEntry{.ambiguous, 0x2153, 2}, +		EAWEntry{.neutral, 0x2155, 6}, +		EAWEntry{.ambiguous, 0x215b, 4}, +		EAWEntry{.neutral, 0x215f, 1}, +		EAWEntry{.ambiguous, 0x2160, 12}, +		EAWEntry{.neutral, 0x216c, 4}, +		EAWEntry{.ambiguous, 0x2170, 10}, +		EAWEntry{.neutral, 0x217a, 15}, +		EAWEntry{.ambiguous, 0x2189, 1}, +		EAWEntry{.neutral, 0x218a, 2}, +		EAWEntry{.ambiguous, 0x2190, 10}, +		EAWEntry{.neutral, 0x219a, 30}, +		EAWEntry{.ambiguous, 0x21b8, 2}, +		EAWEntry{.neutral, 0x21ba, 24}, +		EAWEntry{.ambiguous, 0x21d2, 1}, +		EAWEntry{.neutral, 0x21d3, 1}, +		EAWEntry{.ambiguous, 0x21d4, 1}, +		EAWEntry{.neutral, 0x21d5, 18}, +		EAWEntry{.ambiguous, 0x21e7, 1}, +		EAWEntry{.neutral, 0x21e8, 24}, +		EAWEntry{.ambiguous, 0x2200, 1}, +		EAWEntry{.neutral, 0x2201, 1}, +		EAWEntry{.ambiguous, 0x2202, 2}, +		EAWEntry{.neutral, 0x2204, 3}, +		EAWEntry{.ambiguous, 0x2207, 2}, +		EAWEntry{.neutral, 0x2209, 2}, +		EAWEntry{.ambiguous, 0x220b, 1}, +		EAWEntry{.neutral, 0x220c, 3}, +		EAWEntry{.ambiguous, 0x220f, 1}, +		EAWEntry{.neutral, 0x2210, 1}, +		EAWEntry{.ambiguous, 0x2211, 1}, +		EAWEntry{.neutral, 0x2212, 3}, +		EAWEntry{.ambiguous, 0x2215, 1}, +		EAWEntry{.neutral, 0x2216, 4}, +		EAWEntry{.ambiguous, 0x221a, 1}, +		EAWEntry{.neutral, 0x221b, 2}, +		EAWEntry{.ambiguous, 0x221d, 4}, +		EAWEntry{.neutral, 0x2221, 2}, +		EAWEntry{.ambiguous, 0x2223, 1}, +		EAWEntry{.neutral, 0x2224, 1}, +		EAWEntry{.ambiguous, 0x2225, 1}, +		EAWEntry{.neutral, 0x2226, 1}, +		EAWEntry{.ambiguous, 0x2227, 6}, +		EAWEntry{.neutral, 0x222d, 1}, +		EAWEntry{.ambiguous, 0x222e, 1}, +		EAWEntry{.neutral, 0x222f, 5}, +		EAWEntry{.ambiguous, 0x2234, 4}, +		EAWEntry{.neutral, 0x2238, 4}, +		EAWEntry{.ambiguous, 0x223c, 2}, +		EAWEntry{.neutral, 0x223e, 10}, +		EAWEntry{.ambiguous, 0x2248, 1}, +		EAWEntry{.neutral, 0x2249, 3}, +		EAWEntry{.ambiguous, 0x224c, 1}, +		EAWEntry{.neutral, 0x224d, 5}, +		EAWEntry{.ambiguous, 0x2252, 1}, +		EAWEntry{.neutral, 0x2253, 13}, +		EAWEntry{.ambiguous, 0x2260, 2}, +		EAWEntry{.neutral, 0x2262, 2}, +		EAWEntry{.ambiguous, 0x2264, 4}, +		EAWEntry{.neutral, 0x2268, 2}, +		EAWEntry{.ambiguous, 0x226a, 2}, +		EAWEntry{.neutral, 0x226c, 2}, +		EAWEntry{.ambiguous, 0x226e, 2}, +		EAWEntry{.neutral, 0x2270, 18}, +		EAWEntry{.ambiguous, 0x2282, 2}, +		EAWEntry{.neutral, 0x2284, 2}, +		EAWEntry{.ambiguous, 0x2286, 2}, +		EAWEntry{.neutral, 0x2288, 13}, +		EAWEntry{.ambiguous, 0x2295, 1}, +		EAWEntry{.neutral, 0x2296, 3}, +		EAWEntry{.ambiguous, 0x2299, 1}, +		EAWEntry{.neutral, 0x229a, 11}, +		EAWEntry{.ambiguous, 0x22a5, 1}, +		EAWEntry{.neutral, 0x22a6, 25}, +		EAWEntry{.ambiguous, 0x22bf, 1}, +		EAWEntry{.neutral, 0x22c0, 82}, +		EAWEntry{.ambiguous, 0x2312, 1}, +		EAWEntry{.neutral, 0x2313, 7}, +		EAWEntry{.wide, 0x231a, 2}, +		EAWEntry{.neutral, 0x231c, 13}, +		EAWEntry{.wide, 0x2329, 2}, +		EAWEntry{.neutral, 0x232b, 190}, +		EAWEntry{.wide, 0x23e9, 4}, +		EAWEntry{.neutral, 0x23ed, 3}, +		EAWEntry{.wide, 0x23f0, 1}, +		EAWEntry{.neutral, 0x23f1, 2}, +		EAWEntry{.wide, 0x23f3, 1}, +		EAWEntry{.neutral, 0x23f4, 51}, +		EAWEntry{.neutral, 0x2440, 11}, +		EAWEntry{.ambiguous, 0x2460, 138}, +		EAWEntry{.neutral, 0x24ea, 1}, +		EAWEntry{.ambiguous, 0x24eb, 97}, +		EAWEntry{.neutral, 0x254c, 4}, +		EAWEntry{.ambiguous, 0x2550, 36}, +		EAWEntry{.neutral, 0x2574, 12}, +		EAWEntry{.ambiguous, 0x2580, 16}, +		EAWEntry{.neutral, 0x2590, 2}, +		EAWEntry{.ambiguous, 0x2592, 4}, +		EAWEntry{.neutral, 0x2596, 10}, +		EAWEntry{.ambiguous, 0x25a0, 2}, +		EAWEntry{.neutral, 0x25a2, 1}, +		EAWEntry{.ambiguous, 0x25a3, 7}, +		EAWEntry{.neutral, 0x25aa, 8}, +		EAWEntry{.ambiguous, 0x25b2, 2}, +		EAWEntry{.neutral, 0x25b4, 2}, +		EAWEntry{.ambiguous, 0x25b6, 2}, +		EAWEntry{.neutral, 0x25b8, 4}, +		EAWEntry{.ambiguous, 0x25bc, 2}, +		EAWEntry{.neutral, 0x25be, 2}, +		EAWEntry{.ambiguous, 0x25c0, 2}, +		EAWEntry{.neutral, 0x25c2, 4}, +		EAWEntry{.ambiguous, 0x25c6, 3}, +		EAWEntry{.neutral, 0x25c9, 2}, +		EAWEntry{.ambiguous, 0x25cb, 1}, +		EAWEntry{.neutral, 0x25cc, 2}, +		EAWEntry{.ambiguous, 0x25ce, 4}, +		EAWEntry{.neutral, 0x25d2, 16}, +		EAWEntry{.ambiguous, 0x25e2, 4}, +		EAWEntry{.neutral, 0x25e6, 9}, +		EAWEntry{.ambiguous, 0x25ef, 1}, +		EAWEntry{.neutral, 0x25f0, 13}, +		EAWEntry{.wide, 0x25fd, 2}, +		EAWEntry{.neutral, 0x25ff, 6}, +		EAWEntry{.ambiguous, 0x2605, 2}, +		EAWEntry{.neutral, 0x2607, 2}, +		EAWEntry{.ambiguous, 0x2609, 1}, +		EAWEntry{.neutral, 0x260a, 4}, +		EAWEntry{.ambiguous, 0x260e, 2}, +		EAWEntry{.neutral, 0x2610, 4}, +		EAWEntry{.wide, 0x2614, 2}, +		EAWEntry{.neutral, 0x2616, 6}, +		EAWEntry{.ambiguous, 0x261c, 1}, +		EAWEntry{.neutral, 0x261d, 1}, +		EAWEntry{.ambiguous, 0x261e, 1}, +		EAWEntry{.neutral, 0x261f, 33}, +		EAWEntry{.ambiguous, 0x2640, 1}, +		EAWEntry{.neutral, 0x2641, 1}, +		EAWEntry{.ambiguous, 0x2642, 1}, +		EAWEntry{.neutral, 0x2643, 5}, +		EAWEntry{.wide, 0x2648, 12}, +		EAWEntry{.neutral, 0x2654, 12}, +		EAWEntry{.ambiguous, 0x2660, 2}, +		EAWEntry{.neutral, 0x2662, 1}, +		EAWEntry{.ambiguous, 0x2663, 3}, +		EAWEntry{.neutral, 0x2666, 1}, +		EAWEntry{.ambiguous, 0x2667, 4}, +		EAWEntry{.neutral, 0x266b, 1}, +		EAWEntry{.ambiguous, 0x266c, 2}, +		EAWEntry{.neutral, 0x266e, 1}, +		EAWEntry{.ambiguous, 0x266f, 1}, +		EAWEntry{.neutral, 0x2670, 15}, +		EAWEntry{.wide, 0x267f, 1}, +		EAWEntry{.neutral, 0x2680, 19}, +		EAWEntry{.wide, 0x2693, 1}, +		EAWEntry{.neutral, 0x2694, 10}, +		EAWEntry{.ambiguous, 0x269e, 2}, +		EAWEntry{.neutral, 0x26a0, 1}, +		EAWEntry{.wide, 0x26a1, 1}, +		EAWEntry{.neutral, 0x26a2, 8}, +		EAWEntry{.wide, 0x26aa, 2}, +		EAWEntry{.neutral, 0x26ac, 17}, +		EAWEntry{.wide, 0x26bd, 2}, +		EAWEntry{.ambiguous, 0x26bf, 1}, +		EAWEntry{.neutral, 0x26c0, 4}, +		EAWEntry{.wide, 0x26c4, 2}, +		EAWEntry{.ambiguous, 0x26c6, 8}, +		EAWEntry{.wide, 0x26ce, 1}, +		EAWEntry{.ambiguous, 0x26cf, 5}, +		EAWEntry{.wide, 0x26d4, 1}, +		EAWEntry{.ambiguous, 0x26d5, 13}, +		EAWEntry{.neutral, 0x26e2, 1}, +		EAWEntry{.ambiguous, 0x26e3, 1}, +		EAWEntry{.neutral, 0x26e4, 4}, +		EAWEntry{.ambiguous, 0x26e8, 2}, +		EAWEntry{.wide, 0x26ea, 1}, +		EAWEntry{.ambiguous, 0x26eb, 7}, +		EAWEntry{.wide, 0x26f2, 2}, +		EAWEntry{.ambiguous, 0x26f4, 1}, +		EAWEntry{.wide, 0x26f5, 1}, +		EAWEntry{.ambiguous, 0x26f6, 4}, +		EAWEntry{.wide, 0x26fa, 1}, +		EAWEntry{.ambiguous, 0x26fb, 2}, +		EAWEntry{.wide, 0x26fd, 1}, +		EAWEntry{.ambiguous, 0x26fe, 2}, +		EAWEntry{.neutral, 0x2700, 5}, +		EAWEntry{.wide, 0x2705, 1}, +		EAWEntry{.neutral, 0x2706, 4}, +		EAWEntry{.wide, 0x270a, 2}, +		EAWEntry{.neutral, 0x270c, 28}, +		EAWEntry{.wide, 0x2728, 1}, +		EAWEntry{.neutral, 0x2729, 20}, +		EAWEntry{.ambiguous, 0x273d, 1}, +		EAWEntry{.neutral, 0x273e, 14}, +		EAWEntry{.wide, 0x274c, 1}, +		EAWEntry{.neutral, 0x274d, 1}, +		EAWEntry{.wide, 0x274e, 1}, +		EAWEntry{.neutral, 0x274f, 4}, +		EAWEntry{.wide, 0x2753, 3}, +		EAWEntry{.neutral, 0x2756, 1}, +		EAWEntry{.wide, 0x2757, 1}, +		EAWEntry{.neutral, 0x2758, 30}, +		EAWEntry{.ambiguous, 0x2776, 10}, +		EAWEntry{.neutral, 0x2780, 21}, +		EAWEntry{.wide, 0x2795, 3}, +		EAWEntry{.neutral, 0x2798, 24}, +		EAWEntry{.wide, 0x27b0, 1}, +		EAWEntry{.neutral, 0x27b1, 14}, +		EAWEntry{.wide, 0x27bf, 1}, +		EAWEntry{.neutral, 0x27c0, 38}, +		EAWEntry{.narrow, 0x27e6, 8}, +		EAWEntry{.neutral, 0x27ee, 407}, +		EAWEntry{.narrow, 0x2985, 2}, +		EAWEntry{.neutral, 0x2987, 404}, +		EAWEntry{.wide, 0x2b1b, 2}, +		EAWEntry{.neutral, 0x2b1d, 51}, +		EAWEntry{.wide, 0x2b50, 1}, +		EAWEntry{.neutral, 0x2b51, 4}, +		EAWEntry{.wide, 0x2b55, 1}, +		EAWEntry{.ambiguous, 0x2b56, 4}, +		EAWEntry{.neutral, 0x2b5a, 26}, +		EAWEntry{.neutral, 0x2b76, 32}, +		EAWEntry{.neutral, 0x2b97, 152}, +		EAWEntry{.neutral, 0x2c30, 47}, +		EAWEntry{.neutral, 0x2c60, 148}, +		EAWEntry{.neutral, 0x2cf9, 45}, +		EAWEntry{.neutral, 0x2d27, 1}, +		EAWEntry{.neutral, 0x2d2d, 1}, +		EAWEntry{.neutral, 0x2d30, 56}, +		EAWEntry{.neutral, 0x2d6f, 2}, +		EAWEntry{.neutral, 0x2d7f, 24}, +		EAWEntry{.neutral, 0x2da0, 7}, +		EAWEntry{.neutral, 0x2da8, 7}, +		EAWEntry{.neutral, 0x2db0, 7}, +		EAWEntry{.neutral, 0x2db8, 7}, +		EAWEntry{.neutral, 0x2dc0, 7}, +		EAWEntry{.neutral, 0x2dc8, 7}, +		EAWEntry{.neutral, 0x2dd0, 7}, +		EAWEntry{.neutral, 0x2dd8, 7}, +		EAWEntry{.neutral, 0x2de0, 115}, +		EAWEntry{.wide, 0x2e80, 26}, +		EAWEntry{.wide, 0x2e9b, 89}, +		EAWEntry{.wide, 0x2f00, 214}, +		EAWEntry{.wide, 0x2ff0, 12}, +		EAWEntry{.full, 0x3000, 1}, +		EAWEntry{.wide, 0x3001, 62}, +		EAWEntry{.neutral, 0x303f, 1}, +		EAWEntry{.wide, 0x3041, 86}, +		EAWEntry{.wide, 0x3099, 103}, +		EAWEntry{.wide, 0x3105, 43}, +		EAWEntry{.wide, 0x3131, 94}, +		EAWEntry{.wide, 0x3190, 84}, +		EAWEntry{.wide, 0x31f0, 47}, +		EAWEntry{.wide, 0x3220, 40}, +		EAWEntry{.ambiguous, 0x3248, 8}, +		EAWEntry{.wide, 0x3250, 7024}, +		EAWEntry{.neutral, 0x4dc0, 64}, +		EAWEntry{.wide, 0x4e00, 22157}, +		EAWEntry{.wide, 0xa490, 55}, +		EAWEntry{.neutral, 0xa4d0, 348}, +		EAWEntry{.neutral, 0xa640, 184}, +		EAWEntry{.neutral, 0xa700, 192}, +		EAWEntry{.neutral, 0xa7c2, 9}, +		EAWEntry{.neutral, 0xa7f5, 56}, +		EAWEntry{.neutral, 0xa830, 10}, +		EAWEntry{.neutral, 0xa840, 56}, +		EAWEntry{.neutral, 0xa880, 70}, +		EAWEntry{.neutral, 0xa8ce, 12}, +		EAWEntry{.neutral, 0xa8e0, 116}, +		EAWEntry{.neutral, 0xa95f, 1}, +		EAWEntry{.wide, 0xa960, 29}, +		EAWEntry{.neutral, 0xa980, 78}, +		EAWEntry{.neutral, 0xa9cf, 11}, +		EAWEntry{.neutral, 0xa9de, 33}, +		EAWEntry{.neutral, 0xaa00, 55}, +		EAWEntry{.neutral, 0xaa40, 14}, +		EAWEntry{.neutral, 0xaa50, 10}, +		EAWEntry{.neutral, 0xaa5c, 103}, +		EAWEntry{.neutral, 0xaadb, 28}, +		EAWEntry{.neutral, 0xab01, 6}, +		EAWEntry{.neutral, 0xab09, 6}, +		EAWEntry{.neutral, 0xab11, 6}, +		EAWEntry{.neutral, 0xab20, 7}, +		EAWEntry{.neutral, 0xab28, 7}, +		EAWEntry{.neutral, 0xab30, 60}, +		EAWEntry{.neutral, 0xab70, 126}, +		EAWEntry{.neutral, 0xabf0, 10}, +		EAWEntry{.wide, 0xac00, 11172}, +		EAWEntry{.neutral, 0xd7b0, 23}, +		EAWEntry{.neutral, 0xd7cb, 49}, +		EAWEntry{.neutral, 0xd800, 2048}, +		EAWEntry{.ambiguous, 0xe000, 6400}, +		EAWEntry{.wide, 0xf900, 512}, +		EAWEntry{.neutral, 0xfb00, 7}, +		EAWEntry{.neutral, 0xfb13, 5}, +		EAWEntry{.neutral, 0xfb1d, 26}, +		EAWEntry{.neutral, 0xfb38, 5}, +		EAWEntry{.neutral, 0xfb3e, 1}, +		EAWEntry{.neutral, 0xfb40, 2}, +		EAWEntry{.neutral, 0xfb43, 2}, +		EAWEntry{.neutral, 0xfb46, 124}, +		EAWEntry{.neutral, 0xfbd3, 365}, +		EAWEntry{.neutral, 0xfd50, 64}, +		EAWEntry{.neutral, 0xfd92, 54}, +		EAWEntry{.neutral, 0xfdf0, 14}, +		EAWEntry{.ambiguous, 0xfe00, 16}, +		EAWEntry{.wide, 0xfe10, 10}, +		EAWEntry{.neutral, 0xfe20, 16}, +		EAWEntry{.wide, 0xfe30, 35}, +		EAWEntry{.wide, 0xfe54, 19}, +		EAWEntry{.wide, 0xfe68, 4}, +		EAWEntry{.neutral, 0xfe70, 5}, +		EAWEntry{.neutral, 0xfe76, 135}, +		EAWEntry{.neutral, 0xfeff, 1}, +		EAWEntry{.full, 0xff01, 96}, +		EAWEntry{.half, 0xff61, 94}, +		EAWEntry{.half, 0xffc2, 6}, +		EAWEntry{.half, 0xffca, 6}, +		EAWEntry{.half, 0xffd2, 6}, +		EAWEntry{.half, 0xffda, 3}, +		EAWEntry{.full, 0xffe0, 7}, +		EAWEntry{.half, 0xffe8, 7}, +		EAWEntry{.neutral, 0xfff9, 4}, +		EAWEntry{.ambiguous, 0xfffd, 1}, +		EAWEntry{.neutral, 0x10000, 12}, +		EAWEntry{.neutral, 0x1000d, 26}, +		EAWEntry{.neutral, 0x10028, 19}, +		EAWEntry{.neutral, 0x1003c, 2}, +		EAWEntry{.neutral, 0x1003f, 15}, +		EAWEntry{.neutral, 0x10050, 14}, +		EAWEntry{.neutral, 0x10080, 123}, +		EAWEntry{.neutral, 0x10100, 3}, +		EAWEntry{.neutral, 0x10107, 45}, +		EAWEntry{.neutral, 0x10137, 88}, +		EAWEntry{.neutral, 0x10190, 13}, +		EAWEntry{.neutral, 0x101a0, 1}, +		EAWEntry{.neutral, 0x101d0, 46}, +		EAWEntry{.neutral, 0x10280, 29}, +		EAWEntry{.neutral, 0x102a0, 49}, +		EAWEntry{.neutral, 0x102e0, 28}, +		EAWEntry{.neutral, 0x10300, 36}, +		EAWEntry{.neutral, 0x1032d, 30}, +		EAWEntry{.neutral, 0x10350, 43}, +		EAWEntry{.neutral, 0x10380, 30}, +		EAWEntry{.neutral, 0x1039f, 37}, +		EAWEntry{.neutral, 0x103c8, 14}, +		EAWEntry{.neutral, 0x10400, 158}, +		EAWEntry{.neutral, 0x104a0, 10}, +		EAWEntry{.neutral, 0x104b0, 36}, +		EAWEntry{.neutral, 0x104d8, 36}, +		EAWEntry{.neutral, 0x10500, 40}, +		EAWEntry{.neutral, 0x10530, 52}, +		EAWEntry{.neutral, 0x1056f, 1}, +		EAWEntry{.neutral, 0x10600, 311}, +		EAWEntry{.neutral, 0x10740, 22}, +		EAWEntry{.neutral, 0x10760, 8}, +		EAWEntry{.neutral, 0x10800, 6}, +		EAWEntry{.neutral, 0x10808, 1}, +		EAWEntry{.neutral, 0x1080a, 44}, +		EAWEntry{.neutral, 0x10837, 2}, +		EAWEntry{.neutral, 0x1083c, 1}, +		EAWEntry{.neutral, 0x1083f, 23}, +		EAWEntry{.neutral, 0x10857, 72}, +		EAWEntry{.neutral, 0x108a7, 9}, +		EAWEntry{.neutral, 0x108e0, 19}, +		EAWEntry{.neutral, 0x108f4, 2}, +		EAWEntry{.neutral, 0x108fb, 33}, +		EAWEntry{.neutral, 0x1091f, 27}, +		EAWEntry{.neutral, 0x1093f, 1}, +		EAWEntry{.neutral, 0x10980, 56}, +		EAWEntry{.neutral, 0x109bc, 20}, +		EAWEntry{.neutral, 0x109d2, 50}, +		EAWEntry{.neutral, 0x10a05, 2}, +		EAWEntry{.neutral, 0x10a0c, 8}, +		EAWEntry{.neutral, 0x10a15, 3}, +		EAWEntry{.neutral, 0x10a19, 29}, +		EAWEntry{.neutral, 0x10a38, 3}, +		EAWEntry{.neutral, 0x10a3f, 10}, +		EAWEntry{.neutral, 0x10a50, 9}, +		EAWEntry{.neutral, 0x10a60, 64}, +		EAWEntry{.neutral, 0x10ac0, 39}, +		EAWEntry{.neutral, 0x10aeb, 12}, +		EAWEntry{.neutral, 0x10b00, 54}, +		EAWEntry{.neutral, 0x10b39, 29}, +		EAWEntry{.neutral, 0x10b58, 27}, +		EAWEntry{.neutral, 0x10b78, 26}, +		EAWEntry{.neutral, 0x10b99, 4}, +		EAWEntry{.neutral, 0x10ba9, 7}, +		EAWEntry{.neutral, 0x10c00, 73}, +		EAWEntry{.neutral, 0x10c80, 51}, +		EAWEntry{.neutral, 0x10cc0, 51}, +		EAWEntry{.neutral, 0x10cfa, 46}, +		EAWEntry{.neutral, 0x10d30, 10}, +		EAWEntry{.neutral, 0x10e60, 31}, +		EAWEntry{.neutral, 0x10e80, 42}, +		EAWEntry{.neutral, 0x10eab, 3}, +		EAWEntry{.neutral, 0x10eb0, 2}, +		EAWEntry{.neutral, 0x10f00, 40}, +		EAWEntry{.neutral, 0x10f30, 42}, +		EAWEntry{.neutral, 0x10fb0, 28}, +		EAWEntry{.neutral, 0x10fe0, 23}, +		EAWEntry{.neutral, 0x11000, 78}, +		EAWEntry{.neutral, 0x11052, 30}, +		EAWEntry{.neutral, 0x1107f, 67}, +		EAWEntry{.neutral, 0x110cd, 1}, +		EAWEntry{.neutral, 0x110d0, 25}, +		EAWEntry{.neutral, 0x110f0, 10}, +		EAWEntry{.neutral, 0x11100, 53}, +		EAWEntry{.neutral, 0x11136, 18}, +		EAWEntry{.neutral, 0x11150, 39}, +		EAWEntry{.neutral, 0x11180, 96}, +		EAWEntry{.neutral, 0x111e1, 20}, +		EAWEntry{.neutral, 0x11200, 18}, +		EAWEntry{.neutral, 0x11213, 44}, +		EAWEntry{.neutral, 0x11280, 7}, +		EAWEntry{.neutral, 0x11288, 1}, +		EAWEntry{.neutral, 0x1128a, 4}, +		EAWEntry{.neutral, 0x1128f, 15}, +		EAWEntry{.neutral, 0x1129f, 11}, +		EAWEntry{.neutral, 0x112b0, 59}, +		EAWEntry{.neutral, 0x112f0, 10}, +		EAWEntry{.neutral, 0x11300, 4}, +		EAWEntry{.neutral, 0x11305, 8}, +		EAWEntry{.neutral, 0x1130f, 2}, +		EAWEntry{.neutral, 0x11313, 22}, +		EAWEntry{.neutral, 0x1132a, 7}, +		EAWEntry{.neutral, 0x11332, 2}, +		EAWEntry{.neutral, 0x11335, 5}, +		EAWEntry{.neutral, 0x1133b, 10}, +		EAWEntry{.neutral, 0x11347, 2}, +		EAWEntry{.neutral, 0x1134b, 3}, +		EAWEntry{.neutral, 0x11350, 1}, +		EAWEntry{.neutral, 0x11357, 1}, +		EAWEntry{.neutral, 0x1135d, 7}, +		EAWEntry{.neutral, 0x11366, 7}, +		EAWEntry{.neutral, 0x11370, 5}, +		EAWEntry{.neutral, 0x11400, 92}, +		EAWEntry{.neutral, 0x1145d, 5}, +		EAWEntry{.neutral, 0x11480, 72}, +		EAWEntry{.neutral, 0x114d0, 10}, +		EAWEntry{.neutral, 0x11580, 54}, +		EAWEntry{.neutral, 0x115b8, 38}, +		EAWEntry{.neutral, 0x11600, 69}, +		EAWEntry{.neutral, 0x11650, 10}, +		EAWEntry{.neutral, 0x11660, 13}, +		EAWEntry{.neutral, 0x11680, 57}, +		EAWEntry{.neutral, 0x116c0, 10}, +		EAWEntry{.neutral, 0x11700, 27}, +		EAWEntry{.neutral, 0x1171d, 15}, +		EAWEntry{.neutral, 0x11730, 16}, +		EAWEntry{.neutral, 0x11800, 60}, +		EAWEntry{.neutral, 0x118a0, 83}, +		EAWEntry{.neutral, 0x118ff, 8}, +		EAWEntry{.neutral, 0x11909, 1}, +		EAWEntry{.neutral, 0x1190c, 8}, +		EAWEntry{.neutral, 0x11915, 2}, +		EAWEntry{.neutral, 0x11918, 30}, +		EAWEntry{.neutral, 0x11937, 2}, +		EAWEntry{.neutral, 0x1193b, 12}, +		EAWEntry{.neutral, 0x11950, 10}, +		EAWEntry{.neutral, 0x119a0, 8}, +		EAWEntry{.neutral, 0x119aa, 46}, +		EAWEntry{.neutral, 0x119da, 11}, +		EAWEntry{.neutral, 0x11a00, 72}, +		EAWEntry{.neutral, 0x11a50, 83}, +		EAWEntry{.neutral, 0x11ac0, 57}, +		EAWEntry{.neutral, 0x11c00, 9}, +		EAWEntry{.neutral, 0x11c0a, 45}, +		EAWEntry{.neutral, 0x11c38, 14}, +		EAWEntry{.neutral, 0x11c50, 29}, +		EAWEntry{.neutral, 0x11c70, 32}, +		EAWEntry{.neutral, 0x11c92, 22}, +		EAWEntry{.neutral, 0x11ca9, 14}, +		EAWEntry{.neutral, 0x11d00, 7}, +		EAWEntry{.neutral, 0x11d08, 2}, +		EAWEntry{.neutral, 0x11d0b, 44}, +		EAWEntry{.neutral, 0x11d3a, 1}, +		EAWEntry{.neutral, 0x11d3c, 2}, +		EAWEntry{.neutral, 0x11d3f, 9}, +		EAWEntry{.neutral, 0x11d50, 10}, +		EAWEntry{.neutral, 0x11d60, 6}, +		EAWEntry{.neutral, 0x11d67, 2}, +		EAWEntry{.neutral, 0x11d6a, 37}, +		EAWEntry{.neutral, 0x11d90, 2}, +		EAWEntry{.neutral, 0x11d93, 6}, +		EAWEntry{.neutral, 0x11da0, 10}, +		EAWEntry{.neutral, 0x11ee0, 25}, +		EAWEntry{.neutral, 0x11fb0, 1}, +		EAWEntry{.neutral, 0x11fc0, 50}, +		EAWEntry{.neutral, 0x11fff, 923}, +		EAWEntry{.neutral, 0x12400, 111}, +		EAWEntry{.neutral, 0x12470, 5}, +		EAWEntry{.neutral, 0x12480, 196}, +		EAWEntry{.neutral, 0x13000, 1071}, +		EAWEntry{.neutral, 0x13430, 9}, +		EAWEntry{.neutral, 0x14400, 583}, +		EAWEntry{.neutral, 0x16800, 569}, +		EAWEntry{.neutral, 0x16a40, 31}, +		EAWEntry{.neutral, 0x16a60, 10}, +		EAWEntry{.neutral, 0x16a6e, 2}, +		EAWEntry{.neutral, 0x16ad0, 30}, +		EAWEntry{.neutral, 0x16af0, 6}, +		EAWEntry{.neutral, 0x16b00, 70}, +		EAWEntry{.neutral, 0x16b50, 10}, +		EAWEntry{.neutral, 0x16b5b, 7}, +		EAWEntry{.neutral, 0x16b63, 21}, +		EAWEntry{.neutral, 0x16b7d, 19}, +		EAWEntry{.neutral, 0x16e40, 91}, +		EAWEntry{.neutral, 0x16f00, 75}, +		EAWEntry{.neutral, 0x16f4f, 57}, +		EAWEntry{.neutral, 0x16f8f, 17}, +		EAWEntry{.wide, 0x16fe0, 5}, +		EAWEntry{.wide, 0x16ff0, 2}, +		EAWEntry{.wide, 0x17000, 6136}, +		EAWEntry{.wide, 0x18800, 1238}, +		EAWEntry{.wide, 0x18d00, 9}, +		EAWEntry{.wide, 0x1b000, 287}, +		EAWEntry{.wide, 0x1b150, 3}, +		EAWEntry{.wide, 0x1b164, 4}, +		EAWEntry{.wide, 0x1b170, 396}, +		EAWEntry{.neutral, 0x1bc00, 107}, +		EAWEntry{.neutral, 0x1bc70, 13}, +		EAWEntry{.neutral, 0x1bc80, 9}, +		EAWEntry{.neutral, 0x1bc90, 10}, +		EAWEntry{.neutral, 0x1bc9c, 8}, +		EAWEntry{.neutral, 0x1d000, 246}, +		EAWEntry{.neutral, 0x1d100, 39}, +		EAWEntry{.neutral, 0x1d129, 192}, +		EAWEntry{.neutral, 0x1d200, 70}, +		EAWEntry{.neutral, 0x1d2e0, 20}, +		EAWEntry{.neutral, 0x1d300, 87}, +		EAWEntry{.neutral, 0x1d360, 25}, +		EAWEntry{.neutral, 0x1d400, 85}, +		EAWEntry{.neutral, 0x1d456, 71}, +		EAWEntry{.neutral, 0x1d49e, 2}, +		EAWEntry{.neutral, 0x1d4a2, 1}, +		EAWEntry{.neutral, 0x1d4a5, 2}, +		EAWEntry{.neutral, 0x1d4a9, 4}, +		EAWEntry{.neutral, 0x1d4ae, 12}, +		EAWEntry{.neutral, 0x1d4bb, 1}, +		EAWEntry{.neutral, 0x1d4bd, 7}, +		EAWEntry{.neutral, 0x1d4c5, 65}, +		EAWEntry{.neutral, 0x1d507, 4}, +		EAWEntry{.neutral, 0x1d50d, 8}, +		EAWEntry{.neutral, 0x1d516, 7}, +		EAWEntry{.neutral, 0x1d51e, 28}, +		EAWEntry{.neutral, 0x1d53b, 4}, +		EAWEntry{.neutral, 0x1d540, 5}, +		EAWEntry{.neutral, 0x1d546, 1}, +		EAWEntry{.neutral, 0x1d54a, 7}, +		EAWEntry{.neutral, 0x1d552, 340}, +		EAWEntry{.neutral, 0x1d6a8, 292}, +		EAWEntry{.neutral, 0x1d7ce, 702}, +		EAWEntry{.neutral, 0x1da9b, 5}, +		EAWEntry{.neutral, 0x1daa1, 15}, +		EAWEntry{.neutral, 0x1e000, 7}, +		EAWEntry{.neutral, 0x1e008, 17}, +		EAWEntry{.neutral, 0x1e01b, 7}, +		EAWEntry{.neutral, 0x1e023, 2}, +		EAWEntry{.neutral, 0x1e026, 5}, +		EAWEntry{.neutral, 0x1e100, 45}, +		EAWEntry{.neutral, 0x1e130, 14}, +		EAWEntry{.neutral, 0x1e140, 10}, +		EAWEntry{.neutral, 0x1e14e, 2}, +		EAWEntry{.neutral, 0x1e2c0, 58}, +		EAWEntry{.neutral, 0x1e2ff, 1}, +		EAWEntry{.neutral, 0x1e800, 197}, +		EAWEntry{.neutral, 0x1e8c7, 16}, +		EAWEntry{.neutral, 0x1e900, 76}, +		EAWEntry{.neutral, 0x1e950, 10}, +		EAWEntry{.neutral, 0x1e95e, 2}, +		EAWEntry{.neutral, 0x1ec71, 68}, +		EAWEntry{.neutral, 0x1ed01, 61}, +		EAWEntry{.neutral, 0x1ee00, 4}, +		EAWEntry{.neutral, 0x1ee05, 27}, +		EAWEntry{.neutral, 0x1ee21, 2}, +		EAWEntry{.neutral, 0x1ee24, 1}, +		EAWEntry{.neutral, 0x1ee27, 1}, +		EAWEntry{.neutral, 0x1ee29, 10}, +		EAWEntry{.neutral, 0x1ee34, 4}, +		EAWEntry{.neutral, 0x1ee39, 1}, +		EAWEntry{.neutral, 0x1ee3b, 1}, +		EAWEntry{.neutral, 0x1ee42, 1}, +		EAWEntry{.neutral, 0x1ee47, 1}, +		EAWEntry{.neutral, 0x1ee49, 1}, +		EAWEntry{.neutral, 0x1ee4b, 1}, +		EAWEntry{.neutral, 0x1ee4d, 3}, +		EAWEntry{.neutral, 0x1ee51, 2}, +		EAWEntry{.neutral, 0x1ee54, 1}, +		EAWEntry{.neutral, 0x1ee57, 1}, +		EAWEntry{.neutral, 0x1ee59, 1}, +		EAWEntry{.neutral, 0x1ee5b, 1}, +		EAWEntry{.neutral, 0x1ee5d, 1}, +		EAWEntry{.neutral, 0x1ee5f, 1}, +		EAWEntry{.neutral, 0x1ee61, 2}, +		EAWEntry{.neutral, 0x1ee64, 1}, +		EAWEntry{.neutral, 0x1ee67, 4}, +		EAWEntry{.neutral, 0x1ee6c, 7}, +		EAWEntry{.neutral, 0x1ee74, 4}, +		EAWEntry{.neutral, 0x1ee79, 4}, +		EAWEntry{.neutral, 0x1ee7e, 1}, +		EAWEntry{.neutral, 0x1ee80, 10}, +		EAWEntry{.neutral, 0x1ee8b, 17}, +		EAWEntry{.neutral, 0x1eea1, 3}, +		EAWEntry{.neutral, 0x1eea5, 5}, +		EAWEntry{.neutral, 0x1eeab, 17}, +		EAWEntry{.neutral, 0x1eef0, 2}, +		EAWEntry{.neutral, 0x1f000, 4}, +		EAWEntry{.wide, 0x1f004, 1}, +		EAWEntry{.neutral, 0x1f005, 39}, +		EAWEntry{.neutral, 0x1f030, 100}, +		EAWEntry{.neutral, 0x1f0a0, 15}, +		EAWEntry{.neutral, 0x1f0b1, 15}, +		EAWEntry{.neutral, 0x1f0c1, 14}, +		EAWEntry{.wide, 0x1f0cf, 1}, +		EAWEntry{.neutral, 0x1f0d1, 37}, +		EAWEntry{.ambiguous, 0x1f100, 11}, +		EAWEntry{.neutral, 0x1f10b, 5}, +		EAWEntry{.ambiguous, 0x1f110, 30}, +		EAWEntry{.neutral, 0x1f12e, 2}, +		EAWEntry{.ambiguous, 0x1f130, 58}, +		EAWEntry{.neutral, 0x1f16a, 6}, +		EAWEntry{.ambiguous, 0x1f170, 30}, +		EAWEntry{.wide, 0x1f18e, 1}, +		EAWEntry{.ambiguous, 0x1f18f, 2}, +		EAWEntry{.wide, 0x1f191, 10}, +		EAWEntry{.ambiguous, 0x1f19b, 18}, +		EAWEntry{.neutral, 0x1f1ad, 1}, +		EAWEntry{.neutral, 0x1f1e6, 26}, +		EAWEntry{.wide, 0x1f200, 3}, +		EAWEntry{.wide, 0x1f210, 44}, +		EAWEntry{.wide, 0x1f240, 9}, +		EAWEntry{.wide, 0x1f250, 2}, +		EAWEntry{.wide, 0x1f260, 6}, +		EAWEntry{.wide, 0x1f300, 33}, +		EAWEntry{.neutral, 0x1f321, 12}, +		EAWEntry{.wide, 0x1f32d, 9}, +		EAWEntry{.neutral, 0x1f336, 1}, +		EAWEntry{.wide, 0x1f337, 70}, +		EAWEntry{.neutral, 0x1f37d, 1}, +		EAWEntry{.wide, 0x1f37e, 22}, +		EAWEntry{.neutral, 0x1f394, 12}, +		EAWEntry{.wide, 0x1f3a0, 43}, +		EAWEntry{.neutral, 0x1f3cb, 4}, +		EAWEntry{.wide, 0x1f3cf, 5}, +		EAWEntry{.neutral, 0x1f3d4, 12}, +		EAWEntry{.wide, 0x1f3e0, 17}, +		EAWEntry{.neutral, 0x1f3f1, 3}, +		EAWEntry{.wide, 0x1f3f4, 1}, +		EAWEntry{.neutral, 0x1f3f5, 3}, +		EAWEntry{.wide, 0x1f3f8, 71}, +		EAWEntry{.neutral, 0x1f43f, 1}, +		EAWEntry{.wide, 0x1f440, 1}, +		EAWEntry{.neutral, 0x1f441, 1}, +		EAWEntry{.wide, 0x1f442, 187}, +		EAWEntry{.neutral, 0x1f4fd, 2}, +		EAWEntry{.wide, 0x1f4ff, 63}, +		EAWEntry{.neutral, 0x1f53e, 13}, +		EAWEntry{.wide, 0x1f54b, 4}, +		EAWEntry{.neutral, 0x1f54f, 1}, +		EAWEntry{.wide, 0x1f550, 24}, +		EAWEntry{.neutral, 0x1f568, 18}, +		EAWEntry{.wide, 0x1f57a, 1}, +		EAWEntry{.neutral, 0x1f57b, 26}, +		EAWEntry{.wide, 0x1f595, 2}, +		EAWEntry{.neutral, 0x1f597, 13}, +		EAWEntry{.wide, 0x1f5a4, 1}, +		EAWEntry{.neutral, 0x1f5a5, 86}, +		EAWEntry{.wide, 0x1f5fb, 85}, +		EAWEntry{.neutral, 0x1f650, 48}, +		EAWEntry{.wide, 0x1f680, 70}, +		EAWEntry{.neutral, 0x1f6c6, 6}, +		EAWEntry{.wide, 0x1f6cc, 1}, +		EAWEntry{.neutral, 0x1f6cd, 3}, +		EAWEntry{.wide, 0x1f6d0, 3}, +		EAWEntry{.neutral, 0x1f6d3, 2}, +		EAWEntry{.wide, 0x1f6d5, 3}, +		EAWEntry{.neutral, 0x1f6e0, 11}, +		EAWEntry{.wide, 0x1f6eb, 2}, +		EAWEntry{.neutral, 0x1f6f0, 4}, +		EAWEntry{.wide, 0x1f6f4, 9}, +		EAWEntry{.neutral, 0x1f700, 116}, +		EAWEntry{.neutral, 0x1f780, 89}, +		EAWEntry{.wide, 0x1f7e0, 12}, +		EAWEntry{.neutral, 0x1f800, 12}, +		EAWEntry{.neutral, 0x1f810, 56}, +		EAWEntry{.neutral, 0x1f850, 10}, +		EAWEntry{.neutral, 0x1f860, 40}, +		EAWEntry{.neutral, 0x1f890, 30}, +		EAWEntry{.neutral, 0x1f8b0, 2}, +		EAWEntry{.neutral, 0x1f900, 12}, +		EAWEntry{.wide, 0x1f90c, 47}, +		EAWEntry{.neutral, 0x1f93b, 1}, +		EAWEntry{.wide, 0x1f93c, 10}, +		EAWEntry{.neutral, 0x1f946, 1}, +		EAWEntry{.wide, 0x1f947, 50}, +		EAWEntry{.wide, 0x1f97a, 82}, +		EAWEntry{.wide, 0x1f9cd, 51}, +		EAWEntry{.neutral, 0x1fa00, 84}, +		EAWEntry{.neutral, 0x1fa60, 14}, +		EAWEntry{.wide, 0x1fa70, 5}, +		EAWEntry{.wide, 0x1fa78, 3}, +		EAWEntry{.wide, 0x1fa80, 7}, +		EAWEntry{.wide, 0x1fa90, 25}, +		EAWEntry{.wide, 0x1fab0, 7}, +		EAWEntry{.wide, 0x1fac0, 3}, +		EAWEntry{.wide, 0x1fad0, 7}, +		EAWEntry{.neutral, 0x1fb00, 147}, +		EAWEntry{.neutral, 0x1fb94, 55}, +		EAWEntry{.neutral, 0x1fbf0, 10}, +		EAWEntry{.wide, 0x20000, 65534}, +		EAWEntry{.wide, 0x30000, 65534}, +		EAWEntry{.neutral, 0xe0001, 1}, +		EAWEntry{.neutral, 0xe0020, 96}, +		EAWEntry{.ambiguous, 0xe0100, 240}, +		EAWEntry{.ambiguous, 0xf0000, 65534}, +		EAWEntry{.ambiguous, 0x100000, 65534}, +	] +) diff --git a/v_windows/v/vlib/encoding/utf8/east_asian/east_asian_width_test.v b/v_windows/v/vlib/encoding/utf8/east_asian/east_asian_width_test.v new file mode 100644 index 0000000..a44a9f8 --- /dev/null +++ b/v_windows/v/vlib/encoding/utf8/east_asian/east_asian_width_test.v @@ -0,0 +1,23 @@ +module east_asian + +fn test_width() { +	assert east_asian_width_property_at('A', 0) == .narrow +	assert east_asian_width_property_at('A', 0) == .full +	assert east_asian_width_property_at('ア', 0) == .half +	assert east_asian_width_property_at('ア', 0) == .wide +	assert east_asian_width_property_at('☆', 0) == .ambiguous +	assert east_asian_width_property_at('ج', 0) == .neutral +	assert display_width('abc', 1) == 3 +	assert display_width('ひらがな', 1) == 8 +	assert display_width('カタカナ', 1) == 8 +	assert display_width('カタカナ', 1) == 4 +	assert display_width('한글', 1) == 4 +	assert display_width('한자', 1) == 4 +	assert display_width('漢字', 1) == 4 +	assert display_width('简体字', 1) == 6 +	assert display_width('繁體字', 1) == 6 +	assert display_width('अरबी लिपि', 1) == 9 +	assert display_width('☆', 1) == 1 +	assert display_width('☆', 2) == 2 +	assert display_width('🐈👽📛', 1) == 6 +} diff --git a/v_windows/v/vlib/encoding/utf8/encoding_utf8_test.v b/v_windows/v/vlib/encoding/utf8/encoding_utf8_test.v new file mode 100644 index 0000000..ebea87c --- /dev/null +++ b/v_windows/v/vlib/encoding/utf8/encoding_utf8_test.v @@ -0,0 +1,9 @@ +import encoding.utf8 + +fn test_validate_str() { +	assert utf8.validate_str('añçá') == true +	assert utf8.validate_str('\x61\xC3\xB1\xC3\xA7\xC3\xA1') == true +	assert utf8.validate_str('\xC0\xC1') == false +	assert utf8.validate_str('\xF5\xFF') == false +	assert utf8.validate_str('\xE0\xEF') == false +} diff --git a/v_windows/v/vlib/encoding/utf8/utf8.v b/v_windows/v/vlib/encoding/utf8/utf8.v new file mode 100644 index 0000000..88c598f --- /dev/null +++ b/v_windows/v/vlib/encoding/utf8/utf8.v @@ -0,0 +1,88 @@ +module utf8 + +struct Utf8State { +mut: +	index    int +	subindex int +	failed   bool +} + +pub fn validate_str(str string) bool { +	return validate(str.str, str.len) +} + +pub fn validate(data &byte, len int) bool { +	mut state := Utf8State{} +	for i := 0; i < len; i++ { +		s := unsafe { data[i] } +		if s == 0 { +			break +		} +		state.next_state(s) +		if state.failed { +			return false +		} +	} +	return !state.failed && state.subindex <= 0 +} + +fn (mut s Utf8State) seq(r0 bool, r1 bool, is_tail bool) bool { +	if s.subindex == 0 || (s.index > 1 && s.subindex == 1) || (s.index >= 6 && s.subindex == 2) { +		if (s.subindex == 0 && r0) || (s.subindex == 1 && r1) || (s.subindex == 2 && is_tail) { +			s.subindex++ +			return true +		} +	} else { +		s.failed = true +		if is_tail { +			s.index = 0 +			s.subindex = 0 +			s.failed = false +		} +		return true +	} +	s.index++ +	s.subindex = 0 +	return false +} + +fn (mut s Utf8State) next_state(c byte) { +	// sequence 1 +	if s.index == 0 { +		if (c >= 0x00 + 1 && c <= 0x7F) || c == 0x00 { +			return +		} +		s.index++ +		s.subindex = 0 +	} +	is_tail := c >= 0x80 && c <= 0xBF +	// sequence 2 +	if s.index == 1 && s.seq(c >= 0xC2 && c <= 0xDF, false, is_tail) { +		return +	} +	// sequence 3 +	if s.index == 2 && s.seq(c == 0xE0, c >= 0xA0 && c <= 0xBF, is_tail) { +		return +	} +	if s.index == 3 && s.seq(c >= 0xE1 && c <= 0xEC, c >= 0x80 && c <= 0xBF, is_tail) { +		return +	} +	if s.index == 4 && s.seq(c == 0xED, c >= 0x80 && c <= 0x9F, is_tail) { +		return +	} +	if s.index == 5 && s.seq(c >= 0xEE && c <= 0xEF, c >= 0x80 && c <= 0xBF, is_tail) { +		return +	} +	// sequence 4 +	if s.index == 6 && s.seq(c == 0xF0, c >= 0x90 && c <= 0xBF, is_tail) { +		return +	} +	if s.index == 7 && s.seq(c >= 0xF1 && c <= 0xF3, c >= 0x80 && c <= 0xBF, is_tail) { +		return +	} +	if s.index == 8 && s.seq(c == 0xF4, c >= 0x80 && c <= 0x8F, is_tail) { +		return +	} +	// we should never reach here +	s.failed = true +} diff --git a/v_windows/v/vlib/encoding/utf8/utf8_util.v b/v_windows/v/vlib/encoding/utf8/utf8_util.v new file mode 100644 index 0000000..2e3da0d --- /dev/null +++ b/v_windows/v/vlib/encoding/utf8/utf8_util.v @@ -0,0 +1,1161 @@ +/* +utf-8 util + +Copyright (c) 2019-2021 Dario Deledda. All rights reserved. +Use of this source code is governed by an MIT license +that can be found in the LICENSE file. + +This file contains utilities for utf8 strings +*/ +module utf8 + +/* +Utility functions +*/ + +// len return the length as number of unicode chars from a string +pub fn len(s string) int { +	if s.len == 0 { +		return 0 +	} + +	mut count := 0 +	mut index := 0 + +	for { +		ch_len := utf8_char_len(s[index]) +		index += ch_len +		count++ +		if index >= s.len { +			break +		} +	} +	return count +} + +// get_uchar convert a unicode glyph in string[index] into a int unicode char +pub fn get_uchar(s string, index int) int { +	mut res := 0 +	mut ch_len := 0 +	if s.len > 0 { +		ch_len = utf8_char_len(s[index]) + +		if ch_len == 1 { +			return u16(s[index]) +		} +		if ch_len > 1 && ch_len < 5 { +			mut lword := 0 +			for i := 0; i < ch_len; i++ { +				lword = (lword << 8) | int(s[index + i]) +			} + +			// 2 byte utf-8 +			// byte format: 110xxxxx 10xxxxxx +			// +			if ch_len == 2 { +				res = (lword & 0x1f00) >> 2 | (lword & 0x3f) +			} +			// 3 byte utf-8 +			// byte format: 1110xxxx 10xxxxxx 10xxxxxx +			// +			else if ch_len == 3 { +				res = (lword & 0x0f0000) >> 4 | (lword & 0x3f00) >> 2 | (lword & 0x3f) +			} +			// 4 byte utf-8 +			// byte format: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx +			// +			else if ch_len == 4 { +				res = ((lword & 0x07000000) >> 6) | ((lword & 0x003f0000) >> 4) | ((lword & 0x00003F00) >> 2) | (lword & 0x0000003f) +			} +		} +	} +	return res +} + +// raw_index - get the raw chracter from the string by the given index value. +// example: '我是V Lang'.raw_index(1) => '是' + +// raw_index - get the raw chracter from the string by the given index value. +// example: utf8.raw_index('我是V Lang', 1) => '是' +pub fn raw_index(s string, index int) string { +	mut r := []rune{} + +	for i := 0; i < s.len; i++ { +		if r.len - 1 == index { +			break +		} + +		b := s[i] +		ch_len := ((0xe5000000 >> ((b >> 3) & 0x1e)) & 3) + +		r << if ch_len > 0 { +			i += ch_len +			rune(get_uchar(s, i - ch_len)) +		} else { +			rune(b) +		} +	} + +	return r[index].str() +} + +// reverse - returns a reversed string. +// example: utf8.reverse('你好世界hello world') => 'dlrow olleh界世好你'. +pub fn reverse(s string) string { +	len_s := len(s) +	if len_s == 0 || len_s == 1 { +		return s.clone() +	} +	mut str_array := []string{} +	for i in 0 .. len_s { +		str_array << raw_index(s, i) +	} +	str_array = str_array.reverse() +	return str_array.join('') +} + +/* +Conversion functions +*/ + +// to_upper return an uppercase string from a string +pub fn to_upper(s string) string { +	return up_low(s, true) +} + +// to_lower return an lowercase string from a string +pub fn to_lower(s string) string { +	return up_low(s, false) +} + +/* +Punctuation functions + +The "western" function search on a small table, that is quicker than +the global unicode table search. **Use only for western chars**. +*/ + +// +// Western +// + +// is_punct return true if the string[index] byte is the start of a unicode western punctuation +pub fn is_punct(s string, index int) bool { +	return is_uchar_punct(get_uchar(s, index)) +} + +// is_uchar_punct return true if the input unicode is a western unicode punctuation +pub fn is_uchar_punct(uchar int) bool { +	return find_punct_in_table(uchar, utf8.unicode_punct_western) != 0 +} + +// +// Global +// + +// is_global_punct return true if the string[index] byte of is the start of a global unicode punctuation +pub fn is_global_punct(s string, index int) bool { +	return is_uchar_global_punct(get_uchar(s, index)) +} + +// is_uchar_global_punct return true if the input unicode is a global unicode punctuation +pub fn is_uchar_global_punct(uchar int) bool { +	return find_punct_in_table(uchar, utf8.unicode_punct) != 0 +} + +/* +Private functions +*/ + +// Raw to_lower utf-8 function +fn utf8_to_lower(in_cp int) int { +	mut cp := in_cp +	if ((0x0041 <= cp) && (0x005a >= cp)) || ((0x00c0 <= cp) && (0x00d6 >= cp)) +		|| ((0x00d8 <= cp) && (0x00de >= cp)) || ((0x0391 <= cp) && (0x03a1 >= cp)) +		|| ((0x03a3 <= cp) && (0x03ab >= cp)) || ((0x0410 <= cp) && (0x042f >= cp)) { +		cp += 32 +	} else if (0x0400 <= cp) && (0x040f >= cp) { +		cp += 80 +	} else if ((0x0100 <= cp) && (0x012f >= cp)) || ((0x0132 <= cp) && (0x0137 >= cp)) +		|| ((0x014a <= cp) && (0x0177 >= cp)) || ((0x0182 <= cp) && (0x0185 >= cp)) +		|| ((0x01a0 <= cp) && (0x01a5 >= cp)) || ((0x01de <= cp) && (0x01ef >= cp)) +		|| ((0x01f8 <= cp) && (0x021f >= cp)) || ((0x0222 <= cp) && (0x0233 >= cp)) +		|| ((0x0246 <= cp) && (0x024f >= cp)) || ((0x03d8 <= cp) && (0x03ef >= cp)) +		|| ((0x0460 <= cp) && (0x0481 >= cp)) || ((0x048a <= cp) && (0x04ff >= cp)) { +		cp |= 0x1 +	} else if ((0x0139 <= cp) && (0x0148 >= cp)) || ((0x0179 <= cp) && (0x017e >= cp)) +		|| ((0x01af <= cp) && (0x01b0 >= cp)) || ((0x01b3 <= cp) && (0x01b6 >= cp)) +		|| ((0x01cd <= cp) && (0x01dc >= cp)) { +		cp += 1 +		cp &= ~0x1 +	} else if ((0x0531 <= cp) && (0x0556 >= cp)) || ((0x10A0 <= cp) && (0x10C5 >= cp)) { +		// ARMENIAN or GEORGIAN +		cp += 0x30 +	} else if (((0x1E00 <= cp) && (0x1E94 >= cp)) || ((0x1EA0 <= cp) && (0x1EF8 >= cp))) +		&& (cp & 1 == 0) { +		// LATIN CAPITAL LETTER +		cp += 1 +	} else if (0x24B6 <= cp) && (0x24CF >= cp) { +		// CIRCLED LATIN +		cp += 0x1a +	} else if (0xFF21 <= cp) && (0xFF3A >= cp) { +		// FULLWIDTH LATIN CAPITAL +		cp += 0x19 +	} else if ((0x1F08 <= cp) && (0x1F0F >= cp)) || ((0x1F18 <= cp) && (0x1F1D >= cp)) +		|| ((0x1F28 <= cp) && (0x1F2F >= cp)) || ((0x1F38 <= cp) && (0x1F3F >= cp)) +		|| ((0x1F48 <= cp) && (0x1F4D >= cp)) || ((0x1F68 <= cp) && (0x1F6F >= cp)) +		|| ((0x1F88 <= cp) && (0x1F8F >= cp)) || ((0x1F98 <= cp) && (0x1F9F >= cp)) +		|| ((0x1FA8 <= cp) && (0x1FAF >= cp)) { +		// GREEK +		cp -= 8 +	} else { +		match cp { +			0x0178 { cp = 0x00ff } +			0x0243 { cp = 0x0180 } +			0x018e { cp = 0x01dd } +			0x023d { cp = 0x019a } +			0x0220 { cp = 0x019e } +			0x01b7 { cp = 0x0292 } +			0x01c4 { cp = 0x01c6 } +			0x01c7 { cp = 0x01c9 } +			0x01ca { cp = 0x01cc } +			0x01f1 { cp = 0x01f3 } +			0x01f7 { cp = 0x01bf } +			0x0187 { cp = 0x0188 } +			0x018b { cp = 0x018c } +			0x0191 { cp = 0x0192 } +			0x0198 { cp = 0x0199 } +			0x01a7 { cp = 0x01a8 } +			0x01ac { cp = 0x01ad } +			0x01af { cp = 0x01b0 } +			0x01b8 { cp = 0x01b9 } +			0x01bc { cp = 0x01bd } +			0x01f4 { cp = 0x01f5 } +			0x023b { cp = 0x023c } +			0x0241 { cp = 0x0242 } +			0x03fd { cp = 0x037b } +			0x03fe { cp = 0x037c } +			0x03ff { cp = 0x037d } +			0x037f { cp = 0x03f3 } +			0x0386 { cp = 0x03ac } +			0x0388 { cp = 0x03ad } +			0x0389 { cp = 0x03ae } +			0x038a { cp = 0x03af } +			0x038c { cp = 0x03cc } +			0x038e { cp = 0x03cd } +			0x038f { cp = 0x03ce } +			0x0370 { cp = 0x0371 } +			0x0372 { cp = 0x0373 } +			0x0376 { cp = 0x0377 } +			0x03f4 { cp = 0x03b8 } +			0x03cf { cp = 0x03d7 } +			0x03f9 { cp = 0x03f2 } +			0x03f7 { cp = 0x03f8 } +			0x03fa { cp = 0x03fb } +			// GREEK +			0x1F59 { cp = 0x1F51 } +			0x1F5B { cp = 0x1F53 } +			0x1F5D { cp = 0x1F55 } +			0x1F5F { cp = 0x1F57 } +			0x1FB8 { cp = 0x1FB0 } +			0x1FB9 { cp = 0x1FB1 } +			0x1FD8 { cp = 0x1FD0 } +			0x1FD9 { cp = 0x1FD1 } +			0x1FE8 { cp = 0x1FE0 } +			0x1FE9 { cp = 0x1FE1 } +			else {} +		} +	} + +	return cp +} + +// Raw to_upper utf-8 function +fn utf8_to_upper(in_cp int) int { +	mut cp := in_cp +	if ((0x0061 <= cp) && (0x007a >= cp)) || ((0x00e0 <= cp) && (0x00f6 >= cp)) +		|| ((0x00f8 <= cp) && (0x00fe >= cp)) || ((0x03b1 <= cp) && (0x03c1 >= cp)) +		|| ((0x03c3 <= cp) && (0x03cb >= cp)) || ((0x0430 <= cp) && (0x044f >= cp)) { +		cp -= 32 +	} else if (0x0450 <= cp) && (0x045f >= cp) { +		cp -= 80 +	} else if ((0x0100 <= cp) && (0x012f >= cp)) || ((0x0132 <= cp) && (0x0137 >= cp)) +		|| ((0x014a <= cp) && (0x0177 >= cp)) || ((0x0182 <= cp) && (0x0185 >= cp)) +		|| ((0x01a0 <= cp) && (0x01a5 >= cp)) || ((0x01de <= cp) && (0x01ef >= cp)) +		|| ((0x01f8 <= cp) && (0x021f >= cp)) || ((0x0222 <= cp) && (0x0233 >= cp)) +		|| ((0x0246 <= cp) && (0x024f >= cp)) || ((0x03d8 <= cp) && (0x03ef >= cp)) +		|| ((0x0460 <= cp) && (0x0481 >= cp)) || ((0x048a <= cp) && (0x04ff >= cp)) { +		cp &= ~0x1 +	} else if ((0x0139 <= cp) && (0x0148 >= cp)) || ((0x0179 <= cp) && (0x017e >= cp)) +		|| ((0x01af <= cp) && (0x01b0 >= cp)) || ((0x01b3 <= cp) && (0x01b6 >= cp)) +		|| ((0x01cd <= cp) && (0x01dc >= cp)) { +		cp -= 1 +		cp |= 0x1 +	} else if ((0x0561 <= cp) && (0x0586 >= cp)) || ((0x10D0 <= cp) && (0x10F5 >= cp)) { +		// ARMENIAN or GEORGIAN +		cp -= 0x30 +	} else if (((0x1E01 <= cp) && (0x1E95 >= cp)) || ((0x1EA1 <= cp) && (0x1EF9 >= cp))) +		&& (cp & 1 == 1) { +		// LATIN CAPITAL LETTER +		cp -= 1 +	} else if (0x24D0 <= cp) && (0x24E9 >= cp) { +		// CIRCLED LATIN +		cp -= 0x1a +	} else if (0xFF41 <= cp) && (0xFF5A >= cp) { +		// FULLWIDTH LATIN CAPITAL +		cp -= 0x19 +	} else if ((0x1F00 <= cp) && (0x1F07 >= cp)) || ((0x1F10 <= cp) && (0x1F15 >= cp)) +		|| ((0x1F20 <= cp) && (0x1F27 >= cp)) || ((0x1F30 <= cp) && (0x1F37 >= cp)) +		|| ((0x1F40 <= cp) && (0x1F45 >= cp)) || ((0x1F60 <= cp) && (0x1F67 >= cp)) +		|| ((0x1F80 <= cp) && (0x1F87 >= cp)) || ((0x1F90 <= cp) && (0x1F97 >= cp)) +		|| ((0x1FA0 <= cp) && (0x1FA7 >= cp)) { +		// GREEK +		cp += 8 +	} else { +		match cp { +			0x00ff { cp = 0x0178 } +			0x0180 { cp = 0x0243 } +			0x01dd { cp = 0x018e } +			0x019a { cp = 0x023d } +			0x019e { cp = 0x0220 } +			0x0292 { cp = 0x01b7 } +			0x01c6 { cp = 0x01c4 } +			0x01c9 { cp = 0x01c7 } +			0x01cc { cp = 0x01ca } +			0x01f3 { cp = 0x01f1 } +			0x01bf { cp = 0x01f7 } +			0x0188 { cp = 0x0187 } +			0x018c { cp = 0x018b } +			0x0192 { cp = 0x0191 } +			0x0199 { cp = 0x0198 } +			0x01a8 { cp = 0x01a7 } +			0x01ad { cp = 0x01ac } +			0x01b0 { cp = 0x01af } +			0x01b9 { cp = 0x01b8 } +			0x01bd { cp = 0x01bc } +			0x01f5 { cp = 0x01f4 } +			0x023c { cp = 0x023b } +			0x0242 { cp = 0x0241 } +			0x037b { cp = 0x03fd } +			0x037c { cp = 0x03fe } +			0x037d { cp = 0x03ff } +			0x03f3 { cp = 0x037f } +			0x03ac { cp = 0x0386 } +			0x03ad { cp = 0x0388 } +			0x03ae { cp = 0x0389 } +			0x03af { cp = 0x038a } +			0x03cc { cp = 0x038c } +			0x03cd { cp = 0x038e } +			0x03ce { cp = 0x038f } +			0x0371 { cp = 0x0370 } +			0x0373 { cp = 0x0372 } +			0x0377 { cp = 0x0376 } +			0x03d1 { cp = 0x0398 } +			0x03d7 { cp = 0x03cf } +			0x03f2 { cp = 0x03f9 } +			0x03f8 { cp = 0x03f7 } +			0x03fb { cp = 0x03fa } +			// GREEK +			0x1F51 { cp = 0x1F59 } +			0x1F53 { cp = 0x1F5B } +			0x1F55 { cp = 0x1F5D } +			0x1F57 { cp = 0x1F5F } +			0x1FB0 { cp = 0x1FB8 } +			0x1FB1 { cp = 0x1FB9 } +			0x1FD0 { cp = 0x1FD8 } +			0x1FD1 { cp = 0x1FD9 } +			0x1FE0 { cp = 0x1FE8 } +			0x1FE1 { cp = 0x1FE9 } +			else {} +		} +	} + +	return cp +} + +// +// if upper_flag == true  then make low ==> upper conversion +// if upper_flag == false then make upper ==> low conversion +// +// up_low make the dirt job +fn up_low(s string, upper_flag bool) string { +	mut index := 0 +	mut tab_char := 0 +	mut str_res := unsafe { malloc_noscan(s.len + 1) } + +	for { +		ch_len := utf8_char_len(s[index]) + +		if ch_len == 1 { +			if upper_flag == true { +				unsafe { +					str_res[index] = byte(C.toupper(s.str[index])) +				} +			} else { +				unsafe { +					str_res[index] = byte(C.tolower(s.str[index])) +				} +			} +		} else if ch_len > 1 && ch_len < 5 { +			mut lword := 0 + +			for i := 0; i < ch_len; i++ { +				lword = (lword << 8) | int(s[index + i]) +			} + +			// println("#${index} ($lword)") + +			mut res := 0 + +			// 2 byte utf-8 +			// byte format: 110xxxxx 10xxxxxx +			// +			if ch_len == 2 { +				res = (lword & 0x1f00) >> 2 | (lword & 0x3f) +			} +			// 3 byte utf-8 +			// byte format: 1110xxxx 10xxxxxx 10xxxxxx +			// +			else if ch_len == 3 { +				res = (lword & 0x0f0000) >> 4 | (lword & 0x3f00) >> 2 | (lword & 0x3f) +			} +			// 4 byte utf-8 +			// byte format: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx +			// +			else if ch_len == 4 { +				res = ((lword & 0x07000000) >> 6) | ((lword & 0x003f0000) >> 4) | ((lword & 0x00003F00) >> 2) | (lword & 0x0000003f) +			} + +			// println("res: ${res.hex():8}") + +			if upper_flag == false { +				tab_char = utf8_to_lower(res) +			} else { +				tab_char = utf8_to_upper(res) +			} + +			if ch_len == 2 { +				ch0 := byte((tab_char >> 6) & 0x1f) | 0xc0 // 110x xxxx +				ch1 := byte((tab_char >> 0) & 0x3f) | 0x80 // 10xx xxxx +				// C.printf("[%02x%02x] \n",ch0,ch1) + +				unsafe { +					str_res[index + 0] = ch0 +					str_res[index + 1] = ch1 +				} +				//**************************************************************** +				//  BUG: doesn't compile, workaround use shitf to right of 0 bit +				//**************************************************************** +				// str_res[index + 1 ] = byte( tab_char & 0xbf )	// 1011 1111 +			} else if ch_len == 3 { +				ch0 := byte((tab_char >> 12) & 0x0f) | 0xe0 // 1110 xxxx +				ch1 := byte((tab_char >> 6) & 0x3f) | 0x80 // 10xx xxxx +				ch2 := byte((tab_char >> 0) & 0x3f) | 0x80 // 10xx xxxx +				// C.printf("[%02x%02x%02x] \n",ch0,ch1,ch2) + +				unsafe { +					str_res[index + 0] = ch0 +					str_res[index + 1] = ch1 +					str_res[index + 2] = ch2 +				} +			} +			// TODO: write if needed +			else if ch_len == 4 { +				// place holder!! +				// at the present time simply copy the utf8 char +				for i in 0 .. ch_len { +					unsafe { +						str_res[index + i] = s[index + i] +					} +				} +			} +		} else { +			// other cases, just copy the string +			for i in 0 .. ch_len { +				unsafe { +					str_res[index + i] = s[index + i] +				} +			} +		} + +		index += ch_len + +		// we are done, exit the loop +		if index >= s.len { +			break +		} +	} + +	// for c compatibility set the ending 0 +	unsafe { +		str_res[index] = 0 +		// C.printf("str_res: %s\n--------------\n",str_res) +		return tos(str_res, s.len) +	} +} + +// find punct in lockup table +fn find_punct_in_table(in_code int, in_table []int) int { +	// +	// We will use a simple binary search +	// + +	mut first_index := 0 +	mut last_index := (in_table.len) +	mut index := 0 +	mut x := 0 + +	for { +		index = (first_index + last_index) >> 1 +		x = in_table[index] +		// C.printf("(%d..%d) index:%d base[%08x]==>[%08x]\n",first_index,last_index,index,in_code,x) + +		if x == in_code { +			return index +		} else if x > in_code { +			last_index = index +		} else { +			first_index = index +		} + +		if (last_index - first_index) <= 1 { +			break +		} +	} +	// C.printf("not found.\n") +	return 0 +} + +/* +Unicode punctuation chars + +source: http://www.unicode.org/faq/punctuation_symbols.html +*/ +const ( +	// Western punctuation mark +	// Character	Name	Browser	Image +	unicode_punct_western = [ +		0x0021 /* EXCLAMATION MARK	! */, +		0x0022 /* QUOTATION MARK	" */, +		0x0027 /* APOSTROPHE	' */, +		0x002A /* ASTERISK	* */, +		0x002C /* COMMA	, */, +		0x002E /* FULL STOP	. */, +		0x002F /* SOLIDUS	/ */, +		0x003A /* COLON	: */, +		0x003B /* SEMICOLON	; */, +		0x003F /* QUESTION MARK	? */, +		0x00A1 /* INVERTED EXCLAMATION MARK	¡ */, +		0x00A7 /* SECTION SIGN	§ */, +		0x00B6 /* PILCROW SIGN	¶ */, +		0x00B7 /* MIDDLE DOT	· */, +		0x00BF /* INVERTED QUESTION MARK	¿ */, +		0x037E /* GREEK QUESTION MARK	; */, +		0x0387 /* GREEK ANO TELEIA	· */, +		0x055A /* ARMENIAN APOSTROPHE	՚ */, +		0x055B /* ARMENIAN EMPHASIS MARK	՛ */, +		0x055C /* ARMENIAN EXCLAMATION MARK	՜ */, +		0x055D /* ARMENIAN COMMA	՝ */, +		0x055E /* ARMENIAN QUESTION MARK	՞ */, +		0x055F /* ARMENIAN ABBREVIATION MARK	՟ */, +		0x0589 /* ARMENIAN FULL STOP	։ */, +		0x05C0 /* HEBREW PUNCTUATION PASEQ	׀ */, +		0x05C3 /* HEBREW PUNCTUATION SOF PASUQ	׃ */, +		0x05C6 /* HEBREW PUNCTUATION NUN HAFUKHA	׆ */, +		0x05F3 /* HEBREW PUNCTUATION GERESH	׳ */, +		0x05F4 /* HEBREW PUNCTUATION GERSHAYIM	״ */, +	] + +	// Unicode Characters in the 'Punctuation, Other' Category +	// Character	Name	Browser	Image +	unicode_punct         = [ +		0x0021 /* EXCLAMATION MARK	! */, +		0x0022 /* QUOTATION MARK	" */, +		0x0023 /* NUMBER SIGN	# */, +		0x0025 /* PERCENT SIGN	% */, +		0x0026 /* AMPERSAND	& */, +		0x0027 /* APOSTROPHE	' */, +		0x002A /* ASTERISK	* */, +		0x002C /* COMMA	, */, +		0x002E /* FULL STOP	. */, +		0x002F /* SOLIDUS	/ */, +		0x003A /* COLON	: */, +		0x003B /* SEMICOLON	; */, +		0x003F /* QUESTION MARK	? */, +		0x0040 /* COMMERCIAL AT	@ */, +		0x005C /* REVERSE SOLIDUS	\ */, +		0x00A1 /* INVERTED EXCLAMATION MARK	¡ */, +		0x00A7 /* SECTION SIGN	§ */, +		0x00B6 /* PILCROW SIGN	¶ */, +		0x00B7 /* MIDDLE DOT	· */, +		0x00BF /* INVERTED QUESTION MARK	¿ */, +		0x037E /* GREEK QUESTION MARK	; */, +		0x0387 /* GREEK ANO TELEIA	· */, +		0x055A /* ARMENIAN APOSTROPHE	՚ */, +		0x055B /* ARMENIAN EMPHASIS MARK	՛ */, +		0x055C /* ARMENIAN EXCLAMATION MARK	՜ */, +		0x055D /* ARMENIAN COMMA	՝ */, +		0x055E /* ARMENIAN QUESTION MARK	՞ */, +		0x055F /* ARMENIAN ABBREVIATION MARK	՟ */, +		0x0589 /* ARMENIAN FULL STOP	։ */, +		0x05C0 /* HEBREW PUNCTUATION PASEQ	׀ */, +		0x05C3 /* HEBREW PUNCTUATION SOF PASUQ	׃ */, +		0x05C6 /* HEBREW PUNCTUATION NUN HAFUKHA	׆ */, +		0x05F3 /* HEBREW PUNCTUATION GERESH	׳ */, +		0x05F4 /* HEBREW PUNCTUATION GERSHAYIM	״ */, +		0x0609 /* ARABIC-INDIC PER MILLE SIGN	؉ */, +		0x060A /* ARABIC-INDIC PER TEN THOUSAND SIGN	؊ */, +		0x060C /* ARABIC COMMA	، */, +		0x060D /* ARABIC DATE SEPARATOR	؍ */, +		0x061B /* ARABIC SEMICOLON	؛ */, +		0x061E /* ARABIC TRIPLE DOT PUNCTUATION MARK	؞ */, +		0x061F /* ARABIC QUESTION MARK	؟ */, +		0x066A /* ARABIC PERCENT SIGN	٪ */, +		0x066B /* ARABIC DECIMAL SEPARATOR	٫ */, +		0x066C /* ARABIC THOUSANDS SEPARATOR	٬ */, +		0x066D /* ARABIC FIVE POINTED STAR	٭ */, +		0x06D4 /* ARABIC FULL STOP	۔ */, +		0x0700 /* SYRIAC END OF PARAGRAPH	܀ */, +		0x0701 /* SYRIAC SUPRALINEAR FULL STOP	܁ */, +		0x0702 /* SYRIAC SUBLINEAR FULL STOP	܂ */, +		0x0703 /* SYRIAC SUPRALINEAR COLON	܃ */, +		0x0704 /* SYRIAC SUBLINEAR COLON	܄ */, +		0x0705 /* SYRIAC HORIZONTAL COLON	܅ */, +		0x0706 /* SYRIAC COLON SKEWED LEFT	܆ */, +		0x0707 /* SYRIAC COLON SKEWED RIGHT	܇ */, +		0x0708 /* SYRIAC SUPRALINEAR COLON SKEWED LEFT	܈ */, +		0x0709 /* SYRIAC SUBLINEAR COLON SKEWED RIGHT	܉ */, +		0x070A /* SYRIAC CONTRACTION	܊ */, +		0x070B /* SYRIAC HARKLEAN OBELUS	܋ */, +		0x070C /* SYRIAC HARKLEAN METOBELUS	܌ */, +		0x070D /* SYRIAC HARKLEAN ASTERISCUS	܍ */, +		0x07F7 /* NKO SYMBOL GBAKURUNEN	߷ */, +		0x07F8 /* NKO COMMA	߸ */, +		0x07F9 /* NKO EXCLAMATION MARK	߹ */, +		0x0830 /* SAMARITAN PUNCTUATION NEQUDAA	࠰ */, +		0x0831 /* SAMARITAN PUNCTUATION AFSAAQ	࠱ */, +		0x0832 /* SAMARITAN PUNCTUATION ANGED	࠲ */, +		0x0833 /* SAMARITAN PUNCTUATION BAU	࠳ */, +		0x0834 /* SAMARITAN PUNCTUATION ATMAAU	࠴ */, +		0x0835 /* SAMARITAN PUNCTUATION SHIYYAALAA	࠵ */, +		0x0836 /* SAMARITAN ABBREVIATION MARK	࠶ */, +		0x0837 /* SAMARITAN PUNCTUATION MELODIC QITSA	࠷ */, +		0x0838 /* SAMARITAN PUNCTUATION ZIQAA	࠸ */, +		0x0839 /* SAMARITAN PUNCTUATION QITSA	࠹ */, +		0x083A /* SAMARITAN PUNCTUATION ZAEF	࠺ */, +		0x083B /* SAMARITAN PUNCTUATION TURU	࠻ */, +		0x083C /* SAMARITAN PUNCTUATION ARKAANU	࠼ */, +		0x083D /* SAMARITAN PUNCTUATION SOF MASHFAAT	࠽ */, +		0x083E /* SAMARITAN PUNCTUATION ANNAAU	࠾ */, +		0x085E /* MANDAIC PUNCTUATION	࡞ */, +		0x0964 /* DEVANAGARI DANDA	। */, +		0x0965 /* DEVANAGARI DOUBLE DANDA	॥ */, +		0x0970 /* DEVANAGARI ABBREVIATION SIGN	॰ */, +		0x09FD /* BENGALI ABBREVIATION SIGN	৽ */, +		0x0A76 /* GURMUKHI ABBREVIATION SIGN	੶ */, +		0x0AF0 /* GUJARATI ABBREVIATION SIGN	૰ */, +		0x0C77 /* TELUGU SIGN SIDDHAM	౷ */, +		0x0C84 /* KANNADA SIGN SIDDHAM	಄ */, +		0x0DF4 /* SINHALA PUNCTUATION KUNDDALIYA	෴ */, +		0x0E4F /* THAI CHARACTER FONGMAN	๏ */, +		0x0E5A /* THAI CHARACTER ANGKHANKHU	๚ */, +		0x0E5B /* THAI CHARACTER KHOMUT	๛ */, +		0x0F04 /* TIBETAN MARK INITIAL YIG MGO MDUN MA	༄ */, +		0x0F05 /* TIBETAN MARK CLOSING YIG MGO SGAB MA	༅ */, +		0x0F06 /* TIBETAN MARK CARET YIG MGO PHUR SHAD MA	༆ */, +		0x0F07 /* TIBETAN MARK YIG MGO TSHEG SHAD MA	༇ */, +		0x0F08 /* TIBETAN MARK SBRUL SHAD	༈ */, +		0x0F09 /* TIBETAN MARK BSKUR YIG MGO	༉ */, +		0x0F0A /* TIBETAN MARK BKA- SHOG YIG MGO	༊ */, +		0x0F0B /* TIBETAN MARK INTERSYLLABIC TSHEG	་ */, +		0x0F0C /* TIBETAN MARK DELIMITER TSHEG BSTAR	༌ */, +		0x0F0D /* TIBETAN MARK SHAD	། */, +		0x0F0E /* TIBETAN MARK NYIS SHAD	༎ */, +		0x0F0F /* TIBETAN MARK TSHEG SHAD	༏ */, +		0x0F10 /* TIBETAN MARK NYIS TSHEG SHAD	༐ */, +		0x0F11 /* TIBETAN MARK RIN CHEN SPUNGS SHAD	༑ */, +		0x0F12 /* TIBETAN MARK RGYA GRAM SHAD	༒ */, +		0x0F14 /* TIBETAN MARK GTER TSHEG	༔ */, +		0x0F85 /* TIBETAN MARK PALUTA	྅ */, +		0x0FD0 /* TIBETAN MARK BSKA- SHOG GI MGO RGYAN	࿐ */, +		0x0FD1 /* TIBETAN MARK MNYAM YIG GI MGO RGYAN	࿑ */, +		0x0FD2 /* TIBETAN MARK NYIS TSHEG	࿒ */, +		0x0FD3 /* TIBETAN MARK INITIAL BRDA RNYING YIG MGO MDUN MA	࿓ */, +		0x0FD4 /* TIBETAN MARK CLOSING BRDA RNYING YIG MGO SGAB MA	࿔ */, +		0x0FD9 /* TIBETAN MARK LEADING MCHAN RTAGS	࿙ */, +		0x0FDA /* TIBETAN MARK TRAILING MCHAN RTAGS	࿚ */, +		0x104A /* MYANMAR SIGN LITTLE SECTION	၊ */, +		0x104B /* MYANMAR SIGN SECTION	။ */, +		0x104C /* MYANMAR SYMBOL LOCATIVE	၌ */, +		0x104D /* MYANMAR SYMBOL COMPLETED	၍ */, +		0x104E /* MYANMAR SYMBOL AFOREMENTIONED	၎ */, +		0x104F /* MYANMAR SYMBOL GENITIVE	၏ */, +		0x10FB /* GEORGIAN PARAGRAPH SEPARATOR	჻ */, +		0x1360 /* ETHIOPIC SECTION MARK	፠ */, +		0x1361 /* ETHIOPIC WORDSPACE	፡ */, +		0x1362 /* ETHIOPIC FULL STOP	። */, +		0x1363 /* ETHIOPIC COMMA	፣ */, +		0x1364 /* ETHIOPIC SEMICOLON	፤ */, +		0x1365 /* ETHIOPIC COLON	፥ */, +		0x1366 /* ETHIOPIC PREFACE COLON	፦ */, +		0x1367 /* ETHIOPIC QUESTION MARK	፧ */, +		0x1368 /* ETHIOPIC PARAGRAPH SEPARATOR	፨ */, +		0x166E /* CANADIAN SYLLABICS FULL STOP	᙮ */, +		0x16EB /* RUNIC SINGLE PUNCTUATION	᛫ */, +		0x16EC /* RUNIC MULTIPLE PUNCTUATION	᛬ */, +		0x16ED /* RUNIC CROSS PUNCTUATION	᛭ */, +		0x1735 /* PHILIPPINE SINGLE PUNCTUATION	᜵ */, +		0x1736 /* PHILIPPINE DOUBLE PUNCTUATION	᜶ */, +		0x17D4 /* KHMER SIGN KHAN	។ */, +		0x17D5 /* KHMER SIGN BARIYOOSAN	៕ */, +		0x17D6 /* KHMER SIGN CAMNUC PII KUUH	៖ */, +		0x17D8 /* KHMER SIGN BEYYAL	៘ */, +		0x17D9 /* KHMER SIGN PHNAEK MUAN	៙ */, +		0x17DA /* KHMER SIGN KOOMUUT	៚ */, +		0x1800 /* MONGOLIAN BIRGA	᠀ */, +		0x1801 /* MONGOLIAN ELLIPSIS	᠁ */, +		0x1802 /* MONGOLIAN COMMA	᠂ */, +		0x1803 /* MONGOLIAN FULL STOP	᠃ */, +		0x1804 /* MONGOLIAN COLON	᠄ */, +		0x1805 /* MONGOLIAN FOUR DOTS	᠅ */, +		0x1807 /* MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER	᠇ */, +		0x1808 /* MONGOLIAN MANCHU COMMA	᠈ */, +		0x1809 /* MONGOLIAN MANCHU FULL STOP	᠉ */, +		0x180A /* MONGOLIAN NIRUGU	᠊ */, +		0x1944 /* LIMBU EXCLAMATION MARK	᥄ */, +		0x1945 /* LIMBU QUESTION MARK	᥅ */, +		0x1A1E /* BUGINESE PALLAWA	᨞ */, +		0x1A1F /* BUGINESE END OF SECTION	᨟ */, +		0x1AA0 /* TAI THAM SIGN WIANG	᪠ */, +		0x1AA1 /* TAI THAM SIGN WIANGWAAK	᪡ */, +		0x1AA2 /* TAI THAM SIGN SAWAN	᪢ */, +		0x1AA3 /* TAI THAM SIGN KEOW	᪣ */, +		0x1AA4 /* TAI THAM SIGN HOY	᪤ */, +		0x1AA5 /* TAI THAM SIGN DOKMAI	᪥ */, +		0x1AA6 /* TAI THAM SIGN REVERSED ROTATED RANA	᪦ */, +		0x1AA8 /* TAI THAM SIGN KAAN	᪨ */, +		0x1AA9 /* TAI THAM SIGN KAANKUU	᪩ */, +		0x1AAA /* TAI THAM SIGN SATKAAN	᪪ */, +		0x1AAB /* TAI THAM SIGN SATKAANKUU	᪫ */, +		0x1AAC /* TAI THAM SIGN HANG	᪬ */, +		0x1AAD /* TAI THAM SIGN CAANG	᪭ */, +		0x1B5A /* BALINESE PANTI	᭚ */, +		0x1B5B /* BALINESE PAMADA	᭛ */, +		0x1B5C /* BALINESE WINDU	᭜ */, +		0x1B5D /* BALINESE CARIK PAMUNGKAH	᭝ */, +		0x1B5E /* BALINESE CARIK SIKI	᭞ */, +		0x1B5F /* BALINESE CARIK PAREREN	᭟ */, +		0x1B60 /* BALINESE PAMENENG	᭠ */, +		0x1BFC /* BATAK SYMBOL BINDU NA METEK	᯼ */, +		0x1BFD /* BATAK SYMBOL BINDU PINARBORAS	᯽ */, +		0x1BFE /* BATAK SYMBOL BINDU JUDUL	᯾ */, +		0x1BFF /* BATAK SYMBOL BINDU PANGOLAT	᯿ */, +		0x1C3B /* LEPCHA PUNCTUATION TA-ROL	᰻ */, +		0x1C3C /* LEPCHA PUNCTUATION NYET THYOOM TA-ROL	᰼ */, +		0x1C3D /* LEPCHA PUNCTUATION CER-WA	᰽ */, +		0x1C3E /* LEPCHA PUNCTUATION TSHOOK CER-WA	᰾ */, +		0x1C3F /* LEPCHA PUNCTUATION TSHOOK	᰿ */, +		0x1C7E /* OL CHIKI PUNCTUATION MUCAAD	᱾ */, +		0x1C7F /* OL CHIKI PUNCTUATION DOUBLE MUCAAD	᱿ */, +		0x1CC0 /* SUNDANESE PUNCTUATION BINDU SURYA	᳀ */, +		0x1CC1 /* SUNDANESE PUNCTUATION BINDU PANGLONG	᳁ */, +		0x1CC2 /* SUNDANESE PUNCTUATION BINDU PURNAMA	᳂ */, +		0x1CC3 /* SUNDANESE PUNCTUATION BINDU CAKRA	᳃ */, +		0x1CC4 /* SUNDANESE PUNCTUATION BINDU LEU SATANGA	᳄ */, +		0x1CC5 /* SUNDANESE PUNCTUATION BINDU KA SATANGA	᳅ */, +		0x1CC6 /* SUNDANESE PUNCTUATION BINDU DA SATANGA	᳆ */, +		0x1CC7 /* SUNDANESE PUNCTUATION BINDU BA SATANGA	᳇ */, +		0x1CD3 /* VEDIC SIGN NIHSHVASA	᳓ */, +		0x2016 /* DOUBLE VERTICAL LINE	‖ */, +		0x2017 /* DOUBLE LOW LINE	‗ */, +		0x2020 /* DAGGER	† */, +		0x2021 /* DOUBLE DAGGER	‡ */, +		0x2022 /* BULLET	• */, +		0x2023 /* TRIANGULAR BULLET	‣ */, +		0x2024 /* ONE DOT LEADER	․ */, +		0x2025 /* TWO DOT LEADER	‥ */, +		0x2026 /* HORIZONTAL ELLIPSIS	… */, +		0x2027 /* HYPHENATION POINT	‧ */, +		0x2030 /* PER MILLE SIGN	‰ */, +		0x2031 /* PER TEN THOUSAND SIGN	‱ */, +		0x2032 /* PRIME	′ */, +		0x2033 /* DOUBLE PRIME	″ */, +		0x2034 /* TRIPLE PRIME	‴ */, +		0x2035 /* REVERSED PRIME	‵ */, +		0x2036 /* REVERSED DOUBLE PRIME	‶ */, +		0x2037 /* REVERSED TRIPLE PRIME	‷ */, +		0x2038 /* CARET	‸ */, +		0x203B /* REFERENCE MARK	※ */, +		0x203C /* DOUBLE EXCLAMATION MARK	‼ */, +		0x203D /* INTERROBANG	‽ */, +		0x203E /* OVERLINE	‾ */, +		0x2041 /* CARET INSERTION POINT	⁁ */, +		0x2042 /* ASTERISM	⁂ */, +		0x2043 /* HYPHEN BULLET	⁃ */, +		0x2047 /* DOUBLE QUESTION MARK	⁇ */, +		0x2048 /* QUESTION EXCLAMATION MARK	⁈ */, +		0x2049 /* EXCLAMATION QUESTION MARK	⁉ */, +		0x204A /* TIRONIAN SIGN ET	⁊ */, +		0x204B /* REVERSED PILCROW SIGN	⁋ */, +		0x204C /* BLACK LEFTWARDS BULLET	⁌ */, +		0x204D /* BLACK RIGHTWARDS BULLET	⁍ */, +		0x204E /* LOW ASTERISK	⁎ */, +		0x204F /* REVERSED SEMICOLON	⁏ */, +		0x2050 /* CLOSE UP	⁐ */, +		0x2051 /* TWO ASTERISKS ALIGNED VERTICALLY	⁑ */, +		0x2053 /* SWUNG DASH	⁓ */, +		0x2055 /* FLOWER PUNCTUATION MARK	⁕ */, +		0x2056 /* THREE DOT PUNCTUATION	⁖ */, +		0x2057 /* QUADRUPLE PRIME	⁗ */, +		0x2058 /* FOUR DOT PUNCTUATION	⁘ */, +		0x2059 /* FIVE DOT PUNCTUATION	⁙ */, +		0x205A /* TWO DOT PUNCTUATION	⁚ */, +		0x205B /* FOUR DOT MARK	⁛ */, +		0x205C /* DOTTED CROSS	⁜ */, +		0x205D /* TRICOLON	⁝ */, +		0x205E /* VERTICAL FOUR DOTS	⁞ */, +		0x2CF9 /* COPTIC OLD NUBIAN FULL STOP	⳹ */, +		0x2CFA /* COPTIC OLD NUBIAN DIRECT QUESTION MARK	⳺ */, +		0x2CFB /* COPTIC OLD NUBIAN INDIRECT QUESTION MARK	⳻ */, +		0x2CFC /* COPTIC OLD NUBIAN VERSE DIVIDER	⳼ */, +		0x2CFE /* COPTIC FULL STOP	⳾ */, +		0x2CFF /* COPTIC MORPHOLOGICAL DIVIDER	⳿ */, +		0x2D70 /* TIFINAGH SEPARATOR MARK	⵰ */, +		0x2E00 /* RIGHT ANGLE SUBSTITUTION MARKER	⸀ */, +		0x2E01 /* RIGHT ANGLE DOTTED SUBSTITUTION MARKER	⸁ */, +		0x2E06 /* RAISED INTERPOLATION MARKER	⸆ */, +		0x2E07 /* RAISED DOTTED INTERPOLATION MARKER	⸇ */, +		0x2E08 /* DOTTED TRANSPOSITION MARKER	⸈ */, +		0x2E0B /* RAISED SQUARE	⸋ */, +		0x2E0E /* EDITORIAL CORONIS	⸎ */, +		0x2E0F /* PARAGRAPHOS	⸏ */, +		0x2E10 /* FORKED PARAGRAPHOS	⸐ */, +		0x2E11 /* REVERSED FORKED PARAGRAPHOS	⸑ */, +		0x2E12 /* HYPODIASTOLE	⸒ */, +		0x2E13 /* DOTTED OBELOS	⸓ */, +		0x2E14 /* DOWNWARDS ANCORA	⸔ */, +		0x2E15 /* UPWARDS ANCORA	⸕ */, +		0x2E16 /* DOTTED RIGHT-POINTING ANGLE	⸖ */, +		0x2E18 /* INVERTED INTERROBANG	⸘ */, +		0x2E19 /* PALM BRANCH	⸙ */, +		0x2E1B /* TILDE WITH RING ABOVE	⸛ */, +		0x2E1E /* TILDE WITH DOT ABOVE	⸞ */, +		0x2E1F /* TILDE WITH DOT BELOW	⸟ */, +		0x2E2A /* TWO DOTS OVER ONE DOT PUNCTUATION	⸪ */, +		0x2E2B /* ONE DOT OVER TWO DOTS PUNCTUATION	⸫ */, +		0x2E2C /* SQUARED FOUR DOT PUNCTUATION	⸬ */, +		0x2E2D /* FIVE DOT MARK	⸭ */, +		0x2E2E /* REVERSED QUESTION MARK	⸮ */, +		0x2E30 /* RING POINT	⸰ */, +		0x2E31 /* WORD SEPARATOR MIDDLE DOT	⸱ */, +		0x2E32 /* TURNED COMMA	⸲ */, +		0x2E33 /* RAISED DOT	⸳ */, +		0x2E34 /* RAISED COMMA	⸴ */, +		0x2E35 /* TURNED SEMICOLON	⸵ */, +		0x2E36 /* DAGGER WITH LEFT GUARD	⸶ */, +		0x2E37 /* DAGGER WITH RIGHT GUARD	⸷ */, +		0x2E38 /* TURNED DAGGER	⸸ */, +		0x2E39 /* TOP HALF SECTION SIGN	⸹ */, +		0x2E3C /* STENOGRAPHIC FULL STOP	⸼ */, +		0x2E3D /* VERTICAL SIX DOTS	⸽ */, +		0x2E3E /* WIGGLY VERTICAL LINE	⸾ */, +		0x2E3F /* CAPITULUM	⸿ */, +		0x2E41 /* REVERSED COMMA	⹁ */, +		0x2E43 /* DASH WITH LEFT UPTURN	⹃ */, +		0x2E44 /* DOUBLE SUSPENSION MARK	⹄ */, +		0x2E45 /* INVERTED LOW KAVYKA	⹅ */, +		0x2E46 /* INVERTED LOW KAVYKA WITH KAVYKA ABOVE	⹆ */, +		0x2E47 /* LOW KAVYKA	⹇ */, +		0x2E48 /* LOW KAVYKA WITH DOT	⹈ */, +		0x2E49 /* DOUBLE STACKED COMMA	⹉ */, +		0x2E4A /* DOTTED SOLIDUS	⹊ */, +		0x2E4B /* TRIPLE DAGGER	⹋ */, +		0x2E4C /* MEDIEVAL COMMA	⹌ */, +		0x2E4D /* PARAGRAPHUS MARK	⹍ */, +		0x2E4E /* PUNCTUS ELEVATUS MARK	⹎ */, +		0x2E4F /* CORNISH VERSE DIVIDER	⹏ */, +		0x3001 /* IDEOGRAPHIC COMMA	、 */, +		0x3002 /* IDEOGRAPHIC FULL STOP	。 */, +		0x3003 /* DITTO MARK	〃 */, +		0x303D /* PART ALTERNATION MARK	〽 */, +		0x30FB /* KATAKANA MIDDLE DOT	・ */, +		0xA4FE /* LISU PUNCTUATION COMMA	꓾ */, +		0xA4FF /* LISU PUNCTUATION FULL STOP	꓿ */, +		0xA60D /* VAI COMMA	꘍ */, +		0xA60E /* VAI FULL STOP	꘎ */, +		0xA60F /* VAI QUESTION MARK	꘏ */, +		0xA673 /* SLAVONIC ASTERISK	꙳ */, +		0xA67E /* CYRILLIC KAVYKA	꙾ */, +		0xA6F2 /* BAMUM NJAEMLI	꛲ */, +		0xA6F3 /* BAMUM FULL STOP	꛳ */, +		0xA6F4 /* BAMUM COLON	꛴ */, +		0xA6F5 /* BAMUM COMMA	꛵ */, +		0xA6F6 /* BAMUM SEMICOLON	꛶ */, +		0xA6F7 /* BAMUM QUESTION MARK	꛷ */, +		0xA874 /* PHAGS-PA SINGLE HEAD MARK	꡴ */, +		0xA875 /* PHAGS-PA DOUBLE HEAD MARK	꡵ */, +		0xA876 /* PHAGS-PA MARK SHAD	꡶ */, +		0xA877 /* PHAGS-PA MARK DOUBLE SHAD	꡷ */, +		0xA8CE /* SAURASHTRA DANDA	꣎ */, +		0xA8CF /* SAURASHTRA DOUBLE DANDA	꣏ */, +		0xA8F8 /* DEVANAGARI SIGN PUSHPIKA	꣸ */, +		0xA8F9 /* DEVANAGARI GAP FILLER	꣹ */, +		0xA8FA /* DEVANAGARI CARET	꣺ */, +		0xA8FC /* DEVANAGARI SIGN SIDDHAM	꣼ */, +		0xA92E /* KAYAH LI SIGN CWI	꤮ */, +		0xA92F /* KAYAH LI SIGN SHYA	꤯ */, +		0xA95F /* REJANG SECTION MARK	꥟ */, +		0xA9C1 /* JAVANESE LEFT RERENGGAN	꧁ */, +		0xA9C2 /* JAVANESE RIGHT RERENGGAN	꧂ */, +		0xA9C3 /* JAVANESE PADA ANDAP	꧃ */, +		0xA9C4 /* JAVANESE PADA MADYA	꧄ */, +		0xA9C5 /* JAVANESE PADA LUHUR	꧅ */, +		0xA9C6 /* JAVANESE PADA WINDU	꧆ */, +		0xA9C7 /* JAVANESE PADA PANGKAT	꧇ */, +		0xA9C8 /* JAVANESE PADA LINGSA	꧈ */, +		0xA9C9 /* JAVANESE PADA LUNGSI	꧉ */, +		0xA9CA /* JAVANESE PADA ADEG	꧊ */, +		0xA9CB /* JAVANESE PADA ADEG ADEG	꧋ */, +		0xA9CC /* JAVANESE PADA PISELEH	꧌ */, +		0xA9CD /* JAVANESE TURNED PADA PISELEH	꧍ */, +		0xA9DE /* JAVANESE PADA TIRTA TUMETES	꧞ */, +		0xA9DF /* JAVANESE PADA ISEN-ISEN	꧟ */, +		0xAA5C /* CHAM PUNCTUATION SPIRAL	꩜ */, +		0xAA5D /* CHAM PUNCTUATION DANDA	꩝ */, +		0xAA5E /* CHAM PUNCTUATION DOUBLE DANDA	꩞ */, +		0xAA5F /* CHAM PUNCTUATION TRIPLE DANDA	꩟ */, +		0xAADE /* TAI VIET SYMBOL HO HOI	꫞ */, +		0xAADF /* TAI VIET SYMBOL KOI KOI	꫟ */, +		0xAAF0 /* MEETEI MAYEK CHEIKHAN	꫰ */, +		0xAAF1 /* MEETEI MAYEK AHANG KHUDAM	꫱ */, +		0xABEB /* MEETEI MAYEK CHEIKHEI	꯫ */, +		0xFE10 /* PRESENTATION FORM FOR VERTICAL COMMA	︐ */, +		0xFE11 /* PRESENTATION FORM FOR VERTICAL IDEOGRAPHIC COMMA	︑ */, +		0xFE12 /* PRESENTATION FORM FOR VERTICAL IDEOGRAPHIC FULL STOP	︒ */, +		0xFE13 /* PRESENTATION FORM FOR VERTICAL COLON	︓ */, +		0xFE14 /* PRESENTATION FORM FOR VERTICAL SEMICOLON	︔ */, +		0xFE15 /* PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK	︕ */, +		0xFE16 /* PRESENTATION FORM FOR VERTICAL QUESTION MARK	︖ */, +		0xFE19 /* PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS	︙ */, +		0xFE30 /* PRESENTATION FORM FOR VERTICAL TWO DOT LEADER	︰ */, +		0xFE45 /* SESAME DOT	﹅ */, +		0xFE46 /* WHITE SESAME DOT	﹆ */, +		0xFE49 /* DASHED OVERLINE	﹉ */, +		0xFE4A /* CENTRELINE OVERLINE	﹊ */, +		0xFE4B /* WAVY OVERLINE	﹋ */, +		0xFE4C /* DOUBLE WAVY OVERLINE	﹌ */, +		0xFE50 /* SMALL COMMA	﹐ */, +		0xFE51 /* SMALL IDEOGRAPHIC COMMA	﹑ */, +		0xFE52 /* SMALL FULL STOP	﹒ */, +		0xFE54 /* SMALL SEMICOLON	﹔ */, +		0xFE55 /* SMALL COLON	﹕ */, +		0xFE56 /* SMALL QUESTION MARK	﹖ */, +		0xFE57 /* SMALL EXCLAMATION MARK	﹗ */, +		0xFE5F /* SMALL NUMBER SIGN	﹟ */, +		0xFE60 /* SMALL AMPERSAND	﹠ */, +		0xFE61 /* SMALL ASTERISK	﹡ */, +		0xFE68 /* SMALL REVERSE SOLIDUS	﹨ */, +		0xFE6A /* SMALL PERCENT SIGN	﹪ */, +		0xFE6B /* SMALL COMMERCIAL AT	﹫ */, +		0xFF01 /* FULLWIDTH EXCLAMATION MARK	! */, +		0xFF02 /* FULLWIDTH QUOTATION MARK	" */, +		0xFF03 /* FULLWIDTH NUMBER SIGN	# */, +		0xFF05 /* FULLWIDTH PERCENT SIGN	% */, +		0xFF06 /* FULLWIDTH AMPERSAND	& */, +		0xFF07 /* FULLWIDTH APOSTROPHE	' */, +		0xFF0A /* FULLWIDTH ASTERISK	* */, +		0xFF0C /* FULLWIDTH COMMA	, */, +		0xFF0E /* FULLWIDTH FULL STOP	. */, +		0xFF0F /* FULLWIDTH SOLIDUS	/ */, +		0xFF1A /* FULLWIDTH COLON	: */, +		0xFF1B /* FULLWIDTH SEMICOLON	; */, +		0xFF1F /* FULLWIDTH QUESTION MARK	? */, +		0xFF20 /* FULLWIDTH COMMERCIAL AT	@ */, +		0xFF3C /* FULLWIDTH REVERSE SOLIDUS	\ */, +		0xFF61 /* HALFWIDTH IDEOGRAPHIC FULL STOP	。 */, +		0xFF64 /* HALFWIDTH IDEOGRAPHIC COMMA	、 */, +		0xFF65 /* HALFWIDTH KATAKANA MIDDLE DOT	・ */, +		0x10100 /* AEGEAN WORD SEPARATOR LINE	𐄀 */, +		0x10101 /* AEGEAN WORD SEPARATOR DOT	𐄁 */, +		0x10102 /* AEGEAN CHECK MARK	𐄂 */, +		0x1039F /* UGARITIC WORD DIVIDER	𐎟 */, +		0x103D0 /* OLD PERSIAN WORD DIVIDER	𐏐 */, +		0x1056F /* CAUCASIAN ALBANIAN CITATION MARK	𐕯 */, +		0x10857 /* IMPERIAL ARAMAIC SECTION SIGN	𐡗 */, +		0x1091F /* PHOENICIAN WORD SEPARATOR	𐤟 */, +		0x1093F /* LYDIAN TRIANGULAR MARK	𐤿 */, +		0x10A50 /* KHAROSHTHI PUNCTUATION DOT	𐩐 */, +		0x10A51 /* KHAROSHTHI PUNCTUATION SMALL CIRCLE	𐩑 */, +		0x10A52 /* KHAROSHTHI PUNCTUATION CIRCLE	𐩒 */, +		0x10A53 /* KHAROSHTHI PUNCTUATION CRESCENT BAR	𐩓 */, +		0x10A54 /* KHAROSHTHI PUNCTUATION MANGALAM	𐩔 */, +		0x10A55 /* KHAROSHTHI PUNCTUATION LOTUS	𐩕 */, +		0x10A56 /* KHAROSHTHI PUNCTUATION DANDA	𐩖 */, +		0x10A57 /* KHAROSHTHI PUNCTUATION DOUBLE DANDA	𐩗 */, +		0x10A58 /* KHAROSHTHI PUNCTUATION LINES	𐩘 */, +		0x10A7F /* OLD SOUTH ARABIAN NUMERIC INDICATOR	𐩿 */, +		0x10AF0 /* MANICHAEAN PUNCTUATION STAR	𐫰 */, +		0x10AF1 /* MANICHAEAN PUNCTUATION FLEURON	𐫱 */, +		0x10AF2 /* MANICHAEAN PUNCTUATION DOUBLE DOT WITHIN DOT	𐫲 */, +		0x10AF3 /* MANICHAEAN PUNCTUATION DOT WITHIN DOT	𐫳 */, +		0x10AF4 /* MANICHAEAN PUNCTUATION DOT	𐫴 */, +		0x10AF5 /* MANICHAEAN PUNCTUATION TWO DOTS	𐫵 */, +		0x10AF6 /* MANICHAEAN PUNCTUATION LINE FILLER	𐫶 */, +		0x10B39 /* AVESTAN ABBREVIATION MARK	𐬹 */, +		0x10B3A /* TINY TWO DOTS OVER ONE DOT PUNCTUATION	𐬺 */, +		0x10B3B /* SMALL TWO DOTS OVER ONE DOT PUNCTUATION	𐬻 */, +		0x10B3C /* LARGE TWO DOTS OVER ONE DOT PUNCTUATION	𐬼 */, +		0x10B3D /* LARGE ONE DOT OVER TWO DOTS PUNCTUATION	𐬽 */, +		0x10B3E /* LARGE TWO RINGS OVER ONE RING PUNCTUATION	𐬾 */, +		0x10B3F /* LARGE ONE RING OVER TWO RINGS PUNCTUATION	𐬿 */, +		0x10B99 /* PSALTER PAHLAVI SECTION MARK	𐮙 */, +		0x10B9A /* PSALTER PAHLAVI TURNED SECTION MARK	𐮚 */, +		0x10B9B /* PSALTER PAHLAVI FOUR DOTS WITH CROSS	𐮛 */, +		0x10B9C /* PSALTER PAHLAVI FOUR DOTS WITH DOT	𐮜 */, +		0x10F55 /* SOGDIAN PUNCTUATION TWO VERTICAL BARS	𐽕 */, +		0x10F56 /* SOGDIAN PUNCTUATION TWO VERTICAL BARS WITH DOTS	𐽖 */, +		0x10F57 /* SOGDIAN PUNCTUATION CIRCLE WITH DOT	𐽗 */, +		0x10F58 /* SOGDIAN PUNCTUATION TWO CIRCLES WITH DOTS	𐽘 */, +		0x10F59 /* SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT	𐽙 */, +		0x11047 /* BRAHMI DANDA	𑁇 */, +		0x11048 /* BRAHMI DOUBLE DANDA	𑁈 */, +		0x11049 /* BRAHMI PUNCTUATION DOT	𑁉 */, +		0x1104A /* BRAHMI PUNCTUATION DOUBLE DOT	𑁊 */, +		0x1104B /* BRAHMI PUNCTUATION LINE	𑁋 */, +		0x1104C /* BRAHMI PUNCTUATION CRESCENT BAR	𑁌 */, +		0x1104D /* BRAHMI PUNCTUATION LOTUS	𑁍 */, +		0x110BB /* KAITHI ABBREVIATION SIGN	𑂻 */, +		0x110BC /* KAITHI ENUMERATION SIGN	𑂼 */, +		0x110BE /* KAITHI SECTION MARK	𑂾 */, +		0x110BF /* KAITHI DOUBLE SECTION MARK	𑂿 */, +		0x110C0 /* KAITHI DANDA	𑃀 */, +		0x110C1 /* KAITHI DOUBLE DANDA	𑃁 */, +		0x11140 /* CHAKMA SECTION MARK	𑅀 */, +		0x11141 /* CHAKMA DANDA	𑅁 */, +		0x11142 /* CHAKMA DOUBLE DANDA	𑅂 */, +		0x11143 /* CHAKMA QUESTION MARK	𑅃 */, +		0x11174 /* MAHAJANI ABBREVIATION SIGN	𑅴 */, +		0x11175 /* MAHAJANI SECTION MARK	𑅵 */, +		0x111C5 /* SHARADA DANDA	𑇅 */, +		0x111C6 /* SHARADA DOUBLE DANDA	𑇆 */, +		0x111C7 /* SHARADA ABBREVIATION SIGN	𑇇 */, +		0x111C8 /* SHARADA SEPARATOR	𑇈 */, +		0x111CD /* SHARADA SUTRA MARK	𑇍 */, +		0x111DB /* SHARADA SIGN SIDDHAM	𑇛 */, +		0x111DD /* SHARADA CONTINUATION SIGN	𑇝 */, +		0x111DE /* SHARADA SECTION MARK-1	𑇞 */, +		0x111DF /* SHARADA SECTION MARK-2	𑇟 */, +		0x11238 /* KHOJKI DANDA	𑈸 */, +		0x11239 /* KHOJKI DOUBLE DANDA	𑈹 */, +		0x1123A /* KHOJKI WORD SEPARATOR	𑈺 */, +		0x1123B /* KHOJKI SECTION MARK	𑈻 */, +		0x1123C /* KHOJKI DOUBLE SECTION MARK	𑈼 */, +		0x1123D /* KHOJKI ABBREVIATION SIGN	𑈽 */, +		0x112A9 /* MULTANI SECTION MARK	𑊩 */, +		0x1144B /* NEWA DANDA	𑑋 */, +		0x1144C /* NEWA DOUBLE DANDA	𑑌 */, +		0x1144D /* NEWA COMMA	𑑍 */, +		0x1144E /* NEWA GAP FILLER	𑑎 */, +		0x1144F /* NEWA ABBREVIATION SIGN	𑑏 */, +		0x1145B /* NEWA PLACEHOLDER MARK	𑑛 */, +		0x1145D /* NEWA INSERTION SIGN	𑑝 */, +		0x114C6 /* TIRHUTA ABBREVIATION SIGN	𑓆 */, +		0x115C1 /* SIDDHAM SIGN SIDDHAM	𑗁 */, +		0x115C2 /* SIDDHAM DANDA	𑗂 */, +		0x115C3 /* SIDDHAM DOUBLE DANDA	𑗃 */, +		0x115C4 /* SIDDHAM SEPARATOR DOT	𑗄 */, +		0x115C5 /* SIDDHAM SEPARATOR BAR	𑗅 */, +		0x115C6 /* SIDDHAM REPETITION MARK-1	𑗆 */, +		0x115C7 /* SIDDHAM REPETITION MARK-2	𑗇 */, +		0x115C8 /* SIDDHAM REPETITION MARK-3	𑗈 */, +		0x115C9 /* SIDDHAM END OF TEXT MARK	𑗉 */, +		0x115CA /* SIDDHAM SECTION MARK WITH TRIDENT AND U-SHAPED ORNAMENTS	𑗊 */, +		0x115CB /* SIDDHAM SECTION MARK WITH TRIDENT AND DOTTED CRESCENTS	𑗋 */, +		0x115CC /* SIDDHAM SECTION MARK WITH RAYS AND DOTTED CRESCENTS	𑗌 */, +		0x115CD /* SIDDHAM SECTION MARK WITH RAYS AND DOTTED DOUBLE CRESCENTS	𑗍 */, +		0x115CE /* SIDDHAM SECTION MARK WITH RAYS AND DOTTED TRIPLE CRESCENTS	𑗎 */, +		0x115CF /* SIDDHAM SECTION MARK DOUBLE RING	𑗏 */, +		0x115D0 /* SIDDHAM SECTION MARK DOUBLE RING WITH RAYS	𑗐 */, +		0x115D1 /* SIDDHAM SECTION MARK WITH DOUBLE CRESCENTS	𑗑 */, +		0x115D2 /* SIDDHAM SECTION MARK WITH TRIPLE CRESCENTS	𑗒 */, +		0x115D3 /* SIDDHAM SECTION MARK WITH QUADRUPLE CRESCENTS	𑗓 */, +		0x115D4 /* SIDDHAM SECTION MARK WITH SEPTUPLE CRESCENTS	𑗔 */, +		0x115D5 /* SIDDHAM SECTION MARK WITH CIRCLES AND RAYS	𑗕 */, +		0x115D6 /* SIDDHAM SECTION MARK WITH CIRCLES AND TWO ENCLOSURES	𑗖 */, +		0x115D7 /* SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES	𑗗 */, +		0x11641 /* MODI DANDA	𑙁 */, +		0x11642 /* MODI DOUBLE DANDA	𑙂 */, +		0x11643 /* MODI ABBREVIATION SIGN	𑙃 */, +		0x11660 /* MONGOLIAN BIRGA WITH ORNAMENT	𑙠 */, +		0x11661 /* MONGOLIAN ROTATED BIRGA	𑙡 */, +		0x11662 /* MONGOLIAN DOUBLE BIRGA WITH ORNAMENT	𑙢 */, +		0x11663 /* MONGOLIAN TRIPLE BIRGA WITH ORNAMENT	𑙣 */, +		0x11664 /* MONGOLIAN BIRGA WITH DOUBLE ORNAMENT	𑙤 */, +		0x11665 /* MONGOLIAN ROTATED BIRGA WITH ORNAMENT	𑙥 */, +		0x11666 /* MONGOLIAN ROTATED BIRGA WITH DOUBLE ORNAMENT	𑙦 */, +		0x11667 /* MONGOLIAN INVERTED BIRGA	𑙧 */, +		0x11668 /* MONGOLIAN INVERTED BIRGA WITH DOUBLE ORNAMENT	𑙨 */, +		0x11669 /* MONGOLIAN SWIRL BIRGA	𑙩 */, +		0x1166A /* MONGOLIAN SWIRL BIRGA WITH ORNAMENT	𑙪 */, +		0x1166B /* MONGOLIAN SWIRL BIRGA WITH DOUBLE ORNAMENT	𑙫 */, +		0x1166C /* MONGOLIAN TURNED SWIRL BIRGA WITH DOUBLE ORNAMENT	𑙬 */, +		0x1173C /* AHOM SIGN SMALL SECTION	𑜼 */, +		0x1173D /* AHOM SIGN SECTION	𑜽 */, +		0x1173E /* AHOM SIGN RULAI	𑜾 */, +		0x1183B /* DOGRA ABBREVIATION SIGN	𑠻 */, +		0x119E2 /* NANDINAGARI SIGN SIDDHAM	𑧢 */, +		0x11A3F /* ZANABAZAR SQUARE INITIAL HEAD MARK	𑨿 */, +		0x11A40 /* ZANABAZAR SQUARE CLOSING HEAD MARK	𑩀 */, +		0x11A41 /* ZANABAZAR SQUARE MARK TSHEG	𑩁 */, +		0x11A42 /* ZANABAZAR SQUARE MARK SHAD	𑩂 */, +		0x11A43 /* ZANABAZAR SQUARE MARK DOUBLE SHAD	𑩃 */, +		0x11A44 /* ZANABAZAR SQUARE MARK LONG TSHEG	𑩄 */, +		0x11A45 /* ZANABAZAR SQUARE INITIAL DOUBLE-LINED HEAD MARK	𑩅 */, +		0x11A46 /* ZANABAZAR SQUARE CLOSING DOUBLE-LINED HEAD MARK	𑩆 */, +		0x11A9A /* SOYOMBO MARK TSHEG	𑪚 */, +		0x11A9B /* SOYOMBO MARK SHAD	𑪛 */, +		0x11A9C /* SOYOMBO MARK DOUBLE SHAD	𑪜 */, +		0x11A9E /* SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME	𑪞 */, +		0x11A9F /* SOYOMBO HEAD MARK WITH MOON AND SUN AND FLAME	𑪟 */, +		0x11AA0 /* SOYOMBO HEAD MARK WITH MOON AND SUN	𑪠 */, +		0x11AA1 /* SOYOMBO TERMINAL MARK-1	𑪡 */, +		0x11AA2 /* SOYOMBO TERMINAL MARK-2	𑪢 */, +		0x11C41 /* BHAIKSUKI DANDA	𑱁 */, +		0x11C42 /* BHAIKSUKI DOUBLE DANDA	𑱂 */, +		0x11C43 /* BHAIKSUKI WORD SEPARATOR	𑱃 */, +		0x11C44 /* BHAIKSUKI GAP FILLER-1	𑱄 */, +		0x11C45 /* BHAIKSUKI GAP FILLER-2	𑱅 */, +		0x11C70 /* MARCHEN HEAD MARK	𑱰 */, +		0x11C71 /* MARCHEN MARK SHAD	𑱱 */, +		0x11EF7 /* MAKASAR PASSIMBANG	𑻷 */, +		0x11EF8 /* MAKASAR END OF SECTION	𑻸 */, +		0x11FFF /* TAMIL PUNCTUATION END OF TEXT	𑿿 */, +		0x12470 /* CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER	𒑰 */, +		0x12471 /* CUNEIFORM PUNCTUATION SIGN VERTICAL COLON	𒑱 */, +		0x12472 /* CUNEIFORM PUNCTUATION SIGN DIAGONAL COLON	𒑲 */, +		0x12473 /* CUNEIFORM PUNCTUATION SIGN DIAGONAL TRICOLON	𒑳 */, +		0x12474 /* CUNEIFORM PUNCTUATION SIGN DIAGONAL QUADCOLON	𒑴 */, +		0x16A6E /* MRO DANDA	𖩮 */, +		0x16A6F /* MRO DOUBLE DANDA	𖩯 */, +		0x16AF5 /* BASSA VAH FULL STOP	𖫵 */, +		0x16B37 /* PAHAWH HMONG SIGN VOS THOM	𖬷 */, +		0x16B38 /* PAHAWH HMONG SIGN VOS TSHAB CEEB	𖬸 */, +		0x16B39 /* PAHAWH HMONG SIGN CIM CHEEM	𖬹 */, +		0x16B3A /* PAHAWH HMONG SIGN VOS THIAB	𖬺 */, +		0x16B3B /* PAHAWH HMONG SIGN VOS FEEM	𖬻 */, +		0x16B44 /* PAHAWH HMONG SIGN XAUS	𖭄 */, +		0x16E97 /* MEDEFAIDRIN COMMA	𖺗 */, +		0x16E98 /* MEDEFAIDRIN FULL STOP	𖺘 */, +		0x16E99 /* MEDEFAIDRIN SYMBOL AIVA	𖺙 */, +		0x16E9A /* MEDEFAIDRIN EXCLAMATION OH	𖺚 */, +		0x16FE2 /* OLD CHINESE HOOK MARK	𖿢 */, +		0x1BC9F /* DUPLOYAN PUNCTUATION CHINOOK FULL STOP	𛲟 */, +		0x1DA87 /* SIGNWRITING COMMA	𝪇 */, +		0x1DA88 /* SIGNWRITING FULL STOP	𝪈 */, +		0x1DA89 /* SIGNWRITING SEMICOLON	𝪉 */, +		0x1DA8A /* SIGNWRITING COLON	𝪊 */, +		0x1DA8B /* SIGNWRITING PARENTHESIS	𝪋 */, +		0x1E95E /* ADLAM INITIAL EXCLAMATION MARK	𞥞 */, +		0x1E95F /* ADLAM INITIAL QUESTION MARK */, +	] +) diff --git a/v_windows/v/vlib/encoding/utf8/utf8_util_test.v b/v_windows/v/vlib/encoding/utf8/utf8_util_test.v new file mode 100644 index 0000000..f09cb76 --- /dev/null +++ b/v_windows/v/vlib/encoding/utf8/utf8_util_test.v @@ -0,0 +1,66 @@ +import encoding.utf8 + +fn test_utf8_util() { +	// string test +	src := 'ăĂ ôÔ testo 怔' //_\u1E5A\u1E5B<=>\u1F49\u1F41<=>\u0128\u012a\u012c" // len 29 runes, raw 49 bytes +	src_upper := 'ĂĂ ÔÔ TESTO Æ€”' //_\u1E5A\u1E5A<=>\u1F49\u1F49<=>\u0128\u012A\u012C" +	src_lower := 'ăă ôô testo 怔' //_\u1E5B\u1E5B<=>\u1F41\u1F41<=>\u0129\u012B\u012D" +	upper := utf8.to_upper(src) +	lower := utf8.to_lower(src) +	assert upper == src_upper +	assert lower == src_lower + +	assert utf8.to_upper('абвёabc12{') == 'АБВЁABC12{' +	assert utf8.to_lower('АБВЁABC12{') == 'абвёabc12{' + +	// test len function +	assert utf8.len('') == 0 +	assert utf8.len('pippo') == 5 +	assert utf8.len(src) == 15 // 29 +	assert src.len == 24 // 49 + +	// western punctuation +	a := '.abc?abcòàè.' +	assert utf8.is_punct(a, 0) == true +	assert utf8.is_punct('b', 0) == false +	assert utf8.is_uchar_punct(0x002E) == true +	assert utf8.is_punct(a, 4) == true // ? +	assert utf8.is_punct(a, 14) == true // last . +	assert utf8.is_punct(a, 12) == false // è +	println('OK western') + +	// global punctuation +	b := '.ĂĂa. ÔÔ TESTO Æ€' +	assert utf8.is_global_punct(b, 0) == true +	assert utf8.is_global_punct('.', 0) == true +	assert utf8.is_uchar_punct(0x002E) == true +	assert utf8.is_global_punct(b, 6) == true // . +	assert utf8.is_global_punct(b, 1) == false // a + +	// test utility functions +	assert utf8.get_uchar(b, 0) == 0x002E +} + +fn test_raw_indexing() { +	a := '我是V Lang!' + +	// test non ascii characters +	assert utf8.raw_index(a, 0) == '我' +	assert utf8.raw_index(a, 1) == '是' + +	// test ascii characters +	assert utf8.raw_index(a, 2) == 'V' +	assert utf8.raw_index(a, 3) == ' ' +	assert utf8.raw_index(a, 4) == 'L' +	assert utf8.raw_index(a, 5) == 'a' +	assert utf8.raw_index(a, 6) == 'n' +	assert utf8.raw_index(a, 7) == 'g' +	assert utf8.raw_index(a, 8) == '!' +} + +fn test_reversed() { +	a := '我是V Lang!' +	b := '你好世界hello world' +	assert utf8.reverse(a) == '!gnaL V是我' +	assert utf8.reverse(b) == 'dlrow olleh界世好你' +}  | 
