aboutsummaryrefslogtreecommitdiff
path: root/v_windows/v/vlib/encoding/utf8/utf8.v
diff options
context:
space:
mode:
Diffstat (limited to 'v_windows/v/vlib/encoding/utf8/utf8.v')
-rw-r--r--v_windows/v/vlib/encoding/utf8/utf8.v88
1 files changed, 88 insertions, 0 deletions
diff --git a/v_windows/v/vlib/encoding/utf8/utf8.v b/v_windows/v/vlib/encoding/utf8/utf8.v
new file mode 100644
index 0000000..88c598f
--- /dev/null
+++ b/v_windows/v/vlib/encoding/utf8/utf8.v
@@ -0,0 +1,88 @@
+module utf8
+
+struct Utf8State {
+mut:
+ index int
+ subindex int
+ failed bool
+}
+
+pub fn validate_str(str string) bool {
+ return validate(str.str, str.len)
+}
+
+pub fn validate(data &byte, len int) bool {
+ mut state := Utf8State{}
+ for i := 0; i < len; i++ {
+ s := unsafe { data[i] }
+ if s == 0 {
+ break
+ }
+ state.next_state(s)
+ if state.failed {
+ return false
+ }
+ }
+ return !state.failed && state.subindex <= 0
+}
+
+fn (mut s Utf8State) seq(r0 bool, r1 bool, is_tail bool) bool {
+ if s.subindex == 0 || (s.index > 1 && s.subindex == 1) || (s.index >= 6 && s.subindex == 2) {
+ if (s.subindex == 0 && r0) || (s.subindex == 1 && r1) || (s.subindex == 2 && is_tail) {
+ s.subindex++
+ return true
+ }
+ } else {
+ s.failed = true
+ if is_tail {
+ s.index = 0
+ s.subindex = 0
+ s.failed = false
+ }
+ return true
+ }
+ s.index++
+ s.subindex = 0
+ return false
+}
+
+fn (mut s Utf8State) next_state(c byte) {
+ // sequence 1
+ if s.index == 0 {
+ if (c >= 0x00 + 1 && c <= 0x7F) || c == 0x00 {
+ return
+ }
+ s.index++
+ s.subindex = 0
+ }
+ is_tail := c >= 0x80 && c <= 0xBF
+ // sequence 2
+ if s.index == 1 && s.seq(c >= 0xC2 && c <= 0xDF, false, is_tail) {
+ return
+ }
+ // sequence 3
+ if s.index == 2 && s.seq(c == 0xE0, c >= 0xA0 && c <= 0xBF, is_tail) {
+ return
+ }
+ if s.index == 3 && s.seq(c >= 0xE1 && c <= 0xEC, c >= 0x80 && c <= 0xBF, is_tail) {
+ return
+ }
+ if s.index == 4 && s.seq(c == 0xED, c >= 0x80 && c <= 0x9F, is_tail) {
+ return
+ }
+ if s.index == 5 && s.seq(c >= 0xEE && c <= 0xEF, c >= 0x80 && c <= 0xBF, is_tail) {
+ return
+ }
+ // sequence 4
+ if s.index == 6 && s.seq(c == 0xF0, c >= 0x90 && c <= 0xBF, is_tail) {
+ return
+ }
+ if s.index == 7 && s.seq(c >= 0xF1 && c <= 0xF3, c >= 0x80 && c <= 0xBF, is_tail) {
+ return
+ }
+ if s.index == 8 && s.seq(c == 0xF4, c >= 0x80 && c <= 0x8F, is_tail) {
+ return
+ }
+ // we should never reach here
+ s.failed = true
+}