diff options
Diffstat (limited to 'v_windows/v/vlib/encoding/utf8/utf8.v')
-rw-r--r-- | v_windows/v/vlib/encoding/utf8/utf8.v | 88 |
1 files changed, 88 insertions, 0 deletions
diff --git a/v_windows/v/vlib/encoding/utf8/utf8.v b/v_windows/v/vlib/encoding/utf8/utf8.v new file mode 100644 index 0000000..88c598f --- /dev/null +++ b/v_windows/v/vlib/encoding/utf8/utf8.v @@ -0,0 +1,88 @@ +module utf8 + +struct Utf8State { +mut: + index int + subindex int + failed bool +} + +pub fn validate_str(str string) bool { + return validate(str.str, str.len) +} + +pub fn validate(data &byte, len int) bool { + mut state := Utf8State{} + for i := 0; i < len; i++ { + s := unsafe { data[i] } + if s == 0 { + break + } + state.next_state(s) + if state.failed { + return false + } + } + return !state.failed && state.subindex <= 0 +} + +fn (mut s Utf8State) seq(r0 bool, r1 bool, is_tail bool) bool { + if s.subindex == 0 || (s.index > 1 && s.subindex == 1) || (s.index >= 6 && s.subindex == 2) { + if (s.subindex == 0 && r0) || (s.subindex == 1 && r1) || (s.subindex == 2 && is_tail) { + s.subindex++ + return true + } + } else { + s.failed = true + if is_tail { + s.index = 0 + s.subindex = 0 + s.failed = false + } + return true + } + s.index++ + s.subindex = 0 + return false +} + +fn (mut s Utf8State) next_state(c byte) { + // sequence 1 + if s.index == 0 { + if (c >= 0x00 + 1 && c <= 0x7F) || c == 0x00 { + return + } + s.index++ + s.subindex = 0 + } + is_tail := c >= 0x80 && c <= 0xBF + // sequence 2 + if s.index == 1 && s.seq(c >= 0xC2 && c <= 0xDF, false, is_tail) { + return + } + // sequence 3 + if s.index == 2 && s.seq(c == 0xE0, c >= 0xA0 && c <= 0xBF, is_tail) { + return + } + if s.index == 3 && s.seq(c >= 0xE1 && c <= 0xEC, c >= 0x80 && c <= 0xBF, is_tail) { + return + } + if s.index == 4 && s.seq(c == 0xED, c >= 0x80 && c <= 0x9F, is_tail) { + return + } + if s.index == 5 && s.seq(c >= 0xEE && c <= 0xEF, c >= 0x80 && c <= 0xBF, is_tail) { + return + } + // sequence 4 + if s.index == 6 && s.seq(c == 0xF0, c >= 0x90 && c <= 0xBF, is_tail) { + return + } + if s.index == 7 && s.seq(c >= 0xF1 && c <= 0xF3, c >= 0x80 && c <= 0xBF, is_tail) { + return + } + if s.index == 8 && s.seq(c == 0xF4, c >= 0x80 && c <= 0x8F, is_tail) { + return + } + // we should never reach here + s.failed = true +} |