aboutsummaryrefslogtreecommitdiff
path: root/v_windows/v/old/vlib/x/json2/scanner.v
diff options
context:
space:
mode:
Diffstat (limited to 'v_windows/v/old/vlib/x/json2/scanner.v')
-rw-r--r--v_windows/v/old/vlib/x/json2/scanner.v306
1 files changed, 306 insertions, 0 deletions
diff --git a/v_windows/v/old/vlib/x/json2/scanner.v b/v_windows/v/old/vlib/x/json2/scanner.v
new file mode 100644
index 0000000..473a83b
--- /dev/null
+++ b/v_windows/v/old/vlib/x/json2/scanner.v
@@ -0,0 +1,306 @@
+// Copyright (c) 2019-2021 Alexander Medvednikov. All rights reserved.
+// Use of this source code is governed by an MIT license
+// that can be found in the LICENSE file.
+module json2
+
+import strconv
+
+struct Scanner {
+mut:
+ text []byte
+ pos int
+ line int
+ col int
+}
+
+enum TokenKind {
+ none_
+ error
+ str_
+ float
+ int_
+ null
+ bool_
+ eof
+ comma = 44
+ colon = 58
+ lsbr = 91
+ rsbr = 93
+ lcbr = 123
+ rcbr = 125
+}
+
+struct Token {
+ lit []byte
+ kind TokenKind
+ line int
+ col int
+}
+
+const (
+ // list of characters commonly used in JSON.
+ char_list = [`{`, `}`, `[`, `]`, `,`, `:`]
+ // list of newlines to check when moving to a new position.
+ newlines = [`\r`, `\n`, `\t`]
+ // list of escapable that needs to be escaped inside a JSON string.
+ // double quotes and forward slashes are excluded intentionally since
+ // they have their own separate checks for it in order to pass the
+ // JSON test suite (https://github.com/nst/JSONTestSuite/).
+ important_escapable_chars = [`\b`, `\f`, `\n`, `\r`, `\t`]
+ // list of valid unicode escapes aside from \u{4-hex digits}
+ valid_unicode_escapes = [`b`, `f`, `n`, `r`, `t`, `\\`, `"`, `/`]
+ // used for transforming escapes into valid unicode (eg. n => \n)
+ unicode_transform_escapes = map{
+ 98: `\b`
+ 102: `\f`
+ 110: `\n`
+ 114: `\r`
+ 116: `\t`
+ 92: `\\`
+ 34: `"`
+ 47: `/`
+ }
+ exp_signs = [byte(`-`), `+`]
+)
+
+// move_pos proceeds to the next position.
+fn (mut s Scanner) move() {
+ s.move_pos(true, true)
+}
+
+// move_pos_with_newlines is the same as move_pos but only enables newline checking.
+fn (mut s Scanner) move_pos_with_newlines() {
+ s.move_pos(false, true)
+}
+
+fn (mut s Scanner) move_pos(include_space bool, include_newlines bool) {
+ s.pos++
+ if s.pos < s.text.len {
+ if include_newlines && s.text[s.pos] in json2.newlines {
+ s.line++
+ s.col = 0
+ if s.text[s.pos] == `\r` && s.pos + 1 < s.text.len && s.text[s.pos + 1] == `\n` {
+ s.pos++
+ }
+ for s.pos < s.text.len && s.text[s.pos] in json2.newlines {
+ s.move()
+ }
+ } else if include_space && s.text[s.pos] == ` ` {
+ s.pos++
+ s.col++
+ for s.pos < s.text.len && s.text[s.pos] == ` ` {
+ s.move()
+ }
+ }
+ } else {
+ s.col++
+ }
+}
+
+// error returns an error token.
+fn (s Scanner) error(description string) Token {
+ return s.tokenize(description.bytes(), .error)
+}
+
+// tokenize returns a token based on the given lit and kind.
+fn (s Scanner) tokenize(lit []byte, kind TokenKind) Token {
+ return Token{
+ lit: lit
+ kind: kind
+ col: s.col
+ line: s.line
+ }
+}
+
+// text_scan scans and returns a string token.
+[manualfree]
+fn (mut s Scanner) text_scan() Token {
+ mut has_closed := false
+ mut chrs := []byte{}
+ for {
+ s.pos++
+ s.col++
+ if s.pos >= s.text.len {
+ break
+ }
+ ch := s.text[s.pos]
+ if (s.pos - 1 >= 0 && s.text[s.pos - 1] != `\\`) && ch == `"` {
+ has_closed = true
+ break
+ } else if (s.pos - 1 >= 0 && s.text[s.pos - 1] != `\\`)
+ && ch in json2.important_escapable_chars {
+ return s.error('character must be escaped with a backslash')
+ } else if (s.pos == s.text.len - 1 && ch == `\\`) || ch == byte(0) {
+ return s.error('invalid backslash escape')
+ } else if s.pos + 1 < s.text.len && ch == `\\` {
+ peek := s.text[s.pos + 1]
+ if peek in json2.valid_unicode_escapes {
+ chrs << json2.unicode_transform_escapes[int(peek)]
+ s.pos++
+ s.col++
+ continue
+ } else if peek == `u` {
+ if s.pos + 5 < s.text.len {
+ s.pos++
+ s.col++
+ mut codepoint := []byte{}
+ codepoint_start := s.pos
+ for s.pos < s.text.len && s.pos < codepoint_start + 4 {
+ s.pos++
+ s.col++
+ if s.text[s.pos] == `"` {
+ break
+ } else if !s.text[s.pos].is_hex_digit() {
+ x := s.text[s.pos].ascii_str()
+ return s.error('`$x` is not a hex digit')
+ }
+ codepoint << s.text[s.pos]
+ }
+ if codepoint.len != 4 {
+ return s.error('unicode escape must have 4 hex digits')
+ }
+ val := u32(strconv.parse_uint(codepoint.bytestr(), 16, 32) or { 0 })
+ converted := utf32_to_str(val)
+ converted_bytes := converted.bytes()
+ chrs << converted_bytes
+ unsafe {
+ converted.free()
+ converted_bytes.free()
+ codepoint.free()
+ }
+ continue
+ } else {
+ return s.error('incomplete unicode escape')
+ }
+ } else if peek == `U` {
+ return s.error('unicode endpoints must be in lowercase `u`')
+ } else if peek == byte(229) {
+ return s.error('unicode endpoint not allowed')
+ } else {
+ return s.error('invalid backslash escape')
+ }
+ }
+ chrs << ch
+ }
+ tok := s.tokenize(chrs, .str_)
+ s.move()
+ if !has_closed {
+ return s.error('missing double quotes in string closing')
+ }
+ return tok
+}
+
+// num_scan scans and returns an int/float token.
+fn (mut s Scanner) num_scan() Token {
+ // analyze json number structure
+ // -[digit][?[dot][digit]][?[E/e][?-/+][digit]]
+ mut is_fl := false
+ mut dot_index := -1
+ mut digits := []byte{}
+ if s.text[s.pos] == `-` {
+ digits << `-`
+ if !s.text[s.pos + 1].is_digit() {
+ return s.invalid_token()
+ }
+ s.move_pos_with_newlines()
+ }
+ if s.text[s.pos] == `0` && (s.pos + 1 < s.text.len && s.text[s.pos + 1].is_digit()) {
+ return s.error('leading zeroes in a number are not allowed')
+ }
+ for s.pos < s.text.len && (s.text[s.pos].is_digit() || (!is_fl && s.text[s.pos] == `.`)) {
+ digits << s.text[s.pos]
+ if s.text[s.pos] == `.` {
+ is_fl = true
+ dot_index = digits.len - 1
+ }
+ s.move_pos_with_newlines()
+ }
+ if dot_index + 1 < s.text.len && digits[dot_index + 1..].len == 0 {
+ return s.error('invalid float')
+ }
+ if s.pos < s.text.len && (s.text[s.pos] == `e` || s.text[s.pos] == `E`) {
+ digits << s.text[s.pos]
+ s.move_pos_with_newlines()
+ if s.pos < s.text.len && s.text[s.pos] in json2.exp_signs {
+ digits << s.text[s.pos]
+ s.move_pos_with_newlines()
+ }
+ mut exp_digits_count := 0
+ for s.pos < s.text.len && s.text[s.pos].is_digit() {
+ digits << s.text[s.pos]
+ exp_digits_count++
+ s.move_pos_with_newlines()
+ }
+ if exp_digits_count == 0 {
+ return s.error('invalid exponent')
+ }
+ }
+ kind := if is_fl { TokenKind.float } else { TokenKind.int_ }
+ return s.tokenize(digits, kind)
+}
+
+// invalid_token returns an error token with the invalid token message.
+fn (s Scanner) invalid_token() Token {
+ if s.text[s.pos] >= 32 && s.text[s.pos] <= 126 {
+ x := s.text[s.pos].ascii_str()
+ return s.error('invalid token `$x`')
+ } else {
+ x := s.text[s.pos].str_escaped()
+ return s.error('invalid token `$x`')
+ }
+}
+
+// scan returns a token based on the scanner's current position.
+[manualfree]
+fn (mut s Scanner) scan() Token {
+ if s.pos < s.text.len && (s.text[s.pos] == ` ` || s.text[s.pos] in json2.newlines) {
+ s.move()
+ }
+ if s.pos >= s.text.len {
+ return s.tokenize([]byte{}, .eof)
+ } else if s.pos + 3 < s.text.len && (s.text[s.pos] == `t` || s.text[s.pos] == `n`) {
+ ident := s.text[s.pos..s.pos + 4].bytestr()
+ if ident == 'true' || ident == 'null' {
+ mut kind := TokenKind.null
+ if ident == 'true' {
+ kind = .bool_
+ }
+ unsafe { ident.free() }
+ val := s.text[s.pos..s.pos + 4]
+ tok := s.tokenize(val, kind)
+ s.move() // n / t
+ s.move() // u / r
+ s.move() // l / u
+ s.move() // l / e
+ return tok
+ }
+ unsafe { ident.free() }
+ return s.invalid_token()
+ } else if s.pos + 4 < s.text.len && s.text[s.pos] == `f` {
+ ident := s.text[s.pos..s.pos + 5].bytestr()
+ if ident == 'false' {
+ unsafe { ident.free() }
+ val := s.text[s.pos..s.pos + 5]
+ tok := s.tokenize(val, .bool_)
+ s.move() // f
+ s.move() // a
+ s.move() // l
+ s.move() // s
+ s.move() // e
+ return tok
+ }
+ unsafe { ident.free() }
+ return s.invalid_token()
+ } else if s.text[s.pos] in json2.char_list {
+ chr := s.text[s.pos]
+ tok := s.tokenize([]byte{}, TokenKind(int(chr)))
+ s.move()
+ return tok
+ } else if s.text[s.pos] == `"` {
+ return s.text_scan()
+ } else if s.text[s.pos].is_digit() || s.text[s.pos] == `-` {
+ return s.num_scan()
+ } else {
+ return s.invalid_token()
+ }
+}