diff options
Diffstat (limited to 'v_windows/v/old/vlib/strings')
-rw-r--r-- | v_windows/v/old/vlib/strings/builder.js.v | 51 | ||||
-rw-r--r-- | v_windows/v/old/vlib/strings/builder.v | 140 | ||||
-rw-r--r-- | v_windows/v/old/vlib/strings/builder_test.v | 96 | ||||
-rw-r--r-- | v_windows/v/old/vlib/strings/similarity.v | 69 | ||||
-rw-r--r-- | v_windows/v/old/vlib/strings/similarity_test.v | 13 | ||||
-rw-r--r-- | v_windows/v/old/vlib/strings/strings.c.v | 38 | ||||
-rw-r--r-- | v_windows/v/old/vlib/strings/strings.js.v | 17 | ||||
-rw-r--r-- | v_windows/v/old/vlib/strings/strings.v | 13 | ||||
-rw-r--r-- | v_windows/v/old/vlib/strings/strings_test.v | 14 | ||||
-rw-r--r-- | v_windows/v/old/vlib/strings/textscanner/textscanner.v | 154 | ||||
-rw-r--r-- | v_windows/v/old/vlib/strings/textscanner/textscanner_test.v | 159 |
11 files changed, 764 insertions, 0 deletions
diff --git a/v_windows/v/old/vlib/strings/builder.js.v b/v_windows/v/old/vlib/strings/builder.js.v new file mode 100644 index 0000000..e445804 --- /dev/null +++ b/v_windows/v/old/vlib/strings/builder.js.v @@ -0,0 +1,51 @@ +// Copyright (c) 2019-2021 Alexander Medvednikov. All rights reserved. +// Use of this source code is governed by an MIT license +// that can be found in the LICENSE file. +module strings + +pub struct Builder { +mut: + buf []byte +pub mut: + len int + initial_size int = 1 +} + +pub fn new_builder(initial_size int) Builder { + return Builder{ + buf: make(0, initial_size, sizeof(byte)) + initial_size: initial_size + } +} + +pub fn (mut b Builder) write_b(data byte) { + b.buf << data + b.len++ +} + +pub fn (mut b Builder) write_string(s string) { + b.buf.push_many(s.str, s.len) + // b.buf << []byte(s) // TODO + b.len += s.len +} + +pub fn (mut b Builder) writeln(s string) { + b.buf.push_many(s.str, s.len) + // b.buf << []byte(s) // TODO + b.buf << `\n` + b.len += s.len + 1 +} + +pub fn (b Builder) str() string { + x := &byte(b.buf.data) + return unsafe { x.vstring_with_len(b.len) } +} + +pub fn (mut b Builder) cut(n int) { + b.len -= n +} + +pub fn (mut b Builder) free() { + b.buf = make(0, b.initial_size, 1) + b.len = 0 +} diff --git a/v_windows/v/old/vlib/strings/builder.v b/v_windows/v/old/vlib/strings/builder.v new file mode 100644 index 0000000..cec4f6c --- /dev/null +++ b/v_windows/v/old/vlib/strings/builder.v @@ -0,0 +1,140 @@ +// Copyright (c) 2019-2021 Alexander Medvednikov. All rights reserved. +// Use of this source code is governed by an MIT license +// that can be found in the LICENSE file. +module strings + +// strings.Builder is used to efficiently append many strings to a large +// dynamically growing buffer, then use the resulting large string. Using +// a string builder is much better for performance/memory usage than doing +// constantly string concatenation. +pub type Builder = []byte + +// new_builder returns a new string builder, with an initial capacity of `initial_size` +pub fn new_builder(initial_size int) Builder { + return Builder([]byte{cap: initial_size}) +} + +// write_ptr writes `len` bytes provided byteptr to the accumulated buffer +[unsafe] +pub fn (mut b Builder) write_ptr(ptr &byte, len int) { + if len == 0 { + return + } + unsafe { b.push_many(ptr, len) } +} + +// write_b appends a single `data` byte to the accumulated buffer +pub fn (mut b Builder) write_b(data byte) { + b << data +} + +// write implements the Writer interface +pub fn (mut b Builder) write(data []byte) ?int { + if data.len == 0 { + return 0 + } + b << data + return data.len +} + +[inline] +pub fn (b &Builder) byte_at(n int) byte { + return unsafe { (&[]byte(b))[n] } +} + +// write appends the string `s` to the buffer +[inline] +pub fn (mut b Builder) write_string(s string) { + if s.len == 0 { + return + } + unsafe { b.push_many(s.str, s.len) } + // for c in s { + // b.buf << c + // } + // b.buf << []byte(s) // TODO +} + +// go_back discards the last `n` bytes from the buffer +pub fn (mut b Builder) go_back(n int) { + b.trim(b.len - n) +} + +// cut_last cuts the last `n` bytes from the buffer and returns them +pub fn (mut b Builder) cut_last(n int) string { + cut_pos := b.len - n + x := unsafe { (&[]byte(b))[cut_pos..] } + res := x.bytestr() + b.trim(cut_pos) + return res +} + +// cut_to cuts the string after `pos` and returns it. +// if `pos` is superior to builder length, returns an empty string +// and cancel further operations +pub fn (mut b Builder) cut_to(pos int) string { + if pos > b.len { + return '' + } + return b.cut_last(b.len - pos) +} + +// go_back_to resets the buffer to the given position `pos` +// NB: pos should be < than the existing buffer length. +pub fn (mut b Builder) go_back_to(pos int) { + b.trim(pos) +} + +// writeln appends the string `s`, and then a newline character. +[inline] +pub fn (mut b Builder) writeln(s string) { + // for c in s { + // b.buf << c + // } + if s.len > 0 { + unsafe { b.push_many(s.str, s.len) } + } + // b.buf << []byte(s) // TODO + b << byte(`\n`) +} + +// last_n(5) returns 'world' +// buf == 'hello world' +pub fn (b &Builder) last_n(n int) string { + if n > b.len { + return '' + } + x := unsafe { (&[]byte(b))[b.len - n..] } + return x.bytestr() +} + +// after(6) returns 'world' +// buf == 'hello world' +pub fn (b &Builder) after(n int) string { + if n >= b.len { + return '' + } + x := unsafe { (&[]byte(b))[n..] } + return x.bytestr() +} + +// str returns a copy of all of the accumulated buffer content. +// NB: after a call to b.str(), the builder b should not be +// used again, you need to call b.free() first, or just leave +// it to be freed by -autofree when it goes out of scope. +// The returned string *owns* its own separate copy of the +// accumulated data that was in the string builder, before the +// .str() call. +pub fn (mut b Builder) str() string { + b << byte(0) + bcopy := unsafe { &byte(memdup(b.data, b.len)) } + s := unsafe { bcopy.vstring_with_len(b.len - 1) } + b.trim(0) + return s +} + +// free is for manually freeing the contents of the buffer +[unsafe] +pub fn (mut b Builder) free() { + unsafe { free(b.data) } +} diff --git a/v_windows/v/old/vlib/strings/builder_test.v b/v_windows/v/old/vlib/strings/builder_test.v new file mode 100644 index 0000000..a714722 --- /dev/null +++ b/v_windows/v/old/vlib/strings/builder_test.v @@ -0,0 +1,96 @@ +import strings + +type MyInt = int + +fn test_sb() { + mut sb := strings.new_builder(100) + sb.write_string('hi') + sb.write_string('!') + sb.write_string('hello') + assert sb.len == 8 + sb_end := sb.str() + assert sb_end == 'hi!hello' + assert sb.len == 0 + /// + sb = strings.new_builder(10) + sb.write_string('a') + sb.write_string('b') + assert sb.len == 2 + assert sb.str() == 'ab' + // Test interpolation optimization + sb = strings.new_builder(10) + x := 10 + y := MyInt(20) + sb.writeln('x = $x y = $y') + res := sb.str() + assert res[res.len - 1] == `\n` + println('"$res"') + assert res.trim_space() == 'x = 10 y = 20' + // + sb = strings.new_builder(10) + sb.write_string('x = $x y = $y') + assert sb.str() == 'x = 10 y = 20' + //$if !windows { + sb = strings.new_builder(10) + sb.write_string('123456') + last_2 := sb.cut_last(2) + assert last_2 == '56' + final_sb := sb.str() + assert final_sb == '1234' + //} +} + +const ( + maxn = 100000 +) + +fn test_big_sb() { + mut sb := strings.new_builder(100) + mut sb2 := strings.new_builder(10000) + for i in 0 .. maxn { + sb.writeln(i.str()) + sb2.write_string('+') + } + s := sb.str() + lines := s.split_into_lines() + assert lines.len == maxn + assert lines[0] == '0' + assert lines[1] == '1' + assert lines[777] == '777' + assert lines[98765] == '98765' + println(sb2.len) + assert sb2.len == maxn +} + +fn test_byte_write() { + mut sb := strings.new_builder(100) + temp_str := 'byte testing' + mut count := 0 + for word in temp_str { + sb.write_b(word) + count++ + assert count == sb.len + } + sb_final := sb.str() + assert sb_final == temp_str +} + +fn test_strings_builder_reuse() { + mut sb := strings.new_builder(256) + sb.write_string('world') + assert sb.str() == 'world' + sb.write_string('hello') + assert sb.str() == 'hello' +} + +fn test_cut_to() { + mut sb := strings.new_builder(16) + sb.write_string('hello') + assert sb.cut_to(3) == 'lo' + assert sb.len == 3 + assert sb.cut_to(3) == '' + assert sb.len == 3 + assert sb.cut_to(0) == 'hel' + assert sb.cut_to(32) == '' + assert sb.len == 0 +} diff --git a/v_windows/v/old/vlib/strings/similarity.v b/v_windows/v/old/vlib/strings/similarity.v new file mode 100644 index 0000000..8d8de95 --- /dev/null +++ b/v_windows/v/old/vlib/strings/similarity.v @@ -0,0 +1,69 @@ +module strings + +// #-js +// use levenshtein distance algorithm to calculate +// the distance between between two strings (lower is closer) +pub fn levenshtein_distance(a string, b string) int { + mut f := [0].repeat(b.len + 1) + for j in 0 .. f.len { + f[j] = j + } + for ca in a { + mut j := 1 + mut fj1 := f[0] + f[0]++ + for cb in b { + mut mn := if f[j] + 1 <= f[j - 1] + 1 { f[j] + 1 } else { f[j - 1] + 1 } + if cb != ca { + mn = if mn <= fj1 + 1 { mn } else { fj1 + 1 } + } else { + mn = if mn <= fj1 { mn } else { fj1 } + } + fj1 = f[j] + f[j] = mn + j++ + } + } + return f[f.len - 1] +} + +// use levenshtein distance algorithm to calculate +// how similar two strings are as a percentage (higher is closer) +pub fn levenshtein_distance_percentage(a string, b string) f32 { + d := levenshtein_distance(a, b) + l := if a.len >= b.len { a.len } else { b.len } + return (1.00 - f32(d) / f32(l)) * 100.00 +} + +// implementation of Sørensen–Dice coefficient. +// find the similarity between two strings. +// returns coefficient between 0.0 (not similar) and 1.0 (exact match). +pub fn dice_coefficient(s1 string, s2 string) f32 { + if s1.len == 0 || s2.len == 0 { + return 0.0 + } + if s1 == s2 { + return 1.0 + } + if s1.len < 2 || s2.len < 2 { + return 0.0 + } + a := if s1.len > s2.len { s1 } else { s2 } + b := if a == s1 { s2 } else { s1 } + mut first_bigrams := map[string]int{} + for i in 0 .. a.len - 1 { + bigram := a[i..i + 2] + q := if bigram in first_bigrams { first_bigrams[bigram] + 1 } else { 1 } + first_bigrams[bigram] = q + } + mut intersection_size := 0 + for i in 0 .. b.len - 1 { + bigram := b[i..i + 2] + count := if bigram in first_bigrams { first_bigrams[bigram] } else { 0 } + if count > 0 { + first_bigrams[bigram] = count - 1 + intersection_size++ + } + } + return (2.0 * f32(intersection_size)) / (f32(a.len) + f32(b.len) - 2) +} diff --git a/v_windows/v/old/vlib/strings/similarity_test.v b/v_windows/v/old/vlib/strings/similarity_test.v new file mode 100644 index 0000000..965da45 --- /dev/null +++ b/v_windows/v/old/vlib/strings/similarity_test.v @@ -0,0 +1,13 @@ +import strings + +fn test_levenshtein_distance() { + assert strings.levenshtein_distance('', '') == 0 + assert strings.levenshtein_distance('one', 'one') == 0 + assert strings.levenshtein_distance('', 'two') == 3 + assert strings.levenshtein_distance('three', '') == 5 + assert strings.levenshtein_distance('bananna', '') == 7 + assert strings.levenshtein_distance('cats', 'hats') == 1 + assert strings.levenshtein_distance('hugs', 'shrugs') == 2 + assert strings.levenshtein_distance('broom', 'shroom') == 2 + assert strings.levenshtein_distance('flomax', 'volmax') == 3 +} diff --git a/v_windows/v/old/vlib/strings/strings.c.v b/v_windows/v/old/vlib/strings/strings.c.v new file mode 100644 index 0000000..c020f5b --- /dev/null +++ b/v_windows/v/old/vlib/strings/strings.c.v @@ -0,0 +1,38 @@ +module strings + +// strings.repeat - fill a string with `n` repetitions of the character `c` +pub fn repeat(c byte, n int) string { + if n <= 0 { + return '' + } + mut bytes := unsafe { malloc_noscan(n + 1) } + unsafe { + C.memset(bytes, c, n) + bytes[n] = `0` + } + return unsafe { bytes.vstring_with_len(n) } +} + +// strings.repeat_string - gives you `n` repetitions of the substring `s` +// NB: strings.repeat, that repeats a single byte, is between 2x +// and 24x faster than strings.repeat_string called for a 1 char string. +pub fn repeat_string(s string, n int) string { + if n <= 0 || s.len == 0 { + return '' + } + slen := s.len + blen := slen * n + mut bytes := unsafe { malloc_noscan(blen + 1) } + for bi in 0 .. n { + bislen := bi * slen + for si in 0 .. slen { + unsafe { + bytes[bislen + si] = s[si] + } + } + } + unsafe { + bytes[blen] = `0` + } + return unsafe { bytes.vstring_with_len(blen) } +} diff --git a/v_windows/v/old/vlib/strings/strings.js.v b/v_windows/v/old/vlib/strings/strings.js.v new file mode 100644 index 0000000..2681b94 --- /dev/null +++ b/v_windows/v/old/vlib/strings/strings.js.v @@ -0,0 +1,17 @@ +module strings + +pub fn repeat(c byte, n int) string { + if n <= 0 { + return '' + } + arr := [c].repeat(n) + return arr.bytestr() +} + +pub fn repeat_string(s string, n int) string { + /* + // TODO: uncomment this. It is commented for now, so that `v doc strings` works + res := # s.repeat(n) + return res + */ +} diff --git a/v_windows/v/old/vlib/strings/strings.v b/v_windows/v/old/vlib/strings/strings.v new file mode 100644 index 0000000..c01dd90 --- /dev/null +++ b/v_windows/v/old/vlib/strings/strings.v @@ -0,0 +1,13 @@ +module strings + +// import rand +// random returns a random string with `n` characters +/* +pub fn random(n int) string { + buf := vmalloc(n) + for i in 0..n { + buf[i] = rand.next() + } + return tos(buf) +} +*/ diff --git a/v_windows/v/old/vlib/strings/strings_test.v b/v_windows/v/old/vlib/strings/strings_test.v new file mode 100644 index 0000000..ff5ddf5 --- /dev/null +++ b/v_windows/v/old/vlib/strings/strings_test.v @@ -0,0 +1,14 @@ +import strings + +fn test_repeat() { + assert strings.repeat(`x`, 10) == 'xxxxxxxxxx' + assert strings.repeat(`a`, 1) == 'a' + assert strings.repeat(`a`, 0) == '' +} + +fn test_repeat_string() { + assert strings.repeat_string('abc', 3) == 'abcabcabc' + assert strings.repeat_string('abc', 1) == 'abc' + assert strings.repeat_string('abc', 0) == '' + assert strings.repeat_string('', 200) == '' +} diff --git a/v_windows/v/old/vlib/strings/textscanner/textscanner.v b/v_windows/v/old/vlib/strings/textscanner/textscanner.v new file mode 100644 index 0000000..5525137 --- /dev/null +++ b/v_windows/v/old/vlib/strings/textscanner/textscanner.v @@ -0,0 +1,154 @@ +module textscanner + +// TextScanner simplifies writing small scanners/parsers +// by providing safe methods to scan texts character by +// character, peek for the next characters, go back, etc. +pub struct TextScanner { +pub: + input string + ilen int +mut: + pos int // current position; pos is *always* kept in [0,ilen] +} + +// new returns a stack allocated instance of TextScanner. +pub fn new(input string) TextScanner { + return TextScanner{ + input: input + ilen: input.len + } +} + +// free frees all allocated resources. +[unsafe] +pub fn (mut ss TextScanner) free() { + unsafe { + ss.input.free() + } +} + +// remaining returns how many characters remain from current position. +[inline] +pub fn (ss &TextScanner) remaining() int { + return ss.ilen - ss.pos +} + +// next returns the next character code from the input text. +// next returns `-1` if it can't reach the next character. +// next advances the scanner position. +[direct_array_access; inline] +pub fn (mut ss TextScanner) next() int { + if ss.pos < ss.ilen { + opos := ss.pos + ss.pos++ + return ss.input[opos] + } + return -1 +} + +// skip skips one character ahead; `skip()` is slightly faster than `.next()`. +// `skip()` does not return a result. +[inline] +pub fn (mut ss TextScanner) skip() { + if ss.pos + 1 < ss.ilen { + ss.pos++ + } +} + +// skip_n skips ahead `n` characters, stopping at the end of the input. +[inline] +pub fn (mut ss TextScanner) skip_n(n int) { + ss.pos += n + if ss.pos > ss.ilen { + ss.pos = ss.ilen + } +} + +// peek returns the *next* character code from the input text. +// peek returns `-1` if it can't peek the next character. +// unlike `next()`, `peek()` does not change the state of the scanner. +[direct_array_access; inline] +pub fn (ss &TextScanner) peek() int { + if ss.pos < ss.ilen { + return ss.input[ss.pos] + } + return -1 +} + +// peek_n returns the character code from the input text at position + `n`. +// peek_n returns `-1` if it can't peek `n` characters ahead. +// ts.peek_n(0) == ts.current() . +// ts.peek_n(1) == ts.peek() . +[direct_array_access; inline] +pub fn (ss &TextScanner) peek_n(n int) int { + if ss.pos + n < ss.ilen { + return ss.input[ss.pos + n] + } + return -1 +} + +// back goes back one character from the current scanner position. +[inline] +pub fn (mut ss TextScanner) back() { + if ss.pos > 0 { + ss.pos-- + } +} + +// back_n goes back `n` characters from the current scanner position. +pub fn (mut ss TextScanner) back_n(n int) { + ss.pos -= n + if ss.pos < 0 { + ss.pos = 0 + } + if ss.pos > ss.ilen { + ss.pos = ss.ilen + } +} + +// peek_back returns the *previous* character code from the input text. +// peek_back returns `-1` if it can't peek the previous character. +// unlike `back()`, `peek_back()` does not change the state of the scanner. +[direct_array_access; inline] +pub fn (ss &TextScanner) peek_back() int { + return ss.peek_back_n(1) +} + +// peek_back_n returns the character code from the input text at position - `n`. +// peek_back_n returns `-1` if it can't peek `n` characters back. +// ts.peek_back_n(0) == ts.current() +// ts.peek_back_n(1) == ts.peek_back() +[direct_array_access; inline] +pub fn (ss &TextScanner) peek_back_n(n int) int { + offset := n + 1 + if ss.pos >= offset { + return ss.input[ss.pos - offset] + } + return -1 +} + +// current returns the current character code from the input text. +// current returns `-1` at the start of the input text. +// NB: after `c := ts.next()`, `ts.current()` will also return `c`. +[direct_array_access; inline] +pub fn (mut ss TextScanner) current() int { + if ss.pos > 0 { + return ss.input[ss.pos - 1] + } + return -1 +} + +// reset resets the internal state of the scanner +// After calling .reset(), .next() will start reading +// again from the start of the input text. +pub fn (mut ss TextScanner) reset() { + ss.pos = 0 +} + +// goto_end has the same effect as `for ts.next() != -1 {}` +// i.e. after calling .goto_end(), the scanner will be at +// the end of the input text. Further .next() calls will +// return -1, unless you go back. +pub fn (mut ss TextScanner) goto_end() { + ss.pos = ss.ilen +} diff --git a/v_windows/v/old/vlib/strings/textscanner/textscanner_test.v b/v_windows/v/old/vlib/strings/textscanner/textscanner_test.v new file mode 100644 index 0000000..e9d2487 --- /dev/null +++ b/v_windows/v/old/vlib/strings/textscanner/textscanner_test.v @@ -0,0 +1,159 @@ +import strings.textscanner + +fn test_remaining() { + mut s := textscanner.new('abc') + assert s.remaining() == 3 + s.next() + s.next() + assert s.remaining() == 1 + s.next() + assert s.remaining() == 0 + s.next() + s.next() + assert s.remaining() == 0 + s.reset() + assert s.remaining() == 3 +} + +fn test_next() { + mut s := textscanner.new('abc') + assert s.next() == `a` + assert s.next() == `b` + assert s.next() == `c` + assert s.next() == -1 + assert s.next() == -1 + assert s.next() == -1 +} + +fn test_skip() { + mut s := textscanner.new('abc') + assert s.next() == `a` + s.skip() + assert s.next() == `c` + assert s.next() == -1 +} + +fn test_skip_n() { + mut s := textscanner.new('abc') + s.skip_n(2) + assert s.next() == `c` + assert s.next() == -1 +} + +fn test_peek() { + mut s := textscanner.new('abc') + assert s.peek() == `a` + assert s.peek() == `a` + assert s.peek() == `a` + // + assert s.next() == `a` + assert s.next() == `b` + assert s.next() == `c` + assert s.next() == -1 +} + +fn test_peek_n() { + mut s := textscanner.new('abc') + assert s.peek_n(0) == `a` + assert s.peek_n(1) == `b` + assert s.peek_n(2) == `c` + assert s.peek_n(3) == -1 + assert s.peek_n(4) == -1 + // + assert s.next() == `a` + assert s.next() == `b` + assert s.next() == `c` + assert s.next() == -1 +} + +fn test_back() { + mut s := textscanner.new('abc') + assert s.next() == `a` + s.back() + assert s.next() == `a` + assert s.next() == `b` + s.back() + assert s.next() == `b` + assert s.next() == `c` + assert s.next() == -1 +} + +fn test_back_n() { + mut s := textscanner.new('abc') + assert s.next() == `a` + s.back_n(10) + assert s.next() == `a` + assert s.next() == `b` + assert s.next() == `c` + s.back_n(2) + assert s.next() == `b` +} + +fn test_peek_back() { + mut s := textscanner.new('abc') + assert s.next() == `a` + assert s.next() == `b` + // check that calling .peek_back() multiple times + // does not change the state: + assert s.peek_back() == `a` + assert s.peek_back() == `a` + assert s.peek_back() == `a` + // advance, then peek_back again: + assert s.next() == `c` + assert s.peek_back() == `b` + // peeking before the start: + s.reset() + assert s.peek_back() == -1 + // peeking right at the end: + s.goto_end() + assert s.peek_back() == `b` +} + +fn test_peek_back_n() { + mut s := textscanner.new('abc') + s.goto_end() + assert s.peek_back_n(0) == `c` + assert s.peek_back_n(1) == `b` + assert s.peek_back_n(2) == `a` + assert s.peek_back_n(3) == -1 + assert s.peek_back_n(4) == -1 +} + +fn test_reset() { + mut s := textscanner.new('abc') + assert s.next() == `a` + s.next() + s.next() + assert s.next() == -1 + s.reset() + assert s.next() == `a` +} + +fn test_current() { + mut s := textscanner.new('abc') + assert s.current() == -1 + assert s.next() == `a` + assert s.current() == `a` + assert s.current() == `a` + assert s.peek_back() == -1 + assert s.next() == `b` + assert s.current() == `b` + assert s.current() == `b` + assert s.peek_back() == `a` + assert s.next() == `c` + assert s.current() == `c` + assert s.next() == -1 + assert s.current() == `c` + assert s.next() == -1 + assert s.current() == `c` + s.reset() + assert s.current() == -1 + assert s.next() == `a` + assert s.current() == `a` +} + +fn test_goto_end() { + mut s := textscanner.new('abc') + s.goto_end() + assert s.current() == `c` +} |