aboutsummaryrefslogtreecommitdiff
path: root/v_windows/v/vlib/regex/regex_util.v
diff options
context:
space:
mode:
Diffstat (limited to 'v_windows/v/vlib/regex/regex_util.v')
-rw-r--r--v_windows/v/vlib/regex/regex_util.v436
1 files changed, 436 insertions, 0 deletions
diff --git a/v_windows/v/vlib/regex/regex_util.v b/v_windows/v/vlib/regex/regex_util.v
new file mode 100644
index 0000000..0bf1a81
--- /dev/null
+++ b/v_windows/v/vlib/regex/regex_util.v
@@ -0,0 +1,436 @@
+/*
+regex 1.0 alpha
+
+Copyright (c) 2019-2021 Dario Deledda. All rights reserved.
+Use of this source code is governed by an MIT license
+that can be found in the LICENSE file.
+*/
+module regex
+
+import strings
+
+/******************************************************************************
+*
+* Inits
+*
+******************************************************************************/
+// regex create a regex object from the query string, retunr RE object and errors as re_err, err_pos
+pub fn regex_base(pattern string) (RE, int, int) {
+ // init regex
+ mut re := RE{}
+ re.prog = []Token{len: pattern.len + 1} // max program length, can not be longer then the pattern
+ re.cc = []CharClass{len: pattern.len} // can not be more char class the the length of the pattern
+ re.group_csave_flag = false // enable continuos group saving
+ re.group_max_nested = 128 // set max 128 group nested
+ re.group_max = pattern.len >> 1 // we can't have more groups than the half of the pattern legth
+
+ re.group_stack = []int{len: re.group_max, init: -1}
+ re.group_data = []int{len: re.group_max, init: -1}
+
+ re_err, err_pos := re.impl_compile(pattern)
+ return re, re_err, err_pos
+}
+
+/******************************************************************************
+*
+* Utilities
+*
+******************************************************************************/
+// get_group_bounds_by_name get a group boundaries by its name
+pub fn (re RE) get_group_bounds_by_name(group_name string) (int, int) {
+ if group_name in re.group_map {
+ tmp_index := re.group_map[group_name] - 1
+ start := re.groups[tmp_index * 2]
+ end := re.groups[tmp_index * 2 + 1]
+ return start, end
+ }
+ return -1, -1
+}
+
+// get_group_by_name get a group boundaries by its name
+pub fn (re RE) get_group_by_name(in_txt string, group_name string) string {
+ if group_name in re.group_map {
+ tmp_index := re.group_map[group_name] - 1
+ start := re.groups[tmp_index * 2]
+ end := re.groups[tmp_index * 2 + 1]
+ if start >= 0 && end > start {
+ return in_txt[start..end]
+ }
+ }
+ return ''
+}
+
+// get_group_by_id get a group string by its id
+pub fn (re RE) get_group_by_id(in_txt string, group_id int) string {
+ if group_id < (re.groups.len >> 1) {
+ index := group_id << 1
+ start := re.groups[index]
+ end := re.groups[index + 1]
+ if start >= 0 && end > start {
+ return in_txt[start..end]
+ }
+ }
+ return ''
+}
+
+// get_group_by_id get a group boundaries by its id
+pub fn (re RE) get_group_bounds_by_id(group_id int) (int, int) {
+ if group_id < re.group_count {
+ index := group_id << 1
+ return re.groups[index], re.groups[index + 1]
+ }
+ return -1, -1
+}
+
+pub struct Re_group {
+pub:
+ start int = -1
+ end int = -1
+}
+
+// get_group_list return a list of Re_group for the found groups
+pub fn (re RE) get_group_list() []Re_group {
+ mut res := []Re_group{len: re.groups.len >> 1}
+ mut gi := 0
+ // println("len: ${re.groups.len} groups: ${re.groups}")
+
+ for gi < re.groups.len {
+ if re.groups[gi] >= 0 {
+ txt_st := re.groups[gi]
+ txt_en := re.groups[gi + 1]
+
+ // println("#${gi/2} start: ${re.groups[gi]} end: ${re.groups[gi + 1]} ")
+ if txt_st >= 0 && txt_en > txt_st {
+ tmp := Re_group{
+ start: re.groups[gi]
+ end: re.groups[gi + 1]
+ }
+ // println(tmp)
+ res[gi >> 1] = tmp
+ } else {
+ res[gi >> 1] = Re_group{}
+ }
+ }
+ gi += 2
+ }
+ return res
+}
+
+/******************************************************************************
+*
+* Matchers
+*
+******************************************************************************/
+// match_string Match the pattern with the in_txt string
+[direct_array_access]
+pub fn (mut re RE) match_string(in_txt string) (int, int) {
+ start, mut end := re.match_base(in_txt.str, in_txt.len + 1)
+ if end > in_txt.len {
+ end = in_txt.len
+ }
+
+ if start >= 0 && end > start {
+ if (re.flag & f_ms) != 0 && start > 0 {
+ return no_match_found, 0
+ }
+ if (re.flag & f_me) != 0 && end < in_txt.len {
+ if in_txt[end] in new_line_list {
+ return start, end
+ }
+ return no_match_found, 0
+ }
+ return start, end
+ }
+ return start, end
+}
+
+/******************************************************************************
+*
+* Finders
+*
+******************************************************************************/
+/*
+// find internal implementation HERE for reference do not remove!!
+[direct_array_access]
+fn (mut re RE) find_imp(in_txt string) (int,int) {
+ old_flag := re.flag
+ re.flag |= f_src // enable search mode
+
+ start, mut end := re.match_base(in_txt.str, in_txt.len + 1)
+ //print("Find [$start,$end] '${in_txt[start..end]}'")
+ if end > in_txt.len {
+ end = in_txt.len
+ }
+ re.flag = old_flag
+
+ if start >= 0 && end > start {
+ return start, end
+ }
+ return no_match_found, 0
+}
+*/
+
+// find try to find the first match in the input string
+[direct_array_access]
+pub fn (mut re RE) find(in_txt string) (int, int) {
+ // old_flag := re.flag
+ // re.flag |= f_src // enable search mode
+
+ mut i := 0
+ for i < in_txt.len {
+ mut s := -1
+ mut e := -1
+ unsafe {
+ // tmp_str := tos(in_txt.str + i, in_txt.len - i)
+ // println("Check: [$tmp_str]")
+ s, e = re.match_base(in_txt.str + i, in_txt.len - i + 1)
+
+ if s >= 0 && e > s {
+ // println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}]")
+ // re.flag = old_flag
+ return i + s, i + e
+ }
+ i++
+ }
+ }
+ // re.flag = old_flag
+ return -1, -1
+}
+
+// find try to find the first match in the input string strarting from start index
+[direct_array_access]
+pub fn (mut re RE) find_from(in_txt string, start int) (int, int) {
+ old_flag := re.flag
+ re.flag |= f_src // enable search mode
+
+ mut i := start
+ if i < 0 {
+ return -1, -1
+ }
+ for i < in_txt.len {
+ //--- speed references ---
+
+ mut s := -1
+ mut e := -1
+
+ unsafe {
+ tmp_str := tos(in_txt.str + i, in_txt.len - i)
+ s, e = re.match_string(tmp_str)
+ }
+ //------------------------
+ // s,e = re.find_imp(in_txt[i..])
+ //------------------------
+ if s >= 0 && e > s {
+ // println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}]")
+ re.flag = old_flag
+ return i + s, i + e
+ } else {
+ i++
+ }
+ }
+ re.flag = old_flag
+ return -1, -1
+}
+
+// find_all find all the non overlapping occurrences of the match pattern
+[direct_array_access]
+pub fn (mut re RE) find_all(in_txt string) []int {
+ // old_flag := re.flag
+ // re.flag |= f_src // enable search mode
+
+ mut i := 0
+ mut res := []int{}
+
+ for i < in_txt.len {
+ mut s := -1
+ mut e := -1
+ unsafe {
+ // tmp_str := in_txt[i..]
+ // tmp_str := tos(in_txt.str + i, in_txt.len - i)
+ // println("Check: [$tmp_str]")
+ s, e = re.match_base(in_txt.str + i, in_txt.len + 1 - i)
+
+ if s >= 0 && e > s {
+ res << i + s
+ res << i + e
+ i += e
+ continue
+ }
+ }
+ i++
+ }
+ // re.flag = old_flag
+ return res
+}
+
+// find_all_str find all the non overlapping occurrences of the match pattern, return a string list
+[direct_array_access]
+pub fn (mut re RE) find_all_str(in_txt string) []string {
+ // old_flag := re.flag
+ // re.flag |= f_src // enable search mode
+
+ mut i := 0
+ mut res := []string{}
+
+ for i < in_txt.len {
+ mut s := -1
+ mut e := -1
+ unsafe {
+ // tmp_str := in_txt[i..]
+ // tmp_str := tos(in_txt.str + i, in_txt.len - i)
+ // println("Check: [$tmp_str]")
+ s, e = re.match_base(in_txt.str + i, in_txt.len + 1 - i)
+
+ if s >= 0 && e > s {
+ tmp_str := tos(in_txt.str + i, in_txt.len - i)
+ // println("Found: $s:$e [${tmp_str[s..e]}]")
+ res << tmp_str[..e]
+ i += e
+ continue
+ }
+ }
+ i++
+ }
+ // re.flag = old_flag
+ return res
+}
+
+/******************************************************************************
+*
+* Replacers
+*
+******************************************************************************/
+// replace_simple return a string where the matches are replaced with the replace string
+pub fn (mut re RE) replace_simple(in_txt string, repl string) string {
+ pos := re.find_all(in_txt)
+
+ if pos.len > 0 {
+ mut res := ''
+ mut i := 0
+
+ mut s1 := 0
+ mut e1 := in_txt.len
+
+ for i < pos.len {
+ e1 = pos[i]
+ res += in_txt[s1..e1] + repl
+ s1 = pos[i + 1]
+ i += 2
+ }
+
+ res += in_txt[s1..]
+ return res
+ }
+ return in_txt
+}
+
+// type of function used for custom replace
+// in_txt source text
+// start index of the start of the match in in_txt
+// end index of the end of the match in in_txt
+// the match is in in_txt[start..end]
+pub type FnReplace = fn (re RE, in_txt string, start int, end int) string
+
+// replace_by_fn return a string where the matches are replaced with the string from the repl_fn callback function
+pub fn (mut re RE) replace_by_fn(in_txt string, repl_fn FnReplace) string {
+ mut i := 0
+ mut res := strings.new_builder(in_txt.len)
+ mut last_end := 0
+
+ for i < in_txt.len {
+ // println("Find Start. $i [${in_txt[i..]}]")
+ s, e := re.find_from(in_txt, i)
+ // println("Find End.")
+ if s >= 0 && e > s {
+ // println("find match in: ${s},${e} [${in_txt[s..e]}]")
+
+ if last_end < s {
+ res.write_string(in_txt[last_end..s])
+ }
+
+ for g_i in 0 .. re.group_count {
+ re.groups[g_i << 1] += i
+ re.groups[(g_i << 1) + 1] += i
+ }
+
+ repl := repl_fn(re, in_txt, s, e)
+ // println("repl res: $repl")
+ res.write_string(repl)
+ // res.write_string("[[${in_txt[s..e]}]]")
+
+ last_end = e
+ i = e
+ } else {
+ break
+ // i++
+ }
+ // println(i)
+ }
+ if last_end >= 0 && last_end < in_txt.len {
+ res.write_string(in_txt[last_end..])
+ }
+ return res.str()
+}
+
+fn (re RE) parsed_replace_string(in_txt string, repl string) string {
+ str_lst := repl.split('\\')
+ mut res := str_lst[0]
+ mut i := 1
+ for i < str_lst.len {
+ tmp := str_lst[i]
+ // println("tmp: ${tmp}")
+ if tmp.len > 0 && tmp[0] >= `0` && tmp[0] <= `9` {
+ group_id := int(tmp[0] - `0`)
+ group := re.get_group_by_id(in_txt, group_id)
+ // println("group: $group_id [$group]")
+ res += '$group${tmp[1..]}'
+ } else {
+ res += '\\' + tmp
+ }
+ i++
+ }
+ return res
+}
+
+// replace return a string where the matches are replaced with the repl_str string,
+// this function support use groups in the replace string
+pub fn (mut re RE) replace(in_txt string, repl_str string) string {
+ mut i := 0
+ mut res := strings.new_builder(in_txt.len)
+ mut last_end := 0
+
+ for i < in_txt.len {
+ // println("Find Start. $i [${in_txt[i..]}]")
+ s, e := re.find_from(in_txt, i)
+ // println("Find End.")
+ if s >= 0 && e > s {
+ // println("find match in: ${s},${e} [${in_txt[s..e]}]")
+
+ if last_end < s {
+ res.write_string(in_txt[last_end..s])
+ }
+
+ for g_i in 0 .. re.group_count {
+ re.groups[g_i << 1] += i
+ re.groups[(g_i << 1) + 1] += i
+ }
+
+ // repl := repl_fn(re, in_txt, s, e)
+ repl := re.parsed_replace_string(in_txt, repl_str)
+ // println("repl res: $repl")
+ res.write_string(repl)
+ // res.write_string("[[${in_txt[s..e]}]]")
+
+ last_end = e
+ i = e
+ } else {
+ break
+ // i++
+ }
+ // println(i)
+ }
+ if last_end >= 0 && last_end < in_txt.len {
+ res.write_string(in_txt[last_end..])
+ }
+ return res.str()
+}