diff options
Diffstat (limited to 'v_windows/v/old/examples/regex')
-rw-r--r-- | v_windows/v/old/examples/regex/pcre.vv | 69 | ||||
-rw-r--r-- | v_windows/v/old/examples/regex/readme.md | 8 | ||||
-rw-r--r-- | v_windows/v/old/examples/regex/regex_example.v | 80 | ||||
-rw-r--r-- | v_windows/v/old/examples/regex/regex_with_memoization.v | 127 |
4 files changed, 284 insertions, 0 deletions
diff --git a/v_windows/v/old/examples/regex/pcre.vv b/v_windows/v/old/examples/regex/pcre.vv new file mode 100644 index 0000000..72beaf5 --- /dev/null +++ b/v_windows/v/old/examples/regex/pcre.vv @@ -0,0 +1,69 @@ +module main + +// NB: you need to `v install pcre` to be able to compile this example. + +import pcre + +fn example() { + r := pcre.new_regex('Match everything after this: (.+)', 0) or { + println('An error occured!') + return + } + + m := r.match_str('Match everything after this: "I ❤️ VLang!"', 0, 0) or { + println('No match!') + return + } + + // m.get(0) -> Match everything after this: "I ❤️ VLang!" + // m.get(1) -> "I ❤️ VLang!"' + // m.get(2) -> Error! + whole_match := m.get(0) or { + println('We matched nothing...') + return + } + + matched_str := m.get(1) or { + println('We matched nothing...') + return + } + + println(whole_match) // Match everything after this: "I ❤️ VLang!" + println(matched_str) // "I ❤️ VLang!" +} + +fn main() { + example() + + mut text := '[ an s. s! ]( wi4ki:something ) + [ an s. s! ]( wi4ki:something ) + [ an s. s! ](wiki:something) + [ an s. s! ](something)dd + d [ an s. s! ](something ) d + [ more text ]( something ) s [ something b ](something)dd + + ' + + // check the regex on https://regex101.com/r/HdYya8/1/ + + regex := r'(\[[a-z\.\! ]*\]\( *\w*\:*\w* *\))*' + + r := pcre.new_regex(regex, 0) or { + println('An error occured!') + return + } + + m := r.match_str(text, 0, 0) or { + println('No match!') + return + } + + whole_match1 := m.get(0) or { + println('We matched nothing 0...') + return + } + + println(whole_match1) + + println(m.get_all()) +} diff --git a/v_windows/v/old/examples/regex/readme.md b/v_windows/v/old/examples/regex/readme.md new file mode 100644 index 0000000..3559564 --- /dev/null +++ b/v_windows/v/old/examples/regex/readme.md @@ -0,0 +1,8 @@ +# regex + +There are 2 ways to do regex: +a) using the native module called `regex` +b) using an exteranl module called `pcre`, which wraps the C library pcre. +NB: you need to first do: `v install pcre`, for the `pcre` module to work. + +You can find examples of both in this directory. diff --git a/v_windows/v/old/examples/regex/regex_example.v b/v_windows/v/old/examples/regex/regex_example.v new file mode 100644 index 0000000..7469ef5 --- /dev/null +++ b/v_windows/v/old/examples/regex/regex_example.v @@ -0,0 +1,80 @@ +/********************************************************************** +* regex samples +* +* Copyright (c) 2019-2021 Dario Deledda. All rights reserved. +* Use of this source code is governed by an MIT license +* that can be found in the LICENSE file. +* +* This file contains a collection of regex samples +* +**********************************************************************/ +import regex + +/* +This simple function converts an HTML RGB value with 3 or 6 hex digits to a u32 value, +this function is not optimized and it is only for didatical purpose +example: #A0B0CC #A9F +*/ +fn convert_html_rgb(in_col string) u32 { + mut n_digit := if in_col.len == 4 { 1 } else { 2 } + mut col_mul := if in_col.len == 4 { 4 } else { 0 } + + // this is the regex query, it uses V string interpolation to customize the regex query + // NOTE: if you want use escaped code you must use the r"" (raw) strings, + // *** please remember that V interpoaltion doesn't work on raw strings. *** + + query := '#([a-fA-F0-9]{$n_digit})([a-fA-F0-9]{$n_digit})([a-fA-F0-9]{$n_digit})' + + mut re := regex.regex_opt(query) or { panic(err) } + start, end := re.match_string(in_col) + println('start: $start, end: $end') + mut res := u32(0) + if start >= 0 { + group_list := re.get_group_list() + r := ('0x' + in_col[group_list[0].start..group_list[0].end]).int() << col_mul + g := ('0x' + in_col[group_list[1].start..group_list[1].end]).int() << col_mul + b := ('0x' + in_col[group_list[2].start..group_list[2].end]).int() << col_mul + println('r: $r g: $g b: $b') + res = u32(r) << 16 | u32(g) << 8 | u32(b) + } + return res +} + +/* +This function demonstrates the use of the named groups +*/ +fn convert_html_rgb_n(in_col string) u32 { + mut n_digit := if in_col.len == 4 { 1 } else { 2 } + mut col_mul := if in_col.len == 4 { 4 } else { 0 } + + query := '#(?P<red>[a-fA-F0-9]{$n_digit})(?P<green>[a-fA-F0-9]{$n_digit})(?P<blue>[a-fA-F0-9]{$n_digit})' + + mut re := regex.regex_opt(query) or { panic(err) } + start, end := re.match_string(in_col) + println('start: $start, end: $end') + mut res := u32(0) + if start >= 0 { + red_s, red_e := re.get_group_bounds_by_name('red') + r := ('0x' + in_col[red_s..red_e]).int() << col_mul + + green_s, green_e := re.get_group_bounds_by_name('green') + g := ('0x' + in_col[green_s..green_e]).int() << col_mul + + blue_s, blue_e := re.get_group_bounds_by_name('blue') + b := ('0x' + in_col[blue_s..blue_e]).int() << col_mul + + println('r: $r g: $g b: $b') + res = u32(r) << 16 | u32(g) << 8 | u32(b) + } + return res +} + +fn main() { + // convert HTML rgb color using groups + println(convert_html_rgb('#A0b0Cc').hex()) + println(convert_html_rgb('#ABC').hex()) + + // convert HTML rgb color using named groups + println(convert_html_rgb_n('#A0B0CC').hex()) + println(convert_html_rgb_n('#ABC').hex()) +} diff --git a/v_windows/v/old/examples/regex/regex_with_memoization.v b/v_windows/v/old/examples/regex/regex_with_memoization.v new file mode 100644 index 0000000..28e346b --- /dev/null +++ b/v_windows/v/old/examples/regex/regex_with_memoization.v @@ -0,0 +1,127 @@ +import os + +fn regex_match(src string, pat string) bool { + src_size := src.len + 1 + pat_size := pat.len + 1 + mut memo := [][]int{len: src_size, init: []int{len: pat_size, init: -1}} + return regex_match_core(src, pat, 0, 0, mut memo) +} + +fn regex_match_core(src string, pat string, src_pos int, pat_pos int, mut memo [][]int) bool { + if memo[src_pos][pat_pos] != -1 { + return memo[src_pos][pat_pos] == 1 + } + mut spos := src_pos + mut ppos := pat_pos + if spos >= src.len && ppos >= pat.len { + memo[src_pos][pat_pos] = 1 + return true + } else if spos < src.len && ppos >= pat.len { + memo[src_pos][pat_pos] = 0 + return false + } else if spos >= src.len && ppos < pat.len { + if pat[ppos] == `\\` { + ppos++ + } + res := ppos + 1 < pat.len && pat[ppos + 1] in [`*`, `?`] + && regex_match_core(src, pat, spos, ppos + 2, mut memo) + memo[src_pos][pat_pos] = if res { 1 } else { 0 } + return res + } else { + first_is_bslash := pat[ppos] == `\\` + if first_is_bslash { + ppos++ + } + first_bslash_and_match := first_is_bslash && ppos < pat.len + && (((pat[ppos] == `d` && src[spos].is_digit()) + || (pat[ppos] == `D` && !src[spos].is_digit()) + || (pat[ppos] == `s` && src[spos].is_space()) + || (pat[ppos] == `S` && !src[spos].is_space()) + || (pat[ppos] == `w` && (src[spos].is_digit() || src[spos].is_letter() + || src[spos] == `_`)) || (pat[ppos] == `W` && !(src[spos].is_digit() + || src[spos].is_letter() || src[spos] == `_`))) + || (pat[ppos] in [`d`, `D`, `s`, `S`, `w`, `W`] && ppos + 1 < pat.len + && pat[ppos + 1] in [`*`, `?`, `+`]) + || (pat[ppos] !in [`d`, `D`, `s`, `S`, `w`, `W`] && src[spos] == pat[ppos])) + if ppos + 1 < pat.len { + match pat[ppos + 1] { + `*` { + if first_bslash_and_match { + res := regex_match_core(src, pat, spos + 1, ppos - 1, mut memo) + || regex_match_core(src, pat, spos, ppos + 2, mut memo) + memo[src_pos][pat_pos] = if res { 1 } else { 0 } + return res + } else if src[spos] == pat[ppos] || pat[ppos] == `.` { + res := regex_match_core(src, pat, spos + 1, ppos, mut memo) + || regex_match_core(src, pat, spos, ppos + 2, mut memo) + memo[src_pos][pat_pos] = if res { 1 } else { 0 } + return res + } else { + res := regex_match_core(src, pat, spos, ppos + 2, mut memo) + memo[src_pos][pat_pos] = if res { 1 } else { 0 } + return res + } + } + `+` { + if first_bslash_and_match { + res := regex_match_core(src, pat, spos + 1, ppos - 1, mut memo) + || regex_match_core(src, pat, spos + 1, ppos + 2, mut memo) + memo[src_pos][pat_pos] = if res { 1 } else { 0 } + return res + } else if src[spos] == pat[ppos] || pat[ppos] == `.` { + res := regex_match_core(src, pat, spos + 1, ppos, mut memo) + || regex_match_core(src, pat, spos + 1, ppos + 2, mut memo) + memo[src_pos][pat_pos] = if res { 1 } else { 0 } + return res + } else { + memo[src_pos][pat_pos] = 0 + return false + } + } + `?` { + if first_bslash_and_match || src[spos] == pat[ppos] || pat[ppos] == `.` { + res := regex_match_core(src, pat, spos + 1, ppos + 2, mut memo) + || regex_match_core(src, pat, spos, ppos + 2, mut memo) + memo[src_pos][pat_pos] = if res { 1 } else { 0 } + return res + } else { + res := regex_match_core(src, pat, spos, ppos + 2, mut memo) + memo[src_pos][pat_pos] = if res { 1 } else { 0 } + return res + } + } + else {} + } + } + if first_is_bslash { + res := first_bslash_and_match + && regex_match_core(src, pat, spos + 1, ppos + 1, mut memo) + memo[src_pos][pat_pos] = if res { 1 } else { 0 } + return res + } else { + res := (src[spos] == pat[ppos] || pat[ppos] == `.`) && pat[ppos] != `\\` + && regex_match_core(src, pat, spos + 1, ppos + 1, mut memo) + memo[src_pos][pat_pos] = if res { 1 } else { 0 } + return res + } + } +} + +fn main() { + mut cnt := 0 + println('currently supported patterns: . ? + * \\ \\d \\D \\s \\S \\w \\W') + println('example: source `address@domain.net` matches pattern `\\w+@domain\\.net`') + println('enter `exit` to quit\n') + for { + cnt++ + src := os.input('[$cnt] enter source string: ') + if src == 'exit' { + break + } + pat := os.input('[$cnt] enter pattern string: ') + if pat == 'exit' { + break + } + println('[$cnt] whether `$src` matches `$pat`: ${regex_match(src, pat)}') + } +} |