aboutsummaryrefslogtreecommitdiff
path: root/v_windows/v/vlib/net/html
diff options
context:
space:
mode:
authorIndrajith K L2022-12-03 17:00:20 +0530
committerIndrajith K L2022-12-03 17:00:20 +0530
commitf5c4671bfbad96bf346bd7e9a21fc4317b4959df (patch)
tree2764fc62da58f2ba8da7ed341643fc359873142f /v_windows/v/vlib/net/html
downloadcli-tools-windows-f5c4671bfbad96bf346bd7e9a21fc4317b4959df.tar.gz
cli-tools-windows-f5c4671bfbad96bf346bd7e9a21fc4317b4959df.tar.bz2
cli-tools-windows-f5c4671bfbad96bf346bd7e9a21fc4317b4959df.zip
Adds most of the toolsHEADmaster
Diffstat (limited to 'v_windows/v/vlib/net/html')
-rw-r--r--v_windows/v/vlib/net/html/README.md16
-rw-r--r--v_windows/v/vlib/net/html/data_structures.v91
-rw-r--r--v_windows/v/vlib/net/html/dom.v189
-rw-r--r--v_windows/v/vlib/net/html/dom_test.v56
-rw-r--r--v_windows/v/vlib/net/html/html.v18
-rw-r--r--v_windows/v/vlib/net/html/html_test.v15
-rw-r--r--v_windows/v/vlib/net/html/parser.v260
-rw-r--r--v_windows/v/vlib/net/html/parser_test.v41
-rw-r--r--v_windows/v/vlib/net/html/tag.v68
9 files changed, 754 insertions, 0 deletions
diff --git a/v_windows/v/vlib/net/html/README.md b/v_windows/v/vlib/net/html/README.md
new file mode 100644
index 0000000..a92a6e6
--- /dev/null
+++ b/v_windows/v/vlib/net/html/README.md
@@ -0,0 +1,16 @@
+net/http is an HTML written in pure V.
+
+## Usage
+```v oksyntax
+import net.html
+
+fn main() {
+ doc := html.parse('<html><body><h1 class="title">Hello world!</h1></body></html>')
+ tag := doc.get_tag('h1')[0] // <h1>Hello world!</h1>
+ println(tag.name) // h1
+ println(tag.content) // Hello world!
+ println(tag.attributes) // {'class':'title'}
+ println(tag.str()) // <h1 class="title">Hello world!</h1>
+}
+```
+More examples found on [`parser_test.v`](parser_test.v) and [`html_test.v`](html_test.v)
diff --git a/v_windows/v/vlib/net/html/data_structures.v b/v_windows/v/vlib/net/html/data_structures.v
new file mode 100644
index 0000000..688b756
--- /dev/null
+++ b/v_windows/v/vlib/net/html/data_structures.v
@@ -0,0 +1,91 @@
+module html
+
+const (
+ null_element = int(0x80000000)
+)
+
+struct Stack {
+mut:
+ elements []int
+ size int
+}
+
+[inline]
+fn is_null(data int) bool {
+ return data == html.null_element
+}
+
+[inline]
+fn (stack Stack) is_empty() bool {
+ return stack.size <= 0
+}
+
+fn (stack Stack) peek() int {
+ return if !stack.is_empty() { stack.elements[stack.size - 1] } else { html.null_element }
+}
+
+fn (mut stack Stack) pop() int {
+ mut to_return := html.null_element
+ if !stack.is_empty() {
+ to_return = stack.elements[stack.size - 1]
+ stack.size--
+ }
+ return to_return
+}
+
+fn (mut stack Stack) push(item int) {
+ if stack.elements.len > stack.size {
+ stack.elements[stack.size] = item
+ } else {
+ stack.elements << item
+ }
+ stack.size++
+}
+
+struct BTree {
+mut:
+ all_tags []Tag
+ node_pointer int
+ childrens [][]int
+ parents []int
+}
+
+fn (mut btree BTree) add_children(tag Tag) int {
+ btree.all_tags << tag
+ if btree.all_tags.len > 1 {
+ for btree.childrens.len <= btree.node_pointer {
+ mut temp_array := btree.childrens
+ temp_array << []int{}
+ btree.childrens = temp_array
+ }
+ btree.childrens[btree.node_pointer] << btree.all_tags.len - 1
+ for btree.parents.len < btree.all_tags.len {
+ mut temp_array := btree.parents
+ temp_array << 0
+ btree.parents = temp_array
+ }
+ btree.parents[btree.all_tags.len - 1] = btree.node_pointer
+ }
+ return btree.all_tags.len - 1
+}
+
+[inline]
+fn (btree BTree) get_children() []int {
+ return btree.childrens[btree.node_pointer]
+}
+
+[inline]
+fn (btree BTree) get_parent() int {
+ return btree.parents[btree.node_pointer]
+}
+
+[inline]
+fn (btree BTree) get_stored() Tag {
+ return btree.all_tags[btree.node_pointer]
+}
+
+fn (mut btree BTree) move_pointer(to int) {
+ if to < btree.all_tags.len {
+ btree.node_pointer = to
+ }
+}
diff --git a/v_windows/v/vlib/net/html/dom.v b/v_windows/v/vlib/net/html/dom.v
new file mode 100644
index 0000000..b145ddc
--- /dev/null
+++ b/v_windows/v/vlib/net/html/dom.v
@@ -0,0 +1,189 @@
+module html
+
+import os
+
+// The W3C Document Object Model (DOM) is a platform and language-neutral
+// interface that allows programs and scripts to dynamically access and
+// update the content, structure, and style of a document.
+//
+// https://www.w3.org/TR/WD-DOM/introduction.html
+pub struct DocumentObjectModel {
+mut:
+ root &Tag
+ constructed bool
+ btree BTree
+ all_tags []&Tag
+ all_attributes map[string][]&Tag
+ close_tags map[string]bool // add a counter to see count how many times is closed and parse correctly
+ attributes map[string][]string
+ tag_attributes map[string][][]&Tag
+ tag_type map[string][]&Tag
+ debug_file os.File
+}
+
+[if debug]
+fn (mut dom DocumentObjectModel) print_debug(data string) {
+ $if debug {
+ if data.len > 0 {
+ dom.debug_file.writeln(data) or { panic(err) }
+ }
+ }
+}
+
+[inline]
+fn is_close_tag(tag &Tag) bool {
+ return tag.name.len > 0 && tag.name[0] == `/`
+}
+
+fn (mut dom DocumentObjectModel) where_is(item_name string, attribute_name string) int {
+ if attribute_name !in dom.attributes {
+ dom.attributes[attribute_name] = []string{}
+ }
+ mut string_array := dom.attributes[attribute_name]
+ mut counter := 0
+ for value in string_array {
+ if value == item_name {
+ return counter
+ }
+ counter++
+ }
+ string_array << item_name
+ dom.attributes[attribute_name] = string_array
+ return string_array.len - 1
+}
+
+fn (mut dom DocumentObjectModel) add_tag_attribute(tag &Tag) {
+ for attribute_name, _ in tag.attributes {
+ attribute_value := tag.attributes[attribute_name]
+ location := dom.where_is(attribute_value, attribute_name)
+ if attribute_name !in dom.tag_attributes {
+ dom.tag_attributes[attribute_name] = []
+ }
+ for {
+ mut temp_array := dom.tag_attributes[attribute_name]
+ temp_array << []&Tag{}
+ dom.tag_attributes[attribute_name] = temp_array
+ if location < dom.tag_attributes[attribute_name].len + 1 {
+ break
+ }
+ }
+ mut temp_array := dom.tag_attributes[attribute_name][location]
+ temp_array << tag
+ dom.tag_attributes[attribute_name][location] = temp_array
+ }
+}
+
+fn (mut dom DocumentObjectModel) add_tag_by_type(tag &Tag) {
+ tag_name := tag.name
+ if tag_name !in dom.tag_type {
+ dom.tag_type[tag_name] = [tag]
+ } else {
+ mut temp_array := dom.tag_type[tag_name]
+ temp_array << tag
+ dom.tag_type[tag_name] = temp_array
+ }
+}
+
+fn (mut dom DocumentObjectModel) add_tag_by_attribute(tag &Tag) {
+ for attribute_name in tag.attributes.keys() {
+ if attribute_name !in dom.all_attributes {
+ dom.all_attributes[attribute_name] = [tag]
+ } else {
+ mut temp_array := dom.all_attributes[attribute_name]
+ temp_array << tag
+ dom.all_attributes[attribute_name] = temp_array
+ }
+ }
+}
+
+fn (mut dom DocumentObjectModel) construct(tag_list []&Tag) {
+ dom.constructed = true
+ mut temp_map := map[string]int{}
+ mut temp_int := null_element
+ mut temp_string := ''
+ mut stack := Stack{}
+ dom.btree = BTree{}
+ dom.root = tag_list[0]
+ dom.all_tags = [tag_list[0]]
+ temp_map['0'] = dom.btree.add_children(tag_list[0])
+ stack.push(0)
+ root_index := 0
+ for index := 1; index < tag_list.len; index++ {
+ mut tag := tag_list[index]
+ dom.print_debug(tag.str())
+ if is_close_tag(tag) {
+ temp_int = stack.peek()
+ temp_string = tag.name[1..]
+ for !is_null(temp_int) && temp_string != tag_list[temp_int].name
+ && !tag_list[temp_int].closed {
+ dom.print_debug(temp_string + ' >> ' + tag_list[temp_int].name + ' ' +
+ (temp_string == tag_list[temp_int].name).str())
+ stack.pop()
+ temp_int = stack.peek()
+ }
+ temp_int = stack.peek()
+ temp_int = if !is_null(temp_int) { stack.pop() } else { root_index }
+ if is_null(temp_int) {
+ stack.push(root_index)
+ }
+ dom.print_debug('Removed ' + temp_string + ' -- ' + tag_list[temp_int].name)
+ } else if tag.name.len > 0 {
+ dom.add_tag_attribute(tag) // error here
+ dom.add_tag_by_attribute(tag)
+ dom.add_tag_by_type(tag)
+ dom.all_tags << tag
+ temp_int = stack.peek()
+ if !is_null(temp_int) {
+ dom.btree.move_pointer(temp_map[temp_int.str()])
+ temp_map[index.str()] = dom.btree.add_children(tag)
+ mut temp_tag := tag_list[temp_int]
+ position_in_parent := temp_tag.add_child(tag) // tag_list[temp_int] = temp_tag
+ tag.add_parent(temp_tag, position_in_parent)
+ /*
+ dom.print_debug("Added ${tag.name} as child of '" + tag_list[temp_int].name +
+ "' which now has ${dom.btree.get_children().len} childrens")
+ */
+ dom.print_debug("Added $tag.name as child of '" + temp_tag.name +
+ "' which now has $temp_tag.children.len childrens")
+ } else { // dom.new_root(tag)
+ stack.push(root_index)
+ }
+ temp_string = '/' + tag.name
+ if temp_string in dom.close_tags && !tag.closed { // if tag ends with />
+ dom.print_debug('Pushed ' + temp_string)
+ stack.push(index)
+ }
+ }
+ } // println(tag_list[root_index]) for debug purposes
+ dom.root = tag_list[0]
+}
+
+// get_tag_by_attribute_value retrieves all the tags in the document that has the given attribute name and value.
+pub fn (mut dom DocumentObjectModel) get_tag_by_attribute_value(name string, value string) []&Tag {
+ location := dom.where_is(value, name)
+ return if dom.tag_attributes[name].len > location {
+ dom.tag_attributes[name][location]
+ } else {
+ []&Tag{}
+ }
+}
+
+// get_tag retrieves all the tags in the document that has the given tag name.
+pub fn (dom DocumentObjectModel) get_tag(name string) []&Tag {
+ return if name in dom.tag_type { dom.tag_type[name] } else { []&Tag{} }
+}
+
+// get_tag_by_attribute retrieves all the tags in the document that has the given attribute name.
+pub fn (dom DocumentObjectModel) get_tag_by_attribute(name string) []&Tag {
+ return if name in dom.all_attributes { dom.all_attributes[name] } else { []&Tag{} }
+}
+
+// get_root returns the root of the document.
+pub fn (dom DocumentObjectModel) get_root() &Tag {
+ return dom.root
+}
+
+// get_tags returns all of the tags stored in the document.
+pub fn (dom DocumentObjectModel) get_tags() []&Tag {
+ return dom.all_tags
+}
diff --git a/v_windows/v/vlib/net/html/dom_test.v b/v_windows/v/vlib/net/html/dom_test.v
new file mode 100644
index 0000000..d4fd292
--- /dev/null
+++ b/v_windows/v/vlib/net/html/dom_test.v
@@ -0,0 +1,56 @@
+module html
+
+import strings
+
+fn generate_temp_html() string {
+ mut temp_html := strings.new_builder(200)
+ temp_html.write_string('<!doctype html><html><head><title>Giant String</title></head><body>')
+ for counter := 0; counter < 4; counter++ {
+ temp_html.write_string("<div id='name_$counter' ")
+ temp_html.write_string("class='several-$counter'>Look at $counter</div>")
+ }
+ temp_html.write_string('</body></html>')
+ return temp_html.str()
+}
+
+fn test_search_by_tag_type() {
+ dom := parse(generate_temp_html())
+ assert dom.get_tag('div').len == 4
+ assert dom.get_tag('head').len == 1
+ assert dom.get_tag('body').len == 1
+}
+
+fn test_search_by_attribute_value() {
+ mut dom := parse(generate_temp_html())
+ // println(temp_html)
+ print('Amount ')
+ println(dom.get_tag_by_attribute_value('id', 'name_0'))
+ assert dom.get_tag_by_attribute_value('id', 'name_0').len == 1
+}
+
+fn test_access_parent() {
+ mut dom := parse(generate_temp_html())
+ div_tags := dom.get_tag('div')
+ parent := div_tags[0].parent
+ assert parent != 0
+ for div_tag in div_tags {
+ assert div_tag.parent == parent
+ }
+}
+
+fn test_search_by_attributes() {
+ dom := parse(generate_temp_html())
+ assert dom.get_tag_by_attribute('id').len == 4
+}
+
+fn test_tags_used() {
+ dom := parse(generate_temp_html())
+ assert dom.get_tags().len == 9
+}
+
+fn test_access_tag_fields() {
+ dom := parse(generate_temp_html())
+ id_tags := dom.get_tag_by_attribute('id')
+ assert id_tags[0].name == 'div'
+ assert id_tags[1].attributes['class'] == 'several-1'
+}
diff --git a/v_windows/v/vlib/net/html/html.v b/v_windows/v/vlib/net/html/html.v
new file mode 100644
index 0000000..293b643
--- /dev/null
+++ b/v_windows/v/vlib/net/html/html.v
@@ -0,0 +1,18 @@
+module html
+
+import os
+
+// parse parses and returns the DOM from the given text.
+pub fn parse(text string) DocumentObjectModel {
+ mut parser := Parser{}
+ parser.parse_html(text)
+ return parser.get_dom()
+}
+
+// parse_file parses and returns the DOM from the contents of a file.
+pub fn parse_file(filename string) DocumentObjectModel {
+ content := os.read_file(filename) or { return DocumentObjectModel{
+ root: &Tag{}
+ } }
+ return parse(content)
+}
diff --git a/v_windows/v/vlib/net/html/html_test.v b/v_windows/v/vlib/net/html/html_test.v
new file mode 100644
index 0000000..51271cd
--- /dev/null
+++ b/v_windows/v/vlib/net/html/html_test.v
@@ -0,0 +1,15 @@
+module html
+
+fn test_parse() {
+ doc := parse('<html><body><h1 class="title">Hello world!</h1></body></html>')
+ tags := doc.get_tag('h1')
+ assert tags.len == 1
+ h1_tag := tags[0] // <h1>Hello world!</h1>
+ assert h1_tag.name == 'h1'
+ assert h1_tag.content == 'Hello world!'
+ assert h1_tag.attributes.len == 2
+ // TODO: do not remove. Attributes must not have an empty attr.
+ // assert h1_tag.attributes.len == 1
+ assert h1_tag.str() == '<h1 class="title" >Hello world!</h1>'
+ // assert h1_tag.str() == '<h1 class="title">Hello world!</h1>'
+}
diff --git a/v_windows/v/vlib/net/html/parser.v b/v_windows/v/vlib/net/html/parser.v
new file mode 100644
index 0000000..5b9bbd1
--- /dev/null
+++ b/v_windows/v/vlib/net/html/parser.v
@@ -0,0 +1,260 @@
+module html
+
+import os
+import strings
+
+struct LexicalAttributes {
+mut:
+ current_tag &Tag
+ open_tag bool
+ open_code bool
+ open_string int
+ open_comment bool
+ is_attribute bool
+ opened_code_type string
+ line_count int
+ lexeme_builder strings.Builder = strings.new_builder(100)
+ code_tags map[string]bool = {
+ 'script': true
+ 'style': true
+ }
+}
+
+// Parser is responsible for reading the HTML strings and converting them into a `DocumentObjectModel`.
+pub struct Parser {
+mut:
+ dom DocumentObjectModel
+ lexical_attributes LexicalAttributes = LexicalAttributes{
+ current_tag: &Tag{}
+ }
+ filename string = 'direct-parse'
+ initialized bool
+ tags []&Tag
+ debug_file os.File
+}
+
+// This function is used to add a tag for the parser ignore it's content.
+// For example, if you have an html or XML with a custom tag, like `<script>`, using this function,
+// like `add_code_tag('script')` will make all `script` tags content be jumped,
+// so you still have its content, but will not confuse the parser with it's `>` or `<`.
+pub fn (mut parser Parser) add_code_tag(name string) {
+ if name.len <= 0 {
+ return
+ }
+ parser.lexical_attributes.code_tags[name] = true
+}
+
+[inline]
+fn (parser Parser) builder_str() string {
+ return parser.lexical_attributes.lexeme_builder.after(0)
+}
+
+[if debug]
+fn (mut parser Parser) print_debug(data string) {
+ $if debug {
+ if data.len > 0 {
+ parser.debug_file.writeln(data) or { panic(err) }
+ }
+ }
+}
+
+fn (mut parser Parser) verify_end_comment(remove bool) bool {
+ lexeme := parser.builder_str()
+ last := lexeme[lexeme.len - 1]
+ penultimate := lexeme[lexeme.len - 2]
+ is_end_comment := last == `-` && penultimate == `-`
+ if is_end_comment && remove {
+ parser.lexical_attributes.lexeme_builder.go_back(2)
+ }
+ return is_end_comment
+}
+
+fn blank_string(data string) bool {
+ mut count := 0
+ for chr in data {
+ if chr == 9 || chr == 32 {
+ count++
+ }
+ }
+ return count == data.len
+}
+
+// init initializes the parser.
+fn (mut parser Parser) init() {
+ if parser.initialized {
+ return
+ }
+ parser.dom = DocumentObjectModel{
+ debug_file: parser.debug_file
+ root: &Tag{}
+ }
+ parser.add_code_tag('')
+ parser.tags = []&Tag{}
+ parser.dom.close_tags['/!document'] = true
+ parser.lexical_attributes.current_tag = &Tag{}
+ parser.initialized = true
+}
+
+fn (mut parser Parser) generate_tag() {
+ if parser.lexical_attributes.open_tag {
+ return
+ }
+ if parser.lexical_attributes.current_tag.name.len > 0
+ || parser.lexical_attributes.current_tag.content.len > 0 {
+ parser.tags << parser.lexical_attributes.current_tag
+ }
+ parser.lexical_attributes.current_tag = &Tag{}
+}
+
+// split_parse parses the HTML fragment
+pub fn (mut parser Parser) split_parse(data string) {
+ parser.init()
+ for chr in data {
+ // returns true if byte is a " or '
+ is_quote := chr == `"` || chr == `'`
+ string_code := match chr {
+ `"` { 1 } // "
+ `'` { 2 } // '
+ else { 0 }
+ }
+ if parser.lexical_attributes.open_code { // here will verify all needed to know if open_code finishes and string in code
+ parser.lexical_attributes.lexeme_builder.write_b(chr)
+ if parser.lexical_attributes.open_string > 0
+ && parser.lexical_attributes.open_string == string_code {
+ parser.lexical_attributes.open_string = 0
+ } else if is_quote {
+ parser.lexical_attributes.open_string = string_code
+ } else if chr == `>` { // only execute verification if is a > // here will verify < to know if code tag is finished
+ name_close_tag := '</$parser.lexical_attributes.opened_code_type>'
+ if parser.builder_str().to_lower().ends_with(name_close_tag) {
+ parser.lexical_attributes.open_code = false
+ // need to modify lexeme_builder to add script text as a content in next loop (not gave error in dom)
+ parser.lexical_attributes.lexeme_builder.go_back(name_close_tag.len)
+ parser.lexical_attributes.current_tag.closed = true
+ parser.lexical_attributes.current_tag.close_type = .new_tag
+ }
+ }
+ } else if parser.lexical_attributes.open_comment {
+ if chr == `>` && parser.verify_end_comment(false) { // close tag '>'
+ // parser.print_debug(parser.builder_str() + " >> " + parser.lexical_attributes.line_count.str())
+ parser.lexical_attributes.lexeme_builder.go_back_to(0)
+ parser.lexical_attributes.open_comment = false
+ parser.lexical_attributes.open_tag = false
+ } else {
+ parser.lexical_attributes.lexeme_builder.write_b(chr)
+ }
+ } else if parser.lexical_attributes.open_string > 0 {
+ if parser.lexical_attributes.open_string == string_code {
+ parser.lexical_attributes.open_string = 0
+ parser.lexical_attributes.lexeme_builder.write_b(chr)
+ temp_lexeme := parser.builder_str()
+ if parser.lexical_attributes.current_tag.last_attribute != '' {
+ lattr := parser.lexical_attributes.current_tag.last_attribute
+ nval := temp_lexeme.substr(1, temp_lexeme.len - 1)
+ // parser.print_debug(lattr + " = " + temp_lexeme)
+ parser.lexical_attributes.current_tag.attributes[lattr] = nval
+ parser.lexical_attributes.current_tag.last_attribute = ''
+ } else {
+ parser.lexical_attributes.current_tag.attributes[temp_lexeme.to_lower()] = '' // parser.print_debug(temp_lexeme)
+ }
+ parser.lexical_attributes.lexeme_builder.go_back_to(0)
+ } else {
+ parser.lexical_attributes.lexeme_builder.write_b(chr)
+ }
+ } else if parser.lexical_attributes.open_tag {
+ if parser.lexical_attributes.lexeme_builder.len == 0 && is_quote {
+ parser.lexical_attributes.open_string = string_code
+ parser.lexical_attributes.lexeme_builder.write_b(chr)
+ } else if chr == `>` { // close tag >
+ complete_lexeme := parser.builder_str().to_lower()
+ parser.lexical_attributes.current_tag.closed = (complete_lexeme.len > 0
+ && complete_lexeme[complete_lexeme.len - 1] == `/`) // if equals to /
+ if complete_lexeme.len > 0 && complete_lexeme[0] == `/` {
+ parser.dom.close_tags[complete_lexeme] = true
+ }
+ /*
+ else if complete_lexeme.len > 0 && complete_lexeme[complete_lexeme.len - 1] == 47 { // if end tag like "/>"
+ parser.lexical_attributes.current_tag.closed = true
+ }
+ */
+ if parser.lexical_attributes.current_tag.name == '' {
+ parser.lexical_attributes.current_tag.name = complete_lexeme
+ } else if complete_lexeme != '/' {
+ parser.lexical_attributes.current_tag.attributes[complete_lexeme] = ''
+ }
+ parser.lexical_attributes.open_tag = false
+ parser.lexical_attributes.lexeme_builder.go_back_to(0) // if tag name is code
+ if parser.lexical_attributes.current_tag.name in parser.lexical_attributes.code_tags {
+ parser.lexical_attributes.open_code = true
+ parser.lexical_attributes.opened_code_type = parser.lexical_attributes.current_tag.name
+ }
+ // parser.print_debug(parser.lexical_attributes.current_tag.name)
+ } else if chr !in [byte(9), ` `, `=`, `\n`] { // Tab, space, = and \n
+ parser.lexical_attributes.lexeme_builder.write_b(chr)
+ } else if chr != 10 {
+ complete_lexeme := parser.builder_str().to_lower()
+ if parser.lexical_attributes.current_tag.name == '' {
+ parser.lexical_attributes.current_tag.name = complete_lexeme
+ } else {
+ parser.lexical_attributes.current_tag.attributes[complete_lexeme] = ''
+ parser.lexical_attributes.current_tag.last_attribute = ''
+ if chr == `=` { // if was a =
+ parser.lexical_attributes.current_tag.last_attribute = complete_lexeme
+ }
+ }
+ parser.lexical_attributes.lexeme_builder.go_back_to(0)
+ }
+ if parser.builder_str() == '!--' {
+ parser.lexical_attributes.open_comment = true
+ }
+ } else if chr == `<` { // open tag '<'
+ temp_string := parser.builder_str()
+ if parser.lexical_attributes.lexeme_builder.len >= 1 {
+ if parser.lexical_attributes.current_tag.name.len > 1
+ && parser.lexical_attributes.current_tag.name[0] == 47
+ && !blank_string(temp_string) {
+ parser.tags << &Tag{
+ name: 'text'
+ content: temp_string
+ }
+ } else {
+ parser.lexical_attributes.current_tag.content = temp_string // verify later who has this content
+ }
+ }
+ // parser.print_debug(parser.lexical_attributes.current_tag.str())
+ parser.lexical_attributes.lexeme_builder.go_back_to(0)
+ parser.generate_tag()
+ parser.lexical_attributes.open_tag = true
+ } else {
+ parser.lexical_attributes.lexeme_builder.write_b(chr)
+ }
+ }
+}
+
+// parse_html parses the given HTML string
+pub fn (mut parser Parser) parse_html(data string) {
+ parser.init()
+ mut lines := data.split_into_lines()
+ for line in lines {
+ parser.lexical_attributes.line_count++
+ parser.split_parse(line)
+ }
+ parser.generate_tag()
+ parser.dom.debug_file = parser.debug_file
+ parser.dom.construct(parser.tags)
+}
+
+// finalize finishes the parsing stage .
+[inline]
+pub fn (mut parser Parser) finalize() {
+ parser.generate_tag()
+}
+
+// get_dom returns the parser's current DOM representation.
+pub fn (mut parser Parser) get_dom() DocumentObjectModel {
+ if !parser.dom.constructed {
+ parser.generate_tag()
+ parser.dom.construct(parser.tags)
+ }
+ return parser.dom
+}
diff --git a/v_windows/v/vlib/net/html/parser_test.v b/v_windows/v/vlib/net/html/parser_test.v
new file mode 100644
index 0000000..274a47c
--- /dev/null
+++ b/v_windows/v/vlib/net/html/parser_test.v
@@ -0,0 +1,41 @@
+module html
+
+import strings
+
+fn test_split_parse() {
+ mut parser := Parser{}
+ parser.init()
+ parser.split_parse('<!doctype htm')
+ parser.split_parse('l public')
+ parser.split_parse('><html><he')
+ parser.split_parse('ad><t')
+ parser.split_parse('itle> Hum... ')
+ parser.split_parse('A Tit')
+ parser.split_parse('\nle</ti\ntle>')
+ parser.split_parse('</\nhead><body>\t\t\t<h3>')
+ parser.split_parse('Nice Test!</h3>')
+ parser.split_parse('</bo\n\n\ndy></html>')
+ parser.finalize()
+ assert parser.tags.len == 11
+ assert parser.tags[3].content == ' Hum... A Tit\nle'
+}
+
+fn test_giant_string() {
+ mut temp_html := strings.new_builder(200)
+ mut parser := Parser{}
+ temp_html.write_string('<!doctype html><html><head><title>Giant String</title></head><body>')
+ for counter := 0; counter < 2000; counter++ {
+ temp_html.write_string("<div id='name_$counter' class='several-$counter'>Look at $counter</div>")
+ }
+ temp_html.write_string('</body></html>')
+ parser.parse_html(temp_html.str())
+ assert parser.tags.len == 4009
+}
+
+fn test_script_tag() {
+ mut parser := Parser{}
+ script_content := "\nvar googletag = googletag || {};\ngoogletag.cmd = googletag.cmd || [];if(3 > 5) {console.log('Birl');}\n"
+ temp_html := '<html><body><script>$script_content</script></body></html>'
+ parser.parse_html(temp_html)
+ assert parser.tags[2].content.len == script_content.replace('\n', '').len
+}
diff --git a/v_windows/v/vlib/net/html/tag.v b/v_windows/v/vlib/net/html/tag.v
new file mode 100644
index 0000000..62260c0
--- /dev/null
+++ b/v_windows/v/vlib/net/html/tag.v
@@ -0,0 +1,68 @@
+module html
+
+import strings
+
+enum CloseTagType {
+ in_name
+ new_tag
+}
+
+// Tag holds the information of an HTML tag.
+[heap]
+pub struct Tag {
+pub mut:
+ name string
+ content string
+ children []&Tag
+ attributes map[string]string // attributes will be like map[name]value
+ last_attribute string
+ parent &Tag = 0
+ position_in_parent int
+ closed bool
+ close_type CloseTagType = .in_name
+}
+
+fn (mut tag Tag) add_parent(t &Tag, position int) {
+ tag.position_in_parent = position
+ tag.parent = t
+}
+
+fn (mut tag Tag) add_child(t &Tag) int {
+ tag.children << t
+ return tag.children.len
+}
+
+// text returns the text contents of the tag.
+pub fn (tag Tag) text() string {
+ if tag.name.len >= 2 && tag.name[..2] == 'br' {
+ return '\n'
+ }
+ mut text_str := strings.new_builder(200)
+ text_str.write_string(tag.content.replace('\n', ''))
+ for child in tag.children {
+ text_str.write_string(child.text())
+ }
+ return text_str.str()
+}
+
+pub fn (tag &Tag) str() string {
+ mut html_str := strings.new_builder(200)
+ html_str.write_string('<$tag.name')
+ for key, value in tag.attributes {
+ html_str.write_string(' $key')
+ if value.len > 0 {
+ html_str.write_string('="$value"')
+ }
+ }
+ html_str.write_string(if tag.closed && tag.close_type == .in_name { '/>' } else { '>' })
+ html_str.write_string(tag.content)
+ if tag.children.len > 0 {
+ for child in tag.children {
+ html_str.write_string(child.str())
+ }
+ }
+ if !tag.closed || tag.close_type == .new_tag {
+ html_str.write_string('</$tag.name>')
+ }
+ return html_str.str()
+}