From f5c4671bfbad96bf346bd7e9a21fc4317b4959df Mon Sep 17 00:00:00 2001
From: Indrajith K L
Date: Sat, 3 Dec 2022 17:00:20 +0530
Subject: Adds most of the tools
---
 v_windows/v/old/vlib/net/html/README.md         |  16 ++
 v_windows/v/old/vlib/net/html/data_structures.v |  91 +++++++++
 v_windows/v/old/vlib/net/html/dom.v             | 189 +++++++++++++++++
 v_windows/v/old/vlib/net/html/dom_test.v        |  56 +++++
 v_windows/v/old/vlib/net/html/html.v            |  18 ++
 v_windows/v/old/vlib/net/html/html_test.v       |  15 ++
 v_windows/v/old/vlib/net/html/parser.v          | 260 ++++++++++++++++++++++++
 v_windows/v/old/vlib/net/html/parser_test.v     |  41 ++++
 v_windows/v/old/vlib/net/html/tag.v             |  68 +++++++
 9 files changed, 754 insertions(+)
 create mode 100644 v_windows/v/old/vlib/net/html/README.md
 create mode 100644 v_windows/v/old/vlib/net/html/data_structures.v
 create mode 100644 v_windows/v/old/vlib/net/html/dom.v
 create mode 100644 v_windows/v/old/vlib/net/html/dom_test.v
 create mode 100644 v_windows/v/old/vlib/net/html/html.v
 create mode 100644 v_windows/v/old/vlib/net/html/html_test.v
 create mode 100644 v_windows/v/old/vlib/net/html/parser.v
 create mode 100644 v_windows/v/old/vlib/net/html/parser_test.v
 create mode 100644 v_windows/v/old/vlib/net/html/tag.v
(limited to 'v_windows/v/old/vlib/net/html')
diff --git a/v_windows/v/old/vlib/net/html/README.md b/v_windows/v/old/vlib/net/html/README.md
new file mode 100644
index 0000000..a92a6e6
--- /dev/null
+++ b/v_windows/v/old/vlib/net/html/README.md
@@ -0,0 +1,16 @@
+net/http is an HTML written in pure V.
+
+## Usage
+```v oksyntax
+import net.html
+
+fn main() {
+	doc := html.parse('
Hello world!
')
+	tag := doc.get_tag('h1')[0] // Hello world!
+	println(tag.name) // h1
+	println(tag.content) // Hello world!
+	println(tag.attributes) // {'class':'title'}
+	println(tag.str()) // Hello world!
+}
+```
+More examples found on [`parser_test.v`](parser_test.v) and [`html_test.v`](html_test.v)
diff --git a/v_windows/v/old/vlib/net/html/data_structures.v b/v_windows/v/old/vlib/net/html/data_structures.v
new file mode 100644
index 0000000..688b756
--- /dev/null
+++ b/v_windows/v/old/vlib/net/html/data_structures.v
@@ -0,0 +1,91 @@
+module html
+
+const (
+	null_element = int(0x80000000)
+)
+
+struct Stack {
+mut:
+	elements []int
+	size     int
+}
+
+[inline]
+fn is_null(data int) bool {
+	return data == html.null_element
+}
+
+[inline]
+fn (stack Stack) is_empty() bool {
+	return stack.size <= 0
+}
+
+fn (stack Stack) peek() int {
+	return if !stack.is_empty() { stack.elements[stack.size - 1] } else { html.null_element }
+}
+
+fn (mut stack Stack) pop() int {
+	mut to_return := html.null_element
+	if !stack.is_empty() {
+		to_return = stack.elements[stack.size - 1]
+		stack.size--
+	}
+	return to_return
+}
+
+fn (mut stack Stack) push(item int) {
+	if stack.elements.len > stack.size {
+		stack.elements[stack.size] = item
+	} else {
+		stack.elements << item
+	}
+	stack.size++
+}
+
+struct BTree {
+mut:
+	all_tags     []Tag
+	node_pointer int
+	childrens    [][]int
+	parents      []int
+}
+
+fn (mut btree BTree) add_children(tag Tag) int {
+	btree.all_tags << tag
+	if btree.all_tags.len > 1 {
+		for btree.childrens.len <= btree.node_pointer {
+			mut temp_array := btree.childrens
+			temp_array << []int{}
+			btree.childrens = temp_array
+		}
+		btree.childrens[btree.node_pointer] << btree.all_tags.len - 1
+		for btree.parents.len < btree.all_tags.len {
+			mut temp_array := btree.parents
+			temp_array << 0
+			btree.parents = temp_array
+		}
+		btree.parents[btree.all_tags.len - 1] = btree.node_pointer
+	}
+	return btree.all_tags.len - 1
+}
+
+[inline]
+fn (btree BTree) get_children() []int {
+	return btree.childrens[btree.node_pointer]
+}
+
+[inline]
+fn (btree BTree) get_parent() int {
+	return btree.parents[btree.node_pointer]
+}
+
+[inline]
+fn (btree BTree) get_stored() Tag {
+	return btree.all_tags[btree.node_pointer]
+}
+
+fn (mut btree BTree) move_pointer(to int) {
+	if to < btree.all_tags.len {
+		btree.node_pointer = to
+	}
+}
diff --git a/v_windows/v/old/vlib/net/html/dom.v b/v_windows/v/old/vlib/net/html/dom.v
new file mode 100644
index 0000000..f56e9c2
--- /dev/null
+++ b/v_windows/v/old/vlib/net/html/dom.v
@@ -0,0 +1,189 @@
+module html
+
+import os
+
+// The W3C Document Object Model (DOM) is a platform and language-neutral
+// interface that allows programs and scripts to dynamically access and
+// update the content, structure, and style of a document.
+//
+// https://www.w3.org/TR/WD-DOM/introduction.html
+pub struct DocumentObjectModel {
+mut:
+	root           &Tag
+	constructed    bool
+	btree          BTree
+	all_tags       []&Tag
+	all_attributes map[string][]&Tag
+	close_tags     map[string]bool // add a counter to see count how many times is closed and parse correctly
+	attributes     map[string][]string
+	tag_attributes map[string][][]&Tag
+	tag_type       map[string][]&Tag
+	debug_file     os.File
+}
+
+[if debug]
+fn (mut dom DocumentObjectModel) print_debug(data string) {
+	$if debug {
+		if data.len > 0 {
+			dom.debug_file.writeln(data) or { panic(err) }
+		}
+	}
+}
+
+[inline]
+fn is_close_tag(tag &Tag) bool {
+	return tag.name.len > 0 && tag.name[0] == `/`
+}
+
+fn (mut dom DocumentObjectModel) where_is(item_name string, attribute_name string) int {
+	if attribute_name !in dom.attributes {
+		dom.attributes[attribute_name] = []string{}
+	}
+	mut string_array := dom.attributes[attribute_name]
+	mut counter := 0
+	for value in string_array {
+		if value == item_name {
+			return counter
+		}
+		counter++
+	}
+	string_array << item_name
+	dom.attributes[attribute_name] = string_array
+	return string_array.len - 1
+}
+
+fn (mut dom DocumentObjectModel) add_tag_attribute(tag &Tag) {
+	for attribute_name, _ in tag.attributes {
+		attribute_value := tag.attributes[attribute_name]
+		location := dom.where_is(attribute_value, attribute_name)
+		if attribute_name !in dom.tag_attributes {
+			dom.tag_attributes[attribute_name] = []
+		}
+		for {
+			mut temp_array := dom.tag_attributes[attribute_name]
+			temp_array << []&Tag{}
+			dom.tag_attributes[attribute_name] = temp_array
+			if location < dom.tag_attributes[attribute_name].len + 1 {
+				break
+			}
+		}
+		mut temp_array := dom.tag_attributes[attribute_name][location]
+		temp_array << tag
+		dom.tag_attributes[attribute_name][location] = temp_array
+	}
+}
+
+fn (mut dom DocumentObjectModel) add_tag_by_type(tag &Tag) {
+	tag_name := tag.name
+	if !(tag_name in dom.tag_type) {
+		dom.tag_type[tag_name] = [tag]
+	} else {
+		mut temp_array := dom.tag_type[tag_name]
+		temp_array << tag
+		dom.tag_type[tag_name] = temp_array
+	}
+}
+
+fn (mut dom DocumentObjectModel) add_tag_by_attribute(tag &Tag) {
+	for attribute_name in tag.attributes.keys() {
+		if attribute_name !in dom.all_attributes {
+			dom.all_attributes[attribute_name] = [tag]
+		} else {
+			mut temp_array := dom.all_attributes[attribute_name]
+			temp_array << tag
+			dom.all_attributes[attribute_name] = temp_array
+		}
+	}
+}
+
+fn (mut dom DocumentObjectModel) construct(tag_list []&Tag) {
+	dom.constructed = true
+	mut temp_map := map[string]int{}
+	mut temp_int := null_element
+	mut temp_string := ''
+	mut stack := Stack{}
+	dom.btree = BTree{}
+	dom.root = tag_list[0]
+	dom.all_tags = [tag_list[0]]
+	temp_map['0'] = dom.btree.add_children(tag_list[0])
+	stack.push(0)
+	root_index := 0
+	for index := 1; index < tag_list.len; index++ {
+		mut tag := tag_list[index]
+		dom.print_debug(tag.str())
+		if is_close_tag(tag) {
+			temp_int = stack.peek()
+			temp_string = tag.name[1..]
+			for !is_null(temp_int) && temp_string != tag_list[temp_int].name
+				&& !tag_list[temp_int].closed {
+				dom.print_debug(temp_string + ' >> ' + tag_list[temp_int].name + ' ' +
+					(temp_string == tag_list[temp_int].name).str())
+				stack.pop()
+				temp_int = stack.peek()
+			}
+			temp_int = stack.peek()
+			temp_int = if !is_null(temp_int) { stack.pop() } else { root_index }
+			if is_null(temp_int) {
+				stack.push(root_index)
+			}
+			dom.print_debug('Removed ' + temp_string + ' -- ' + tag_list[temp_int].name)
+		} else if tag.name.len > 0 {
+			dom.add_tag_attribute(tag) // error here
+			dom.add_tag_by_attribute(tag)
+			dom.add_tag_by_type(tag)
+			dom.all_tags << tag
+			temp_int = stack.peek()
+			if !is_null(temp_int) {
+				dom.btree.move_pointer(temp_map[temp_int.str()])
+				temp_map[index.str()] = dom.btree.add_children(tag)
+				mut temp_tag := tag_list[temp_int]
+				position_in_parent := temp_tag.add_child(tag) // tag_list[temp_int] = temp_tag
+				tag.add_parent(temp_tag, position_in_parent)
+				/*
+				dom.print_debug("Added ${tag.name} as child of '" + tag_list[temp_int].name +
+					"' which now has ${dom.btree.get_children().len} childrens")
+				*/
+				dom.print_debug("Added $tag.name as child of '" + temp_tag.name +
+					"' which now has $temp_tag.children.len childrens")
+			} else { // dom.new_root(tag)
+				stack.push(root_index)
+			}
+			temp_string = '/' + tag.name
+			if temp_string in dom.close_tags && !tag.closed { // if tag ends with />
+				dom.print_debug('Pushed ' + temp_string)
+				stack.push(index)
+			}
+		}
+	} // println(tag_list[root_index]) for debug purposes
+	dom.root = tag_list[0]
+}
+
+// get_tag_by_attribute_value retrieves all the tags in the document that has the given attribute name and value.
+pub fn (mut dom DocumentObjectModel) get_tag_by_attribute_value(name string, value string) []&Tag {
+	location := dom.where_is(value, name)
+	return if dom.tag_attributes[name].len > location {
+		dom.tag_attributes[name][location]
+	} else {
+		[]&Tag{}
+	}
+}
+
+// get_tag retrieves all the tags in the document that has the given tag name.
+pub fn (dom DocumentObjectModel) get_tag(name string) []&Tag {
+	return if name in dom.tag_type { dom.tag_type[name] } else { []&Tag{} }
+}
+
+// get_tag_by_attribute retrieves all the tags in the document that has the given attribute name.
+pub fn (dom DocumentObjectModel) get_tag_by_attribute(name string) []&Tag {
+	return if name in dom.all_attributes { dom.all_attributes[name] } else { []&Tag{} }
+}
+
+// get_root returns the root of the document.
+pub fn (dom DocumentObjectModel) get_root() &Tag {
+	return dom.root
+}
+
+// get_tags returns all of the tags stored in the document.
+pub fn (dom DocumentObjectModel) get_tags() []&Tag {
+	return dom.all_tags
+}
diff --git a/v_windows/v/old/vlib/net/html/dom_test.v b/v_windows/v/old/vlib/net/html/dom_test.v
new file mode 100644
index 0000000..d4fd292
--- /dev/null
+++ b/v_windows/v/old/vlib/net/html/dom_test.v
@@ -0,0 +1,56 @@
+module html
+
+import strings
+
+fn generate_temp_html() string {
+	mut temp_html := strings.new_builder(200)
+	temp_html.write_string('Giant String')
+	for counter := 0; counter < 4; counter++ {
+		temp_html.write_string("Look at $counter
")
+	}
+	temp_html.write_string('')
+	return temp_html.str()
+}
+
+fn test_search_by_tag_type() {
+	dom := parse(generate_temp_html())
+	assert dom.get_tag('div').len == 4
+	assert dom.get_tag('head').len == 1
+	assert dom.get_tag('body').len == 1
+}
+
+fn test_search_by_attribute_value() {
+	mut dom := parse(generate_temp_html())
+	// println(temp_html)
+	print('Amount ')
+	println(dom.get_tag_by_attribute_value('id', 'name_0'))
+	assert dom.get_tag_by_attribute_value('id', 'name_0').len == 1
+}
+
+fn test_access_parent() {
+	mut dom := parse(generate_temp_html())
+	div_tags := dom.get_tag('div')
+	parent := div_tags[0].parent
+	assert parent != 0
+	for div_tag in div_tags {
+		assert div_tag.parent == parent
+	}
+}
+
+fn test_search_by_attributes() {
+	dom := parse(generate_temp_html())
+	assert dom.get_tag_by_attribute('id').len == 4
+}
+
+fn test_tags_used() {
+	dom := parse(generate_temp_html())
+	assert dom.get_tags().len == 9
+}
+
+fn test_access_tag_fields() {
+	dom := parse(generate_temp_html())
+	id_tags := dom.get_tag_by_attribute('id')
+	assert id_tags[0].name == 'div'
+	assert id_tags[1].attributes['class'] == 'several-1'
+}
diff --git a/v_windows/v/old/vlib/net/html/html.v b/v_windows/v/old/vlib/net/html/html.v
new file mode 100644
index 0000000..293b643
--- /dev/null
+++ b/v_windows/v/old/vlib/net/html/html.v
@@ -0,0 +1,18 @@
+module html
+
+import os
+
+// parse parses and returns the DOM from the given text.
+pub fn parse(text string) DocumentObjectModel {
+	mut parser := Parser{}
+	parser.parse_html(text)
+	return parser.get_dom()
+}
+
+// parse_file parses and returns the DOM from the contents of a file.
+pub fn parse_file(filename string) DocumentObjectModel {
+	content := os.read_file(filename) or { return DocumentObjectModel{
+		root: &Tag{}
+	} }
+	return parse(content)
+}
diff --git a/v_windows/v/old/vlib/net/html/html_test.v b/v_windows/v/old/vlib/net/html/html_test.v
new file mode 100644
index 0000000..51271cd
--- /dev/null
+++ b/v_windows/v/old/vlib/net/html/html_test.v
@@ -0,0 +1,15 @@
+module html
+
+fn test_parse() {
+	doc := parse('Hello world!
')
+	tags := doc.get_tag('h1')
+	assert tags.len == 1
+	h1_tag := tags[0] // Hello world!
+	assert h1_tag.name == 'h1'
+	assert h1_tag.content == 'Hello world!'
+	assert h1_tag.attributes.len == 2
+	// TODO: do not remove. Attributes must not have an empty attr.
+	// assert h1_tag.attributes.len == 1
+	assert h1_tag.str() == 'Hello world!
'
+	// assert h1_tag.str() == 'Hello world!
'
+}
diff --git a/v_windows/v/old/vlib/net/html/parser.v b/v_windows/v/old/vlib/net/html/parser.v
new file mode 100644
index 0000000..b9ad2a1
--- /dev/null
+++ b/v_windows/v/old/vlib/net/html/parser.v
@@ -0,0 +1,260 @@
+module html
+
+import os
+import strings
+
+struct LexicalAttributes {
+mut:
+	current_tag      &Tag
+	open_tag         bool
+	open_code        bool
+	open_string      int
+	open_comment     bool
+	is_attribute     bool
+	opened_code_type string
+	line_count       int
+	lexeme_builder   strings.Builder = strings.new_builder(100)
+	code_tags        map[string]bool = map{
+		'script': true
+		'style':  true
+	}
+}
+
+// Parser is responsible for reading the HTML strings and converting them into a `DocumentObjectModel`.
+pub struct Parser {
+mut:
+	dom                DocumentObjectModel
+	lexical_attributes LexicalAttributes = LexicalAttributes{
+		current_tag: &Tag{}
+	}
+	filename    string = 'direct-parse'
+	initialized bool
+	tags        []&Tag
+	debug_file  os.File
+}
+
+// This function is used to add a tag for the parser ignore it's content.
+// For example, if you have an html or XML with a custom tag, like `