Adds most of the toolsHEAD master

author: Indrajith K L 2022-12-03 17:00:20 +0530
committer: Indrajith K L 2022-12-03 17:00:20 +0530
commit: f5c4671bfbad96bf346bd7e9a21fc4317b4959df (patch)
tree: 2764fc62da58f2ba8da7ed341643fc359873142f /v_windows/v/examples/web_crawler
download: cli-tools-windows-f5c4671bfbad96bf346bd7e9a21fc4317b4959df.tar.gz
cli-tools-windows-f5c4671bfbad96bf346bd7e9a21fc4317b4959df.tar.bz2
cli-tools-windows-f5c4671bfbad96bf346bd7e9a21fc4317b4959df.zip
2 files changed, 46 insertions, 0 deletions
diff --git a/v_windows/v/examples/web_crawler/README.md b/v_windows/v/examples/web_crawler/README.md
new file mode 100644
index 0000000..c8a741f
--- /dev/null
+++ b/v_windows/v/examples/web_crawler/README.md
@@ -0,0 +1,22 @@
+# web_crawler
+web_crawler is a very simple web crawler.  
+This web crawler fetches news from tuicool.com,
+(a chinese site similar to hacker-news.firebaseio.com).  
+
+# Compile and Run
+
+Use this to generate an executable, and then launch the web crawler:
+```bash
+v web_crawler.v
+./web_crawler
+```
+
+And this to compile and launch the web crawler directly:
+```bash
+v run web_crawler.v
+```
+
+This project shows how to use http.fetch() to get http.Response, 
+and then html.parse() to parse the returned html.
+
+It's easy, isn't it?  
diff --git a/v_windows/v/examples/web_crawler/web_crawler.v b/v_windows/v/examples/web_crawler/web_crawler.v
new file mode 100644
index 0000000..00d4dfa
--- /dev/null
+++ b/v_windows/v/examples/web_crawler/web_crawler.v
@@ -0,0 +1,24 @@
+import net.http
+import net.html
+
+fn main() {
+	// http.fetch() sends an HTTP request to the URL with the given method and configurations.
+	config := http.FetchConfig{
+		user_agent: 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0'
+	}
+	resp := http.fetch(http.FetchConfig{ ...config, url: 'https://tuicool.com' }) or {
+		println('failed to fetch data from the server')
+		return
+	}
+	// html.parse() parses and returns the DOM from the given text.
+	mut doc := html.parse(resp.text)
+	// html.DocumentObjectModel.get_tag_by_attribute_value() retrieves all the tags in the document that has the given attribute name and value.
+	tags := doc.get_tag_by_attribute_value('class', 'list_article_item')
+	for tag in tags {
+		href := tag.children[0].attributes['href'] or { panic('key not found') }
+		title := tag.children[0].attributes['title'] or { panic('key not found') }
+		println('href: $href')
+		println('title: $title')
+		println('')
+	}
+}
author	Indrajith K L	2022-12-03 17:00:20 +0530
committer	Indrajith K L	2022-12-03 17:00:20 +0530
commit	f5c4671bfbad96bf346bd7e9a21fc4317b4959df (patch)
tree	2764fc62da58f2ba8da7ed341643fc359873142f /v_windows/v/examples/web_crawler
download	cli-tools-windows-f5c4671bfbad96bf346bd7e9a21fc4317b4959df.tar.gz cli-tools-windows-f5c4671bfbad96bf346bd7e9a21fc4317b4959df.tar.bz2 cli-tools-windows-f5c4671bfbad96bf346bd7e9a21fc4317b4959df.zip