diff options
| author | Indrajith K L | 2022-12-03 17:00:20 +0530 | 
|---|---|---|
| committer | Indrajith K L | 2022-12-03 17:00:20 +0530 | 
| commit | f5c4671bfbad96bf346bd7e9a21fc4317b4959df (patch) | |
| tree | 2764fc62da58f2ba8da7ed341643fc359873142f /v_windows/v/old/examples/web_crawler | |
| download | cli-tools-windows-f5c4671bfbad96bf346bd7e9a21fc4317b4959df.tar.gz cli-tools-windows-f5c4671bfbad96bf346bd7e9a21fc4317b4959df.tar.bz2 cli-tools-windows-f5c4671bfbad96bf346bd7e9a21fc4317b4959df.zip | |
Diffstat (limited to 'v_windows/v/old/examples/web_crawler')
| -rw-r--r-- | v_windows/v/old/examples/web_crawler/README.md | 22 | ||||
| -rw-r--r-- | v_windows/v/old/examples/web_crawler/web_crawler.v | 24 | 
2 files changed, 46 insertions, 0 deletions
| diff --git a/v_windows/v/old/examples/web_crawler/README.md b/v_windows/v/old/examples/web_crawler/README.md new file mode 100644 index 0000000..c8a741f --- /dev/null +++ b/v_windows/v/old/examples/web_crawler/README.md @@ -0,0 +1,22 @@ +# web_crawler +web_crawler is a very simple web crawler.   +This web crawler fetches news from tuicool.com, +(a chinese site similar to hacker-news.firebaseio.com).   + +# Compile and Run + +Use this to generate an executable, and then launch the web crawler: +```bash +v web_crawler.v +./web_crawler +``` + +And this to compile and launch the web crawler directly: +```bash +v run web_crawler.v +``` + +This project shows how to use http.fetch() to get http.Response,  +and then html.parse() to parse the returned html. + +It's easy, isn't it?   diff --git a/v_windows/v/old/examples/web_crawler/web_crawler.v b/v_windows/v/old/examples/web_crawler/web_crawler.v new file mode 100644 index 0000000..e32de54 --- /dev/null +++ b/v_windows/v/old/examples/web_crawler/web_crawler.v @@ -0,0 +1,24 @@ +import net.http +import net.html + +fn main() { +	// http.fetch() sends an HTTP request to the URL with the given method and configurations. +	config := http.FetchConfig{ +		user_agent: 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0' +	} +	resp := http.fetch('https://tuicool.com', config) or { +		println('failed to fetch data from the server') +		return +	} +	// html.parse() parses and returns the DOM from the given text. +	mut doc := html.parse(resp.text) +	// html.DocumentObjectModel.get_tag_by_attribute_value() retrieves all the tags in the document that has the given attribute name and value. +	tags := doc.get_tag_by_attribute_value('class', 'list_article_item') +	for tag in tags { +		href := tag.children[0].attributes['href'] or { panic('key not found') } +		title := tag.children[0].attributes['title'] or { panic('key not found') } +		println('href: $href') +		println('title: $title') +		println('') +	} +} | 
