diff options
| author | Indrajith K L | 2022-12-03 17:00:20 +0530 | 
|---|---|---|
| committer | Indrajith K L | 2022-12-03 17:00:20 +0530 | 
| commit | f5c4671bfbad96bf346bd7e9a21fc4317b4959df (patch) | |
| tree | 2764fc62da58f2ba8da7ed341643fc359873142f /v_windows/v/old/vlib/net/urllib | |
| download | cli-tools-windows-f5c4671bfbad96bf346bd7e9a21fc4317b4959df.tar.gz cli-tools-windows-f5c4671bfbad96bf346bd7e9a21fc4317b4959df.tar.bz2 cli-tools-windows-f5c4671bfbad96bf346bd7e9a21fc4317b4959df.zip | |
Diffstat (limited to 'v_windows/v/old/vlib/net/urllib')
| -rw-r--r-- | v_windows/v/old/vlib/net/urllib/urllib.v | 1095 | ||||
| -rw-r--r-- | v_windows/v/old/vlib/net/urllib/urllib_test.v | 51 | ||||
| -rw-r--r-- | v_windows/v/old/vlib/net/urllib/values.v | 87 | 
3 files changed, 1233 insertions, 0 deletions
| diff --git a/v_windows/v/old/vlib/net/urllib/urllib.v b/v_windows/v/old/vlib/net/urllib/urllib.v new file mode 100644 index 0000000..3b02ef6 --- /dev/null +++ b/v_windows/v/old/vlib/net/urllib/urllib.v @@ -0,0 +1,1095 @@ +// urllib parses URLs and implements query escaping. +// See RFC 3986. This module generally follows RFC 3986, except where +// it deviates for compatibility reasons. +// Based off:   https://github.com/golang/go/blob/master/src/net/url/url.go +// Last commit: https://github.com/golang/go/commit/fe2ed5054176935d4adcf13e891715ccf2ee3cce +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +module urllib + +import strings + +enum EncodingMode { +	encode_path +	encode_path_segment +	encode_host +	encode_zone +	encode_user_password +	encode_query_component +	encode_fragment +} + +const ( +	err_msg_escape = 'unescape: invalid URL escape' +	err_msg_parse  = 'parse: failed parsing url' +) + +fn error_msg(message string, val string) string { +	mut msg := 'net.urllib.$message' +	if val != '' { +		msg = '$msg ($val)' +	} +	return msg +} + +// Return true if the specified character should be escaped when +// appearing in a URL string, according to RFC 3986. +// +// Please be informed that for now should_escape does not check all +// reserved characters correctly. See golang.org/issue/5684. +fn should_escape(c byte, mode EncodingMode) bool { +	// §2.3 Unreserved characters (alphanum) +	if (`a` <= c && c <= `z`) || (`A` <= c && c <= `Z`) || (`0` <= c && c <= `9`) { +		return false +	} +	if mode == .encode_host || mode == .encode_zone { +		// §3.2.2 host allows +		// sub-delims = `!` / `$` / `&` / ``` / `(` / `)` / `*` / `+` / `,` / `;` / `=` +		// as part of reg-name. +		// We add : because we include :port as part of host. +		// We add [ ] because we include [ipv6]:port as part of host. +		// We add < > because they`re the only characters left that +		// we could possibly allow, and parse will reject them if we +		// escape them (because hosts can`t use %-encoding for +		// ASCII bytes). +		if c in [`!`, `$`, `&`, `\\`, `(`, `)`, `*`, `+`, `,`, `;`, `=`, `:`, `[`, `]`, `<`, `>`, +			`"`, +		] { +			return false +		} +	} +	match c { +		`-`, `_`, `.`, `~` { +			// §2.3 Unreserved characters (mark) +			return false +		} +		`$`, `&`, `+`, `,`, `/`, `:`, `;`, `=`, `?`, `@` { +			// §2.2 Reserved characters (reserved) +			// Different sections of the URL allow a few of +			// the reserved characters to appear unescaped. +			match mode { +				.encode_path { +					// §3.3 +					// The RFC allows : @ & = + $ but saves / ; , for assigning +					// meaning to individual path segments. This package +					// only manipulates the path as a whole, so we allow those +					// last three as well. That leaves only ? to escape. +					return c == `?` +				} +				.encode_path_segment { +					// §3.3 +					// The RFC allows : @ & = + $ but saves / ; , for assigning +					// meaning to individual path segments. +					return c == `/` || c == `;` || c == `,` || c == `?` +				} +				.encode_user_password { +					// §3.2.1 +					// The RFC allows `;`, `:`, `&`, `=`, `+`, `$`, and `,` in +					// userinfo, so we must escape only `@`, `/`, and `?`. +					// The parsing of userinfo treats `:` as special so we must escape +					// that too. +					return c == `@` || c == `/` || c == `?` || c == `:` +				} +				.encode_query_component { +					// §3.4 +					// The RFC reserves (so we must escape) everything. +					return true +				} +				.encode_fragment { +					// §4.1 +					// The RFC text is silent but the grammar allows +					// everything, so escape nothing. +					return false +				} +				else {} +			} +		} +		else {} +	} +	if mode == .encode_fragment { +		// RFC 3986 §2.2 allows not escaping sub-delims. A subset of sub-delims are +		// included in reserved from RFC 2396 §2.2. The remaining sub-delims do not +		// need to be escaped. To minimize potential breakage, we apply two restrictions: +		// (1) we always escape sub-delims outside of the fragment, and (2) we always +		// escape single quote to avoid breaking callers that had previously assumed that +		// single quotes would be escaped. See issue #19917. +		match c { +			`!`, `(`, `)`, `*` { return false } +			else {} +		} +	} +	// Everything else must be escaped. +	return true +} + +// query_unescape does the inverse transformation of query_escape, +// converting each 3-byte encoded substring of the form '%AB' into the +// hex-decoded byte 0xAB. +// It returns an error if any % is not followed by two hexadecimal +// digits. +pub fn query_unescape(s string) ?string { +	return unescape(s, .encode_query_component) +} + +// path_unescape does the inverse transformation of path_escape, +// converting each 3-byte encoded substring of the form '%AB' into the +// hex-decoded byte 0xAB. It returns an error if any % is not followed +// by two hexadecimal digits. +// +// path_unescape is identical to query_unescape except that it does not +// unescape '+' to ' ' (space). +pub fn path_unescape(s string) ?string { +	return unescape(s, .encode_path_segment) +} + +// unescape unescapes a string; the mode specifies +// which section of the URL string is being unescaped. +fn unescape(s_ string, mode EncodingMode) ?string { +	mut s := s_ +	// Count %, check that they're well-formed. +	mut n := 0 +	mut has_plus := false +	for i := 0; i < s.len; { +		x := s[i] +		match x { +			`%` { +				if s == '' { +					break +				} +				n++ +				if i + 2 >= s.len || !ishex(s[i + 1]) || !ishex(s[i + 2]) { +					if mode == .encode_query_component && i + 1 < s.len { +						s = s[..i] + '%25' + s[(i + 1)..] +						i += 4 // skip the %25 and the next character +						continue +					} +					s = s[i..] +					if s.len > 3 { +						s = s[..3] +					} +					return error(error_msg(urllib.err_msg_escape, s)) +				} +				// Per https://tools.ietf.org/html/rfc3986#page-21 +				// in the host component %-encoding can only be used +				// for non-ASCII bytes. +				// But https://tools.ietf.org/html/rfc6874#section-2 +				// introduces %25 being allowed to escape a percent sign +				// in IPv6 scoped-address literals. Yay. +				if i + 3 >= s.len && mode == .encode_host && unhex(s[i + 1]) < 8 +					&& s[i..i + 3] != '%25' { +					return error(error_msg(urllib.err_msg_escape, s[i..i + 3])) +				} +				if mode == .encode_zone { +					// RFC 6874 says basically 'anything goes' for zone identifiers +					// and that even non-ASCII can be redundantly escaped, +					// but it seems prudent to restrict %-escaped bytes here to those +					// that are valid host name bytes in their unescaped form. +					// That is, you can use escaping in the zone identifier but not +					// to introduce bytes you couldn't just write directly. +					// But Windows puts spaces here! Yay. +					if i + 3 >= s.len { +						return error(error_msg('unescape: invalid escape sequence', '')) +					} +					v := ((unhex(s[i + 1]) << byte(4)) | unhex(s[i + 2])) +					if s[i..i + 3] != '%25' && v != ` ` && should_escape(v, .encode_host) { +						error(error_msg(urllib.err_msg_escape, s[i..i + 3])) +					} +				} +				i += 3 +			} +			`+` { +				has_plus = mode == .encode_query_component +				i++ +			} +			else { +				if (mode == .encode_host || mode == .encode_zone) && s[i] < 0x80 +					&& should_escape(s[i], mode) { +					error(error_msg('unescape: invalid character in host name', s[i..i + 1])) +				} +				i++ +			} +		} +	} +	if n == 0 && !has_plus { +		return s +	} +	if s.len < 2 * n { +		return error(error_msg('unescape: invalid escape sequence', '')) +	} +	mut t := strings.new_builder(s.len - 2 * n) +	for i := 0; i < s.len; i++ { +		x := s[i] +		match x { +			`%` { +				if i + 2 >= s.len { +					return error(error_msg('unescape: invalid escape sequence', '')) +				} +				t.write_string(((unhex(s[i + 1]) << byte(4)) | unhex(s[i + 2])).ascii_str()) +				i += 2 +			} +			`+` { +				if mode == .encode_query_component { +					t.write_string(' ') +				} else { +					t.write_string('+') +				} +			} +			else { +				t.write_string(s[i].ascii_str()) +			} +		} +	} +	return t.str() +} + +// query_escape escapes the string so it can be safely placed +// inside a URL query. +pub fn query_escape(s string) string { +	return escape(s, .encode_query_component) +} + +// path_escape escapes the string so it can be safely placed inside a URL path segment, +// replacing special characters (including /) with %XX sequences as needed. +pub fn path_escape(s string) string { +	return escape(s, .encode_path_segment) +} + +fn escape(s string, mode EncodingMode) string { +	mut space_count := 0 +	mut hex_count := 0 +	mut c := byte(0) +	for i in 0 .. s.len { +		c = s[i] +		if should_escape(c, mode) { +			if c == ` ` && mode == .encode_query_component { +				space_count++ +			} else { +				hex_count++ +			} +		} +	} +	if space_count == 0 && hex_count == 0 { +		return s +	} +	buf := []byte{len: (64)} +	mut t := []byte{} +	required := s.len + 2 * hex_count +	if required <= buf.len { +		t = buf[..required] +	} else { +		t = []byte{len: required} +	} +	if hex_count == 0 { +		copy(t, s.bytes()) +		for i in 0 .. s.len { +			if s[i] == ` ` { +				t[i] = `+` +			} +		} +		return t.bytestr() +	} +	upperhex := '0123456789ABCDEF' +	mut j := 0 +	for i in 0 .. s.len { +		c1 := s[i] +		if c1 == ` ` && mode == .encode_query_component { +			t[j] = `+` +			j++ +		} else if should_escape(c1, mode) { +			t[j] = `%` +			t[j + 1] = upperhex[c1 >> 4] +			t[j + 2] = upperhex[c1 & 15] +			j += 3 +		} else { +			t[j] = s[i] +			j++ +		} +	} +	return t.bytestr() +} + +// A URL represents a parsed URL (technically, a URI reference). +// +// The general form represented is: +// +// [scheme:][//[userinfo@]host][/]path[?query][#fragment] +// +// URLs that do not start with a slash after the scheme are interpreted as: +// +// scheme:opaque[?query][#fragment] +// +// Note that the path field is stored in decoded form: /%47%6f%2f becomes /Go/. +// A consequence is that it is impossible to tell which slashes in the path were +// slashes in the raw URL and which were %2f. This distinction is rarely important, +// but when it is, the code should use raw_path, an optional field which only gets +// set if the default encoding is different from path. +// +// URL's String method uses the escaped_path method to obtain the path. See the +// escaped_path method for more details. +pub struct URL { +pub mut: +	scheme      string +	opaque      string    // encoded opaque data +	user        &Userinfo // username and password information +	host        string    // host or host:port +	path        string    // path (relative paths may omit leading slash) +	raw_path    string    // encoded path hint (see escaped_path method) +	force_query bool      // append a query ('?') even if raw_query is empty +	raw_query   string    // encoded query values, without '?' +	fragment    string    // fragment for references, without '#' +} + +// user returns a Userinfo containing the provided username +// and no password set. +pub fn user(username string) &Userinfo { +	return &Userinfo{ +		username: username +		password: '' +		password_set: false +	} +} + +// user_password returns a Userinfo containing the provided username +// and password. +// +// This functionality should only be used with legacy web sites. +// RFC 2396 warns that interpreting Userinfo this way +// ``is NOT RECOMMENDED, because the passing of authentication +// information in clear text (such as URI) has proven to be a +// security risk in almost every case where it has been used.'' +fn user_password(username string, password string) &Userinfo { +	return &Userinfo{username, password, true} +} + +// The Userinfo type is an immutable encapsulation of username and +// password details for a URL. An existing Userinfo value is guaranteed +// to have a username set (potentially empty, as allowed by RFC 2396), +// and optionally a password. +struct Userinfo { +pub: +	username     string +	password     string +	password_set bool +} + +fn (u &Userinfo) empty() bool { +	return isnil(u) || (u.username == '' && u.password == '') +} + +// string returns the encoded userinfo information in the standard form +// of 'username[:password]'. +fn (u &Userinfo) str() string { +	if u.empty() { +		return '' +	} +	mut s := escape(u.username, .encode_user_password) +	if u.password_set { +		s += ':' + escape(u.password, .encode_user_password) +	} +	return s +} + +// Maybe rawurl is of the form scheme:path. +// (scheme must be [a-zA-Z][a-zA-Z0-9+-.]*) +// If so, return [scheme, path]; else return ['', rawurl] +fn split_by_scheme(rawurl string) ?[]string { +	for i in 0 .. rawurl.len { +		c := rawurl[i] +		if (`a` <= c && c <= `z`) || (`A` <= c && c <= `Z`) { +			// do nothing +		} else if (`0` <= c && c <= `9`) || (c == `+` || c == `-` || c == `.`) { +			if i == 0 { +				return ['', rawurl] +			} +		} else if c == `:` { +			if i == 0 { +				return error(error_msg('split_by_scheme: missing protocol scheme', '')) +			} +			return [rawurl[..i], rawurl[i + 1..]] +		} else { +			// we have encountered an invalid character, +			// so there is no valid scheme +			return ['', rawurl] +		} +	} +	return ['', rawurl] +} + +fn get_scheme(rawurl string) ?string { +	split := split_by_scheme(rawurl) or { return err.msg } +	return split[0] +} + +// split slices s into two substrings separated by the first occurence of +// sep. If cutc is true then sep is included with the second substring. +// If sep does not occur in s then s and the empty string is returned. +fn split(s string, sep byte, cutc bool) (string, string) { +	i := s.index_byte(sep) +	if i < 0 { +		return s, '' +	} +	if cutc { +		return s[..i], s[i + 1..] +	} +	return s[..i], s[i..] +} + +// parse parses rawurl into a URL structure. +// +// The rawurl may be relative (a path, without a host) or absolute +// (starting with a scheme). Trying to parse a hostname and path +// without a scheme is invalid but may not necessarily return an +// error, due to parsing ambiguities. +pub fn parse(rawurl string) ?URL { +	// Cut off #frag +	u, frag := split(rawurl, `#`, true) +	mut url := parse_url(u, false) or { return error(error_msg(urllib.err_msg_parse, u)) } +	if frag == '' { +		return url +	} +	f := unescape(frag, .encode_fragment) or { return error(error_msg(urllib.err_msg_parse, +		u)) } +	url.fragment = f +	return url +} + +// parse_request_uri parses rawurl into a URL structure. It assumes that +// rawurl was received in an HTTP request, so the rawurl is interpreted +// only as an absolute URI or an absolute path. +// The string rawurl is assumed not to have a #fragment suffix. +// (Web browsers strip #fragment before sending the URL to a web server.) +fn parse_request_uri(rawurl string) ?URL { +	return parse_url(rawurl, true) +} + +// parse_url parses a URL from a string in one of two contexts. If +// via_request is true, the URL is assumed to have arrived via an HTTP request, +// in which case only absolute URLs or path-absolute relative URLs are allowed. +// If via_request is false, all forms of relative URLs are allowed. +[manualfree] +fn parse_url(rawurl string, via_request bool) ?URL { +	if string_contains_ctl_byte(rawurl) { +		return error(error_msg('parse_url: invalid control character in URL', rawurl)) +	} +	if rawurl == '' && via_request { +		return error(error_msg('parse_url: empty URL', rawurl)) +	} +	mut url := URL{ +		user: 0 +	} +	if rawurl == '*' { +		url.path = '*' +		return url +	} +	// Split off possible leading 'http:', 'mailto:', etc. +	// Cannot contain escaped characters. +	p := split_by_scheme(rawurl) ? +	url.scheme = p[0] +	mut rest := p[1] +	url.scheme = url.scheme.to_lower() +	// if rest.ends_with('?') && strings.count(rest, '?') == 1 { +	if rest.ends_with('?') && !rest[..1].contains('?') { +		url.force_query = true +		rest = rest[..rest.len - 1] +	} else { +		r, raw_query := split(rest, `?`, true) +		rest = r +		url.raw_query = raw_query +	} +	if !rest.starts_with('/') { +		if url.scheme != '' { +			// We consider rootless paths per RFC 3986 as opaque. +			url.opaque = rest +			return url +		} +		if via_request { +			return error(error_msg('parse_url: invalid URI for request', '')) +		} +		// Avoid confusion with malformed schemes, like cache_object:foo/bar. +		// See golang.org/issue/16822. +		// +		// RFC 3986, §3.3: +		// In addition, a URI reference (Section 4.1) may be a relative-path reference, +		// in which case the first path segment cannot contain a colon (':') character. +		colon := rest.index(':') or { return error('there should be a : in the URL') } +		slash := rest.index('/') or { return error('there should be a / in the URL') } +		if colon >= 0 && (slash < 0 || colon < slash) { +			// First path segment has colon. Not allowed in relative URL. +			return error(error_msg('parse_url: first path segment in URL cannot contain colon', +				'')) +		} +	} +	if ((url.scheme != '' || !via_request) && !rest.starts_with('///')) && rest.starts_with('//') { +		authority, r := split(rest[2..], `/`, false) +		rest = r +		a := parse_authority(authority) ? +		url.user = a.user +		url.host = a.host +	} +	// Set path and, optionally, raw_path. +	// raw_path is a hint of the encoding of path. We don't want to set it if +	// the default escaping of path is equivalent, to help make sure that people +	// don't rely on it in general. +	url.set_path(rest) ? +	return url +} + +struct ParseAuthorityRes { +	user &Userinfo +	host string +} + +fn parse_authority(authority string) ?ParseAuthorityRes { +	i := authority.last_index('@') or { -1 } +	mut host := '' +	mut zuser := user('') +	if i < 0 { +		h := parse_host(authority) ? +		host = h +	} else { +		h := parse_host(authority[i + 1..]) ? +		host = h +	} +	if i < 0 { +		return ParseAuthorityRes{ +			host: host +			user: zuser +		} +	} +	mut userinfo := authority[..i] +	if !valid_userinfo(userinfo) { +		return error(error_msg('parse_authority: invalid userinfo', '')) +	} +	if !userinfo.contains(':') { +		u := unescape(userinfo, .encode_user_password) ? +		userinfo = u +		zuser = user(userinfo) +	} else { +		mut username, mut password := split(userinfo, `:`, true) +		u := unescape(username, .encode_user_password) ? +		username = u +		p := unescape(password, .encode_user_password) ? +		password = p +		zuser = user_password(username, password) +	} +	return ParseAuthorityRes{ +		user: zuser +		host: host +	} +} + +// parse_host parses host as an authority without user +// information. That is, as host[:port]. +fn parse_host(host string) ?string { +	if host.starts_with('[') { +		// parse an IP-Literal in RFC 3986 and RFC 6874. +		// E.g., '[fe80::1]', '[fe80::1%25en0]', '[fe80::1]:80'. +		mut i := host.last_index(']') or { +			return error(error_msg("parse_host: missing ']' in host", '')) +		} +		mut colon_port := host[i + 1..] +		if !valid_optional_port(colon_port) { +			return error(error_msg('parse_host: invalid port $colon_port after host ', +				'')) +		} +		// RFC 6874 defines that %25 (%-encoded percent) introduces +		// the zone identifier, and the zone identifier can use basically +		// any %-encoding it likes. That's different from the host, which +		// can only %-encode non-ASCII bytes. +		// We do impose some restrictions on the zone, to avoid stupidity +		// like newlines. +		if zone := host[..i].index('%25') { +			host1 := unescape(host[..zone], .encode_host) or { return err.msg } +			host2 := unescape(host[zone..i], .encode_zone) or { return err.msg } +			host3 := unescape(host[i..], .encode_host) or { return err.msg } +			return host1 + host2 + host3 +		} +		if idx := host.last_index(':') { +			colon_port = host[idx..] +			if !valid_optional_port(colon_port) { +				return error(error_msg('parse_host: invalid port $colon_port after host ', +					'')) +			} +		} +	} +	h := unescape(host, .encode_host) or { return err.msg } +	return h +	// host = h +	// return host +} + +// set_path sets the path and raw_path fields of the URL based on the provided +// escaped path p. It maintains the invariant that raw_path is only specified +// when it differs from the default encoding of the path. +// For example: +// - set_path('/foo/bar')   will set path='/foo/bar' and raw_path='' +// - set_path('/foo%2fbar') will set path='/foo/bar' and raw_path='/foo%2fbar' +// set_path will return an error only if the provided path contains an invalid +// escaping. +pub fn (mut u URL) set_path(p string) ?bool { +	path := unescape(p, .encode_path) ? +	u.path = path +	escp := escape(path, .encode_path) +	if p == escp { +		// Default encoding is fine. +		u.raw_path = '' +	} else { +		u.raw_path = p +	} +	return true +} + +// escaped_path returns the escaped form of u.path. +// In general there are multiple possible escaped forms of any path. +// escaped_path returns u.raw_path when it is a valid escaping of u.path. +// Otherwise escaped_path ignores u.raw_path and computes an escaped +// form on its own. +// The String and request_uri methods use escaped_path to construct +// their results. +// In general, code should call escaped_path instead of +// reading u.raw_path directly. +pub fn (u &URL) escaped_path() string { +	if u.raw_path != '' && valid_encoded_path(u.raw_path) { +		unescape(u.raw_path, .encode_path) or { return '' } +		return u.raw_path +	} +	if u.path == '*' { +		return '*' // don't escape (Issue 11202) +	} +	return escape(u.path, .encode_path) +} + +// valid_encoded_path reports whether s is a valid encoded path. +// It must not contain any bytes that require escaping during path encoding. +fn valid_encoded_path(s string) bool { +	for i in 0 .. s.len { +		// RFC 3986, Appendix A. +		// pchar = unreserved / pct-encoded / sub-delims / ':' / '@'. +		// should_escape is not quite compliant with the RFC, +		// so we check the sub-delims ourselves and let +		// should_escape handle the others. +		x := s[i] +		match x { +			`!`, `$`, `&`, `\\`, `(`, `)`, `*`, `+`, `,`, `;`, `=`, `:`, `@` { +				// ok +			} +			`[`, `]` { +				// ok - not specified in RFC 3986 but left alone by modern browsers +			} +			`%` { +				// ok - percent encoded, will decode +			} +			else { +				if should_escape(s[i], .encode_path) { +					return false +				} +			} +		} +	} +	return true +} + +// valid_optional_port reports whether port is either an empty string +// or matches /^:\d*$/ +fn valid_optional_port(port string) bool { +	if port == '' { +		return true +	} +	if port[0] != `:` { +		return false +	} +	for b in port[1..] { +		if b < `0` || b > `9` { +			return false +		} +	} +	return true +} + +// str reassembles the URL into a valid URL string. +// The general form of the result is one of: +// +// scheme:opaque?query#fragment +// scheme://userinfo@host/path?query#fragment +// +// If u.opaque is non-empty, String uses the first form; +// otherwise it uses the second form. +// Any non-ASCII characters in host are escaped. +// To obtain the path, String uses u.escaped_path(). +// +// In the second form, the following rules apply: +// - if u.scheme is empty, scheme: is omitted. +// - if u.user is nil, userinfo@ is omitted. +// - if u.host is empty, host/ is omitted. +// - if u.scheme and u.host are empty and u.user is nil, +// the entire scheme://userinfo@host/ is omitted. +// - if u.host is non-empty and u.path begins with a /, +// the form host/path does not add its own /. +// - if u.raw_query is empty, ?query is omitted. +// - if u.fragment is empty, #fragment is omitted. +pub fn (u URL) str() string { +	mut buf := strings.new_builder(200) +	if u.scheme != '' { +		buf.write_string(u.scheme) +		buf.write_string(':') +	} +	if u.opaque != '' { +		buf.write_string(u.opaque) +	} else { +		if u.scheme != '' || u.host != '' || !u.user.empty() { +			if u.host != '' || u.path != '' || !u.user.empty() { +				buf.write_string('//') +			} +			if !u.user.empty() { +				buf.write_string(u.user.str()) +				buf.write_string('@') +			} +			if u.host != '' { +				buf.write_string(escape(u.host, .encode_host)) +			} +		} +		path := u.escaped_path() +		if path != '' && path[0] != `/` && u.host != '' { +			buf.write_string('/') +		} +		if buf.len == 0 { +			// RFC 3986 §4.2 +			// A path segment that contains a colon character (e.g., 'this:that') +			// cannot be used as the first segment of a relative-path reference, as +			// it would be mistaken for a scheme name. Such a segment must be +			// preceded by a dot-segment (e.g., './this:that') to make a relative- +			// path reference. +			i := path.index_byte(`:`) +			if i > -1 { +				// TODO remove this when autofree handles tmp +				// expressions like this +				if i > -1 && path[..i].index_byte(`/`) == -1 { +					buf.write_string('./') +				} +			} +		} +		buf.write_string(path) +	} +	if u.force_query || u.raw_query != '' { +		buf.write_string('?') +		buf.write_string(u.raw_query) +	} +	if u.fragment != '' { +		buf.write_string('#') +		buf.write_string(escape(u.fragment, .encode_fragment)) +	} +	return buf.str() +} + +// Values maps a string key to a list of values. +// It is typically used for query parameters and form values. +// Unlike in the http.Header map, the keys in a Values map +// are case-sensitive. +// parseQuery parses the URL-encoded query string and returns +// a map listing the values specified for each key. +// parseQuery always returns a non-nil map containing all the +// valid query parameters found; err describes the first decoding error +// encountered, if any. +// +// Query is expected to be a list of key=value settings separated by +// ampersands or semicolons. A setting without an equals sign is +// interpreted as a key set to an empty value. +pub fn parse_query(query string) ?Values { +	mut m := new_values() +	parse_query_values(mut m, query) ? +	return m +} + +// parse_query_silent is the same as parse_query +// but any errors will be silent +fn parse_query_silent(query string) Values { +	mut m := new_values() +	parse_query_values(mut m, query) or {} +	return m +} + +fn parse_query_values(mut m Values, query string) ?bool { +	mut had_error := false +	mut q := query +	for q != '' { +		mut key := q +		mut i := key.index_any('&;') +		if i >= 0 { +			q = key[i + 1..] +			key = key[..i] +		} else { +			q = '' +		} +		if key == '' { +			continue +		} +		mut value := '' +		if idx := key.index('=') { +			i = idx +			value = key[i + 1..] +			key = key[..i] +		} +		k := query_unescape(key) or { +			had_error = true +			continue +		} +		key = k +		v := query_unescape(value) or { +			had_error = true +			continue +		} +		value = v +		m.add(key, value) +	} +	if had_error { +		return error(error_msg('parse_query_values: failed parsing query string', '')) +	} +	return true +} + +// encode encodes the values into ``URL encoded'' form +// ('bar=baz&foo=quux') sorted by key. +pub fn (v Values) encode() string { +	if v.len == 0 { +		return '' +	} +	mut buf := strings.new_builder(200) +	mut keys := []string{} +	for k, _ in v.data { +		keys << k +	} +	keys.sort() +	for k in keys { +		vs := v.data[k] +		key_kscaped := query_escape(k) +		for _, val in vs.data { +			if buf.len > 0 { +				buf.write_string('&') +			} +			buf.write_string(key_kscaped) +			buf.write_string('=') +			buf.write_string(query_escape(val)) +		} +	} +	return buf.str() +} + +// resolve_path applies special path segments from refs and applies +// them to base, per RFC 3986. +fn resolve_path(base string, ref string) string { +	mut full := '' +	if ref == '' { +		full = base +	} else if ref[0] != `/` { +		i := base.last_index('/') or { -1 } +		full = base[..i + 1] + ref +	} else { +		full = ref +	} +	if full == '' { +		return '' +	} +	mut dst := []string{} +	src := full.split('/') +	for _, elem in src { +		match elem { +			'.' { +				// drop +			} +			'..' { +				if dst.len > 0 { +					dst = dst[..dst.len - 1] +				} +			} +			else { +				dst << elem +			} +		} +	} +	last := src[src.len - 1] +	if last == '.' || last == '..' { +		// Add final slash to the joined path. +		dst << '' +	} +	return '/' + dst.join('/').trim_left('/') +} + +// is_abs reports whether the URL is absolute. +// Absolute means that it has a non-empty scheme. +pub fn (u &URL) is_abs() bool { +	return u.scheme != '' +} + +// parse parses a URL in the context of the receiver. The provided URL +// may be relative or absolute. parse returns nil, err on parse +// failure, otherwise its return value is the same as resolve_reference. +pub fn (u &URL) parse(ref string) ?URL { +	refurl := parse(ref) ? +	return u.resolve_reference(refurl) +} + +// resolve_reference resolves a URI reference to an absolute URI from +// an absolute base URI u, per RFC 3986 Section 5.2. The URI reference +// may be relative or absolute. resolve_reference always returns a new +// URL instance, even if the returned URL is identical to either the +// base or reference. If ref is an absolute URL, then resolve_reference +// ignores base and returns a copy of ref. +pub fn (u &URL) resolve_reference(ref &URL) ?URL { +	mut url := *ref +	if ref.scheme == '' { +		url.scheme = u.scheme +	} +	if ref.scheme != '' || ref.host != '' || !ref.user.empty() { +		// The 'absoluteURI' or 'net_path' cases. +		// We can ignore the error from set_path since we know we provided a +		// validly-escaped path. +		url.set_path(resolve_path(ref.escaped_path(), '')) ? +		return url +	} +	if ref.opaque != '' { +		url.user = user('') +		url.host = '' +		url.path = '' +		return url +	} +	if ref.path == '' && ref.raw_query == '' { +		url.raw_query = u.raw_query +		if ref.fragment == '' { +			url.fragment = u.fragment +		} +	} +	// The 'abs_path' or 'rel_path' cases. +	url.host = u.host +	url.user = u.user +	url.set_path(resolve_path(u.escaped_path(), ref.escaped_path())) ? +	return url +} + +// query parses raw_query and returns the corresponding values. +// It silently discards malformed value pairs. +// To check errors use parseQuery. +pub fn (u &URL) query() Values { +	v := parse_query_silent(u.raw_query) +	return v +} + +// request_uri returns the encoded path?query or opaque?query +// string that would be used in an HTTP request for u. +pub fn (u &URL) request_uri() string { +	mut result := u.opaque +	if result == '' { +		result = u.escaped_path() +		if result == '' { +			result = '/' +		} +	} else { +		if result.starts_with('//') { +			result = u.scheme + ':' + result +		} +	} +	if u.force_query || u.raw_query != '' { +		result += '?' + u.raw_query +	} +	return result +} + +// hostname returns u.host, stripping any valid port number if present. +// +// If the result is enclosed in square brackets, as literal IPv6 addresses are, +// the square brackets are removed from the result. +pub fn (u &URL) hostname() string { +	host, _ := split_host_port(u.host) +	return host +} + +// port returns the port part of u.host, without the leading colon. +// If u.host doesn't contain a port, port returns an empty string. +pub fn (u &URL) port() string { +	_, port := split_host_port(u.host) +	return port +} + +// split_host_port separates host and port. If the port is not valid, it returns +// the entire input as host, and it doesn't check the validity of the host. +// Per RFC 3986, it requires ports to be numeric. +fn split_host_port(hostport string) (string, string) { +	mut host := hostport +	mut port := '' +	colon := host.last_index_byte(`:`) +	if colon != -1 { +		if valid_optional_port(host[colon..]) { +			port = host[colon + 1..] +			host = host[..colon] +		} +	} +	if host.starts_with('[') && host.ends_with(']') { +		host = host[1..host.len - 1] +	} +	return host, port +} + +// valid_userinfo reports whether s is a valid userinfo string per RFC 3986 +// Section 3.2.1: +// userinfo    = *( unreserved / pct-encoded / sub-delims / ':' ) +// unreserved  = ALPHA / DIGIT / '-' / '.' / '_' / '~' +// sub-delims  = '!' / '$' / '&' / ''' / '(' / ')' +// / '*' / '+' / ',' / ';' / '=' +// +// It doesn't validate pct-encoded. The caller does that via fn unescape. +pub fn valid_userinfo(s string) bool { +	for r in s { +		if `A` <= r && r <= `Z` { +			continue +		} +		if `a` <= r && r <= `z` { +			continue +		} +		if `0` <= r && r <= `9` { +			continue +		} +		match r { +			`-`, `.`, `_`, `:`, `~`, `!`, `$`, `&`, `\\`, `(`, `)`, `*`, `+`, `,`, `;`, `=`, `%`, +			`@` { +				continue +			} +			else { +				return false +			} +		} +	} +	return true +} + +// string_contains_ctl_byte reports whether s contains any ASCII control character. +fn string_contains_ctl_byte(s string) bool { +	for i in 0 .. s.len { +		b := s[i] +		if b < ` ` || b == 0x7f { +			return true +		} +	} +	return false +} + +pub fn ishex(c byte) bool { +	if `0` <= c && c <= `9` { +		return true +	} else if `a` <= c && c <= `f` { +		return true +	} else if `A` <= c && c <= `F` { +		return true +	} +	return false +} + +fn unhex(c byte) byte { +	if `0` <= c && c <= `9` { +		return c - `0` +	} else if `a` <= c && c <= `f` { +		return c - `a` + 10 +	} else if `A` <= c && c <= `F` { +		return c - `A` + 10 +	} +	return 0 +} diff --git a/v_windows/v/old/vlib/net/urllib/urllib_test.v b/v_windows/v/old/vlib/net/urllib/urllib_test.v new file mode 100644 index 0000000..0870c81 --- /dev/null +++ b/v_windows/v/old/vlib/net/urllib/urllib_test.v @@ -0,0 +1,51 @@ +// Copyright (c) 2019-2021 Alexander Medvednikov. All rights reserved. +// Use of this source code is governed by an MIT license +// that can be found in the LICENSE file. +import net.urllib + +fn test_net_urllib() { +	test_query := 'Hellö Wörld@vlang' +	assert urllib.query_escape(test_query) == 'Hell%C3%B6+W%C3%B6rld%40vlang' + +	test_url := 'https://joe:pass@www.mydomain.com:8080/som/url?param1=test1¶m2=test2&foo=bar#testfragment' +	u := urllib.parse(test_url) or { +		assert false +		return +	} +	assert u.scheme == 'https' && u.hostname() == 'www.mydomain.com' && u.port() == '8080' +		&& u.path == '/som/url' && u.fragment == 'testfragment' && u.user.username == 'joe' +		&& u.user.password == 'pass' +} + +fn test_str() { +	url := urllib.parse('https://en.wikipedia.org/wiki/Brazil_(1985_film)') or { +		panic('unable to parse URL') +	} +	assert url.str() == 'https://en.wikipedia.org/wiki/Brazil_(1985_film)' +} + +fn test_escape_unescape() { +	original := 'те ст: т\\%' +	escaped := urllib.query_escape(original) +	assert escaped == '%D1%82%D0%B5+%D1%81%D1%82%3A+%D1%82%5C%25' +	unescaped := urllib.query_unescape(escaped) or { +		assert false +		return +	} +	assert unescaped == original +} + +fn test_parse_query() ? { +	q1 := urllib.parse_query('format=%22%25l%3A+%25c+%25t%22') ? +	q2 := urllib.parse_query('format="%l:+%c+%t"') ? +	// dump(q1) +	// dump(q2) +	assert q1.data['format'].data == ['"%l: %c %t"'] +	assert q2.data['format'].data == ['"%l: %c %t"'] +} + +fn test_parse_missing_host() ? { +	// issue #10311 +	url := urllib.parse('http:///') ? +	assert url.str() == 'http://///' +} diff --git a/v_windows/v/old/vlib/net/urllib/values.v b/v_windows/v/old/vlib/net/urllib/values.v new file mode 100644 index 0000000..ee5c329 --- /dev/null +++ b/v_windows/v/old/vlib/net/urllib/values.v @@ -0,0 +1,87 @@ +// Copyright (c) 2019-2021 Alexander Medvednikov. All rights reserved. +// Use of this source code is governed by an MIT license +// that can be found in the LICENSE file. +module urllib + +struct Value { +pub mut: +	data []string +} + +struct Values { +pub mut: +	data map[string]Value +	len  int +} + +// new_values returns a new Values struct for creating +// urlencoded query string parameters. it can also be to +// post form data with application/x-www-form-urlencoded. +// values.encode() will return the encoded data +pub fn new_values() Values { +	return Values{ +		data: map[string]Value{} +	} +} + +// Currently you will need to use all()[key].data +// once map[string][]string is implemented +// this will be fixed +pub fn (v &Value) all() []string { +	return v.data +} + +// get gets the first value associated with the given key. +// If there are no values associated with the key, get returns +// a empty string. +pub fn (v &Values) get(key string) string { +	if v.data.len == 0 { +		return '' +	} +	vs := v.data[key] +	if vs.data.len == 0 { +		return '' +	} +	return vs.data[0] +} + +// get_all gets the all the values associated with the given key. +// If there are no values associated with the key, get returns +// a empty []string. +pub fn (v &Values) get_all(key string) []string { +	if v.data.len == 0 { +		return [] +	} +	vs := v.data[key] +	if vs.data.len == 0 { +		return [] +	} +	return vs.data +} + +// set sets the key to value. It replaces any existing +// values. +pub fn (mut v Values) set(key string, value string) { +	mut a := v.data[key] +	a.data = [value] +	v.data[key] = a +	v.len = v.data.len +} + +// add adds the value to key. It appends to any existing +// values associated with key. +pub fn (mut v Values) add(key string, value string) { +	mut a := v.data[key] +	if a.data.len == 0 { +		a.data = [] +	} +	a.data << value +	v.data[key] = a +	v.len = v.data.len +} + +// del deletes the values associated with key. +pub fn (mut v Values) del(key string) { +	v.data.delete(key) +	v.len = v.data.len +} | 
