diff options
| author | Indrajith K L | 2022-12-03 17:00:20 +0530 | 
|---|---|---|
| committer | Indrajith K L | 2022-12-03 17:00:20 +0530 | 
| commit | f5c4671bfbad96bf346bd7e9a21fc4317b4959df (patch) | |
| tree | 2764fc62da58f2ba8da7ed341643fc359873142f /v_windows/v/old/vlib/encoding/csv | |
| download | cli-tools-windows-master.tar.gz cli-tools-windows-master.tar.bz2 cli-tools-windows-master.zip  | |
Diffstat (limited to 'v_windows/v/old/vlib/encoding/csv')
| -rw-r--r-- | v_windows/v/old/vlib/encoding/csv/README.md | 19 | ||||
| -rw-r--r-- | v_windows/v/old/vlib/encoding/csv/reader.v | 196 | ||||
| -rw-r--r-- | v_windows/v/old/vlib/encoding/csv/reader_test.v | 253 | ||||
| -rw-r--r-- | v_windows/v/old/vlib/encoding/csv/writer.v | 80 | ||||
| -rw-r--r-- | v_windows/v/old/vlib/encoding/csv/writer_test.v | 11 | 
5 files changed, 559 insertions, 0 deletions
diff --git a/v_windows/v/old/vlib/encoding/csv/README.md b/v_windows/v/old/vlib/encoding/csv/README.md new file mode 100644 index 0000000..01f3e4e --- /dev/null +++ b/v_windows/v/old/vlib/encoding/csv/README.md @@ -0,0 +1,19 @@ +## Reader example + +```v +import encoding.csv + +data := 'x,y\na,b,c\n' +mut parser := csv.new_reader(data) +// read each line +for { +	items := parser.read() or { break } +	println(items) +} +``` + +It prints: +``` +['x', 'y'] +['a', 'b', 'c'] +``` diff --git a/v_windows/v/old/vlib/encoding/csv/reader.v b/v_windows/v/old/vlib/encoding/csv/reader.v new file mode 100644 index 0000000..dafd022 --- /dev/null +++ b/v_windows/v/old/vlib/encoding/csv/reader.v @@ -0,0 +1,196 @@ +// Copyright (c) 2019-2021 Alexander Medvednikov. All rights reserved. +// Use of this source code is governed by an MIT license +// that can be found in the LICENSE file. +module csv + +// Once interfaces are further along the idea would be to have something similar to +// go's io.reader & bufio.reader rather than reading the whole file into string, this +// would then satisfy that interface. I designed it this way to be easily adapted. +struct ErrCommentIsDelimiter { +	msg  string = 'encoding.csv: comment cannot be the same as delimiter' +	code int +} + +struct ErrInvalidDelimiter { +	msg  string = 'encoding.csv: invalid delimiter' +	code int +} + +struct ErrEndOfFile { +	msg  string = 'encoding.csv: end of file' +	code int +} + +struct ErrInvalidLineEnding { +	msg  string = 'encoding.csv: could not find any valid line endings' +	code int +} + +struct Reader { +	// not used yet +	// has_header        bool +	// headings          []string +	data string +pub mut: +	delimiter         byte +	comment           byte +	is_mac_pre_osx_le bool +	row_pos           int +} + +// new_reader initializes a Reader with string data to parse +pub fn new_reader(data string) &Reader { +	return &Reader{ +		delimiter: `,` +		comment: `#` +		data: data +	} +} + +// read reads a row from the CSV data. +// If successful, the result holds an array of each column's data. +pub fn (mut r Reader) read() ?[]string { +	l := r.read_record() ? +	return l +} + +// Once we have multi dimensional array +// pub fn (mut r Reader) read_all() ?[][]string { +// 	mut records := []string{} +// 	for { +// 		record := r.read_record() or { +// 			if err.error == err_eof.error { +// 				return records +// 			} else { +// 				return err +// 			} +// 		} +// 		records << record +// 	} +// 	return records +// } +fn (mut r Reader) read_line() ?string { +	// last record +	if r.row_pos == r.data.len { +		return IError(&ErrEndOfFile{}) +	} +	le := if r.is_mac_pre_osx_le { '\r' } else { '\n' } +	mut i := r.data.index_after(le, r.row_pos) +	if i == -1 { +		if r.row_pos == 0 { +			// check for pre osx mac line endings +			i = r.data.index_after('\r', r.row_pos) +			if i != -1 { +				r.is_mac_pre_osx_le = true +			} else { +				// no valid line endings found +				return IError(&ErrInvalidLineEnding{}) +			} +		} else { +			// No line ending on file +			i = r.data.len - 1 +		} +	} +	mut line := r.data[r.row_pos..i] +	r.row_pos = i + 1 +	// normalize win line endings (remove extra \r) +	if !r.is_mac_pre_osx_le && (line.len >= 1 && line[line.len - 1] == `\r`) { +		line = line[..line.len - 1] +	} +	return line +} + +fn (mut r Reader) read_record() ?[]string { +	if r.delimiter == r.comment { +		return IError(&ErrCommentIsDelimiter{}) +	} +	if !valid_delim(r.delimiter) { +		return IError(&ErrInvalidDelimiter{}) +	} +	mut need_read := true +	mut keep_raw := false +	mut line := '' +	mut fields := []string{} +	mut i := -1 +	for { +		if need_read { +			l := r.read_line() ? +			if l.len <= 0 { +				if keep_raw { +					line += '\n' +				} +				continue +			} else if l[0] == r.comment { +				if keep_raw { +					line += '\n' + l +				} +				continue +			} else { +				if keep_raw { +					line += '\n' +				} +				line += l +			} +			need_read = false +			keep_raw = false +		} +		if line.len == 0 || line[0] != `"` { // not quoted +			j := line.index(r.delimiter.ascii_str()) or { +				// last +				fields << line[..line.len] +				break +			} +			i = j +			fields << line[..i] +			line = line[i + 1..] +			continue +		} else { // quoted +			mut need_more := true +			mut has_double_quotes := false +			mut j := 0 +			mut n := 1 +			for n < line.len { +				if line[n] == `"` { +					if n == line.len - 1 || line[n + 1] != `"` { +						need_more = false +						j = n - 1 +						break +					} else { +						has_double_quotes = true +						n++ +					} +				} +				n++ +			} +			if need_more { +				need_read = true +				keep_raw = true +				continue +			} +			line = line[1..] +			if j + 1 == line.len { +				// last record +				fields << if has_double_quotes { line[..j].replace('""', '"') } else { line[..j] } +				break +			} +			next := line[j + 1] +			if next == r.delimiter { +				fields << if has_double_quotes { line[..j].replace('""', '"') } else { line[..j] } +				if j + 2 == line.len { +					line = '' +				} else { +					line = line[j + 2..] +				} +				continue +			} +		} +		if i <= -1 && fields.len == 0 { +			return IError(&ErrInvalidDelimiter{}) +		} +	} +	return fields +} + +fn valid_delim(b byte) bool { +	return b != 0 && b != `"` && b != `\r` && b != `\n` +} diff --git a/v_windows/v/old/vlib/encoding/csv/reader_test.v b/v_windows/v/old/vlib/encoding/csv/reader_test.v new file mode 100644 index 0000000..cd54827 --- /dev/null +++ b/v_windows/v/old/vlib/encoding/csv/reader_test.v @@ -0,0 +1,253 @@ +import encoding.csv + +fn test_encoding_csv_reader() { +	data := 'name,email,phone,other\njoe,joe@blow.com,0400000000,test\nsam,sam@likesham.com,0433000000,"test quoted field"\n#chris,chris@nomail.com,94444444,"commented row"\n' +	mut csv_reader := csv.new_reader(data) +	mut row_count := 0 +	for { +		row := csv_reader.read() or { break } +		row_count++ +		if row_count == 1 { +			assert row[0] == 'name' +			assert row[1] == 'email' +			assert row[2] == 'phone' +			assert row[3] == 'other' +		} else if row_count == 2 { +			assert row[0] == 'joe' +			assert row[1] == 'joe@blow.com' +			assert row[2] == '0400000000' +			assert row[3] == 'test' +		} else if row_count == 3 { +			assert row[0] == 'sam' +			assert row[1] == 'sam@likesham.com' +			assert row[2] == '0433000000' +			// quoted field +			assert row[3] == 'test quoted field' +		} +	} +	assert row_count == 3 +} + +fn test_line_break_lf() { +	lf_data := 'name,email\njoe,joe@blow.com\n' +	mut csv_reader := csv.new_reader(lf_data) +	mut row_count := 0 +	for { +		row := csv_reader.read() or { break } +		row_count++ +		if row_count == 1 { +			assert row[0] == 'name' +			assert row[1] == 'email' +		} else if row_count == 2 { +			assert row[0] == 'joe' +			assert row[1] == 'joe@blow.com' +		} +	} +	assert row_count == 2 +} + +fn test_line_break_cr() { +	cr_data := 'name,email\rjoe,joe@blow.com\r' +	mut csv_reader := csv.new_reader(cr_data) +	mut row_count := 0 +	for { +		row := csv_reader.read() or { break } +		row_count++ +		if row_count == 1 { +			assert row[0] == 'name' +			assert row[1] == 'email' +		} else if row_count == 2 { +			assert row[0] == 'joe' +			assert row[1] == 'joe@blow.com' +		} +	} +	assert row_count == 2 +} + +fn test_line_break_crlf() { +	crlf_data := 'name,email\r\njoe,joe@blow.com\r\n' +	mut csv_reader := csv.new_reader(crlf_data) +	mut row_count := 0 +	for { +		row := csv_reader.read() or { break } +		row_count++ +		if row_count == 1 { +			assert row[0] == 'name' +			assert row[1] == 'email' +		} else if row_count == 2 { +			assert row[0] == 'joe' +			assert row[1] == 'joe@blow.com' +		} +	} +	assert row_count == 2 +} + +fn test_no_line_ending() { +	data := 'name,email,phone,other\njoe,joe@blow.com,0400000000,test' +	mut csv_reader := csv.new_reader(data) +	mut row_count := 0 +	for { +		csv_reader.read() or { break } +		row_count++ +	} +	assert row_count == 2 +} + +fn test_last_field_empty() { +	data := '"name","description","value"\n"one","first","1"\n"two","second",\n' +	mut csv_reader := csv.new_reader(data) +	mut row_count := 0 +	for { +		row := csv_reader.read() or { break } +		row_count++ +		if row_count == 1 { +			assert row[0] == 'name' +			assert row[1] == 'description' +			assert row[2] == 'value' +		} else if row_count == 2 { +			assert row[0] == 'one' +			assert row[1] == 'first' +			assert row[2] == '1' +		} else if row_count == 3 { +			assert row[0] == 'two' +			assert row[1] == 'second' +			assert row[2] == '' +		} +	} +	assert row_count == 3 +} + +fn test_empty_fields_no_quotes() { +	data := '1,2,3,4\n,6,7,8\n9,,11,12\n13,14,,16\n17,18,19,\n' + +	mut csv_reader := csv.new_reader(data) +	mut row_count := 0 +	for { +		row := csv_reader.read() or { break } +		row_count++ +		if row_count == 1 { +			assert row[0] == '1' +			assert row[1] == '2' +			assert row[2] == '3' +			assert row[3] == '4' +		} else if row_count == 2 { +			assert row[0] == '' +			assert row[1] == '6' +			assert row[2] == '7' +			assert row[3] == '8' +		} else if row_count == 3 { +			assert row[0] == '9' +			assert row[1] == '' +			assert row[2] == '11' +			assert row[3] == '12' +		} else if row_count == 4 { +			assert row[0] == '13' +			assert row[1] == '14' +			assert row[2] == '' +			assert row[3] == '16' +		} else if row_count == 5 { +			assert row[0] == '17' +			assert row[1] == '18' +			assert row[2] == '19' +			assert row[3] == '' +		} +	} +	assert row_count == 5 +} + +fn test_empty_line() { +	data := '"name","description","value"\n\n\n"one","first","1"\n\n"two","second",\n' +	mut csv_reader := csv.new_reader(data) +	mut row_count := 0 +	for { +		row := csv_reader.read() or { break } +		row_count++ +		if row_count == 1 { +			assert row[0] == 'name' +			assert row[1] == 'description' +			assert row[2] == 'value' +		} else if row_count == 2 { +			assert row[0] == 'one' +			assert row[1] == 'first' +			assert row[2] == '1' +		} else if row_count == 3 { +			assert row[0] == 'two' +			assert row[1] == 'second' +		} +	} +	assert row_count == 3 +} + +fn test_field_multiple_line() { +	data := '"name","multiple + + line","value"\n"one","first","1"\n' +	mut csv_reader := csv.new_reader(data) +	mut row_count := 0 +	for { +		row := csv_reader.read() or { break } +		row_count++ +		if row_count == 1 { +			assert row[0] == 'name' +			assert row[1] == 'multiple\n\n line' +			assert row[2] == 'value' +		} else if row_count == 2 { +			assert row[0] == 'one' +			assert row[1] == 'first' +			assert row[2] == '1' +		} +	} +	assert row_count == 2 +} + +fn test_field_quotes_for_parts() { +	data := 'a1,"b1",c1\n"a2",b2,c2\na3,b3,"c3"\na4,b4,c4\n' +	mut csv_reader := csv.new_reader(data) +	mut row_count := 0 +	for { +		row := csv_reader.read() or { break } +		row_count++ +		if row_count == 1 { +			assert row[0] == 'a1' +			assert row[1] == 'b1' +			assert row[2] == 'c1' +		} else if row_count == 2 { +			assert row[0] == 'a2' +			assert row[1] == 'b2' +			assert row[2] == 'c2' +		} else if row_count == 3 { +			assert row[0] == 'a3' +			assert row[1] == 'b3' +			assert row[2] == 'c3' +		} else if row_count == 4 { +			assert row[0] == 'a4' +			assert row[1] == 'b4' +			assert row[2] == 'c4' +		} +	} +	assert row_count == 4 +} + +fn test_field_double_quotes() { +	row1 := '11,"12\n13"\n' +	row2 := '21,"2""2""\n23"\n' +	row3 := '"3""1""",32\n' +	data := row1 + row2 + row3 +	mut csv_reader := csv.new_reader(data) +	mut row_count := 0 +	for { +		row := csv_reader.read() or { break } +		row_count++ +		if row_count == 1 { +			assert row[0] == '11' +			assert row[1] == '12\n13' +		} else if row_count == 2 { +			assert row[0] == '21' +			assert row[1] == '2"2"\n23' +		} else if row_count == 3 { +			assert row[0] == '3"1"' +			assert row[1] == '32' +		} +	} +	assert row_count == 3 +} diff --git a/v_windows/v/old/vlib/encoding/csv/writer.v b/v_windows/v/old/vlib/encoding/csv/writer.v new file mode 100644 index 0000000..735ca20 --- /dev/null +++ b/v_windows/v/old/vlib/encoding/csv/writer.v @@ -0,0 +1,80 @@ +// Copyright (c) 2019-2021 Alexander Medvednikov. All rights reserved. +// Use of this source code is governed by an MIT license +// that can be found in the LICENSE file. +module csv + +import strings + +struct Writer { +mut: +	sb strings.Builder +pub mut: +	use_crlf  bool +	delimiter byte +} + +pub fn new_writer() &Writer { +	return &Writer{ +		delimiter: `,` +		sb: strings.new_builder(200) +	} +} + +// write writes a single record +pub fn (mut w Writer) write(record []string) ?bool { +	if !valid_delim(w.delimiter) { +		return IError(&ErrInvalidDelimiter{}) +	} +	le := if w.use_crlf { '\r\n' } else { '\n' } +	for n, field_ in record { +		mut field := field_ +		if n > 0 { +			w.sb.write_string(w.delimiter.ascii_str()) +		} +		if !w.field_needs_quotes(field) { +			w.sb.write_string(field) +			continue +		} +		w.sb.write_string('"') +		for field.len > 0 { +			mut i := field.index_any('"\r\n') +			if i < 0 { +				i = field.len +			} +			w.sb.write_string(field[..i]) +			field = field[i..] +			if field.len > 0 { +				z := field[0] +				match z { +					`"` { w.sb.write_string('""') } +					`\r`, `\n` { w.sb.write_string(le) } +					else {} +				} +				field = field[1..] +			} +		} +		w.sb.write_string('"') +	} +	w.sb.write_string(le) +	return true +} + +// Once we have multi dimensional array +// pub fn (w &Writer) write_all(records [][]string) { +// 	for _, record in records { +// 		w.write(record) +// 	} +// } +fn (w &Writer) field_needs_quotes(field string) bool { +	if field == '' { +		return false +	} +	if field.contains(w.delimiter.ascii_str()) || (field.index_any('"\r\n') != -1) { +		return true +	} +	return false +} + +pub fn (mut w Writer) str() string { +	return w.sb.str() +} diff --git a/v_windows/v/old/vlib/encoding/csv/writer_test.v b/v_windows/v/old/vlib/encoding/csv/writer_test.v new file mode 100644 index 0000000..92882dd --- /dev/null +++ b/v_windows/v/old/vlib/encoding/csv/writer_test.v @@ -0,0 +1,11 @@ +import encoding.csv + +fn test_encoding_csv_writer() { +	mut csv_writer := csv.new_writer() + +	csv_writer.write(['name', 'email', 'phone', 'other']) or {} +	csv_writer.write(['joe', 'joe@blow.com', '0400000000', 'test']) or {} +	csv_writer.write(['sam', 'sam@likesham.com', '0433000000', 'needs, quoting']) or {} + +	assert csv_writer.str() == 'name,email,phone,other\njoe,joe@blow.com,0400000000,test\nsam,sam@likesham.com,0433000000,"needs, quoting"\n' +}  | 
