diff options
Diffstat (limited to 'v_windows/v/vlib/encoding/csv')
-rw-r--r-- | v_windows/v/vlib/encoding/csv/README.md | 19 | ||||
-rw-r--r-- | v_windows/v/vlib/encoding/csv/reader.v | 196 | ||||
-rw-r--r-- | v_windows/v/vlib/encoding/csv/reader_test.v | 253 | ||||
-rw-r--r-- | v_windows/v/vlib/encoding/csv/writer.v | 80 | ||||
-rw-r--r-- | v_windows/v/vlib/encoding/csv/writer_test.v | 11 |
5 files changed, 559 insertions, 0 deletions
diff --git a/v_windows/v/vlib/encoding/csv/README.md b/v_windows/v/vlib/encoding/csv/README.md new file mode 100644 index 0000000..01f3e4e --- /dev/null +++ b/v_windows/v/vlib/encoding/csv/README.md @@ -0,0 +1,19 @@ +## Reader example + +```v +import encoding.csv + +data := 'x,y\na,b,c\n' +mut parser := csv.new_reader(data) +// read each line +for { + items := parser.read() or { break } + println(items) +} +``` + +It prints: +``` +['x', 'y'] +['a', 'b', 'c'] +``` diff --git a/v_windows/v/vlib/encoding/csv/reader.v b/v_windows/v/vlib/encoding/csv/reader.v new file mode 100644 index 0000000..dafd022 --- /dev/null +++ b/v_windows/v/vlib/encoding/csv/reader.v @@ -0,0 +1,196 @@ +// Copyright (c) 2019-2021 Alexander Medvednikov. All rights reserved. +// Use of this source code is governed by an MIT license +// that can be found in the LICENSE file. +module csv + +// Once interfaces are further along the idea would be to have something similar to +// go's io.reader & bufio.reader rather than reading the whole file into string, this +// would then satisfy that interface. I designed it this way to be easily adapted. +struct ErrCommentIsDelimiter { + msg string = 'encoding.csv: comment cannot be the same as delimiter' + code int +} + +struct ErrInvalidDelimiter { + msg string = 'encoding.csv: invalid delimiter' + code int +} + +struct ErrEndOfFile { + msg string = 'encoding.csv: end of file' + code int +} + +struct ErrInvalidLineEnding { + msg string = 'encoding.csv: could not find any valid line endings' + code int +} + +struct Reader { + // not used yet + // has_header bool + // headings []string + data string +pub mut: + delimiter byte + comment byte + is_mac_pre_osx_le bool + row_pos int +} + +// new_reader initializes a Reader with string data to parse +pub fn new_reader(data string) &Reader { + return &Reader{ + delimiter: `,` + comment: `#` + data: data + } +} + +// read reads a row from the CSV data. +// If successful, the result holds an array of each column's data. +pub fn (mut r Reader) read() ?[]string { + l := r.read_record() ? + return l +} + +// Once we have multi dimensional array +// pub fn (mut r Reader) read_all() ?[][]string { +// mut records := []string{} +// for { +// record := r.read_record() or { +// if err.error == err_eof.error { +// return records +// } else { +// return err +// } +// } +// records << record +// } +// return records +// } +fn (mut r Reader) read_line() ?string { + // last record + if r.row_pos == r.data.len { + return IError(&ErrEndOfFile{}) + } + le := if r.is_mac_pre_osx_le { '\r' } else { '\n' } + mut i := r.data.index_after(le, r.row_pos) + if i == -1 { + if r.row_pos == 0 { + // check for pre osx mac line endings + i = r.data.index_after('\r', r.row_pos) + if i != -1 { + r.is_mac_pre_osx_le = true + } else { + // no valid line endings found + return IError(&ErrInvalidLineEnding{}) + } + } else { + // No line ending on file + i = r.data.len - 1 + } + } + mut line := r.data[r.row_pos..i] + r.row_pos = i + 1 + // normalize win line endings (remove extra \r) + if !r.is_mac_pre_osx_le && (line.len >= 1 && line[line.len - 1] == `\r`) { + line = line[..line.len - 1] + } + return line +} + +fn (mut r Reader) read_record() ?[]string { + if r.delimiter == r.comment { + return IError(&ErrCommentIsDelimiter{}) + } + if !valid_delim(r.delimiter) { + return IError(&ErrInvalidDelimiter{}) + } + mut need_read := true + mut keep_raw := false + mut line := '' + mut fields := []string{} + mut i := -1 + for { + if need_read { + l := r.read_line() ? + if l.len <= 0 { + if keep_raw { + line += '\n' + } + continue + } else if l[0] == r.comment { + if keep_raw { + line += '\n' + l + } + continue + } else { + if keep_raw { + line += '\n' + } + line += l + } + need_read = false + keep_raw = false + } + if line.len == 0 || line[0] != `"` { // not quoted + j := line.index(r.delimiter.ascii_str()) or { + // last + fields << line[..line.len] + break + } + i = j + fields << line[..i] + line = line[i + 1..] + continue + } else { // quoted + mut need_more := true + mut has_double_quotes := false + mut j := 0 + mut n := 1 + for n < line.len { + if line[n] == `"` { + if n == line.len - 1 || line[n + 1] != `"` { + need_more = false + j = n - 1 + break + } else { + has_double_quotes = true + n++ + } + } + n++ + } + if need_more { + need_read = true + keep_raw = true + continue + } + line = line[1..] + if j + 1 == line.len { + // last record + fields << if has_double_quotes { line[..j].replace('""', '"') } else { line[..j] } + break + } + next := line[j + 1] + if next == r.delimiter { + fields << if has_double_quotes { line[..j].replace('""', '"') } else { line[..j] } + if j + 2 == line.len { + line = '' + } else { + line = line[j + 2..] + } + continue + } + } + if i <= -1 && fields.len == 0 { + return IError(&ErrInvalidDelimiter{}) + } + } + return fields +} + +fn valid_delim(b byte) bool { + return b != 0 && b != `"` && b != `\r` && b != `\n` +} diff --git a/v_windows/v/vlib/encoding/csv/reader_test.v b/v_windows/v/vlib/encoding/csv/reader_test.v new file mode 100644 index 0000000..cd54827 --- /dev/null +++ b/v_windows/v/vlib/encoding/csv/reader_test.v @@ -0,0 +1,253 @@ +import encoding.csv + +fn test_encoding_csv_reader() { + data := 'name,email,phone,other\njoe,joe@blow.com,0400000000,test\nsam,sam@likesham.com,0433000000,"test quoted field"\n#chris,chris@nomail.com,94444444,"commented row"\n' + mut csv_reader := csv.new_reader(data) + mut row_count := 0 + for { + row := csv_reader.read() or { break } + row_count++ + if row_count == 1 { + assert row[0] == 'name' + assert row[1] == 'email' + assert row[2] == 'phone' + assert row[3] == 'other' + } else if row_count == 2 { + assert row[0] == 'joe' + assert row[1] == 'joe@blow.com' + assert row[2] == '0400000000' + assert row[3] == 'test' + } else if row_count == 3 { + assert row[0] == 'sam' + assert row[1] == 'sam@likesham.com' + assert row[2] == '0433000000' + // quoted field + assert row[3] == 'test quoted field' + } + } + assert row_count == 3 +} + +fn test_line_break_lf() { + lf_data := 'name,email\njoe,joe@blow.com\n' + mut csv_reader := csv.new_reader(lf_data) + mut row_count := 0 + for { + row := csv_reader.read() or { break } + row_count++ + if row_count == 1 { + assert row[0] == 'name' + assert row[1] == 'email' + } else if row_count == 2 { + assert row[0] == 'joe' + assert row[1] == 'joe@blow.com' + } + } + assert row_count == 2 +} + +fn test_line_break_cr() { + cr_data := 'name,email\rjoe,joe@blow.com\r' + mut csv_reader := csv.new_reader(cr_data) + mut row_count := 0 + for { + row := csv_reader.read() or { break } + row_count++ + if row_count == 1 { + assert row[0] == 'name' + assert row[1] == 'email' + } else if row_count == 2 { + assert row[0] == 'joe' + assert row[1] == 'joe@blow.com' + } + } + assert row_count == 2 +} + +fn test_line_break_crlf() { + crlf_data := 'name,email\r\njoe,joe@blow.com\r\n' + mut csv_reader := csv.new_reader(crlf_data) + mut row_count := 0 + for { + row := csv_reader.read() or { break } + row_count++ + if row_count == 1 { + assert row[0] == 'name' + assert row[1] == 'email' + } else if row_count == 2 { + assert row[0] == 'joe' + assert row[1] == 'joe@blow.com' + } + } + assert row_count == 2 +} + +fn test_no_line_ending() { + data := 'name,email,phone,other\njoe,joe@blow.com,0400000000,test' + mut csv_reader := csv.new_reader(data) + mut row_count := 0 + for { + csv_reader.read() or { break } + row_count++ + } + assert row_count == 2 +} + +fn test_last_field_empty() { + data := '"name","description","value"\n"one","first","1"\n"two","second",\n' + mut csv_reader := csv.new_reader(data) + mut row_count := 0 + for { + row := csv_reader.read() or { break } + row_count++ + if row_count == 1 { + assert row[0] == 'name' + assert row[1] == 'description' + assert row[2] == 'value' + } else if row_count == 2 { + assert row[0] == 'one' + assert row[1] == 'first' + assert row[2] == '1' + } else if row_count == 3 { + assert row[0] == 'two' + assert row[1] == 'second' + assert row[2] == '' + } + } + assert row_count == 3 +} + +fn test_empty_fields_no_quotes() { + data := '1,2,3,4\n,6,7,8\n9,,11,12\n13,14,,16\n17,18,19,\n' + + mut csv_reader := csv.new_reader(data) + mut row_count := 0 + for { + row := csv_reader.read() or { break } + row_count++ + if row_count == 1 { + assert row[0] == '1' + assert row[1] == '2' + assert row[2] == '3' + assert row[3] == '4' + } else if row_count == 2 { + assert row[0] == '' + assert row[1] == '6' + assert row[2] == '7' + assert row[3] == '8' + } else if row_count == 3 { + assert row[0] == '9' + assert row[1] == '' + assert row[2] == '11' + assert row[3] == '12' + } else if row_count == 4 { + assert row[0] == '13' + assert row[1] == '14' + assert row[2] == '' + assert row[3] == '16' + } else if row_count == 5 { + assert row[0] == '17' + assert row[1] == '18' + assert row[2] == '19' + assert row[3] == '' + } + } + assert row_count == 5 +} + +fn test_empty_line() { + data := '"name","description","value"\n\n\n"one","first","1"\n\n"two","second",\n' + mut csv_reader := csv.new_reader(data) + mut row_count := 0 + for { + row := csv_reader.read() or { break } + row_count++ + if row_count == 1 { + assert row[0] == 'name' + assert row[1] == 'description' + assert row[2] == 'value' + } else if row_count == 2 { + assert row[0] == 'one' + assert row[1] == 'first' + assert row[2] == '1' + } else if row_count == 3 { + assert row[0] == 'two' + assert row[1] == 'second' + } + } + assert row_count == 3 +} + +fn test_field_multiple_line() { + data := '"name","multiple + + line","value"\n"one","first","1"\n' + mut csv_reader := csv.new_reader(data) + mut row_count := 0 + for { + row := csv_reader.read() or { break } + row_count++ + if row_count == 1 { + assert row[0] == 'name' + assert row[1] == 'multiple\n\n line' + assert row[2] == 'value' + } else if row_count == 2 { + assert row[0] == 'one' + assert row[1] == 'first' + assert row[2] == '1' + } + } + assert row_count == 2 +} + +fn test_field_quotes_for_parts() { + data := 'a1,"b1",c1\n"a2",b2,c2\na3,b3,"c3"\na4,b4,c4\n' + mut csv_reader := csv.new_reader(data) + mut row_count := 0 + for { + row := csv_reader.read() or { break } + row_count++ + if row_count == 1 { + assert row[0] == 'a1' + assert row[1] == 'b1' + assert row[2] == 'c1' + } else if row_count == 2 { + assert row[0] == 'a2' + assert row[1] == 'b2' + assert row[2] == 'c2' + } else if row_count == 3 { + assert row[0] == 'a3' + assert row[1] == 'b3' + assert row[2] == 'c3' + } else if row_count == 4 { + assert row[0] == 'a4' + assert row[1] == 'b4' + assert row[2] == 'c4' + } + } + assert row_count == 4 +} + +fn test_field_double_quotes() { + row1 := '11,"12\n13"\n' + row2 := '21,"2""2""\n23"\n' + row3 := '"3""1""",32\n' + data := row1 + row2 + row3 + mut csv_reader := csv.new_reader(data) + mut row_count := 0 + for { + row := csv_reader.read() or { break } + row_count++ + if row_count == 1 { + assert row[0] == '11' + assert row[1] == '12\n13' + } else if row_count == 2 { + assert row[0] == '21' + assert row[1] == '2"2"\n23' + } else if row_count == 3 { + assert row[0] == '3"1"' + assert row[1] == '32' + } + } + assert row_count == 3 +} diff --git a/v_windows/v/vlib/encoding/csv/writer.v b/v_windows/v/vlib/encoding/csv/writer.v new file mode 100644 index 0000000..735ca20 --- /dev/null +++ b/v_windows/v/vlib/encoding/csv/writer.v @@ -0,0 +1,80 @@ +// Copyright (c) 2019-2021 Alexander Medvednikov. All rights reserved. +// Use of this source code is governed by an MIT license +// that can be found in the LICENSE file. +module csv + +import strings + +struct Writer { +mut: + sb strings.Builder +pub mut: + use_crlf bool + delimiter byte +} + +pub fn new_writer() &Writer { + return &Writer{ + delimiter: `,` + sb: strings.new_builder(200) + } +} + +// write writes a single record +pub fn (mut w Writer) write(record []string) ?bool { + if !valid_delim(w.delimiter) { + return IError(&ErrInvalidDelimiter{}) + } + le := if w.use_crlf { '\r\n' } else { '\n' } + for n, field_ in record { + mut field := field_ + if n > 0 { + w.sb.write_string(w.delimiter.ascii_str()) + } + if !w.field_needs_quotes(field) { + w.sb.write_string(field) + continue + } + w.sb.write_string('"') + for field.len > 0 { + mut i := field.index_any('"\r\n') + if i < 0 { + i = field.len + } + w.sb.write_string(field[..i]) + field = field[i..] + if field.len > 0 { + z := field[0] + match z { + `"` { w.sb.write_string('""') } + `\r`, `\n` { w.sb.write_string(le) } + else {} + } + field = field[1..] + } + } + w.sb.write_string('"') + } + w.sb.write_string(le) + return true +} + +// Once we have multi dimensional array +// pub fn (w &Writer) write_all(records [][]string) { +// for _, record in records { +// w.write(record) +// } +// } +fn (w &Writer) field_needs_quotes(field string) bool { + if field == '' { + return false + } + if field.contains(w.delimiter.ascii_str()) || (field.index_any('"\r\n') != -1) { + return true + } + return false +} + +pub fn (mut w Writer) str() string { + return w.sb.str() +} diff --git a/v_windows/v/vlib/encoding/csv/writer_test.v b/v_windows/v/vlib/encoding/csv/writer_test.v new file mode 100644 index 0000000..92882dd --- /dev/null +++ b/v_windows/v/vlib/encoding/csv/writer_test.v @@ -0,0 +1,11 @@ +import encoding.csv + +fn test_encoding_csv_writer() { + mut csv_writer := csv.new_writer() + + csv_writer.write(['name', 'email', 'phone', 'other']) or {} + csv_writer.write(['joe', 'joe@blow.com', '0400000000', 'test']) or {} + csv_writer.write(['sam', 'sam@likesham.com', '0433000000', 'needs, quoting']) or {} + + assert csv_writer.str() == 'name,email,phone,other\njoe,joe@blow.com,0400000000,test\nsam,sam@likesham.com,0433000000,"needs, quoting"\n' +} |