aboutsummaryrefslogtreecommitdiff
path: root/v_windows/v/vlib/encoding/csv
diff options
context:
space:
mode:
Diffstat (limited to 'v_windows/v/vlib/encoding/csv')
-rw-r--r--v_windows/v/vlib/encoding/csv/README.md19
-rw-r--r--v_windows/v/vlib/encoding/csv/reader.v196
-rw-r--r--v_windows/v/vlib/encoding/csv/reader_test.v253
-rw-r--r--v_windows/v/vlib/encoding/csv/writer.v80
-rw-r--r--v_windows/v/vlib/encoding/csv/writer_test.v11
5 files changed, 559 insertions, 0 deletions
diff --git a/v_windows/v/vlib/encoding/csv/README.md b/v_windows/v/vlib/encoding/csv/README.md
new file mode 100644
index 0000000..01f3e4e
--- /dev/null
+++ b/v_windows/v/vlib/encoding/csv/README.md
@@ -0,0 +1,19 @@
+## Reader example
+
+```v
+import encoding.csv
+
+data := 'x,y\na,b,c\n'
+mut parser := csv.new_reader(data)
+// read each line
+for {
+ items := parser.read() or { break }
+ println(items)
+}
+```
+
+It prints:
+```
+['x', 'y']
+['a', 'b', 'c']
+```
diff --git a/v_windows/v/vlib/encoding/csv/reader.v b/v_windows/v/vlib/encoding/csv/reader.v
new file mode 100644
index 0000000..dafd022
--- /dev/null
+++ b/v_windows/v/vlib/encoding/csv/reader.v
@@ -0,0 +1,196 @@
+// Copyright (c) 2019-2021 Alexander Medvednikov. All rights reserved.
+// Use of this source code is governed by an MIT license
+// that can be found in the LICENSE file.
+module csv
+
+// Once interfaces are further along the idea would be to have something similar to
+// go's io.reader & bufio.reader rather than reading the whole file into string, this
+// would then satisfy that interface. I designed it this way to be easily adapted.
+struct ErrCommentIsDelimiter {
+ msg string = 'encoding.csv: comment cannot be the same as delimiter'
+ code int
+}
+
+struct ErrInvalidDelimiter {
+ msg string = 'encoding.csv: invalid delimiter'
+ code int
+}
+
+struct ErrEndOfFile {
+ msg string = 'encoding.csv: end of file'
+ code int
+}
+
+struct ErrInvalidLineEnding {
+ msg string = 'encoding.csv: could not find any valid line endings'
+ code int
+}
+
+struct Reader {
+ // not used yet
+ // has_header bool
+ // headings []string
+ data string
+pub mut:
+ delimiter byte
+ comment byte
+ is_mac_pre_osx_le bool
+ row_pos int
+}
+
+// new_reader initializes a Reader with string data to parse
+pub fn new_reader(data string) &Reader {
+ return &Reader{
+ delimiter: `,`
+ comment: `#`
+ data: data
+ }
+}
+
+// read reads a row from the CSV data.
+// If successful, the result holds an array of each column's data.
+pub fn (mut r Reader) read() ?[]string {
+ l := r.read_record() ?
+ return l
+}
+
+// Once we have multi dimensional array
+// pub fn (mut r Reader) read_all() ?[][]string {
+// mut records := []string{}
+// for {
+// record := r.read_record() or {
+// if err.error == err_eof.error {
+// return records
+// } else {
+// return err
+// }
+// }
+// records << record
+// }
+// return records
+// }
+fn (mut r Reader) read_line() ?string {
+ // last record
+ if r.row_pos == r.data.len {
+ return IError(&ErrEndOfFile{})
+ }
+ le := if r.is_mac_pre_osx_le { '\r' } else { '\n' }
+ mut i := r.data.index_after(le, r.row_pos)
+ if i == -1 {
+ if r.row_pos == 0 {
+ // check for pre osx mac line endings
+ i = r.data.index_after('\r', r.row_pos)
+ if i != -1 {
+ r.is_mac_pre_osx_le = true
+ } else {
+ // no valid line endings found
+ return IError(&ErrInvalidLineEnding{})
+ }
+ } else {
+ // No line ending on file
+ i = r.data.len - 1
+ }
+ }
+ mut line := r.data[r.row_pos..i]
+ r.row_pos = i + 1
+ // normalize win line endings (remove extra \r)
+ if !r.is_mac_pre_osx_le && (line.len >= 1 && line[line.len - 1] == `\r`) {
+ line = line[..line.len - 1]
+ }
+ return line
+}
+
+fn (mut r Reader) read_record() ?[]string {
+ if r.delimiter == r.comment {
+ return IError(&ErrCommentIsDelimiter{})
+ }
+ if !valid_delim(r.delimiter) {
+ return IError(&ErrInvalidDelimiter{})
+ }
+ mut need_read := true
+ mut keep_raw := false
+ mut line := ''
+ mut fields := []string{}
+ mut i := -1
+ for {
+ if need_read {
+ l := r.read_line() ?
+ if l.len <= 0 {
+ if keep_raw {
+ line += '\n'
+ }
+ continue
+ } else if l[0] == r.comment {
+ if keep_raw {
+ line += '\n' + l
+ }
+ continue
+ } else {
+ if keep_raw {
+ line += '\n'
+ }
+ line += l
+ }
+ need_read = false
+ keep_raw = false
+ }
+ if line.len == 0 || line[0] != `"` { // not quoted
+ j := line.index(r.delimiter.ascii_str()) or {
+ // last
+ fields << line[..line.len]
+ break
+ }
+ i = j
+ fields << line[..i]
+ line = line[i + 1..]
+ continue
+ } else { // quoted
+ mut need_more := true
+ mut has_double_quotes := false
+ mut j := 0
+ mut n := 1
+ for n < line.len {
+ if line[n] == `"` {
+ if n == line.len - 1 || line[n + 1] != `"` {
+ need_more = false
+ j = n - 1
+ break
+ } else {
+ has_double_quotes = true
+ n++
+ }
+ }
+ n++
+ }
+ if need_more {
+ need_read = true
+ keep_raw = true
+ continue
+ }
+ line = line[1..]
+ if j + 1 == line.len {
+ // last record
+ fields << if has_double_quotes { line[..j].replace('""', '"') } else { line[..j] }
+ break
+ }
+ next := line[j + 1]
+ if next == r.delimiter {
+ fields << if has_double_quotes { line[..j].replace('""', '"') } else { line[..j] }
+ if j + 2 == line.len {
+ line = ''
+ } else {
+ line = line[j + 2..]
+ }
+ continue
+ }
+ }
+ if i <= -1 && fields.len == 0 {
+ return IError(&ErrInvalidDelimiter{})
+ }
+ }
+ return fields
+}
+
+fn valid_delim(b byte) bool {
+ return b != 0 && b != `"` && b != `\r` && b != `\n`
+}
diff --git a/v_windows/v/vlib/encoding/csv/reader_test.v b/v_windows/v/vlib/encoding/csv/reader_test.v
new file mode 100644
index 0000000..cd54827
--- /dev/null
+++ b/v_windows/v/vlib/encoding/csv/reader_test.v
@@ -0,0 +1,253 @@
+import encoding.csv
+
+fn test_encoding_csv_reader() {
+ data := 'name,email,phone,other\njoe,joe@blow.com,0400000000,test\nsam,sam@likesham.com,0433000000,"test quoted field"\n#chris,chris@nomail.com,94444444,"commented row"\n'
+ mut csv_reader := csv.new_reader(data)
+ mut row_count := 0
+ for {
+ row := csv_reader.read() or { break }
+ row_count++
+ if row_count == 1 {
+ assert row[0] == 'name'
+ assert row[1] == 'email'
+ assert row[2] == 'phone'
+ assert row[3] == 'other'
+ } else if row_count == 2 {
+ assert row[0] == 'joe'
+ assert row[1] == 'joe@blow.com'
+ assert row[2] == '0400000000'
+ assert row[3] == 'test'
+ } else if row_count == 3 {
+ assert row[0] == 'sam'
+ assert row[1] == 'sam@likesham.com'
+ assert row[2] == '0433000000'
+ // quoted field
+ assert row[3] == 'test quoted field'
+ }
+ }
+ assert row_count == 3
+}
+
+fn test_line_break_lf() {
+ lf_data := 'name,email\njoe,joe@blow.com\n'
+ mut csv_reader := csv.new_reader(lf_data)
+ mut row_count := 0
+ for {
+ row := csv_reader.read() or { break }
+ row_count++
+ if row_count == 1 {
+ assert row[0] == 'name'
+ assert row[1] == 'email'
+ } else if row_count == 2 {
+ assert row[0] == 'joe'
+ assert row[1] == 'joe@blow.com'
+ }
+ }
+ assert row_count == 2
+}
+
+fn test_line_break_cr() {
+ cr_data := 'name,email\rjoe,joe@blow.com\r'
+ mut csv_reader := csv.new_reader(cr_data)
+ mut row_count := 0
+ for {
+ row := csv_reader.read() or { break }
+ row_count++
+ if row_count == 1 {
+ assert row[0] == 'name'
+ assert row[1] == 'email'
+ } else if row_count == 2 {
+ assert row[0] == 'joe'
+ assert row[1] == 'joe@blow.com'
+ }
+ }
+ assert row_count == 2
+}
+
+fn test_line_break_crlf() {
+ crlf_data := 'name,email\r\njoe,joe@blow.com\r\n'
+ mut csv_reader := csv.new_reader(crlf_data)
+ mut row_count := 0
+ for {
+ row := csv_reader.read() or { break }
+ row_count++
+ if row_count == 1 {
+ assert row[0] == 'name'
+ assert row[1] == 'email'
+ } else if row_count == 2 {
+ assert row[0] == 'joe'
+ assert row[1] == 'joe@blow.com'
+ }
+ }
+ assert row_count == 2
+}
+
+fn test_no_line_ending() {
+ data := 'name,email,phone,other\njoe,joe@blow.com,0400000000,test'
+ mut csv_reader := csv.new_reader(data)
+ mut row_count := 0
+ for {
+ csv_reader.read() or { break }
+ row_count++
+ }
+ assert row_count == 2
+}
+
+fn test_last_field_empty() {
+ data := '"name","description","value"\n"one","first","1"\n"two","second",\n'
+ mut csv_reader := csv.new_reader(data)
+ mut row_count := 0
+ for {
+ row := csv_reader.read() or { break }
+ row_count++
+ if row_count == 1 {
+ assert row[0] == 'name'
+ assert row[1] == 'description'
+ assert row[2] == 'value'
+ } else if row_count == 2 {
+ assert row[0] == 'one'
+ assert row[1] == 'first'
+ assert row[2] == '1'
+ } else if row_count == 3 {
+ assert row[0] == 'two'
+ assert row[1] == 'second'
+ assert row[2] == ''
+ }
+ }
+ assert row_count == 3
+}
+
+fn test_empty_fields_no_quotes() {
+ data := '1,2,3,4\n,6,7,8\n9,,11,12\n13,14,,16\n17,18,19,\n'
+
+ mut csv_reader := csv.new_reader(data)
+ mut row_count := 0
+ for {
+ row := csv_reader.read() or { break }
+ row_count++
+ if row_count == 1 {
+ assert row[0] == '1'
+ assert row[1] == '2'
+ assert row[2] == '3'
+ assert row[3] == '4'
+ } else if row_count == 2 {
+ assert row[0] == ''
+ assert row[1] == '6'
+ assert row[2] == '7'
+ assert row[3] == '8'
+ } else if row_count == 3 {
+ assert row[0] == '9'
+ assert row[1] == ''
+ assert row[2] == '11'
+ assert row[3] == '12'
+ } else if row_count == 4 {
+ assert row[0] == '13'
+ assert row[1] == '14'
+ assert row[2] == ''
+ assert row[3] == '16'
+ } else if row_count == 5 {
+ assert row[0] == '17'
+ assert row[1] == '18'
+ assert row[2] == '19'
+ assert row[3] == ''
+ }
+ }
+ assert row_count == 5
+}
+
+fn test_empty_line() {
+ data := '"name","description","value"\n\n\n"one","first","1"\n\n"two","second",\n'
+ mut csv_reader := csv.new_reader(data)
+ mut row_count := 0
+ for {
+ row := csv_reader.read() or { break }
+ row_count++
+ if row_count == 1 {
+ assert row[0] == 'name'
+ assert row[1] == 'description'
+ assert row[2] == 'value'
+ } else if row_count == 2 {
+ assert row[0] == 'one'
+ assert row[1] == 'first'
+ assert row[2] == '1'
+ } else if row_count == 3 {
+ assert row[0] == 'two'
+ assert row[1] == 'second'
+ }
+ }
+ assert row_count == 3
+}
+
+fn test_field_multiple_line() {
+ data := '"name","multiple
+
+ line","value"\n"one","first","1"\n'
+ mut csv_reader := csv.new_reader(data)
+ mut row_count := 0
+ for {
+ row := csv_reader.read() or { break }
+ row_count++
+ if row_count == 1 {
+ assert row[0] == 'name'
+ assert row[1] == 'multiple\n\n line'
+ assert row[2] == 'value'
+ } else if row_count == 2 {
+ assert row[0] == 'one'
+ assert row[1] == 'first'
+ assert row[2] == '1'
+ }
+ }
+ assert row_count == 2
+}
+
+fn test_field_quotes_for_parts() {
+ data := 'a1,"b1",c1\n"a2",b2,c2\na3,b3,"c3"\na4,b4,c4\n'
+ mut csv_reader := csv.new_reader(data)
+ mut row_count := 0
+ for {
+ row := csv_reader.read() or { break }
+ row_count++
+ if row_count == 1 {
+ assert row[0] == 'a1'
+ assert row[1] == 'b1'
+ assert row[2] == 'c1'
+ } else if row_count == 2 {
+ assert row[0] == 'a2'
+ assert row[1] == 'b2'
+ assert row[2] == 'c2'
+ } else if row_count == 3 {
+ assert row[0] == 'a3'
+ assert row[1] == 'b3'
+ assert row[2] == 'c3'
+ } else if row_count == 4 {
+ assert row[0] == 'a4'
+ assert row[1] == 'b4'
+ assert row[2] == 'c4'
+ }
+ }
+ assert row_count == 4
+}
+
+fn test_field_double_quotes() {
+ row1 := '11,"12\n13"\n'
+ row2 := '21,"2""2""\n23"\n'
+ row3 := '"3""1""",32\n'
+ data := row1 + row2 + row3
+ mut csv_reader := csv.new_reader(data)
+ mut row_count := 0
+ for {
+ row := csv_reader.read() or { break }
+ row_count++
+ if row_count == 1 {
+ assert row[0] == '11'
+ assert row[1] == '12\n13'
+ } else if row_count == 2 {
+ assert row[0] == '21'
+ assert row[1] == '2"2"\n23'
+ } else if row_count == 3 {
+ assert row[0] == '3"1"'
+ assert row[1] == '32'
+ }
+ }
+ assert row_count == 3
+}
diff --git a/v_windows/v/vlib/encoding/csv/writer.v b/v_windows/v/vlib/encoding/csv/writer.v
new file mode 100644
index 0000000..735ca20
--- /dev/null
+++ b/v_windows/v/vlib/encoding/csv/writer.v
@@ -0,0 +1,80 @@
+// Copyright (c) 2019-2021 Alexander Medvednikov. All rights reserved.
+// Use of this source code is governed by an MIT license
+// that can be found in the LICENSE file.
+module csv
+
+import strings
+
+struct Writer {
+mut:
+ sb strings.Builder
+pub mut:
+ use_crlf bool
+ delimiter byte
+}
+
+pub fn new_writer() &Writer {
+ return &Writer{
+ delimiter: `,`
+ sb: strings.new_builder(200)
+ }
+}
+
+// write writes a single record
+pub fn (mut w Writer) write(record []string) ?bool {
+ if !valid_delim(w.delimiter) {
+ return IError(&ErrInvalidDelimiter{})
+ }
+ le := if w.use_crlf { '\r\n' } else { '\n' }
+ for n, field_ in record {
+ mut field := field_
+ if n > 0 {
+ w.sb.write_string(w.delimiter.ascii_str())
+ }
+ if !w.field_needs_quotes(field) {
+ w.sb.write_string(field)
+ continue
+ }
+ w.sb.write_string('"')
+ for field.len > 0 {
+ mut i := field.index_any('"\r\n')
+ if i < 0 {
+ i = field.len
+ }
+ w.sb.write_string(field[..i])
+ field = field[i..]
+ if field.len > 0 {
+ z := field[0]
+ match z {
+ `"` { w.sb.write_string('""') }
+ `\r`, `\n` { w.sb.write_string(le) }
+ else {}
+ }
+ field = field[1..]
+ }
+ }
+ w.sb.write_string('"')
+ }
+ w.sb.write_string(le)
+ return true
+}
+
+// Once we have multi dimensional array
+// pub fn (w &Writer) write_all(records [][]string) {
+// for _, record in records {
+// w.write(record)
+// }
+// }
+fn (w &Writer) field_needs_quotes(field string) bool {
+ if field == '' {
+ return false
+ }
+ if field.contains(w.delimiter.ascii_str()) || (field.index_any('"\r\n') != -1) {
+ return true
+ }
+ return false
+}
+
+pub fn (mut w Writer) str() string {
+ return w.sb.str()
+}
diff --git a/v_windows/v/vlib/encoding/csv/writer_test.v b/v_windows/v/vlib/encoding/csv/writer_test.v
new file mode 100644
index 0000000..92882dd
--- /dev/null
+++ b/v_windows/v/vlib/encoding/csv/writer_test.v
@@ -0,0 +1,11 @@
+import encoding.csv
+
+fn test_encoding_csv_writer() {
+ mut csv_writer := csv.new_writer()
+
+ csv_writer.write(['name', 'email', 'phone', 'other']) or {}
+ csv_writer.write(['joe', 'joe@blow.com', '0400000000', 'test']) or {}
+ csv_writer.write(['sam', 'sam@likesham.com', '0433000000', 'needs, quoting']) or {}
+
+ assert csv_writer.str() == 'name,email,phone,other\njoe,joe@blow.com,0400000000,test\nsam,sam@likesham.com,0433000000,"needs, quoting"\n'
+}