aboutsummaryrefslogtreecommitdiff
path: root/v_windows/v/vlib/encoding/csv/reader.v
diff options
context:
space:
mode:
Diffstat (limited to 'v_windows/v/vlib/encoding/csv/reader.v')
-rw-r--r--v_windows/v/vlib/encoding/csv/reader.v196
1 files changed, 196 insertions, 0 deletions
diff --git a/v_windows/v/vlib/encoding/csv/reader.v b/v_windows/v/vlib/encoding/csv/reader.v
new file mode 100644
index 0000000..dafd022
--- /dev/null
+++ b/v_windows/v/vlib/encoding/csv/reader.v
@@ -0,0 +1,196 @@
+// Copyright (c) 2019-2021 Alexander Medvednikov. All rights reserved.
+// Use of this source code is governed by an MIT license
+// that can be found in the LICENSE file.
+module csv
+
+// Once interfaces are further along the idea would be to have something similar to
+// go's io.reader & bufio.reader rather than reading the whole file into string, this
+// would then satisfy that interface. I designed it this way to be easily adapted.
+struct ErrCommentIsDelimiter {
+ msg string = 'encoding.csv: comment cannot be the same as delimiter'
+ code int
+}
+
+struct ErrInvalidDelimiter {
+ msg string = 'encoding.csv: invalid delimiter'
+ code int
+}
+
+struct ErrEndOfFile {
+ msg string = 'encoding.csv: end of file'
+ code int
+}
+
+struct ErrInvalidLineEnding {
+ msg string = 'encoding.csv: could not find any valid line endings'
+ code int
+}
+
+struct Reader {
+ // not used yet
+ // has_header bool
+ // headings []string
+ data string
+pub mut:
+ delimiter byte
+ comment byte
+ is_mac_pre_osx_le bool
+ row_pos int
+}
+
+// new_reader initializes a Reader with string data to parse
+pub fn new_reader(data string) &Reader {
+ return &Reader{
+ delimiter: `,`
+ comment: `#`
+ data: data
+ }
+}
+
+// read reads a row from the CSV data.
+// If successful, the result holds an array of each column's data.
+pub fn (mut r Reader) read() ?[]string {
+ l := r.read_record() ?
+ return l
+}
+
+// Once we have multi dimensional array
+// pub fn (mut r Reader) read_all() ?[][]string {
+// mut records := []string{}
+// for {
+// record := r.read_record() or {
+// if err.error == err_eof.error {
+// return records
+// } else {
+// return err
+// }
+// }
+// records << record
+// }
+// return records
+// }
+fn (mut r Reader) read_line() ?string {
+ // last record
+ if r.row_pos == r.data.len {
+ return IError(&ErrEndOfFile{})
+ }
+ le := if r.is_mac_pre_osx_le { '\r' } else { '\n' }
+ mut i := r.data.index_after(le, r.row_pos)
+ if i == -1 {
+ if r.row_pos == 0 {
+ // check for pre osx mac line endings
+ i = r.data.index_after('\r', r.row_pos)
+ if i != -1 {
+ r.is_mac_pre_osx_le = true
+ } else {
+ // no valid line endings found
+ return IError(&ErrInvalidLineEnding{})
+ }
+ } else {
+ // No line ending on file
+ i = r.data.len - 1
+ }
+ }
+ mut line := r.data[r.row_pos..i]
+ r.row_pos = i + 1
+ // normalize win line endings (remove extra \r)
+ if !r.is_mac_pre_osx_le && (line.len >= 1 && line[line.len - 1] == `\r`) {
+ line = line[..line.len - 1]
+ }
+ return line
+}
+
+fn (mut r Reader) read_record() ?[]string {
+ if r.delimiter == r.comment {
+ return IError(&ErrCommentIsDelimiter{})
+ }
+ if !valid_delim(r.delimiter) {
+ return IError(&ErrInvalidDelimiter{})
+ }
+ mut need_read := true
+ mut keep_raw := false
+ mut line := ''
+ mut fields := []string{}
+ mut i := -1
+ for {
+ if need_read {
+ l := r.read_line() ?
+ if l.len <= 0 {
+ if keep_raw {
+ line += '\n'
+ }
+ continue
+ } else if l[0] == r.comment {
+ if keep_raw {
+ line += '\n' + l
+ }
+ continue
+ } else {
+ if keep_raw {
+ line += '\n'
+ }
+ line += l
+ }
+ need_read = false
+ keep_raw = false
+ }
+ if line.len == 0 || line[0] != `"` { // not quoted
+ j := line.index(r.delimiter.ascii_str()) or {
+ // last
+ fields << line[..line.len]
+ break
+ }
+ i = j
+ fields << line[..i]
+ line = line[i + 1..]
+ continue
+ } else { // quoted
+ mut need_more := true
+ mut has_double_quotes := false
+ mut j := 0
+ mut n := 1
+ for n < line.len {
+ if line[n] == `"` {
+ if n == line.len - 1 || line[n + 1] != `"` {
+ need_more = false
+ j = n - 1
+ break
+ } else {
+ has_double_quotes = true
+ n++
+ }
+ }
+ n++
+ }
+ if need_more {
+ need_read = true
+ keep_raw = true
+ continue
+ }
+ line = line[1..]
+ if j + 1 == line.len {
+ // last record
+ fields << if has_double_quotes { line[..j].replace('""', '"') } else { line[..j] }
+ break
+ }
+ next := line[j + 1]
+ if next == r.delimiter {
+ fields << if has_double_quotes { line[..j].replace('""', '"') } else { line[..j] }
+ if j + 2 == line.len {
+ line = ''
+ } else {
+ line = line[j + 2..]
+ }
+ continue
+ }
+ }
+ if i <= -1 && fields.len == 0 {
+ return IError(&ErrInvalidDelimiter{})
+ }
+ }
+ return fields
+}
+
+fn valid_delim(b byte) bool {
+ return b != 0 && b != `"` && b != `\r` && b != `\n`
+}