v_windows/v/vlib/encoding/csv/reader.v


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196

// Copyright (c) 2019-2021 Alexander Medvednikov. All rights reserved.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
module csv

// Once interfaces are further along the idea would be to have something similar to
// go's io.reader & bufio.reader rather than reading the whole file into string, this
// would then satisfy that interface. I designed it this way to be easily adapted.
struct ErrCommentIsDelimiter {
	msg  string = 'encoding.csv: comment cannot be the same as delimiter'
	code int
}

struct ErrInvalidDelimiter {
	msg  string = 'encoding.csv: invalid delimiter'
	code int
}

struct ErrEndOfFile {
	msg  string = 'encoding.csv: end of file'
	code int
}

struct ErrInvalidLineEnding {
	msg  string = 'encoding.csv: could not find any valid line endings'
	code int
}

struct Reader {
	// not used yet
	// has_header        bool
	// headings          []string
	data string
pub mut:
	delimiter         byte
	comment           byte
	is_mac_pre_osx_le bool
	row_pos           int
}

// new_reader initializes a Reader with string data to parse
pub fn new_reader(data string) &Reader {
	return &Reader{
		delimiter: `,`
		comment: `#`
		data: data
	}
}

// read reads a row from the CSV data.
// If successful, the result holds an array of each column's data.
pub fn (mut r Reader) read() ?[]string {
	l := r.read_record() ?
	return l
}

// Once we have multi dimensional array
// pub fn (mut r Reader) read_all() ?[][]string {
// 	mut records := []string{}
// 	for {
// 		record := r.read_record() or {
// 			if err.error == err_eof.error {
// 				return records
// 			} else {
// 				return err
// 			}
// 		}
// 		records << record
// 	}
// 	return records
// }
fn (mut r Reader) read_line() ?string {
	// last record
	if r.row_pos == r.data.len {
		return IError(&ErrEndOfFile{})
	}
	le := if r.is_mac_pre_osx_le { '\r' } else { '\n' }
	mut i := r.data.index_after(le, r.row_pos)
	if i == -1 {
		if r.row_pos == 0 {
			// check for pre osx mac line endings
			i = r.data.index_after('\r', r.row_pos)
			if i != -1 {
				r.is_mac_pre_osx_le = true
			} else {
				// no valid line endings found
				return IError(&ErrInvalidLineEnding{})
			}
		} else {
			// No line ending on file
			i = r.data.len - 1
		}
	}
	mut line := r.data[r.row_pos..i]
	r.row_pos = i + 1
	// normalize win line endings (remove extra \r)
	if !r.is_mac_pre_osx_le && (line.len >= 1 && line[line.len - 1] == `\r`) {
		line = line[..line.len - 1]
	}
	return line
}

fn (mut r Reader) read_record() ?[]string {
	if r.delimiter == r.comment {
		return IError(&ErrCommentIsDelimiter{})
	}
	if !valid_delim(r.delimiter) {
		return IError(&ErrInvalidDelimiter{})
	}
	mut need_read := true
	mut keep_raw := false
	mut line := ''
	mut fields := []string{}
	mut i := -1
	for {
		if need_read {
			l := r.read_line() ?
			if l.len <= 0 {
				if keep_raw {
					line += '\n'
				}
				continue
			} else if l[0] == r.comment {
				if keep_raw {
					line += '\n' + l
				}
				continue
			} else {
				if keep_raw {
					line += '\n'
				}
				line += l
			}
			need_read = false
			keep_raw = false
		}
		if line.len == 0 || line[0] != `"` { // not quoted
			j := line.index(r.delimiter.ascii_str()) or {
				// last
				fields << line[..line.len]
				break
			}
			i = j
			fields << line[..i]
			line = line[i + 1..]
			continue
		} else { // quoted
			mut need_more := true
			mut has_double_quotes := false
			mut j := 0
			mut n := 1
			for n < line.len {
				if line[n] == `"` {
					if n == line.len - 1 || line[n + 1] != `"` {
						need_more = false
						j = n - 1
						break
					} else {
						has_double_quotes = true
						n++
					}
				}
				n++
			}
			if need_more {
				need_read = true
				keep_raw = true
				continue
			}
			line = line[1..]
			if j + 1 == line.len {
				// last record
				fields << if has_double_quotes { line[..j].replace('""', '"') } else { line[..j] }
				break
			}
			next := line[j + 1]
			if next == r.delimiter {
				fields << if has_double_quotes { line[..j].replace('""', '"') } else { line[..j] }
				if j + 2 == line.len {
					line = ''
				} else {
					line = line[j + 2..]
				}
				continue
			}
		}
		if i <= -1 && fields.len == 0 {
			return IError(&ErrInvalidDelimiter{})
		}
	}
	return fields
}

fn valid_delim(b byte) bool {
	return b != 0 && b != `"` && b != `\r` && b != `\n`
}