blob: dafd02217b39dd6eab19fe53e111f74cfb7dc756 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
|
// Copyright (c) 2019-2021 Alexander Medvednikov. All rights reserved.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
module csv
// Once interfaces are further along the idea would be to have something similar to
// go's io.reader & bufio.reader rather than reading the whole file into string, this
// would then satisfy that interface. I designed it this way to be easily adapted.
struct ErrCommentIsDelimiter {
msg string = 'encoding.csv: comment cannot be the same as delimiter'
code int
}
struct ErrInvalidDelimiter {
msg string = 'encoding.csv: invalid delimiter'
code int
}
struct ErrEndOfFile {
msg string = 'encoding.csv: end of file'
code int
}
struct ErrInvalidLineEnding {
msg string = 'encoding.csv: could not find any valid line endings'
code int
}
struct Reader {
// not used yet
// has_header bool
// headings []string
data string
pub mut:
delimiter byte
comment byte
is_mac_pre_osx_le bool
row_pos int
}
// new_reader initializes a Reader with string data to parse
pub fn new_reader(data string) &Reader {
return &Reader{
delimiter: `,`
comment: `#`
data: data
}
}
// read reads a row from the CSV data.
// If successful, the result holds an array of each column's data.
pub fn (mut r Reader) read() ?[]string {
l := r.read_record() ?
return l
}
// Once we have multi dimensional array
// pub fn (mut r Reader) read_all() ?[][]string {
// mut records := []string{}
// for {
// record := r.read_record() or {
// if err.error == err_eof.error {
// return records
// } else {
// return err
// }
// }
// records << record
// }
// return records
// }
fn (mut r Reader) read_line() ?string {
// last record
if r.row_pos == r.data.len {
return IError(&ErrEndOfFile{})
}
le := if r.is_mac_pre_osx_le { '\r' } else { '\n' }
mut i := r.data.index_after(le, r.row_pos)
if i == -1 {
if r.row_pos == 0 {
// check for pre osx mac line endings
i = r.data.index_after('\r', r.row_pos)
if i != -1 {
r.is_mac_pre_osx_le = true
} else {
// no valid line endings found
return IError(&ErrInvalidLineEnding{})
}
} else {
// No line ending on file
i = r.data.len - 1
}
}
mut line := r.data[r.row_pos..i]
r.row_pos = i + 1
// normalize win line endings (remove extra \r)
if !r.is_mac_pre_osx_le && (line.len >= 1 && line[line.len - 1] == `\r`) {
line = line[..line.len - 1]
}
return line
}
fn (mut r Reader) read_record() ?[]string {
if r.delimiter == r.comment {
return IError(&ErrCommentIsDelimiter{})
}
if !valid_delim(r.delimiter) {
return IError(&ErrInvalidDelimiter{})
}
mut need_read := true
mut keep_raw := false
mut line := ''
mut fields := []string{}
mut i := -1
for {
if need_read {
l := r.read_line() ?
if l.len <= 0 {
if keep_raw {
line += '\n'
}
continue
} else if l[0] == r.comment {
if keep_raw {
line += '\n' + l
}
continue
} else {
if keep_raw {
line += '\n'
}
line += l
}
need_read = false
keep_raw = false
}
if line.len == 0 || line[0] != `"` { // not quoted
j := line.index(r.delimiter.ascii_str()) or {
// last
fields << line[..line.len]
break
}
i = j
fields << line[..i]
line = line[i + 1..]
continue
} else { // quoted
mut need_more := true
mut has_double_quotes := false
mut j := 0
mut n := 1
for n < line.len {
if line[n] == `"` {
if n == line.len - 1 || line[n + 1] != `"` {
need_more = false
j = n - 1
break
} else {
has_double_quotes = true
n++
}
}
n++
}
if need_more {
need_read = true
keep_raw = true
continue
}
line = line[1..]
if j + 1 == line.len {
// last record
fields << if has_double_quotes { line[..j].replace('""', '"') } else { line[..j] }
break
}
next := line[j + 1]
if next == r.delimiter {
fields << if has_double_quotes { line[..j].replace('""', '"') } else { line[..j] }
if j + 2 == line.len {
line = ''
} else {
line = line[j + 2..]
}
continue
}
}
if i <= -1 && fields.len == 0 {
return IError(&ErrInvalidDelimiter{})
}
}
return fields
}
fn valid_delim(b byte) bool {
return b != 0 && b != `"` && b != `\r` && b != `\n`
}
|