Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
seleniumhq
GitHub Repository: seleniumhq/selenium
Path: blob/trunk/third_party/closure/goog/labs/format/csv.js
2868 views
1
// Copyright 2012 The Closure Library Authors. All Rights Reserved.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
// http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS-IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
15
/**
16
* @fileoverview Provides a parser that turns a string of well-formed CSV data
17
* into an array of objects or an array of arrays. All values are returned as
18
* strings; the user has to convert data into numbers or Dates as required.
19
* Empty fields (adjacent commas) are returned as empty strings.
20
*
21
* This parser uses http://tools.ietf.org/html/rfc4180 as the definition of CSV.
22
*
23
* @author [email protected] (Nathan Naze) Ported to Closure
24
*/
25
goog.provide('goog.labs.format.csv');
26
goog.provide('goog.labs.format.csv.ParseError');
27
goog.provide('goog.labs.format.csv.Token');
28
29
goog.require('goog.array');
30
goog.require('goog.asserts');
31
goog.require('goog.debug.Error');
32
goog.require('goog.object');
33
goog.require('goog.string');
34
goog.require('goog.string.newlines');
35
36
37
/**
38
* @define {boolean} Enable verbose debugging. This is a flag so it can be
39
* enabled in production if necessary post-compilation. Otherwise, debug
40
* information will be stripped to minimize final code size.
41
*/
42
goog.labs.format.csv.ENABLE_VERBOSE_DEBUGGING = goog.DEBUG;
43
44
45
46
/**
47
* Error thrown when parsing fails.
48
*
49
* @param {string} text The CSV source text being parsed.
50
* @param {number} index The index, in the string, of the position of the
51
* error.
52
* @param {string=} opt_message A description of the violated parse expectation.
53
* @constructor
54
* @extends {goog.debug.Error}
55
* @final
56
*/
57
goog.labs.format.csv.ParseError = function(text, index, opt_message) {
58
59
var message;
60
61
/**
62
* @type {?{line: number, column: number}} The line and column of the parse
63
* error.
64
*/
65
this.position = null;
66
67
if (goog.labs.format.csv.ENABLE_VERBOSE_DEBUGGING) {
68
message = opt_message || '';
69
70
var info = goog.labs.format.csv.ParseError.findLineInfo_(text, index);
71
if (info) {
72
var lineNumber = info.lineIndex + 1;
73
var columnNumber = index - info.line.startLineIndex + 1;
74
75
this.position = {line: lineNumber, column: columnNumber};
76
77
message +=
78
goog.string.subs(' at line %s column %s', lineNumber, columnNumber);
79
message += '\n' +
80
goog.labs.format.csv.ParseError.getLineDebugString_(
81
info.line.getContent(), columnNumber);
82
}
83
}
84
85
goog.labs.format.csv.ParseError.base(this, 'constructor', message);
86
};
87
goog.inherits(goog.labs.format.csv.ParseError, goog.debug.Error);
88
89
90
/** @inheritDoc */
91
goog.labs.format.csv.ParseError.prototype.name = 'ParseError';
92
93
94
/**
95
* Calculate the line and column for an index in a string.
96
* TODO(nnaze): Consider moving to goog.string.newlines.
97
* @param {string} str A string.
98
* @param {number} index An index into the string.
99
* @return {?{line: !goog.string.newlines.Line, lineIndex: number}} The line
100
* and index of the line.
101
* @private
102
*/
103
goog.labs.format.csv.ParseError.findLineInfo_ = function(str, index) {
104
var lines = goog.string.newlines.getLines(str);
105
var lineIndex = goog.array.findIndex(lines, function(line) {
106
return line.startLineIndex <= index && line.endLineIndex > index;
107
});
108
109
if (goog.isNumber(lineIndex)) {
110
var line = lines[lineIndex];
111
return {line: line, lineIndex: lineIndex};
112
}
113
114
return null;
115
};
116
117
118
/**
119
* Get a debug string of a line and a pointing caret beneath it.
120
* @param {string} str The string.
121
* @param {number} column The column to point at (1-indexed).
122
* @return {string} The debug line.
123
* @private
124
*/
125
goog.labs.format.csv.ParseError.getLineDebugString_ = function(str, column) {
126
var returnString = str + '\n';
127
returnString += goog.string.repeat(' ', column - 1) + '^';
128
return returnString;
129
};
130
131
132
/**
133
* A token -- a single-character string or a sentinel.
134
* @typedef {string|!goog.labs.format.csv.Sentinels_}
135
*/
136
goog.labs.format.csv.Token;
137
138
139
/**
140
* Parses a CSV string to create a two-dimensional array.
141
*
142
* This function does not process header lines, etc -- such transformations can
143
* be made on the resulting array.
144
*
145
* @param {string} text The entire CSV text to be parsed.
146
* @param {boolean=} opt_ignoreErrors Whether to ignore parsing errors and
147
* instead try to recover and keep going.
148
* @param {string=} opt_delimiter The delimiter to use. Defaults to ','
149
* @return {!Array<!Array<string>>} The parsed CSV.
150
*/
151
goog.labs.format.csv.parse = function(text, opt_ignoreErrors, opt_delimiter) {
152
153
var index = 0; // current char offset being considered
154
155
var delimiter = opt_delimiter || ',';
156
goog.asserts.assert(
157
delimiter.length == 1, 'Delimiter must be a single character.');
158
goog.asserts.assert(
159
delimiter != '\r' && opt_delimiter != '\n',
160
'Cannot use newline or carriage return has delimiter.');
161
162
var EOF = goog.labs.format.csv.Sentinels_.EOF;
163
var EOR = goog.labs.format.csv.Sentinels_.EOR;
164
var NEWLINE = goog.labs.format.csv.Sentinels_.NEWLINE; // \r?\n
165
var EMPTY = goog.labs.format.csv.Sentinels_.EMPTY;
166
167
var pushBackToken = null; // A single-token pushback.
168
var sawComma = false; // Special case for terminal comma.
169
170
/**
171
* Push a single token into the push-back variable.
172
* @param {goog.labs.format.csv.Token} t Single token.
173
*/
174
function pushBack(t) {
175
goog.labs.format.csv.assertToken_(t);
176
goog.asserts.assert(goog.isNull(pushBackToken));
177
pushBackToken = t;
178
}
179
180
/**
181
* @return {goog.labs.format.csv.Token} The next token in the stream.
182
*/
183
function nextToken() {
184
// Give the push back token if present.
185
if (pushBackToken != null) {
186
var c = pushBackToken;
187
pushBackToken = null;
188
return c;
189
}
190
191
// We're done. EOF.
192
if (index >= text.length) {
193
return EOF;
194
}
195
196
// Give the next charater.
197
var chr = text.charAt(index++);
198
goog.labs.format.csv.assertToken_(chr);
199
200
// Check if this is a newline. If so, give the new line sentinel.
201
var isNewline = false;
202
if (chr == '\n') {
203
isNewline = true;
204
} else if (chr == '\r') {
205
// This is a '\r\n' newline. Treat as single token, go
206
// forward two indicies.
207
if (index < text.length && text.charAt(index) == '\n') {
208
index++;
209
}
210
211
isNewline = true;
212
}
213
214
if (isNewline) {
215
return NEWLINE;
216
}
217
218
return chr;
219
}
220
221
/**
222
* Read a quoted field from input.
223
* @return {string} The field, as a string.
224
*/
225
function readQuotedField() {
226
// We've already consumed the first quote by the time we get here.
227
var start = index;
228
var end = null;
229
230
for (var token = nextToken(); token != EOF; token = nextToken()) {
231
if (token == '"') {
232
end = index - 1;
233
token = nextToken();
234
235
// Two double quotes in a row. Keep scanning.
236
if (token == '"') {
237
end = null;
238
continue;
239
}
240
241
// End of field. Break out.
242
if (token == delimiter || token == EOF || token == NEWLINE) {
243
if (token == NEWLINE) {
244
pushBack(token);
245
}
246
break;
247
}
248
249
if (!opt_ignoreErrors) {
250
// Ignoring errors here means keep going in current field after
251
// closing quote. E.g. "ab"c,d splits into abc,d
252
throw new goog.labs.format.csv.ParseError(
253
text, index - 1,
254
'Unexpected character "' + token + '" after quote mark');
255
} else {
256
// Fall back to reading the rest of this field as unquoted.
257
// Note: the rest is guaranteed not start with ", as that case is
258
// eliminated above.
259
var prefix = '"' + text.substring(start, index);
260
var suffix = readField();
261
if (suffix == EOR) {
262
pushBack(NEWLINE);
263
return prefix;
264
} else {
265
return prefix + suffix;
266
}
267
}
268
}
269
}
270
271
if (goog.isNull(end)) {
272
if (!opt_ignoreErrors) {
273
throw new goog.labs.format.csv.ParseError(
274
text, text.length - 1, 'Unexpected end of text after open quote');
275
} else {
276
end = text.length;
277
}
278
}
279
280
// Take substring, combine double quotes.
281
return text.substring(start, end).replace(/""/g, '"');
282
}
283
284
/**
285
* Read a field from input.
286
* @return {string|!goog.labs.format.csv.Sentinels_} The field, as a string,
287
* or a sentinel (if applicable).
288
*/
289
function readField() {
290
var start = index;
291
var didSeeComma = sawComma;
292
sawComma = false;
293
var token = nextToken();
294
if (token == EMPTY) {
295
return EOR;
296
}
297
if (token == EOF || token == NEWLINE) {
298
if (didSeeComma) {
299
pushBack(EMPTY);
300
return '';
301
}
302
return EOR;
303
}
304
305
// This is the beginning of a quoted field.
306
if (token == '"') {
307
return readQuotedField();
308
}
309
310
while (true) {
311
// This is the end of line or file.
312
if (token == EOF || token == NEWLINE) {
313
pushBack(token);
314
break;
315
}
316
317
// This is the end of record.
318
if (token == delimiter) {
319
sawComma = true;
320
break;
321
}
322
323
if (token == '"' && !opt_ignoreErrors) {
324
throw new goog.labs.format.csv.ParseError(
325
text, index - 1, 'Unexpected quote mark');
326
}
327
328
token = nextToken();
329
}
330
331
332
var returnString = (token == EOF) ?
333
text.substring(start) : // Return to end of file.
334
text.substring(start, index - 1);
335
336
return returnString.replace(/[\r\n]+/g, ''); // Squash any CRLFs.
337
}
338
339
/**
340
* Read the next record.
341
* @return {!Array<string>|!goog.labs.format.csv.Sentinels_} A single record
342
* with multiple fields.
343
*/
344
function readRecord() {
345
if (index >= text.length) {
346
return EOF;
347
}
348
var record = [];
349
for (var field = readField(); field != EOR; field = readField()) {
350
record.push(field);
351
}
352
return record;
353
}
354
355
// Read all records and return.
356
var records = [];
357
for (var record = readRecord(); record != EOF; record = readRecord()) {
358
records.push(record);
359
}
360
return records;
361
};
362
363
364
/**
365
* Sentinel tracking objects.
366
* @enum {!Object}
367
* @private
368
*/
369
goog.labs.format.csv.Sentinels_ = {
370
/** Empty field */
371
EMPTY: {},
372
373
/** End of file */
374
EOF: {},
375
376
/** End of record */
377
EOR: {},
378
379
/** Newline. \r?\n */
380
NEWLINE: {}
381
};
382
383
384
/**
385
* @param {string} str A string.
386
* @return {boolean} Whether the string is a single character.
387
* @private
388
*/
389
goog.labs.format.csv.isCharacterString_ = function(str) {
390
return goog.isString(str) && str.length == 1;
391
};
392
393
394
/**
395
* Assert the parameter is a token.
396
* @param {*} o What should be a token.
397
* @throws {goog.asserts.AssertionError} If {@ code} is not a token.
398
* @private
399
*/
400
goog.labs.format.csv.assertToken_ = function(o) {
401
if (goog.isString(o)) {
402
goog.asserts.assertString(o);
403
goog.asserts.assert(
404
goog.labs.format.csv.isCharacterString_(o),
405
'Should be a string of length 1 or a sentinel.');
406
} else {
407
goog.asserts.assert(
408
goog.object.containsValue(goog.labs.format.csv.Sentinels_, o),
409
'Should be a string of length 1 or a sentinel.');
410
}
411
};
412
413