1// Copyright 2009 the V8 project authors. All rights reserved. 2// Redistribution and use in source and binary forms, with or without 3// modification, are permitted provided that the following conditions are 4// met: 5// 6// * Redistributions of source code must retain the above copyright 7// notice, this list of conditions and the following disclaimer. 8// * Redistributions in binary form must reproduce the above 9// copyright notice, this list of conditions and the following 10// disclaimer in the documentation and/or other materials provided 11// with the distribution. 12// * Neither the name of Google Inc. nor the names of its 13// contributors may be used to endorse or promote products derived 14// from this software without specific prior written permission. 15// 16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 28 29/** 30 * Creates a CSV lines parser. 31 */ 32export class CsvParser { 33 /** 34 * Converts \x00 and \u0000 escape sequences in the given string. 35 * 36 * @param {string} input field. 37 **/ 38 escapeField(string) { 39 let nextPos = string.indexOf("\\"); 40 if (nextPos === -1) return string; 41 let result = [string.substring(0, nextPos)]; 42 // Escape sequences of the form \x00 and \u0000; 43 let pos = 0; 44 while (nextPos !== -1) { 45 const escapeIdentifier = string[nextPos + 1]; 46 pos = nextPos + 2; 47 if (escapeIdentifier === 'n') { 48 result.push('\n'); 49 nextPos = pos; 50 } else if (escapeIdentifier === '\\') { 51 result.push('\\'); 52 nextPos = pos; 53 } else { 54 if (escapeIdentifier === 'x') { 55 // \x00 ascii range escapes consume 2 chars. 56 nextPos = pos + 2; 57 } else { 58 // \u0000 unicode range escapes consume 4 chars. 59 nextPos = pos + 4; 60 } 61 // Convert the selected escape sequence to a single character. 62 const escapeChars = string.substring(pos, nextPos); 63 if (escapeChars === '2C') { 64 result.push(','); 65 } else { 66 result.push(String.fromCharCode(parseInt(escapeChars, 16))); 67 } 68 } 69 70 // Continue looking for the next escape sequence. 71 pos = nextPos; 72 nextPos = string.indexOf("\\", pos); 73 // If there are no more escape sequences consume the rest of the string. 74 if (nextPos === -1) { 75 result.push(string.substr(pos)); 76 break; 77 } else if (pos !== nextPos) { 78 result.push(string.substring(pos, nextPos)); 79 } 80 } 81 return result.join(''); 82 } 83 84 /** 85 * Parses a line of CSV-encoded values. Returns an array of fields. 86 * 87 * @param {string} line Input line. 88 */ 89 parseLine(line) { 90 let pos = 0; 91 const endPos = line.length; 92 const fields = []; 93 if (endPos == 0) return fields; 94 let nextPos = 0; 95 while(nextPos !== -1) { 96 nextPos = line.indexOf(',', pos); 97 let field; 98 if (nextPos === -1) { 99 field = line.substr(pos); 100 } else { 101 field = line.substring(pos, nextPos); 102 } 103 fields.push(this.escapeField(field)); 104 pos = nextPos + 1; 105 }; 106 return fields 107 } 108} 109