1 /* 2 * Copyright (C) 2018 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.powermodel; 18 19 import java.io.InputStream; 20 import java.io.IOException; 21 import java.nio.charset.Charset; 22 import java.nio.charset.StandardCharsets; 23 import java.util.ArrayList; 24 25 /** 26 * Parses CSV. 27 * <p> 28 * Call parse() with an InputStream. 29 * <p> 30 * CsvLineProcessor.onLine() will be called for each line in the source document. 31 * <p> 32 * To simplify parsing and to protect against using too much memory for bad 33 * data, the maximum field length is {@link #MAX_FIELD_SIZE}. 34 */ 35 class CsvParser { 36 /** 37 * The maximum size of a single field in bytes. 38 */ 39 public static final int MAX_FIELD_SIZE = (8*1024)-1; 40 41 /** 42 * Callback interface for each line of CSV as it is parsed. 43 */ 44 interface LineProcessor { 45 /** 46 * A line of CSV was parsed. 47 * 48 * @param lineNumber the line number in the file, starting at 1 49 * @param fields the comma separated fields for the line 50 */ onLine(int lineNumber, ArrayList<String> fields)51 void onLine(int lineNumber, ArrayList<String> fields) throws ParseException; 52 } 53 54 /** 55 * Parse the CSV text in input, calling onto processor for each row. 56 */ parse(InputStream input, LineProcessor processor)57 public static void parse(InputStream input, LineProcessor processor) 58 throws IOException, ParseException { 59 final Charset utf8 = StandardCharsets.UTF_8; 60 final byte[] buf = new byte[MAX_FIELD_SIZE+1]; 61 int lineNumber = 1; 62 int readPos = 0; 63 int prev = 0; 64 ArrayList<String> fields = new ArrayList<String>(); 65 boolean finalBuffer = false; 66 boolean escaping = false; 67 boolean sawQuote = false; 68 69 while (!finalBuffer) { 70 int amt = input.read(buf, readPos, buf.length-readPos); 71 if (amt < 0) { 72 // No more data. Process whatever's left from before. 73 amt = readPos; 74 finalBuffer = true; 75 } else { 76 // Process whatever's left from before, plus the new data. 77 amt += readPos; 78 finalBuffer = false; 79 } 80 81 // Process as much of this buffer as we can. 82 int fieldStart = 0; 83 int index = readPos; 84 int escapeIndex = escaping ? readPos : -1; 85 while (index < amt) { 86 byte c = buf[index]; 87 if (c == '\r' || c == '\n') { 88 if (escaping) { 89 // TODO: Quotes do not escape newlines in our CSV dialect, 90 // but we actually see some data where it should. 91 fields.add(new String(buf, fieldStart, escapeIndex-fieldStart)); 92 escapeIndex = -1; 93 escaping = false; 94 sawQuote = false; 95 } else { 96 fields.add(new String(buf, fieldStart, index-fieldStart)); 97 } 98 // Don't report blank lines 99 if (fields.size() > 1 || (fields.size() == 1 && fields.get(0).length() > 0)) { 100 processor.onLine(lineNumber, fields); 101 } 102 fields = new ArrayList<String>(); 103 if (!(c == '\n' && prev == '\r')) { 104 // Don't double increment for dos line endings. 105 lineNumber++; 106 } 107 fieldStart = index = index + 1; 108 } else { 109 if (escaping) { 110 // Field started with a " so quotes are escaped with " and commas 111 // don't matter except when following a single quote. 112 if (c == '"') { 113 if (sawQuote) { 114 buf[escapeIndex] = buf[index]; 115 escapeIndex++; 116 sawQuote = false; 117 } else { 118 sawQuote = true; 119 } 120 index++; 121 } else if (sawQuote && c == ',') { 122 fields.add(new String(buf, fieldStart, escapeIndex-fieldStart)); 123 fieldStart = index = index + 1; 124 escapeIndex = -1; 125 escaping = false; 126 sawQuote = false; 127 } else { 128 buf[escapeIndex] = buf[index]; 129 escapeIndex++; 130 index++; 131 sawQuote = false; 132 } 133 } else { 134 if (c == ',') { 135 fields.add(new String(buf, fieldStart, index-fieldStart)); 136 fieldStart = index + 1; 137 } else if (c == '"' && fieldStart == index) { 138 // First character is a " 139 escaping = true; 140 fieldStart = escapeIndex = index + 1; 141 } 142 index++; 143 } 144 } 145 prev = c; 146 } 147 148 // A single field is greater than buf.length, so fail. 149 if (fieldStart == 0 && index == buf.length) { 150 throw new ParseException(lineNumber, "Line is too long: " 151 + new String(buf, 0, 20, utf8) + "..."); 152 } 153 154 // Move whatever we didn't process to the beginning of the buffer 155 // and try again. 156 if (fieldStart != amt) { 157 readPos = (escaping ? escapeIndex : index) - fieldStart; 158 System.arraycopy(buf, fieldStart, buf, 0, readPos); 159 } else { 160 readPos = 0; 161 } 162 163 // Process whatever's left over 164 if (finalBuffer) { 165 fields.add(new String(buf, 0, readPos)); 166 // If there is any content, return the last line. 167 if (fields.size() > 1 || (fields.size() == 1 && fields.get(0).length() > 0)) { 168 processor.onLine(lineNumber, fields); 169 } 170 } 171 } 172 } 173 } 174