1 2 /* 3 * Copyright (C) 2020 The Android Open Source Project 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 19 package com.android.build.config; 20 21 import java.io.IOException; 22 import java.io.Reader; 23 import java.util.ArrayList; 24 import java.util.HashMap; 25 import java.util.Iterator; 26 import java.util.List; 27 import java.util.Map; 28 29 /** 30 * A CSV parser. 31 */ 32 public class CsvParser { 33 /** 34 * Internal string buffer grows by this amount. 35 */ 36 private static final int CHUNK_SIZE = 64 * 1024; 37 38 /** 39 * Error parsing. 40 */ 41 public static class ParseException extends Exception { 42 private int mLine; 43 private int mColumn; 44 ParseException(int line, int column, String message)45 public ParseException(int line, int column, String message) { 46 super(message); 47 mLine = line; 48 mColumn = column; 49 } 50 51 /** 52 * Line number in source file. 53 */ getLine()54 public int getLine() { 55 return mLine; 56 } 57 58 /** 59 * Column in source file. 60 */ getColumn()61 public int getColumn() { 62 return mColumn; 63 } 64 } 65 66 public static class Line { 67 private final int mLineNumber; 68 private final List<String> mFields; 69 Line(int lineno, List<String> fields)70 Line(int lineno, List<String> fields) { 71 mLineNumber = lineno; 72 mFields = fields; 73 } 74 getLine()75 public int getLine() { 76 return mLineNumber; 77 } 78 getFields()79 public List<String> getFields() { 80 return mFields; 81 } 82 } 83 84 // Parser States 85 private static final int STATE_START_LINE = 0; 86 private static final int STATE_START_FIELD = 1; 87 private static final int STATE_INSIDE_QUOTED_FIELD = 2; 88 private static final int STATE_FIRST_QUOTATION_MARK = 3; 89 private static final int STATE_INSIDE_UNQUOTED_FIELD = 4; 90 private static final int STATE_DONE = 5; 91 92 // Parser Actions 93 private static final int ACTION_APPEND_CHAR = 1; 94 private static final int ACTION_FIELD_COMPLETE = 2; 95 private static final int ACTION_LINE_COMPLETE = 4; 96 97 /** 98 * Constructor. 99 */ CsvParser()100 private CsvParser() { 101 } 102 103 /** 104 * Reads CSV and returns a list of Line objects. 105 * 106 * Handles newlines inside fields quoted with double quotes ("). 107 * 108 * Doesn't report blank lines, but does include empty fields. 109 */ parse(Reader reader)110 public static List<Line> parse(Reader reader) 111 throws ParseException, IOException { 112 ArrayList<Line> result = new ArrayList(); 113 int line = 1; 114 int column = 1; 115 int pos = 0; 116 char[] buf = new char[CHUNK_SIZE]; 117 HashMap<String,String> stringPool = new HashMap(); 118 ArrayList<String> fields = new ArrayList(); 119 120 int state = STATE_START_LINE; 121 while (state != STATE_DONE) { 122 int c = reader.read(); 123 int action = 0; 124 125 if (state == STATE_START_LINE) { 126 if (c <= 0) { 127 // No data, skip ACTION_LINE_COMPLETE. 128 state = STATE_DONE; 129 } else if (c == '"') { 130 state = STATE_INSIDE_QUOTED_FIELD; 131 } else if (c == ',') { 132 action = ACTION_FIELD_COMPLETE; 133 state = STATE_START_FIELD; 134 } else if (c == '\n') { 135 // Consume the newline, state stays STATE_START_LINE. 136 } else { 137 action = ACTION_APPEND_CHAR; 138 state = STATE_INSIDE_UNQUOTED_FIELD; 139 } 140 } else if (state == STATE_START_FIELD) { 141 if (c <= 0) { 142 // Field will be empty 143 action = ACTION_FIELD_COMPLETE | ACTION_LINE_COMPLETE; 144 state = STATE_DONE; 145 } else if (c == '"') { 146 state = STATE_INSIDE_QUOTED_FIELD; 147 } else if (c == ',') { 148 action = ACTION_FIELD_COMPLETE; 149 state = STATE_START_FIELD; 150 } else if (c == '\n') { 151 action = ACTION_FIELD_COMPLETE | ACTION_LINE_COMPLETE; 152 state = STATE_START_LINE; 153 } else { 154 action = ACTION_APPEND_CHAR; 155 state = STATE_INSIDE_UNQUOTED_FIELD; 156 } 157 } else if (state == STATE_INSIDE_QUOTED_FIELD) { 158 if (c <= 0) { 159 throw new ParseException(line, column, 160 "Bad input: End of input inside quoted field."); 161 } else if (c == '"') { 162 state = STATE_FIRST_QUOTATION_MARK; 163 } else { 164 action = ACTION_APPEND_CHAR; 165 } 166 } else if (state == STATE_FIRST_QUOTATION_MARK) { 167 if (c <= 0) { 168 action = ACTION_FIELD_COMPLETE | ACTION_LINE_COMPLETE; 169 state = STATE_DONE; 170 } else if (c == '"') { 171 action = ACTION_APPEND_CHAR; 172 state = STATE_INSIDE_QUOTED_FIELD; 173 } else if (c == ',') { 174 action = ACTION_FIELD_COMPLETE; 175 state = STATE_START_FIELD; 176 } else if (c == '\n') { 177 action = ACTION_FIELD_COMPLETE | ACTION_LINE_COMPLETE; 178 state = STATE_START_LINE; 179 } else { 180 throw new ParseException(line, column, 181 "Bad input: Character after field ended or unquoted '\"'."); 182 } 183 } else if (state == STATE_INSIDE_UNQUOTED_FIELD) { 184 if (c <= 0) { 185 action = ACTION_FIELD_COMPLETE | ACTION_LINE_COMPLETE; 186 state = STATE_DONE; 187 } else if (c == ',') { 188 action = ACTION_FIELD_COMPLETE; 189 state = STATE_START_FIELD; 190 } else if (c == '\n') { 191 action = ACTION_FIELD_COMPLETE | ACTION_LINE_COMPLETE; 192 state = STATE_START_LINE; 193 } else { 194 action = ACTION_APPEND_CHAR; 195 } 196 } 197 198 if ((action & ACTION_APPEND_CHAR) != 0) { 199 // Reallocate buffer if necessary. Hopefully not often because CHUNK_SIZE is big. 200 if (pos >= buf.length) { 201 char[] old = buf; 202 buf = new char[old.length + CHUNK_SIZE]; 203 System.arraycopy(old, 0, buf, 0, old.length); 204 } 205 // Store the character 206 buf[pos] = (char)c; 207 pos++; 208 } 209 if ((action & ACTION_FIELD_COMPLETE) != 0) { 210 // A lot of the strings are duplicated, so pool them to reduce peak memory 211 // usage. This could be made slightly better by having a custom key class 212 // that does the lookup without making a new String that gets immediately 213 // thrown away. 214 String field = new String(buf, 0, pos); 215 final String cached = stringPool.get(field); 216 if (cached == null) { 217 stringPool.put(field, field); 218 } else { 219 field = cached; 220 } 221 fields.add(field); 222 pos = 0; 223 } 224 if ((action & ACTION_LINE_COMPLETE) != 0) { 225 // Only report lines with any contents 226 if (fields.size() > 0) { 227 result.add(new Line(line, fields)); 228 fields = new ArrayList(); 229 } 230 } 231 232 if (c == '\n') { 233 line++; 234 column = 1; 235 } else { 236 column++; 237 } 238 } 239 240 return result; 241 } 242 } 243