1 // Copyright 2024 The Pigweed Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); you may not 4 // use this file except in compliance with the License. You may obtain a copy of 5 // the License at 6 // 7 // https://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 11 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 12 // License for the specific language governing permissions and limitations under 13 // the License. 14 15 #include "pw_tokenizer_private/csv.h" 16 17 #include "pw_log/log.h" 18 19 namespace pw::tokenizer::internal { 20 namespace { 21 22 constexpr char kSeparator = ','; IsLineEnd(char ch)23[[nodiscard]] constexpr bool IsLineEnd(char ch) { 24 return ch == '\r' || ch == '\n'; 25 } 26 27 } // namespace 28 ParseCharacterOrEof(int ch)29std::optional<std::vector<std::string>> CsvParser::ParseCharacterOrEof(int ch) { 30 switch (state_) { 31 case kNewEntry: 32 if (ch == '"') { 33 state_ = kQuotedEntry; 34 } else if (IsLineEnd(ch)) { 35 if (line_.size() > 1) { // Ignore empty lines 36 return FinishLine(); 37 } 38 } else if (ch == kSeparator) { 39 line_.emplace_back(); // Append the empty entry, start the next 40 } else { 41 state_ = kUnquotedEntry; 42 line_.back().push_back(ch); 43 } 44 break; 45 case kUnquotedEntry: 46 if (ch == kEndOfFile || IsLineEnd(ch)) { 47 return FinishLine(); 48 } 49 if (ch == kSeparator) { 50 state_ = kNewEntry; 51 line_.emplace_back(); 52 } else { 53 line_.back().push_back(ch); 54 } 55 break; 56 case kQuotedEntry: 57 if (ch == kEndOfFile) { 58 PW_LOG_WARN("Unexpected end-of-file in quoted entry; ignoring line"); 59 } else if (ch == '"') { 60 state_ = kQuotedEntryQuote; 61 } else { 62 line_.back().push_back(ch); 63 } 64 break; 65 case kQuotedEntryQuote: 66 if (ch == '"') { 67 state_ = kQuotedEntry; 68 line_.back().push_back('"'); 69 } else if (ch == kEndOfFile || IsLineEnd(ch)) { 70 return FinishLine(); 71 } else if (ch == kSeparator) { 72 state_ = kNewEntry; 73 line_.emplace_back(); 74 } else { 75 PW_LOG_WARN( 76 "Unexpected character '%c' after quoted entry; expected ',' or a " 77 "line ending; skipping line", 78 ch); 79 state_ = kError; 80 line_.clear(); 81 line_.emplace_back(); 82 } 83 break; 84 case kError: 85 if (IsLineEnd(ch)) { // Skip chars until end-of-line 86 state_ = kNewEntry; 87 } 88 break; 89 } 90 return std::nullopt; 91 } 92 FinishLine()93std::optional<std::vector<std::string>> CsvParser::FinishLine() { 94 state_ = kNewEntry; 95 std::optional<std::vector<std::string>> completed_line = std::move(line_); 96 line_.clear(); 97 line_.emplace_back(); 98 return completed_line; 99 } 100 101 } // namespace pw::tokenizer::internal 102