• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2024 The Pigweed Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
4 // use this file except in compliance with the License. You may obtain a copy of
5 // the License at
6 //
7 //     https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 // License for the specific language governing permissions and limitations under
13 // the License.
14 
15 #include "pw_tokenizer_private/csv.h"
16 
17 #include "pw_log/log.h"
18 
19 namespace pw::tokenizer::internal {
20 namespace {
21 
22 constexpr char kSeparator = ',';
IsLineEnd(char ch)23 [[nodiscard]] constexpr bool IsLineEnd(char ch) {
24   return ch == '\r' || ch == '\n';
25 }
26 
27 }  // namespace
28 
ParseCharacterOrEof(int ch)29 std::optional<std::vector<std::string>> CsvParser::ParseCharacterOrEof(int ch) {
30   switch (state_) {
31     case kNewEntry:
32       if (ch == '"') {
33         state_ = kQuotedEntry;
34       } else if (IsLineEnd(ch)) {
35         if (line_.size() > 1) {  // Ignore empty lines
36           return FinishLine();
37         }
38       } else if (ch == kSeparator) {
39         line_.emplace_back();  // Append the empty entry, start the next
40       } else {
41         state_ = kUnquotedEntry;
42         line_.back().push_back(ch);
43       }
44       break;
45     case kUnquotedEntry:
46       if (ch == kEndOfFile || IsLineEnd(ch)) {
47         return FinishLine();
48       }
49       if (ch == kSeparator) {
50         state_ = kNewEntry;
51         line_.emplace_back();
52       } else {
53         line_.back().push_back(ch);
54       }
55       break;
56     case kQuotedEntry:
57       if (ch == kEndOfFile) {
58         PW_LOG_WARN("Unexpected end-of-file in quoted entry; ignoring line");
59       } else if (ch == '"') {
60         state_ = kQuotedEntryQuote;
61       } else {
62         line_.back().push_back(ch);
63       }
64       break;
65     case kQuotedEntryQuote:
66       if (ch == '"') {
67         state_ = kQuotedEntry;
68         line_.back().push_back('"');
69       } else if (ch == kEndOfFile || IsLineEnd(ch)) {
70         return FinishLine();
71       } else if (ch == kSeparator) {
72         state_ = kNewEntry;
73         line_.emplace_back();
74       } else {
75         PW_LOG_WARN(
76             "Unexpected character '%c' after quoted entry; expected ',' or a "
77             "line ending; skipping line",
78             ch);
79         state_ = kError;
80         line_.clear();
81         line_.emplace_back();
82       }
83       break;
84     case kError:
85       if (IsLineEnd(ch)) {  // Skip chars until end-of-line
86         state_ = kNewEntry;
87       }
88       break;
89   }
90   return std::nullopt;
91 }
92 
FinishLine()93 std::optional<std::vector<std::string>> CsvParser::FinishLine() {
94   state_ = kNewEntry;
95   std::optional<std::vector<std::string>> completed_line = std::move(line_);
96   line_.clear();
97   line_.emplace_back();
98   return completed_line;
99 }
100 
101 }  // namespace pw::tokenizer::internal
102