1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <stddef.h>
6
7 #include "gn/input_file.h"
8 #include "gn/token.h"
9 #include "gn/tokenizer.h"
10 #include "util/test/test.h"
11
12 namespace {
13
14 struct TokenExpectation {
15 Token::Type type;
16 const char* value;
17 };
18
19 template <size_t len>
CheckTokenizer(const char * input,const TokenExpectation (& expect)[len])20 bool CheckTokenizer(const char* input, const TokenExpectation (&expect)[len]) {
21 InputFile input_file(SourceFile("/test"));
22 input_file.SetContents(input);
23
24 Err err;
25 std::vector<Token> results = Tokenizer::Tokenize(&input_file, &err);
26
27 if (results.size() != len)
28 return false;
29 for (size_t i = 0; i < len; i++) {
30 if (expect[i].type != results[i].type())
31 return false;
32 if (expect[i].value != results[i].value())
33 return false;
34 }
35 return true;
36 }
37
38 } // namespace
39
TEST(Tokenizer,Empty)40 TEST(Tokenizer, Empty) {
41 InputFile empty_string_input(SourceFile("/test"));
42 empty_string_input.SetContents("");
43
44 Err err;
45 std::vector<Token> results = Tokenizer::Tokenize(&empty_string_input, &err);
46 EXPECT_TRUE(results.empty());
47
48 InputFile whitespace_input(SourceFile("/test"));
49 whitespace_input.SetContents(" \r \n \r\n");
50
51 results = Tokenizer::Tokenize(&whitespace_input, &err);
52 EXPECT_TRUE(results.empty());
53 }
54
TEST(Tokenizer,Identifier)55 TEST(Tokenizer, Identifier) {
56 TokenExpectation one_ident[] = {{Token::IDENTIFIER, "foo"}};
57 EXPECT_TRUE(CheckTokenizer(" foo ", one_ident));
58 }
59
TEST(Tokenizer,Integer)60 TEST(Tokenizer, Integer) {
61 TokenExpectation integers[] = {{Token::INTEGER, "123"},
62 {Token::INTEGER, "-123"}};
63 EXPECT_TRUE(CheckTokenizer(" 123 -123 ", integers));
64 }
65
TEST(Tokenizer,IntegerNoSpace)66 TEST(Tokenizer, IntegerNoSpace) {
67 TokenExpectation integers[] = {{Token::INTEGER, "123"},
68 {Token::INTEGER, "-123"}};
69 EXPECT_TRUE(CheckTokenizer(" 123-123 ", integers));
70 }
71
TEST(Tokenizer,String)72 TEST(Tokenizer, String) {
73 TokenExpectation strings[] = {{Token::STRING, "\"foo\""},
74 {Token::STRING, "\"bar\\\"baz\""},
75 {Token::STRING, "\"asdf\\\\\""}};
76 EXPECT_TRUE(
77 CheckTokenizer(" \"foo\" \"bar\\\"baz\" \"asdf\\\\\" ", strings));
78 }
79
TEST(Tokenizer,Operator)80 TEST(Tokenizer, Operator) {
81 TokenExpectation operators[] = {
82 {Token::MINUS, "-"},
83 {Token::PLUS, "+"},
84 {Token::EQUAL, "="},
85 {Token::PLUS_EQUALS, "+="},
86 {Token::MINUS_EQUALS, "-="},
87 {Token::NOT_EQUAL, "!="},
88 {Token::EQUAL_EQUAL, "=="},
89 {Token::LESS_THAN, "<"},
90 {Token::GREATER_THAN, ">"},
91 {Token::LESS_EQUAL, "<="},
92 {Token::GREATER_EQUAL, ">="},
93 {Token::BANG, "!"},
94 {Token::BOOLEAN_OR, "||"},
95 {Token::BOOLEAN_AND, "&&"},
96 {Token::DOT, "."},
97 {Token::COMMA, ","},
98 };
99 EXPECT_TRUE(
100 CheckTokenizer("- + = += -= != == < > <= >= ! || && . ,", operators));
101 }
102
TEST(Tokenizer,Scoper)103 TEST(Tokenizer, Scoper) {
104 TokenExpectation scopers[] = {
105 {Token::LEFT_BRACE, "{"}, {Token::LEFT_BRACKET, "["},
106 {Token::RIGHT_BRACKET, "]"}, {Token::RIGHT_BRACE, "}"},
107 {Token::LEFT_PAREN, "("}, {Token::RIGHT_PAREN, ")"},
108 };
109 EXPECT_TRUE(CheckTokenizer("{[ ]} ()", scopers));
110 }
111
TEST(Tokenizer,FunctionCall)112 TEST(Tokenizer, FunctionCall) {
113 TokenExpectation fn[] = {
114 {Token::IDENTIFIER, "fun"}, {Token::LEFT_PAREN, "("},
115 {Token::STRING, "\"foo\""}, {Token::RIGHT_PAREN, ")"},
116 {Token::LEFT_BRACE, "{"}, {Token::IDENTIFIER, "foo"},
117 {Token::EQUAL, "="}, {Token::INTEGER, "12"},
118 {Token::RIGHT_BRACE, "}"},
119 };
120 EXPECT_TRUE(CheckTokenizer("fun(\"foo\") {\nfoo = 12}", fn));
121 }
122
TEST(Tokenizer,Locations)123 TEST(Tokenizer, Locations) {
124 InputFile input(SourceFile("/test"));
125 input.SetContents("1 2 \"three\"\n 4");
126 Err err;
127 std::vector<Token> results = Tokenizer::Tokenize(&input, &err);
128
129 ASSERT_EQ(4u, results.size());
130 ASSERT_TRUE(results[0].location() == Location(&input, 1, 1));
131 ASSERT_TRUE(results[1].location() == Location(&input, 1, 3));
132 ASSERT_TRUE(results[2].location() == Location(&input, 1, 5));
133 ASSERT_TRUE(results[3].location() == Location(&input, 2, 3));
134 }
135
TEST(Tokenizer,ByteOffsetOfNthLine)136 TEST(Tokenizer, ByteOffsetOfNthLine) {
137 EXPECT_EQ(0u, Tokenizer::ByteOffsetOfNthLine("foo", 1));
138
139 // Windows and Posix have different line endings, so check the byte at the
140 // location rather than the offset.
141 char input1[] = "aaa\nxaa\n\nya";
142 EXPECT_EQ('x', input1[Tokenizer::ByteOffsetOfNthLine(input1, 2)]);
143 EXPECT_EQ('y', input1[Tokenizer::ByteOffsetOfNthLine(input1, 4)]);
144
145 char input2[3];
146 input2[0] = 'a';
147 input2[1] = '\n'; // Manually set to avoid Windows double-byte endings.
148 input2[2] = 0;
149 EXPECT_EQ(0u, Tokenizer::ByteOffsetOfNthLine(input2, 1));
150 EXPECT_EQ(2u, Tokenizer::ByteOffsetOfNthLine(input2, 2));
151 }
152
TEST(Tokenizer,Comments)153 TEST(Tokenizer, Comments) {
154 TokenExpectation fn[] = {
155 {Token::LINE_COMMENT, "# Stuff"},
156 {Token::IDENTIFIER, "fun"},
157 {Token::LEFT_PAREN, "("},
158 {Token::STRING, "\"foo\""},
159 {Token::RIGHT_PAREN, ")"},
160 {Token::LEFT_BRACE, "{"},
161 {Token::SUFFIX_COMMENT, "# Things"},
162 {Token::LINE_COMMENT, "#Wee"},
163 {Token::IDENTIFIER, "foo"},
164 {Token::EQUAL, "="},
165 {Token::INTEGER, "12"},
166 {Token::SUFFIX_COMMENT, "#Zip"},
167 {Token::RIGHT_BRACE, "}"},
168 };
169 EXPECT_TRUE(
170 CheckTokenizer("# Stuff\n"
171 "fun(\"foo\") { # Things\n"
172 "#Wee\n"
173 "foo = 12 #Zip\n"
174 "}",
175 fn));
176 }
177
TEST(Tokenizer,CommentsContinued)178 TEST(Tokenizer, CommentsContinued) {
179 // In the first test, the comments aren't horizontally aligned, so they're
180 // considered separate. In the second test, they are, so "B" is a
181 // continuation of "A" (another SUFFIX comment).
182 TokenExpectation fn1[] = {
183 {Token::IDENTIFIER, "fun"}, {Token::LEFT_PAREN, "("},
184 {Token::STRING, "\"foo\""}, {Token::RIGHT_PAREN, ")"},
185 {Token::LEFT_BRACE, "{"}, {Token::SUFFIX_COMMENT, "# A"},
186 {Token::LINE_COMMENT, "# B"}, {Token::RIGHT_BRACE, "}"},
187 };
188 EXPECT_TRUE(
189 CheckTokenizer("fun(\"foo\") { # A\n"
190 " # B\n"
191 "}",
192 fn1));
193
194 TokenExpectation fn2[] = {
195 {Token::IDENTIFIER, "fun"}, {Token::LEFT_PAREN, "("},
196 {Token::STRING, "\"foo\""}, {Token::RIGHT_PAREN, ")"},
197 {Token::LEFT_BRACE, "{"}, {Token::SUFFIX_COMMENT, "# A"},
198 {Token::SUFFIX_COMMENT, "# B"}, {Token::RIGHT_BRACE, "}"},
199 };
200 EXPECT_TRUE(CheckTokenizer(
201 "fun(\"foo\") { # A\n"
202 " # B\n" // Note that these are aligned, the \"s move A out.
203 "}",
204 fn2));
205 }
206
TEST(Tokenizer,WhitespaceTransformMaintain)207 TEST(Tokenizer, WhitespaceTransformMaintain) {
208 InputFile input(SourceFile("/test"));
209 input.SetContents("a\t2\v\"st\tuff\"\f{");
210
211 Err err;
212 std::vector<Token> results = Tokenizer::Tokenize(
213 &input, &err, WhitespaceTransform::kMaintainOriginalInput);
214 EXPECT_TRUE(err.has_error());
215 EXPECT_EQ(err.location().column_number(), 2);
216 }
217
TEST(Tokenizer,WhitespaceTransformToSpace)218 TEST(Tokenizer, WhitespaceTransformToSpace) {
219 InputFile input(SourceFile("/test"));
220 input.SetContents("a\t2\v\"st\tuff\"\f{");
221
222 Err err;
223 std::vector<Token> results =
224 Tokenizer::Tokenize(&input, &err, WhitespaceTransform::kInvalidToSpace);
225 EXPECT_FALSE(err.has_error());
226 ASSERT_EQ(results.size(), 4u);
227 EXPECT_EQ(results[0].type(), Token::IDENTIFIER);
228 EXPECT_EQ(results[0].value(), "a");
229 EXPECT_EQ(results[1].type(), Token::INTEGER);
230 EXPECT_EQ(results[1].value(), "2");
231 EXPECT_EQ(results[2].type(), Token::STRING);
232 EXPECT_EQ(results[2].value(),
233 "\"st\tuff\""); // Note, embedded \t not transformed.
234 EXPECT_EQ(results[3].type(), Token::LEFT_BRACE);
235 EXPECT_EQ(results[3].value(), "{");
236 }
237