• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright 2017 The Bazel Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package syntax
6
7import (
8	"bytes"
9	"fmt"
10	"go/build"
11	"io/ioutil"
12	"path/filepath"
13	"strings"
14	"testing"
15)
16
17func scan(src interface{}) (tokens string, err error) {
18	sc, err := newScanner("foo.star", src, false)
19	if err != nil {
20		return "", err
21	}
22
23	defer sc.recover(&err)
24
25	var buf bytes.Buffer
26	var val tokenValue
27	for {
28		tok := sc.nextToken(&val)
29
30		if buf.Len() > 0 {
31			buf.WriteByte(' ')
32		}
33		switch tok {
34		case EOF:
35			buf.WriteString("EOF")
36		case IDENT:
37			buf.WriteString(val.raw)
38		case INT:
39			if val.bigInt != nil {
40				fmt.Fprintf(&buf, "%d", val.bigInt)
41			} else {
42				fmt.Fprintf(&buf, "%d", val.int)
43			}
44		case FLOAT:
45			fmt.Fprintf(&buf, "%e", val.float)
46		case STRING, BYTES:
47			buf.WriteString(Quote(val.string, tok == BYTES))
48		default:
49			buf.WriteString(tok.String())
50		}
51		if tok == EOF {
52			break
53		}
54	}
55	return buf.String(), nil
56}
57
58func TestScanner(t *testing.T) {
59	for _, test := range []struct {
60		input, want string
61	}{
62		{``, "EOF"},
63		{`123`, "123 EOF"},
64		{`x.y`, "x . y EOF"},
65		{`chocolateclair`, `chocolate . éclair EOF`},
66		{`123 "foo" hello x.y`, `123 "foo" hello x . y EOF`},
67		{`print(x)`, "print ( x ) EOF"},
68		{`print(x); print(y)`, "print ( x ) ; print ( y ) EOF"},
69		{"\nprint(\n1\n)\n", "print ( 1 ) newline EOF"}, // final \n is at toplevel on non-blank line => token
70		{`/ // /= //= ///=`, "/ // /= //= // /= EOF"},
71		{`# hello
72print(x)`, "print ( x ) EOF"},
73		{`# hello
74print(1)
75cc_binary(name="foo")
76def f(x):
77		return x+1
78print(1)
79`,
80			`print ( 1 ) newline ` +
81				`cc_binary ( name = "foo" ) newline ` +
82				`def f ( x ) : newline ` +
83				`indent return x + 1 newline ` +
84				`outdent print ( 1 ) newline ` +
85				`EOF`},
86		// EOF should act line an implicit newline.
87		{`def f(): pass`,
88			"def f ( ) : pass EOF"},
89		{`def f():
90	pass`,
91			"def f ( ) : newline indent pass newline outdent EOF"},
92		{`def f():
93	pass
94# oops`,
95			"def f ( ) : newline indent pass newline outdent EOF"},
96		{`def f():
97	pass \
98`,
99			"def f ( ) : newline indent pass newline outdent EOF"},
100		{`def f():
101	pass
102`,
103			"def f ( ) : newline indent pass newline outdent EOF"},
104		{`pass
105
106
107pass`, "pass newline pass EOF"}, // consecutive newlines are consolidated
108		{`def f():
109    pass
110    `, "def f ( ) : newline indent pass newline outdent EOF"},
111		{`def f():
112    pass
113    ` + "\n", "def f ( ) : newline indent pass newline outdent EOF"},
114		{"pass", "pass EOF"},
115		{"pass\n", "pass newline EOF"},
116		{"pass\n ", "pass newline EOF"},
117		{"pass\n \n", "pass newline EOF"},
118		{"if x:\n  pass\n ", "if x : newline indent pass newline outdent EOF"},
119		{`x = 1 + \
1202`, `x = 1 + 2 EOF`},
121		{`x = 'a\nb'`, `x = "a\nb" EOF`},
122		{`x = r'a\nb'`, `x = "a\\nb" EOF`},
123		{"x = 'a\\\nb'", `x = "ab" EOF`},
124		{`x = '\''`, `x = "'" EOF`},
125		{`x = "\""`, `x = "\"" EOF`},
126		{`x = r'\''`, `x = "\\'" EOF`},
127		{`x = '''\''''`, `x = "'" EOF`},
128		{`x = r'''\''''`, `x = "\\'" EOF`},
129		{`x = ''''a'b'c'''`, `x = "'a'b'c" EOF`},
130		{"x = '''a\nb'''", `x = "a\nb" EOF`},
131		{"x = '''a\rb'''", `x = "a\nb" EOF`},
132		{"x = '''a\r\nb'''", `x = "a\nb" EOF`},
133		{"x = '''a\n\rb'''", `x = "a\n\nb" EOF`},
134		{"x = r'a\\\nb'", `x = "a\\\nb" EOF`},
135		{"x = r'a\\\rb'", `x = "a\\\nb" EOF`},
136		{"x = r'a\\\r\nb'", `x = "a\\\nb" EOF`},
137		{"a\rb", `a newline b EOF`},
138		{"a\nb", `a newline b EOF`},
139		{"a\r\nb", `a newline b EOF`},
140		{"a\n\nb", `a newline b EOF`},
141		// numbers
142		{"0", `0 EOF`},
143		{"00", `0 EOF`},
144		{"0.", `0.000000e+00 EOF`},
145		{"0.e1", `0.000000e+00 EOF`},
146		{".0", `0.000000e+00 EOF`},
147		{"0.0", `0.000000e+00 EOF`},
148		{".e1", `. e1 EOF`},
149		{"1", `1 EOF`},
150		{"1.", `1.000000e+00 EOF`},
151		{".1", `1.000000e-01 EOF`},
152		{".1e1", `1.000000e+00 EOF`},
153		{".1e+1", `1.000000e+00 EOF`},
154		{".1e-1", `1.000000e-02 EOF`},
155		{"1e1", `1.000000e+01 EOF`},
156		{"1e+1", `1.000000e+01 EOF`},
157		{"1e-1", `1.000000e-01 EOF`},
158		{"123", `123 EOF`},
159		{"123e45", `1.230000e+47 EOF`},
160		{"999999999999999999999999999999999999999999999999999", `999999999999999999999999999999999999999999999999999 EOF`},
161		{"12345678901234567890", `12345678901234567890 EOF`},
162		// hex
163		{"0xA", `10 EOF`},
164		{"0xAAG", `170 G EOF`},
165		{"0xG", `foo.star:1:1: invalid hex literal`},
166		{"0XA", `10 EOF`},
167		{"0XG", `foo.star:1:1: invalid hex literal`},
168		{"0xA.", `10 . EOF`},
169		{"0xA.e1", `10 . e1 EOF`},
170		{"0x12345678deadbeef12345678", `5634002672576678570168178296 EOF`},
171		// binary
172		{"0b1010", `10 EOF`},
173		{"0B111101", `61 EOF`},
174		{"0b3", `foo.star:1:3: invalid binary literal`},
175		{"0b1010201", `10 201 EOF`},
176		{"0b1010.01", `10 1.000000e-02 EOF`},
177		{"0b0000", `0 EOF`},
178		// octal
179		{"0o123", `83 EOF`},
180		{"0o12834", `10 834 EOF`},
181		{"0o12934", `10 934 EOF`},
182		{"0o12934.", `10 9.340000e+02 EOF`},
183		{"0o12934.1", `10 9.341000e+02 EOF`},
184		{"0o12934e1", `10 9.340000e+03 EOF`},
185		{"0o123.", `83 . EOF`},
186		{"0o123.1", `83 1.000000e-01 EOF`},
187		{"0123", `foo.star:1:5: obsolete form of octal literal; use 0o123`},
188		{"012834", `foo.star:1:1: invalid int literal`},
189		{"012934", `foo.star:1:1: invalid int literal`},
190		{"i = 012934", `foo.star:1:5: invalid int literal`},
191		// octal escapes in string literals
192		{`"\037"`, `"\x1f" EOF`},
193		{`"\377"`, `foo.star:1:1: non-ASCII octal escape \377 (use \u00FF for the UTF-8 encoding of U+00FF)`},
194		{`"\378"`, `"\x1f8" EOF`},                               // = '\37' + '8'
195		{`"\400"`, `foo.star:1:1: non-ASCII octal escape \400`}, // unlike Python 2 and 3
196		// hex escapes
197		{`"\x00\x20\x09\x41\x7e\x7f"`, `"\x00 \tA~\x7f" EOF`}, // DEL is non-printable
198		{`"\x80"`, `foo.star:1:1: non-ASCII hex escape`},
199		{`"\xff"`, `foo.star:1:1: non-ASCII hex escape`},
200		{`"\xFf"`, `foo.star:1:1: non-ASCII hex escape`},
201		{`"\xF"`, `foo.star:1:1: truncated escape sequence \xF`},
202		{`"\x"`, `foo.star:1:1: truncated escape sequence \x`},
203		{`"\xfg"`, `foo.star:1:1: invalid escape sequence \xfg`},
204		// Unicode escapes
205		// \uXXXX
206		{`"\u0400"`, `"Ѐ" EOF`},
207		{`"\u100"`, `foo.star:1:1: truncated escape sequence \u100`},
208		{`"\u04000"`, `"Ѐ0" EOF`}, // = U+0400 + '0'
209		{`"\u100g"`, `foo.star:1:1: invalid escape sequence \u100g`},
210		{`"\u4E16"`, `"世" EOF`},
211		{`"\udc00"`, `foo.star:1:1: invalid Unicode code point U+DC00`}, // surrogate
212		// \UXXXXXXXX
213		{`"\U00000400"`, `"Ѐ" EOF`},
214		{`"\U0000400"`, `foo.star:1:1: truncated escape sequence \U0000400`},
215		{`"\U000004000"`, `"Ѐ0" EOF`}, // = U+0400 + '0'
216		{`"\U1000000g"`, `foo.star:1:1: invalid escape sequence \U1000000g`},
217		{`"\U0010FFFF"`, `"\U0010ffff" EOF`},
218		{`"\U00110000"`, `foo.star:1:1: code point out of range: \U00110000 (max \U00110000)`},
219		{`"\U0001F63F"`, `"��" EOF`},
220		{`"\U0000dc00"`, `foo.star:1:1: invalid Unicode code point U+DC00`}, // surrogate
221
222		// backslash escapes
223		// As in Go, a backslash must escape something.
224		// (Python started issuing a deprecation warning in 3.6.)
225		{`"foo\(bar"`, `foo.star:1:1: invalid escape sequence \(`},
226		{`"\+"`, `foo.star:1:1: invalid escape sequence \+`},
227		{`"\w"`, `foo.star:1:1: invalid escape sequence \w`},
228		{`"\""`, `"\"" EOF`},
229		{`"\'"`, `"'" EOF`},
230		{`'\w'`, `foo.star:1:1: invalid escape sequence \w`},
231		{`'\''`, `"'" EOF`},
232		{`'\"'`, `"\"" EOF`},
233		{`"""\w"""`, `foo.star:1:1: invalid escape sequence \w`},
234		{`"""\""""`, `"\"" EOF`},
235		{`"""\'"""`, `"'" EOF`},
236		{`'''\w'''`, `foo.star:1:1: invalid escape sequence \w`},
237		{`'''\''''`, `"'" EOF`},
238		{`'''\"'''`, `"\"" EOF`},
239		{`r"\w"`, `"\\w" EOF`},
240		{`r"\""`, `"\\\"" EOF`},
241		{`r"\'"`, `"\\'" EOF`},
242		{`r'\w'`, `"\\w" EOF`},
243		{`r'\''`, `"\\'" EOF`},
244		{`r'\"'`, `"\\\"" EOF`},
245		{`'a\zb'`, `foo.star:1:1: invalid escape sequence \z`},
246		{`"\o123"`, `foo.star:1:1: invalid escape sequence \o`},
247		// bytes literals (where they differ from text strings)
248		{`b"AЀ世��"`, `b"AЀ世��`},                                       // 1-4 byte encodings, literal
249		{`b"\x41\u0400\u4e16\U0001F63F"`, `b"AЀ世��"`},                // same, as escapes
250		{`b"\377\378\x80\xff\xFf"`, `b"\xff\x1f8\x80\xff\xff" EOF`}, // hex/oct escapes allow non-ASCII
251		{`b"\400"`, `foo.star:1:2: invalid escape sequence \400`},
252		{`b"\udc00"`, `foo.star:1:2: invalid Unicode code point U+DC00`}, // (same as string)
253		// floats starting with octal digits
254		{"012934.", `1.293400e+04 EOF`},
255		{"012934.1", `1.293410e+04 EOF`},
256		{"012934e1", `1.293400e+05 EOF`},
257		{"0123.", `1.230000e+02 EOF`},
258		{"0123.1", `1.231000e+02 EOF`},
259		// github.com/google/skylark/issues/16
260		{"x ! 0", "foo.star:1:3: unexpected input character '!'"},
261		// github.com/google/starlark-go/issues/80
262		{"([{<>}])", "( [ { < > } ] ) EOF"},
263		{"f();", "f ( ) ; EOF"},
264		// github.com/google/starlark-go/issues/104
265		{"def f():\n  if x:\n    pass\n  ", `def f ( ) : newline indent if x : newline indent pass newline outdent outdent EOF`},
266		{`while cond: pass`, "while cond : pass EOF"},
267		// github.com/google/starlark-go/issues/107
268		{"~= ~= 5", "~ = ~ = 5 EOF"},
269		{"0in", "0 in EOF"},
270		{"0or", "foo.star:1:3: invalid octal literal"},
271		{"6in", "6 in EOF"},
272		{"6or", "6 or EOF"},
273	} {
274		got, err := scan(test.input)
275		if err != nil {
276			got = err.(Error).Error()
277		}
278		// Prefix match allows us to truncate errors in expecations.
279		// Success cases all end in EOF.
280		if !strings.HasPrefix(got, test.want) {
281			t.Errorf("scan `%s` = [%s], want [%s]", test.input, got, test.want)
282		}
283	}
284}
285
286// dataFile is the same as starlarktest.DataFile.
287// We make a copy to avoid a dependency cycle.
288var dataFile = func(pkgdir, filename string) string {
289	return filepath.Join(build.Default.GOPATH, "src/go.starlark.net", pkgdir, filename)
290}
291
292func BenchmarkScan(b *testing.B) {
293	filename := dataFile("syntax", "testdata/scan.star")
294	b.StopTimer()
295	data, err := ioutil.ReadFile(filename)
296	if err != nil {
297		b.Fatal(err)
298	}
299	b.StartTimer()
300
301	for i := 0; i < b.N; i++ {
302		sc, err := newScanner(filename, data, false)
303		if err != nil {
304			b.Fatal(err)
305		}
306		var val tokenValue
307		for sc.nextToken(&val) != EOF {
308		}
309	}
310}
311