1// Copyright 2017 The Bazel Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package syntax 6 7import ( 8 "bytes" 9 "fmt" 10 "go/build" 11 "io/ioutil" 12 "path/filepath" 13 "strings" 14 "testing" 15) 16 17func scan(src interface{}) (tokens string, err error) { 18 sc, err := newScanner("foo.star", src, false) 19 if err != nil { 20 return "", err 21 } 22 23 defer sc.recover(&err) 24 25 var buf bytes.Buffer 26 var val tokenValue 27 for { 28 tok := sc.nextToken(&val) 29 30 if buf.Len() > 0 { 31 buf.WriteByte(' ') 32 } 33 switch tok { 34 case EOF: 35 buf.WriteString("EOF") 36 case IDENT: 37 buf.WriteString(val.raw) 38 case INT: 39 if val.bigInt != nil { 40 fmt.Fprintf(&buf, "%d", val.bigInt) 41 } else { 42 fmt.Fprintf(&buf, "%d", val.int) 43 } 44 case FLOAT: 45 fmt.Fprintf(&buf, "%e", val.float) 46 case STRING, BYTES: 47 buf.WriteString(Quote(val.string, tok == BYTES)) 48 default: 49 buf.WriteString(tok.String()) 50 } 51 if tok == EOF { 52 break 53 } 54 } 55 return buf.String(), nil 56} 57 58func TestScanner(t *testing.T) { 59 for _, test := range []struct { 60 input, want string 61 }{ 62 {``, "EOF"}, 63 {`123`, "123 EOF"}, 64 {`x.y`, "x . y EOF"}, 65 {`chocolate.éclair`, `chocolate . éclair EOF`}, 66 {`123 "foo" hello x.y`, `123 "foo" hello x . y EOF`}, 67 {`print(x)`, "print ( x ) EOF"}, 68 {`print(x); print(y)`, "print ( x ) ; print ( y ) EOF"}, 69 {"\nprint(\n1\n)\n", "print ( 1 ) newline EOF"}, // final \n is at toplevel on non-blank line => token 70 {`/ // /= //= ///=`, "/ // /= //= // /= EOF"}, 71 {`# hello 72print(x)`, "print ( x ) EOF"}, 73 {`# hello 74print(1) 75cc_binary(name="foo") 76def f(x): 77 return x+1 78print(1) 79`, 80 `print ( 1 ) newline ` + 81 `cc_binary ( name = "foo" ) newline ` + 82 `def f ( x ) : newline ` + 83 `indent return x + 1 newline ` + 84 `outdent print ( 1 ) newline ` + 85 `EOF`}, 86 // EOF should act line an implicit newline. 87 {`def f(): pass`, 88 "def f ( ) : pass EOF"}, 89 {`def f(): 90 pass`, 91 "def f ( ) : newline indent pass newline outdent EOF"}, 92 {`def f(): 93 pass 94# oops`, 95 "def f ( ) : newline indent pass newline outdent EOF"}, 96 {`def f(): 97 pass \ 98`, 99 "def f ( ) : newline indent pass newline outdent EOF"}, 100 {`def f(): 101 pass 102`, 103 "def f ( ) : newline indent pass newline outdent EOF"}, 104 {`pass 105 106 107pass`, "pass newline pass EOF"}, // consecutive newlines are consolidated 108 {`def f(): 109 pass 110 `, "def f ( ) : newline indent pass newline outdent EOF"}, 111 {`def f(): 112 pass 113 ` + "\n", "def f ( ) : newline indent pass newline outdent EOF"}, 114 {"pass", "pass EOF"}, 115 {"pass\n", "pass newline EOF"}, 116 {"pass\n ", "pass newline EOF"}, 117 {"pass\n \n", "pass newline EOF"}, 118 {"if x:\n pass\n ", "if x : newline indent pass newline outdent EOF"}, 119 {`x = 1 + \ 1202`, `x = 1 + 2 EOF`}, 121 {`x = 'a\nb'`, `x = "a\nb" EOF`}, 122 {`x = r'a\nb'`, `x = "a\\nb" EOF`}, 123 {"x = 'a\\\nb'", `x = "ab" EOF`}, 124 {`x = '\''`, `x = "'" EOF`}, 125 {`x = "\""`, `x = "\"" EOF`}, 126 {`x = r'\''`, `x = "\\'" EOF`}, 127 {`x = '''\''''`, `x = "'" EOF`}, 128 {`x = r'''\''''`, `x = "\\'" EOF`}, 129 {`x = ''''a'b'c'''`, `x = "'a'b'c" EOF`}, 130 {"x = '''a\nb'''", `x = "a\nb" EOF`}, 131 {"x = '''a\rb'''", `x = "a\nb" EOF`}, 132 {"x = '''a\r\nb'''", `x = "a\nb" EOF`}, 133 {"x = '''a\n\rb'''", `x = "a\n\nb" EOF`}, 134 {"x = r'a\\\nb'", `x = "a\\\nb" EOF`}, 135 {"x = r'a\\\rb'", `x = "a\\\nb" EOF`}, 136 {"x = r'a\\\r\nb'", `x = "a\\\nb" EOF`}, 137 {"a\rb", `a newline b EOF`}, 138 {"a\nb", `a newline b EOF`}, 139 {"a\r\nb", `a newline b EOF`}, 140 {"a\n\nb", `a newline b EOF`}, 141 // numbers 142 {"0", `0 EOF`}, 143 {"00", `0 EOF`}, 144 {"0.", `0.000000e+00 EOF`}, 145 {"0.e1", `0.000000e+00 EOF`}, 146 {".0", `0.000000e+00 EOF`}, 147 {"0.0", `0.000000e+00 EOF`}, 148 {".e1", `. e1 EOF`}, 149 {"1", `1 EOF`}, 150 {"1.", `1.000000e+00 EOF`}, 151 {".1", `1.000000e-01 EOF`}, 152 {".1e1", `1.000000e+00 EOF`}, 153 {".1e+1", `1.000000e+00 EOF`}, 154 {".1e-1", `1.000000e-02 EOF`}, 155 {"1e1", `1.000000e+01 EOF`}, 156 {"1e+1", `1.000000e+01 EOF`}, 157 {"1e-1", `1.000000e-01 EOF`}, 158 {"123", `123 EOF`}, 159 {"123e45", `1.230000e+47 EOF`}, 160 {"999999999999999999999999999999999999999999999999999", `999999999999999999999999999999999999999999999999999 EOF`}, 161 {"12345678901234567890", `12345678901234567890 EOF`}, 162 // hex 163 {"0xA", `10 EOF`}, 164 {"0xAAG", `170 G EOF`}, 165 {"0xG", `foo.star:1:1: invalid hex literal`}, 166 {"0XA", `10 EOF`}, 167 {"0XG", `foo.star:1:1: invalid hex literal`}, 168 {"0xA.", `10 . EOF`}, 169 {"0xA.e1", `10 . e1 EOF`}, 170 {"0x12345678deadbeef12345678", `5634002672576678570168178296 EOF`}, 171 // binary 172 {"0b1010", `10 EOF`}, 173 {"0B111101", `61 EOF`}, 174 {"0b3", `foo.star:1:3: invalid binary literal`}, 175 {"0b1010201", `10 201 EOF`}, 176 {"0b1010.01", `10 1.000000e-02 EOF`}, 177 {"0b0000", `0 EOF`}, 178 // octal 179 {"0o123", `83 EOF`}, 180 {"0o12834", `10 834 EOF`}, 181 {"0o12934", `10 934 EOF`}, 182 {"0o12934.", `10 9.340000e+02 EOF`}, 183 {"0o12934.1", `10 9.341000e+02 EOF`}, 184 {"0o12934e1", `10 9.340000e+03 EOF`}, 185 {"0o123.", `83 . EOF`}, 186 {"0o123.1", `83 1.000000e-01 EOF`}, 187 {"0123", `foo.star:1:5: obsolete form of octal literal; use 0o123`}, 188 {"012834", `foo.star:1:1: invalid int literal`}, 189 {"012934", `foo.star:1:1: invalid int literal`}, 190 {"i = 012934", `foo.star:1:5: invalid int literal`}, 191 // octal escapes in string literals 192 {`"\037"`, `"\x1f" EOF`}, 193 {`"\377"`, `foo.star:1:1: non-ASCII octal escape \377 (use \u00FF for the UTF-8 encoding of U+00FF)`}, 194 {`"\378"`, `"\x1f8" EOF`}, // = '\37' + '8' 195 {`"\400"`, `foo.star:1:1: non-ASCII octal escape \400`}, // unlike Python 2 and 3 196 // hex escapes 197 {`"\x00\x20\x09\x41\x7e\x7f"`, `"\x00 \tA~\x7f" EOF`}, // DEL is non-printable 198 {`"\x80"`, `foo.star:1:1: non-ASCII hex escape`}, 199 {`"\xff"`, `foo.star:1:1: non-ASCII hex escape`}, 200 {`"\xFf"`, `foo.star:1:1: non-ASCII hex escape`}, 201 {`"\xF"`, `foo.star:1:1: truncated escape sequence \xF`}, 202 {`"\x"`, `foo.star:1:1: truncated escape sequence \x`}, 203 {`"\xfg"`, `foo.star:1:1: invalid escape sequence \xfg`}, 204 // Unicode escapes 205 // \uXXXX 206 {`"\u0400"`, `"Ѐ" EOF`}, 207 {`"\u100"`, `foo.star:1:1: truncated escape sequence \u100`}, 208 {`"\u04000"`, `"Ѐ0" EOF`}, // = U+0400 + '0' 209 {`"\u100g"`, `foo.star:1:1: invalid escape sequence \u100g`}, 210 {`"\u4E16"`, `"世" EOF`}, 211 {`"\udc00"`, `foo.star:1:1: invalid Unicode code point U+DC00`}, // surrogate 212 // \UXXXXXXXX 213 {`"\U00000400"`, `"Ѐ" EOF`}, 214 {`"\U0000400"`, `foo.star:1:1: truncated escape sequence \U0000400`}, 215 {`"\U000004000"`, `"Ѐ0" EOF`}, // = U+0400 + '0' 216 {`"\U1000000g"`, `foo.star:1:1: invalid escape sequence \U1000000g`}, 217 {`"\U0010FFFF"`, `"\U0010ffff" EOF`}, 218 {`"\U00110000"`, `foo.star:1:1: code point out of range: \U00110000 (max \U00110000)`}, 219 {`"\U0001F63F"`, `"" EOF`}, 220 {`"\U0000dc00"`, `foo.star:1:1: invalid Unicode code point U+DC00`}, // surrogate 221 222 // backslash escapes 223 // As in Go, a backslash must escape something. 224 // (Python started issuing a deprecation warning in 3.6.) 225 {`"foo\(bar"`, `foo.star:1:1: invalid escape sequence \(`}, 226 {`"\+"`, `foo.star:1:1: invalid escape sequence \+`}, 227 {`"\w"`, `foo.star:1:1: invalid escape sequence \w`}, 228 {`"\""`, `"\"" EOF`}, 229 {`"\'"`, `"'" EOF`}, 230 {`'\w'`, `foo.star:1:1: invalid escape sequence \w`}, 231 {`'\''`, `"'" EOF`}, 232 {`'\"'`, `"\"" EOF`}, 233 {`"""\w"""`, `foo.star:1:1: invalid escape sequence \w`}, 234 {`"""\""""`, `"\"" EOF`}, 235 {`"""\'"""`, `"'" EOF`}, 236 {`'''\w'''`, `foo.star:1:1: invalid escape sequence \w`}, 237 {`'''\''''`, `"'" EOF`}, 238 {`'''\"'''`, `"\"" EOF`}, 239 {`r"\w"`, `"\\w" EOF`}, 240 {`r"\""`, `"\\\"" EOF`}, 241 {`r"\'"`, `"\\'" EOF`}, 242 {`r'\w'`, `"\\w" EOF`}, 243 {`r'\''`, `"\\'" EOF`}, 244 {`r'\"'`, `"\\\"" EOF`}, 245 {`'a\zb'`, `foo.star:1:1: invalid escape sequence \z`}, 246 {`"\o123"`, `foo.star:1:1: invalid escape sequence \o`}, 247 // bytes literals (where they differ from text strings) 248 {`b"AЀ世"`, `b"AЀ世`}, // 1-4 byte encodings, literal 249 {`b"\x41\u0400\u4e16\U0001F63F"`, `b"AЀ世"`}, // same, as escapes 250 {`b"\377\378\x80\xff\xFf"`, `b"\xff\x1f8\x80\xff\xff" EOF`}, // hex/oct escapes allow non-ASCII 251 {`b"\400"`, `foo.star:1:2: invalid escape sequence \400`}, 252 {`b"\udc00"`, `foo.star:1:2: invalid Unicode code point U+DC00`}, // (same as string) 253 // floats starting with octal digits 254 {"012934.", `1.293400e+04 EOF`}, 255 {"012934.1", `1.293410e+04 EOF`}, 256 {"012934e1", `1.293400e+05 EOF`}, 257 {"0123.", `1.230000e+02 EOF`}, 258 {"0123.1", `1.231000e+02 EOF`}, 259 // github.com/google/skylark/issues/16 260 {"x ! 0", "foo.star:1:3: unexpected input character '!'"}, 261 // github.com/google/starlark-go/issues/80 262 {"([{<>}])", "( [ { < > } ] ) EOF"}, 263 {"f();", "f ( ) ; EOF"}, 264 // github.com/google/starlark-go/issues/104 265 {"def f():\n if x:\n pass\n ", `def f ( ) : newline indent if x : newline indent pass newline outdent outdent EOF`}, 266 {`while cond: pass`, "while cond : pass EOF"}, 267 // github.com/google/starlark-go/issues/107 268 {"~= ~= 5", "~ = ~ = 5 EOF"}, 269 {"0in", "0 in EOF"}, 270 {"0or", "foo.star:1:3: invalid octal literal"}, 271 {"6in", "6 in EOF"}, 272 {"6or", "6 or EOF"}, 273 } { 274 got, err := scan(test.input) 275 if err != nil { 276 got = err.(Error).Error() 277 } 278 // Prefix match allows us to truncate errors in expecations. 279 // Success cases all end in EOF. 280 if !strings.HasPrefix(got, test.want) { 281 t.Errorf("scan `%s` = [%s], want [%s]", test.input, got, test.want) 282 } 283 } 284} 285 286// dataFile is the same as starlarktest.DataFile. 287// We make a copy to avoid a dependency cycle. 288var dataFile = func(pkgdir, filename string) string { 289 return filepath.Join(build.Default.GOPATH, "src/go.starlark.net", pkgdir, filename) 290} 291 292func BenchmarkScan(b *testing.B) { 293 filename := dataFile("syntax", "testdata/scan.star") 294 b.StopTimer() 295 data, err := ioutil.ReadFile(filename) 296 if err != nil { 297 b.Fatal(err) 298 } 299 b.StartTimer() 300 301 for i := 0; i < b.N; i++ { 302 sc, err := newScanner(filename, data, false) 303 if err != nil { 304 b.Fatal(err) 305 } 306 var val tokenValue 307 for sc.nextToken(&val) != EOF { 308 } 309 } 310} 311