1package compile 2 3// This file defines functions to read and write a compile.Program to a file. 4// 5// It is the client's responsibility to avoid version skew between the 6// compiler used to produce a file and the interpreter that consumes it. 7// The version number is provided as a constant. 8// Incompatible protocol changes should also increment the version number. 9// 10// Encoding 11// 12// Program: 13// "sky!" [4]byte # magic number 14// str uint32le # offset of <strings> section 15// version varint # must match Version 16// filename string 17// numloads varint 18// loads []Ident 19// numnames varint 20// names []string 21// numconsts varint 22// consts []Constant 23// numglobals varint 24// globals []Ident 25// toplevel Funcode 26// numfuncs varint 27// funcs []Funcode 28// <strings> []byte # concatenation of all referenced strings 29// EOF 30// 31// Funcode: 32// id Ident 33// code []byte 34// pclinetablen varint 35// pclinetab []varint 36// numlocals varint 37// locals []Ident 38// numcells varint 39// cells []int 40// numfreevars varint 41// freevar []Ident 42// maxstack varint 43// numparams varint 44// numkwonlyparams varint 45// hasvarargs varint (0 or 1) 46// haskwargs varint (0 or 1) 47// 48// Ident: 49// filename string 50// line, col varint 51// 52// Constant: # type data 53// type varint # 0=string string 54// data ... # 1=bytes string 55// # 2=int varint 56// # 3=float varint (bits as uint64) 57// # 4=bigint string (decimal ASCII text) 58// 59// The encoding starts with a four-byte magic number. 60// The next four bytes are a little-endian uint32 61// that provides the offset of the string section 62// at the end of the file, which contains the ordered 63// concatenation of all strings referenced by the 64// program. This design permits the decoder to read 65// the first and second parts of the file into different 66// memory allocations: the first (the encoded program) 67// is transient, but the second (the strings) persists 68// for the life of the Program. 69// 70// Within the encoded program, all strings are referred 71// to by their length. As the encoder and decoder process 72// the entire file sequentially, they are in lock step, 73// so the start offset of each string is implicit. 74// 75// Program.Code is represented as a []byte slice to permit 76// modification when breakpoints are set. All other strings 77// are represented as strings. They all (unsafely) share the 78// same backing byte slice. 79// 80// Aside from the str field, all integers are encoded as varints. 81 82import ( 83 "encoding/binary" 84 "fmt" 85 "math" 86 "math/big" 87 debugpkg "runtime/debug" 88 "unsafe" 89 90 "go.starlark.net/syntax" 91) 92 93const magic = "!sky" 94 95// Encode encodes a compiled Starlark program. 96func (prog *Program) Encode() []byte { 97 var e encoder 98 e.p = append(e.p, magic...) 99 e.p = append(e.p, "????"...) // string data offset; filled in later 100 e.int(Version) 101 e.string(prog.Toplevel.Pos.Filename()) 102 e.bindings(prog.Loads) 103 e.int(len(prog.Names)) 104 for _, name := range prog.Names { 105 e.string(name) 106 } 107 e.int(len(prog.Constants)) 108 for _, c := range prog.Constants { 109 switch c := c.(type) { 110 case string: 111 e.int(0) 112 e.string(c) 113 case Bytes: 114 e.int(1) 115 e.string(string(c)) 116 case int64: 117 e.int(2) 118 e.int64(c) 119 case float64: 120 e.int(3) 121 e.uint64(math.Float64bits(c)) 122 case *big.Int: 123 e.int(4) 124 e.string(c.Text(10)) 125 } 126 } 127 e.bindings(prog.Globals) 128 e.function(prog.Toplevel) 129 e.int(len(prog.Functions)) 130 for _, fn := range prog.Functions { 131 e.function(fn) 132 } 133 134 // Patch in the offset of the string data section. 135 binary.LittleEndian.PutUint32(e.p[4:8], uint32(len(e.p))) 136 137 return append(e.p, e.s...) 138} 139 140type encoder struct { 141 p []byte // encoded program 142 s []byte // strings 143 tmp [binary.MaxVarintLen64]byte 144} 145 146func (e *encoder) int(x int) { 147 e.int64(int64(x)) 148} 149 150func (e *encoder) int64(x int64) { 151 n := binary.PutVarint(e.tmp[:], x) 152 e.p = append(e.p, e.tmp[:n]...) 153} 154 155func (e *encoder) uint64(x uint64) { 156 n := binary.PutUvarint(e.tmp[:], x) 157 e.p = append(e.p, e.tmp[:n]...) 158} 159 160func (e *encoder) string(s string) { 161 e.int(len(s)) 162 e.s = append(e.s, s...) 163} 164 165func (e *encoder) bytes(b []byte) { 166 e.int(len(b)) 167 e.s = append(e.s, b...) 168} 169 170func (e *encoder) binding(bind Binding) { 171 e.string(bind.Name) 172 e.int(int(bind.Pos.Line)) 173 e.int(int(bind.Pos.Col)) 174} 175 176func (e *encoder) bindings(binds []Binding) { 177 e.int(len(binds)) 178 for _, bind := range binds { 179 e.binding(bind) 180 } 181} 182 183func (e *encoder) function(fn *Funcode) { 184 e.binding(Binding{fn.Name, fn.Pos}) 185 e.string(fn.Doc) 186 e.bytes(fn.Code) 187 e.int(len(fn.pclinetab)) 188 for _, x := range fn.pclinetab { 189 e.int64(int64(x)) 190 } 191 e.bindings(fn.Locals) 192 e.int(len(fn.Cells)) 193 for _, index := range fn.Cells { 194 e.int(index) 195 } 196 e.bindings(fn.Freevars) 197 e.int(fn.MaxStack) 198 e.int(fn.NumParams) 199 e.int(fn.NumKwonlyParams) 200 e.int(b2i(fn.HasVarargs)) 201 e.int(b2i(fn.HasKwargs)) 202} 203 204func b2i(b bool) int { 205 if b { 206 return 1 207 } else { 208 return 0 209 } 210} 211 212// DecodeProgram decodes a compiled Starlark program from data. 213func DecodeProgram(data []byte) (_ *Program, err error) { 214 if len(data) < len(magic) { 215 return nil, fmt.Errorf("not a compiled module: no magic number") 216 } 217 if got := string(data[:4]); got != magic { 218 return nil, fmt.Errorf("not a compiled module: got magic number %q, want %q", 219 got, magic) 220 } 221 defer func() { 222 if x := recover(); x != nil { 223 debugpkg.PrintStack() 224 err = fmt.Errorf("internal error while decoding program: %v", x) 225 } 226 }() 227 228 offset := binary.LittleEndian.Uint32(data[4:8]) 229 d := decoder{ 230 p: data[8:offset], 231 s: append([]byte(nil), data[offset:]...), // allocate a copy, which will persist 232 } 233 234 if v := d.int(); v != Version { 235 return nil, fmt.Errorf("version mismatch: read %d, want %d", v, Version) 236 } 237 238 filename := d.string() 239 d.filename = &filename 240 241 loads := d.bindings() 242 243 names := make([]string, d.int()) 244 for i := range names { 245 names[i] = d.string() 246 } 247 248 // constants 249 constants := make([]interface{}, d.int()) 250 for i := range constants { 251 var c interface{} 252 switch d.int() { 253 case 0: 254 c = d.string() 255 case 1: 256 c = Bytes(d.string()) 257 case 2: 258 c = d.int64() 259 case 3: 260 c = math.Float64frombits(d.uint64()) 261 case 4: 262 c, _ = new(big.Int).SetString(d.string(), 10) 263 } 264 constants[i] = c 265 } 266 267 globals := d.bindings() 268 toplevel := d.function() 269 funcs := make([]*Funcode, d.int()) 270 for i := range funcs { 271 funcs[i] = d.function() 272 } 273 274 prog := &Program{ 275 Loads: loads, 276 Names: names, 277 Constants: constants, 278 Globals: globals, 279 Functions: funcs, 280 Toplevel: toplevel, 281 } 282 toplevel.Prog = prog 283 for _, f := range funcs { 284 f.Prog = prog 285 } 286 287 if len(d.p)+len(d.s) > 0 { 288 return nil, fmt.Errorf("internal error: unconsumed data during decoding") 289 } 290 291 return prog, nil 292} 293 294type decoder struct { 295 p []byte // encoded program 296 s []byte // strings 297 filename *string // (indirect to avoid keeping decoder live) 298} 299 300func (d *decoder) int() int { 301 return int(d.int64()) 302} 303 304func (d *decoder) int64() int64 { 305 x, len := binary.Varint(d.p[:]) 306 d.p = d.p[len:] 307 return x 308} 309 310func (d *decoder) uint64() uint64 { 311 x, len := binary.Uvarint(d.p[:]) 312 d.p = d.p[len:] 313 return x 314} 315 316func (d *decoder) string() (s string) { 317 if slice := d.bytes(); len(slice) > 0 { 318 // Avoid a memory allocation for each string 319 // by unsafely aliasing slice. 320 type string struct { 321 data *byte 322 len int 323 } 324 ptr := (*string)(unsafe.Pointer(&s)) 325 ptr.data = &slice[0] 326 ptr.len = len(slice) 327 } 328 return s 329} 330 331func (d *decoder) bytes() []byte { 332 len := d.int() 333 r := d.s[:len:len] 334 d.s = d.s[len:] 335 return r 336} 337 338func (d *decoder) binding() Binding { 339 name := d.string() 340 line := int32(d.int()) 341 col := int32(d.int()) 342 return Binding{Name: name, Pos: syntax.MakePosition(d.filename, line, col)} 343} 344 345func (d *decoder) bindings() []Binding { 346 bindings := make([]Binding, d.int()) 347 for i := range bindings { 348 bindings[i] = d.binding() 349 } 350 return bindings 351} 352 353func (d *decoder) ints() []int { 354 ints := make([]int, d.int()) 355 for i := range ints { 356 ints[i] = d.int() 357 } 358 return ints 359} 360 361func (d *decoder) bool() bool { return d.int() != 0 } 362 363func (d *decoder) function() *Funcode { 364 id := d.binding() 365 doc := d.string() 366 code := d.bytes() 367 pclinetab := make([]uint16, d.int()) 368 for i := range pclinetab { 369 pclinetab[i] = uint16(d.int()) 370 } 371 locals := d.bindings() 372 cells := d.ints() 373 freevars := d.bindings() 374 maxStack := d.int() 375 numParams := d.int() 376 numKwonlyParams := d.int() 377 hasVarargs := d.int() != 0 378 hasKwargs := d.int() != 0 379 return &Funcode{ 380 // Prog is filled in later. 381 Pos: id.Pos, 382 Name: id.Name, 383 Doc: doc, 384 Code: code, 385 pclinetab: pclinetab, 386 Locals: locals, 387 Cells: cells, 388 Freevars: freevars, 389 MaxStack: maxStack, 390 NumParams: numParams, 391 NumKwonlyParams: numKwonlyParams, 392 HasVarargs: hasVarargs, 393 HasKwargs: hasKwargs, 394 } 395} 396