• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright 2020 The Bazel Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package proto defines a module of utilities for constructing and
6// accessing protocol messages within Starlark programs.
7//
8// THIS PACKAGE IS EXPERIMENTAL AND ITS INTERFACE MAY CHANGE.
9//
10// This package defines several types of Starlark value:
11//
12//      Message                 -- a protocol message
13//      RepeatedField           -- a repeated field of a message, like a list
14//
15//      FileDescriptor          -- information about a .proto file
16//      FieldDescriptor         -- information about a message field (or extension field)
17//      MessageDescriptor       -- information about the type of a message
18//      EnumDescriptor          -- information about an enumerated type
19//      EnumValueDescriptor     -- a value of an enumerated type
20//
21// A Message value is a wrapper around a protocol message instance.
22// Starlark programs may access and update Messages using dot notation:
23//
24//      x = msg.field
25//      msg.field = x + 1
26//      msg.field += 1
27//
28// Assignments to message fields perform dynamic checks on the type and
29// range of the value to ensure that the message is at all times valid.
30//
31// The value of a repeated field of a message is represented by the
32// list-like data type, RepeatedField.  Its elements may be accessed,
33// iterated, and updated in the usual ways.  As with assignments to
34// message fields, an assignment to an element of a RepeatedField
35// performs a dynamic check to ensure that the RepeatedField holds
36// only elements of the correct type.
37//
38//      type(msg.uint32s)       # "proto.repeated<uint32>"
39//      msg.uint32s[0] = 1
40//      msg.uint32s[0] = -1     # error: invalid uint32: -1
41//
42// Any iterable may be assigned to a repeated field of a message.  If
43// the iterable is itself a value of type RepeatedField, the message
44// field holds a reference to it.
45//
46//      msg2.uint32s = msg.uint32s      # both messages share one RepeatedField
47//      msg.uint32s[0] = 123
48//      print(msg2.uint32s[0])          # "123"
49//
50// The RepeatedFields' element types must match.
51// It is not enough for the values to be merely valid:
52//
53//      msg.uint32s = [1, 2, 3]         # makes a copy
54//      msg.uint64s = msg.uint32s       # error: repeated field has wrong type
55//      msg.uint64s = list(msg.uint32s) # ok; makes a copy
56//
57// For all other iterables, a new RepeatedField is constructed from the
58// elements of the iterable.
59//
60//      msg.uints32s = [1, 2, 3]
61//      print(type(msg.uints32s))       # "proto.repeated<uint32>"
62//
63//
64// To construct a Message from encoded binary or text data, call
65// Unmarshal or UnmarshalText.  These two functions are exposed to
66// Starlark programs as proto.unmarshal{,_text}.
67//
68// To construct a Message from an existing Go proto.Message instance,
69// you must first encode the Go message to binary, then decode it using
70// Unmarshal. This ensures that messages visible to Starlark are
71// encapsulated and cannot be mutated once their Starlark wrapper values
72// are frozen.
73//
74// TODO(adonovan): document descriptors, enums, message instantiation.
75//
76// See proto_test.go for an example of how to use the 'proto'
77// module in an application that embeds Starlark.
78//
79package proto
80
81// TODO(adonovan): Go and Starlark API improvements:
82// - Make Message and RepeatedField comparable.
83//   (NOTE: proto.Equal works only with generated message types.)
84// - Support maps, oneof, any. But not messageset if we can avoid it.
85// - Support "well-known types".
86// - Defend against cycles in object graph.
87// - Test missing required fields in marshalling.
88
89import (
90	"bytes"
91	"fmt"
92	"sort"
93	"strings"
94	"unsafe"
95	_ "unsafe" // for linkname hack
96
97	"google.golang.org/protobuf/encoding/prototext"
98	"google.golang.org/protobuf/proto"
99	"google.golang.org/protobuf/reflect/protoreflect"
100	"google.golang.org/protobuf/reflect/protoregistry"
101	"google.golang.org/protobuf/types/dynamicpb"
102
103	"go.starlark.net/starlark"
104	"go.starlark.net/starlarkstruct"
105	"go.starlark.net/syntax"
106)
107
108// SetPool associates with the specified Starlark thread the
109// descriptor pool used to find descriptors for .proto files and to
110// instantiate messages from descriptors.  Clients must call SetPool
111// for a Starlark thread to use this package.
112//
113// For example:
114//	SetPool(thread, protoregistry.GlobalFiles)
115//
116func SetPool(thread *starlark.Thread, pool DescriptorPool) {
117	thread.SetLocal(contextKey, pool)
118}
119
120// Pool returns the descriptor pool previously associated with this thread.
121func Pool(thread *starlark.Thread) DescriptorPool {
122	pool, _ := thread.Local(contextKey).(DescriptorPool)
123	return pool
124}
125
126const contextKey = "proto.DescriptorPool"
127
128// A DescriptorPool loads FileDescriptors by path name or package name,
129// possibly on demand.
130//
131// It is a superinterface of protodesc.Resolver, so any Resolver
132// implementation is a valid pool. For example.
133// protoregistry.GlobalFiles, which loads FileDescriptors from the
134// compressed binary information in all the *.pb.go files linked into
135// the process; and protodesc.NewFiles, which holds a set of
136// FileDescriptorSet messages. See star2proto for example usage.
137type DescriptorPool interface {
138	FindFileByPath(string) (protoreflect.FileDescriptor, error)
139}
140
141var Module = &starlarkstruct.Module{
142	Name: "proto",
143	Members: starlark.StringDict{
144		"file":           starlark.NewBuiltin("proto.file", file),
145		"has":            starlark.NewBuiltin("proto.has", has),
146		"marshal":        starlark.NewBuiltin("proto.marshal", marshal),
147		"marshal_text":   starlark.NewBuiltin("proto.marshal_text", marshal),
148		"set_field":      starlark.NewBuiltin("proto.set_field", setFieldStarlark),
149		"get_field":      starlark.NewBuiltin("proto.get_field", getFieldStarlark),
150		"unmarshal":      starlark.NewBuiltin("proto.unmarshal", unmarshal),
151		"unmarshal_text": starlark.NewBuiltin("proto.unmarshal_text", unmarshal_text),
152
153		// TODO(adonovan):
154		// - merge(msg, msg) -> msg
155		// - equals(msg, msg) -> bool
156		// - diff(msg, msg) -> string
157		// - clone(msg) -> msg
158	},
159}
160
161// file(filename) loads the FileDescriptor of the given name, or the
162// first if the pool contains more than one.
163//
164// It's unfortunate that renaming a .proto file in effect breaks the
165// interface it presents to Starlark. Ideally one would import
166// descriptors by package name, but there may be many FileDescriptors
167// for the same package name, and there is no "package descriptor".
168// (Technically a pool may also have many FileDescriptors with the same
169// file name, but this can't happen with a single consistent snapshot.)
170func file(thread *starlark.Thread, fn *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
171	var filename string
172	if err := starlark.UnpackPositionalArgs(fn.Name(), args, kwargs, 1, &filename); err != nil {
173		return nil, err
174	}
175
176	pool := Pool(thread)
177	if pool == nil {
178		return nil, fmt.Errorf("internal error: SetPool was not called")
179	}
180
181	desc, err := pool.FindFileByPath(filename)
182	if err != nil {
183		return nil, err
184	}
185
186	return FileDescriptor{Desc: desc}, nil
187}
188
189// has(msg, field) reports whether the specified field of the message is present.
190// A field may be specified by name (string) or FieldDescriptor.
191// has reports an error if the message type has no such field.
192func has(thread *starlark.Thread, fn *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
193	var x, field starlark.Value
194	if err := starlark.UnpackPositionalArgs(fn.Name(), args, kwargs, 2, &x, &field); err != nil {
195		return nil, err
196	}
197	msg, ok := x.(*Message)
198	if !ok {
199		return nil, fmt.Errorf("%s: got %s, want proto.Message", fn.Name(), x.Type())
200	}
201
202	var fdesc protoreflect.FieldDescriptor
203	switch field := field.(type) {
204	case starlark.String:
205		var err error
206		fdesc, err = fieldDesc(msg.desc(), string(field))
207		if err != nil {
208			return nil, err
209		}
210
211	case FieldDescriptor:
212		if field.Desc.ContainingMessage() != msg.desc() {
213			return nil, fmt.Errorf("%s: %v does not have field %v", fn.Name(), msg.desc().FullName(), field)
214		}
215		fdesc = field.Desc
216
217	default:
218		return nil, fmt.Errorf("%s: for field argument, got %s, want string or proto.FieldDescriptor", fn.Name(), field.Type())
219	}
220
221	return starlark.Bool(msg.msg.Has(fdesc)), nil
222}
223
224// marshal{,_text}(msg) encodes a Message value to binary or text form.
225func marshal(_ *starlark.Thread, fn *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
226	var m *Message
227	if err := starlark.UnpackPositionalArgs(fn.Name(), args, kwargs, 1, &m); err != nil {
228		return nil, err
229	}
230	if fn.Name() == "proto.marshal" {
231		data, err := proto.Marshal(m.Message())
232		if err != nil {
233			return nil, fmt.Errorf("%s: %v", fn.Name(), err)
234		}
235		return starlark.Bytes(data), nil
236	} else {
237		text, err := prototext.MarshalOptions{Indent: "  "}.Marshal(m.Message())
238		if err != nil {
239			return nil, fmt.Errorf("%s: %v", fn.Name(), err)
240		}
241		return starlark.String(text), nil
242	}
243}
244
245// unmarshal(msg) decodes a binary protocol message to a Message.
246func unmarshal(thread *starlark.Thread, fn *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
247	var desc MessageDescriptor
248	var data starlark.Bytes
249	if err := starlark.UnpackPositionalArgs(fn.Name(), args, kwargs, 2, &desc, &data); err != nil {
250		return nil, err
251	}
252	return unmarshalData(desc.Desc, []byte(data), true)
253}
254
255// unmarshal_text(msg) decodes a text protocol message to a Message.
256func unmarshal_text(thread *starlark.Thread, fn *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
257	var desc MessageDescriptor
258	var data string
259	if err := starlark.UnpackPositionalArgs(fn.Name(), args, kwargs, 2, &desc, &data); err != nil {
260		return nil, err
261	}
262	return unmarshalData(desc.Desc, []byte(data), false)
263}
264
265// set_field(msg, field, value) updates the value of a field.
266// It is typically used for extensions, which cannot be updated using msg.field = v notation.
267func setFieldStarlark(thread *starlark.Thread, fn *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
268	// TODO(adonovan): allow field to be specified by name (for non-extension fields), like has?
269	var m *Message
270	var field FieldDescriptor
271	var v starlark.Value
272	if err := starlark.UnpackPositionalArgs(fn.Name(), args, kwargs, 3, &m, &field, &v); err != nil {
273		return nil, err
274	}
275
276	if *m.frozen {
277		return nil, fmt.Errorf("%s: cannot set %v field of frozen %v message", fn.Name(), field, m.desc().FullName())
278	}
279
280	if field.Desc.ContainingMessage() != m.desc() {
281		return nil, fmt.Errorf("%s: %v does not have field %v", fn.Name(), m.desc().FullName(), field)
282	}
283
284	return starlark.None, setField(m.msg, field.Desc, v)
285}
286
287// get_field(msg, field) retrieves the value of a field.
288// It is typically used for extension fields, which cannot be accessed using msg.field notation.
289func getFieldStarlark(thread *starlark.Thread, fn *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
290	// TODO(adonovan): allow field to be specified by name (for non-extension fields), like has?
291	var msg *Message
292	var field FieldDescriptor
293	if err := starlark.UnpackPositionalArgs(fn.Name(), args, kwargs, 2, &msg, &field); err != nil {
294		return nil, err
295	}
296
297	if field.Desc.ContainingMessage() != msg.desc() {
298		return nil, fmt.Errorf("%s: %v does not have field %v", fn.Name(), msg.desc().FullName(), field)
299	}
300
301	return msg.getField(field.Desc), nil
302}
303
304// The Call method implements the starlark.Callable interface.
305// When a message descriptor is called, it returns a new instance of the
306// protocol message it describes.
307//
308//      Message(msg)            -- return a shallow copy of an existing message
309//      Message(k=v, ...)       -- return a new message with the specified fields
310//      Message(dict(...))      -- return a new message with the specified fields
311//
312func (d MessageDescriptor) CallInternal(thread *starlark.Thread, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
313	dest := &Message{
314		msg:    newMessage(d.Desc),
315		frozen: new(bool),
316	}
317
318	// Single positional argument?
319	if len(args) > 0 {
320		if len(kwargs) > 0 {
321			return nil, fmt.Errorf("%s: got both positional and named arguments", d.Desc.Name())
322		}
323		if len(args) > 1 {
324			return nil, fmt.Errorf("%s: got %d positional arguments, want at most 1", d.Desc.Name(), len(args))
325		}
326
327		// Keep consistent with MessageKind case of toProto.
328		// (support the same argument types).
329		switch src := args[0].(type) {
330		case *Message:
331			if dest.desc() != src.desc() {
332				return nil, fmt.Errorf("%s: got message of type %s, want type %s", d.Desc.Name(), src.desc().FullName(), dest.desc().FullName())
333			}
334
335			// Make shallow copy of message.
336			// TODO(adonovan): How does frozen work if we have shallow copy?
337			src.msg.Range(func(fdesc protoreflect.FieldDescriptor, v protoreflect.Value) bool {
338				dest.msg.Set(fdesc, v)
339				return true
340			})
341			return dest, nil
342
343		case *starlark.Dict:
344			kwargs = src.Items()
345			// fall through
346
347		default:
348			return nil, fmt.Errorf("%s: got %s, want dict or message", d.Desc.Name(), src.Type())
349		}
350	}
351
352	// Convert named arguments to field values.
353	err := setFields(dest.msg, kwargs)
354	return dest, err
355}
356
357// setFields updates msg as if by msg.name=value for each (name, value) in items.
358func setFields(msg protoreflect.Message, items []starlark.Tuple) error {
359	for _, item := range items {
360		name, ok := starlark.AsString(item[0])
361		if !ok {
362			return fmt.Errorf("got %s, want string", item[0].Type())
363		}
364		fdesc, err := fieldDesc(msg.Descriptor(), name)
365		if err != nil {
366			return err
367		}
368		if err := setField(msg, fdesc, item[1]); err != nil {
369			return err
370		}
371	}
372	return nil
373}
374
375// setField validates a Starlark field value, converts it to canonical form,
376// and assigns to the field of msg.  If value is None, the field is unset.
377func setField(msg protoreflect.Message, fdesc protoreflect.FieldDescriptor, value starlark.Value) error {
378	// None unsets a field.
379	if value == starlark.None {
380		msg.Clear(fdesc)
381		return nil
382	}
383
384	// Assigning to a repeated field must make a copy,
385	// because the fields.Set doesn't specify whether
386	// it aliases the list or not, so we cannot assume.
387	//
388	// This is potentially surprising as
389	//  x = []; msg.x = x; y = msg.x
390	// causes x and y not to alias.
391	if fdesc.IsList() {
392		iter := starlark.Iterate(value)
393		if iter == nil {
394			return fmt.Errorf("got %s for .%s field, want iterable", value.Type(), fdesc.Name())
395		}
396		defer iter.Done()
397
398		// TODO(adonovan): handle maps
399		list := msg.Mutable(fdesc).List()
400		var x starlark.Value
401		for i := 0; iter.Next(&x); i++ {
402			v, err := toProto(fdesc, x)
403			if err != nil {
404				return fmt.Errorf("index %d: %v", i, err)
405			}
406			list.Append(v)
407		}
408		return nil
409	}
410
411	v, err := toProto(fdesc, value)
412	if err != nil {
413		return fmt.Errorf("in field %s: %v", fdesc.Name(), err)
414	}
415
416	if fdesc.IsExtension() {
417		// The protoreflect.Message.NewField method must be able
418		// to return a new instance of the field type. Without
419		// having the Go type information available for extensions,
420		// the implementation of NewField won't know what to do.
421		//
422		// Thus we must augment the FieldDescriptor to one that
423		// additional holds Go representation type information
424		// (based in this case on dynamicpb).
425		fdesc = dynamicpb.NewExtensionType(fdesc).TypeDescriptor()
426		_ = fdesc.(protoreflect.ExtensionTypeDescriptor)
427	}
428
429	msg.Set(fdesc, v)
430	return nil
431}
432
433// toProto converts a Starlark value for a message field into protoreflect form.
434func toProto(fdesc protoreflect.FieldDescriptor, v starlark.Value) (protoreflect.Value, error) {
435	switch fdesc.Kind() {
436	case protoreflect.BoolKind:
437		// To avoid mistakes, we require v be exactly a bool.
438		if v, ok := v.(starlark.Bool); ok {
439			return protoreflect.ValueOfBool(bool(v)), nil
440		}
441
442	case protoreflect.Fixed32Kind,
443		protoreflect.Uint32Kind:
444		// uint32
445		if i, ok := v.(starlark.Int); ok {
446			if u, ok := i.Uint64(); ok && uint64(uint32(u)) == u {
447				return protoreflect.ValueOfUint32(uint32(u)), nil
448			}
449			return noValue, fmt.Errorf("invalid %s: %v", typeString(fdesc), i)
450		}
451
452	case protoreflect.Int32Kind,
453		protoreflect.Sfixed32Kind,
454		protoreflect.Sint32Kind:
455		// int32
456		if i, ok := v.(starlark.Int); ok {
457			if i, ok := i.Int64(); ok && int64(int32(i)) == i {
458				return protoreflect.ValueOfInt32(int32(i)), nil
459			}
460			return noValue, fmt.Errorf("invalid %s: %v", typeString(fdesc), i)
461		}
462
463	case protoreflect.Uint64Kind,
464		protoreflect.Fixed64Kind:
465		// uint64
466		if i, ok := v.(starlark.Int); ok {
467			if u, ok := i.Uint64(); ok {
468				return protoreflect.ValueOfUint64(u), nil
469			}
470			return noValue, fmt.Errorf("invalid %s: %v", typeString(fdesc), i)
471		}
472
473	case protoreflect.Int64Kind,
474		protoreflect.Sfixed64Kind,
475		protoreflect.Sint64Kind:
476		// int64
477		if i, ok := v.(starlark.Int); ok {
478			if i, ok := i.Int64(); ok {
479				return protoreflect.ValueOfInt64(i), nil
480			}
481			return noValue, fmt.Errorf("invalid %s: %v", typeString(fdesc), i)
482		}
483
484	case protoreflect.StringKind:
485		if s, ok := starlark.AsString(v); ok {
486			return protoreflect.ValueOfString(s), nil
487		} else if b, ok := v.(starlark.Bytes); ok {
488			// TODO(adonovan): allow bytes for string? Not friendly to a Java port.
489			return protoreflect.ValueOfBytes([]byte(b)), nil
490		}
491
492	case protoreflect.BytesKind:
493		if s, ok := starlark.AsString(v); ok {
494			// TODO(adonovan): don't allow string for bytes: it's hostile to a Java port.
495			// Instead provide b"..." literals in the core
496			// and a bytes(str) conversion.
497			return protoreflect.ValueOfBytes([]byte(s)), nil
498		} else if b, ok := v.(starlark.Bytes); ok {
499			return protoreflect.ValueOfBytes([]byte(b)), nil
500		}
501
502	case protoreflect.DoubleKind:
503		switch v := v.(type) {
504		case starlark.Float:
505			return protoreflect.ValueOfFloat64(float64(v)), nil
506		case starlark.Int:
507			return protoreflect.ValueOfFloat64(float64(v.Float())), nil
508		}
509
510	case protoreflect.FloatKind:
511		switch v := v.(type) {
512		case starlark.Float:
513			return protoreflect.ValueOfFloat32(float32(v)), nil
514		case starlark.Int:
515			return protoreflect.ValueOfFloat32(float32(v.Float())), nil
516		}
517
518	case protoreflect.GroupKind,
519		protoreflect.MessageKind:
520		// Keep consistent with MessageDescriptor.CallInternal!
521		desc := fdesc.Message()
522		switch v := v.(type) {
523		case *Message:
524			if desc != v.desc() {
525				return noValue, fmt.Errorf("got %s, want %s", v.desc().FullName(), desc.FullName())
526			}
527			return protoreflect.ValueOfMessage(v.msg), nil // alias it directly
528
529		case *starlark.Dict:
530			dest := newMessage(desc)
531			err := setFields(dest, v.Items())
532			return protoreflect.ValueOfMessage(dest), err
533		}
534
535	case protoreflect.EnumKind:
536		enumval, err := enumValueOf(fdesc.Enum(), v)
537		if err != nil {
538			return noValue, err
539		}
540		return protoreflect.ValueOfEnum(enumval.Number()), nil
541	}
542
543	return noValue, fmt.Errorf("got %s, want %s", v.Type(), typeString(fdesc))
544}
545
546var noValue protoreflect.Value
547
548// toStarlark returns a Starlark value for the value x of a message field.
549// If the result is a repeated field or message,
550// the result aliases the original and has the specified "frozenness" flag.
551//
552// fdesc is only used for the type, not other properties of the field.
553func toStarlark(typ protoreflect.FieldDescriptor, x protoreflect.Value, frozen *bool) starlark.Value {
554	if list, ok := x.Interface().(protoreflect.List); ok {
555		return &RepeatedField{
556			typ:    typ,
557			list:   list,
558			frozen: frozen,
559		}
560	}
561	return toStarlark1(typ, x, frozen)
562}
563
564// toStarlark1, for scalar (non-repeated) values only.
565func toStarlark1(typ protoreflect.FieldDescriptor, x protoreflect.Value, frozen *bool) starlark.Value {
566
567	switch typ.Kind() {
568	case protoreflect.BoolKind:
569		return starlark.Bool(x.Bool())
570
571	case protoreflect.Fixed32Kind,
572		protoreflect.Uint32Kind,
573		protoreflect.Uint64Kind,
574		protoreflect.Fixed64Kind:
575		return starlark.MakeUint64(x.Uint())
576
577	case protoreflect.Int32Kind,
578		protoreflect.Sfixed32Kind,
579		protoreflect.Sint32Kind,
580		protoreflect.Int64Kind,
581		protoreflect.Sfixed64Kind,
582		protoreflect.Sint64Kind:
583		return starlark.MakeInt64(x.Int())
584
585	case protoreflect.StringKind:
586		return starlark.String(x.String())
587
588	case protoreflect.BytesKind:
589		return starlark.Bytes(x.Bytes())
590
591	case protoreflect.DoubleKind, protoreflect.FloatKind:
592		return starlark.Float(x.Float())
593
594	case protoreflect.GroupKind, protoreflect.MessageKind:
595		return &Message{
596			msg:    x.Message(),
597			frozen: frozen,
598		}
599
600	case protoreflect.EnumKind:
601		// Invariant: only EnumValueDescriptor may appear here.
602		enumval := typ.Enum().Values().ByNumber(x.Enum())
603		return EnumValueDescriptor{Desc: enumval}
604	}
605
606	panic(fmt.Sprintf("got %T, want %s", x, typeString(typ)))
607}
608
609// A Message is a Starlark value that wraps a protocol message.
610//
611// Two Messages are equivalent if and only if they are identical.
612//
613// When a Message value becomes frozen, a Starlark program may
614// not modify the underlying protocol message, nor any Message
615// or RepeatedField wrapper values derived from it.
616type Message struct {
617	msg    protoreflect.Message // any concrete type is allowed
618	frozen *bool                // shared by a group of related Message/RepeatedField wrappers
619}
620
621// Message returns the wrapped message.
622func (m *Message) Message() protoreflect.ProtoMessage { return m.msg.Interface() }
623
624func (m *Message) desc() protoreflect.MessageDescriptor { return m.msg.Descriptor() }
625
626var _ starlark.HasSetField = (*Message)(nil)
627
628// Unmarshal parses the data as a binary protocol message of the specified type,
629// and returns it as a new Starlark message value.
630func Unmarshal(desc protoreflect.MessageDescriptor, data []byte) (*Message, error) {
631	return unmarshalData(desc, data, true)
632}
633
634// UnmarshalText parses the data as a text protocol message of the specified type,
635// and returns it as a new Starlark message value.
636func UnmarshalText(desc protoreflect.MessageDescriptor, data []byte) (*Message, error) {
637	return unmarshalData(desc, data, false)
638}
639
640// unmarshalData constructs a Starlark proto.Message by decoding binary or text data.
641func unmarshalData(desc protoreflect.MessageDescriptor, data []byte, binary bool) (*Message, error) {
642	m := &Message{
643		msg:    newMessage(desc),
644		frozen: new(bool),
645	}
646	var err error
647	if binary {
648		err = proto.Unmarshal(data, m.Message())
649	} else {
650		err = prototext.Unmarshal(data, m.Message())
651	}
652	if err != nil {
653		return nil, fmt.Errorf("unmarshalling %s failed: %v", desc.FullName(), err)
654	}
655	return m, nil
656}
657
658func (m *Message) String() string {
659	buf := new(bytes.Buffer)
660	buf.WriteString(string(m.desc().FullName()))
661	buf.WriteByte('(')
662
663	// Sort fields (including extensions) by number.
664	var fields []protoreflect.FieldDescriptor
665	m.msg.Range(func(fdesc protoreflect.FieldDescriptor, v protoreflect.Value) bool {
666		// TODO(adonovan): opt: save v in table too.
667		fields = append(fields, fdesc)
668		return true
669	})
670	sort.Slice(fields, func(i, j int) bool {
671		return fields[i].Number() < fields[j].Number()
672	})
673
674	for i, fdesc := range fields {
675		if i > 0 {
676			buf.WriteString(", ")
677		}
678		if fdesc.IsExtension() {
679			// extension field: "[pkg.Msg.field]"
680			buf.WriteString(string(fdesc.FullName()))
681		} else if fdesc.Kind() != protoreflect.GroupKind {
682			// ordinary field: "field"
683			buf.WriteString(string(fdesc.Name()))
684		} else {
685			// group field: "MyGroup"
686			//
687			// The name of a group is the mangled version,
688			// while the true name of a group is the message itself.
689			// For example, for a group called "MyGroup",
690			// the inlined message will be called "MyGroup",
691			// but the field will be named "mygroup".
692			// This rule complicates name logic everywhere.
693			buf.WriteString(string(fdesc.Message().Name()))
694		}
695		buf.WriteString("=")
696		writeString(buf, fdesc, m.msg.Get(fdesc))
697	}
698	buf.WriteByte(')')
699	return buf.String()
700}
701
702func (m *Message) Type() string                { return "proto.Message" }
703func (m *Message) Truth() starlark.Bool        { return true }
704func (m *Message) Freeze()                     { *m.frozen = true }
705func (m *Message) Hash() (h uint32, err error) { return uint32(uintptr(unsafe.Pointer(m))), nil } // identity hash
706
707// Attr returns the value of this message's field of the specified name.
708// Extension fields are not accessible this way as their names are not unique.
709func (m *Message) Attr(name string) (starlark.Value, error) {
710	// The name 'descriptor' is already effectively reserved
711	// by the Go API for generated message types.
712	if name == "descriptor" {
713		return MessageDescriptor{Desc: m.desc()}, nil
714	}
715
716	fdesc, err := fieldDesc(m.desc(), name)
717	if err != nil {
718		return nil, err
719	}
720	return m.getField(fdesc), nil
721}
722
723func (m *Message) getField(fdesc protoreflect.FieldDescriptor) starlark.Value {
724	if fdesc.IsExtension() {
725		// See explanation in setField.
726		fdesc = dynamicpb.NewExtensionType(fdesc).TypeDescriptor()
727	}
728
729	if m.msg.Has(fdesc) {
730		return toStarlark(fdesc, m.msg.Get(fdesc), m.frozen)
731	}
732	return defaultValue(fdesc)
733}
734
735//go:linkname detrandDisable google.golang.org/protobuf/internal/detrand.Disable
736func detrandDisable()
737
738func init() {
739	// Nasty hack to disable the randomization of output that occurs in textproto.
740	// TODO(adonovan): once go/proto-proposals/canonical-serialization
741	// is resolved the need for the hack should go away. See also go/go-proto-stability.
742	// If the proposal is rejected, we will need our own text-mode formatter.
743	detrandDisable()
744}
745
746// defaultValue returns the (frozen) default Starlark value for a given message field.
747func defaultValue(fdesc protoreflect.FieldDescriptor) starlark.Value {
748	frozen := true
749
750	// The default value of a repeated field is an empty list.
751	if fdesc.IsList() {
752		return &RepeatedField{typ: fdesc, list: emptyList{}, frozen: &frozen}
753	}
754
755	// The zero value for a message type is an empty instance of that message.
756	if desc := fdesc.Message(); desc != nil {
757		return &Message{msg: newMessage(desc), frozen: &frozen}
758	}
759
760	// Convert the default value, which is not necessarily zero, to Starlark.
761	// The frozenness isn't used as the remaining types are all immutable.
762	return toStarlark1(fdesc, fdesc.Default(), &frozen)
763}
764
765// A frozen empty implementation of protoreflect.List.
766type emptyList struct{ protoreflect.List }
767
768func (emptyList) Len() int { return 0 }
769
770// newMessage returns a new empty instance of the message type described by desc.
771func newMessage(desc protoreflect.MessageDescriptor) protoreflect.Message {
772	// If desc refers to a built-in message,
773	// use the more efficient generated type descriptor (a Go struct).
774	mt, err := protoregistry.GlobalTypes.FindMessageByName(desc.FullName())
775	if err == nil && mt.Descriptor() == desc {
776		return mt.New()
777	}
778
779	// For all others, use the generic dynamicpb representation.
780	return dynamicpb.NewMessage(desc).ProtoReflect()
781}
782
783// fieldDesc returns the descriptor for the named non-extension field.
784func fieldDesc(desc protoreflect.MessageDescriptor, name string) (protoreflect.FieldDescriptor, error) {
785	if fdesc := desc.Fields().ByName(protoreflect.Name(name)); fdesc != nil {
786		return fdesc, nil
787	}
788	return nil, starlark.NoSuchAttrError(fmt.Sprintf("%s has no .%s field", desc.FullName(), name))
789}
790
791// SetField updates a non-extension field of this message.
792// It implements the HasSetField interface.
793func (m *Message) SetField(name string, v starlark.Value) error {
794	fdesc, err := fieldDesc(m.desc(), name)
795	if err != nil {
796		return err
797	}
798	if *m.frozen {
799		return fmt.Errorf("cannot set .%s field of frozen %s message",
800			name, m.desc().FullName())
801	}
802	return setField(m.msg, fdesc, v)
803}
804
805// AttrNames returns the set of field names defined for this message.
806// It satisfies the starlark.HasAttrs interface.
807func (m *Message) AttrNames() []string {
808	seen := make(map[string]bool)
809
810	// standard fields
811	seen["descriptor"] = true
812
813	// non-extension fields
814	fields := m.desc().Fields()
815	for i := 0; i < fields.Len(); i++ {
816		fdesc := fields.Get(i)
817		if !fdesc.IsExtension() {
818			seen[string(fdesc.Name())] = true
819		}
820	}
821
822	names := make([]string, 0, len(seen))
823	for name := range seen {
824		names = append(names, name)
825	}
826	sort.Strings(names)
827	return names
828}
829
830// typeString returns a user-friendly description of the type of a
831// protocol message field (or element of a repeated field).
832func typeString(fdesc protoreflect.FieldDescriptor) string {
833	switch fdesc.Kind() {
834	case protoreflect.GroupKind,
835		protoreflect.MessageKind:
836		return string(fdesc.Message().FullName())
837
838	case protoreflect.EnumKind:
839		return string(fdesc.Enum().FullName())
840
841	default:
842		return strings.ToLower(strings.TrimPrefix(fdesc.Kind().String(), "TYPE_"))
843	}
844}
845
846// A RepeatedField is a Starlark value that wraps a repeated field of a protocol message.
847//
848// An assignment to an element of a repeated field incurs a dynamic
849// check that the new value has (or can be converted to) the correct
850// type using conversions similar to those done when calling a
851// MessageDescriptor to construct a message.
852//
853// TODO(adonovan): make RepeatedField implement starlark.Comparable.
854// Should the comparison include type, or be defined on the elements alone?
855type RepeatedField struct {
856	typ       protoreflect.FieldDescriptor // only for type information, not field name
857	list      protoreflect.List
858	frozen    *bool
859	itercount int
860}
861
862var _ starlark.HasSetIndex = (*RepeatedField)(nil)
863
864func (rf *RepeatedField) Type() string {
865	return fmt.Sprintf("proto.repeated<%s>", typeString(rf.typ))
866}
867
868func (rf *RepeatedField) SetIndex(i int, v starlark.Value) error {
869	if *rf.frozen {
870		return fmt.Errorf("cannot insert value in frozen repeated field")
871	}
872	if rf.itercount > 0 {
873		return fmt.Errorf("cannot insert value in repeated field with active iterators")
874	}
875	x, err := toProto(rf.typ, v)
876	if err != nil {
877		// The repeated field value cannot know which field it
878		// belongs to---it might be shared by several of the
879		// same type---so the error message is suboptimal.
880		return fmt.Errorf("setting element of repeated field: %v", err)
881	}
882	rf.list.Set(i, x)
883	return nil
884}
885
886func (rf *RepeatedField) Freeze()               { *rf.frozen = true }
887func (rf *RepeatedField) Hash() (uint32, error) { return 0, fmt.Errorf("unhashable: %s", rf.Type()) }
888func (rf *RepeatedField) Index(i int) starlark.Value {
889	return toStarlark1(rf.typ, rf.list.Get(i), rf.frozen)
890}
891func (rf *RepeatedField) Iterate() starlark.Iterator {
892	if !*rf.frozen {
893		rf.itercount++
894	}
895	return &repeatedFieldIterator{rf, 0}
896}
897func (rf *RepeatedField) Len() int { return rf.list.Len() }
898func (rf *RepeatedField) String() string {
899	// We use list [...] notation even though it not exactly a list.
900	buf := new(bytes.Buffer)
901	buf.WriteByte('[')
902	for i := 0; i < rf.list.Len(); i++ {
903		if i > 0 {
904			buf.WriteString(", ")
905		}
906		writeString(buf, rf.typ, rf.list.Get(i))
907	}
908	buf.WriteByte(']')
909	return buf.String()
910}
911func (rf *RepeatedField) Truth() starlark.Bool { return rf.list.Len() > 0 }
912
913type repeatedFieldIterator struct {
914	rf *RepeatedField
915	i  int
916}
917
918func (it *repeatedFieldIterator) Next(p *starlark.Value) bool {
919	if it.i < it.rf.Len() {
920		*p = it.rf.Index(it.i)
921		it.i++
922		return true
923	}
924	return false
925}
926
927func (it *repeatedFieldIterator) Done() {
928	if !*it.rf.frozen {
929		it.rf.itercount--
930	}
931}
932
933func writeString(buf *bytes.Buffer, fdesc protoreflect.FieldDescriptor, v protoreflect.Value) {
934	// TODO(adonovan): opt: don't materialize the Starlark value.
935	// TODO(adonovan): skip message type when printing submessages? {...}?
936	var frozen bool // ignored
937	x := toStarlark(fdesc, v, &frozen)
938	buf.WriteString(x.String())
939}
940
941// -------- descriptor values --------
942
943// A FileDescriptor is an immutable Starlark value that describes a
944// .proto file.  It is a reference to a protoreflect.FileDescriptor.
945// Two FileDescriptor values compare equal if and only if they refer to
946// the same protoreflect.FileDescriptor.
947//
948// Its fields are the names of the message types (MessageDescriptor) and enum
949// types (EnumDescriptor).
950type FileDescriptor struct {
951	Desc protoreflect.FileDescriptor // TODO(adonovan): hide field, expose method?
952}
953
954var _ starlark.HasAttrs = FileDescriptor{}
955
956func (f FileDescriptor) String() string              { return string(f.Desc.Path()) }
957func (f FileDescriptor) Type() string                { return "proto.FileDescriptor" }
958func (f FileDescriptor) Truth() starlark.Bool        { return true }
959func (f FileDescriptor) Freeze()                     {} // immutable
960func (f FileDescriptor) Hash() (h uint32, err error) { return starlark.String(f.Desc.Path()).Hash() }
961func (f FileDescriptor) Attr(name string) (starlark.Value, error) {
962	if desc := f.Desc.Messages().ByName(protoreflect.Name(name)); desc != nil {
963		return MessageDescriptor{Desc: desc}, nil
964	}
965	if desc := f.Desc.Extensions().ByName(protoreflect.Name(name)); desc != nil {
966		return FieldDescriptor{desc}, nil
967	}
968	if enum := f.Desc.Enums().ByName(protoreflect.Name(name)); enum != nil {
969		return EnumDescriptor{Desc: enum}, nil
970	}
971	return nil, nil
972}
973func (f FileDescriptor) AttrNames() []string {
974	var names []string
975	messages := f.Desc.Messages()
976	for i, n := 0, messages.Len(); i < n; i++ {
977		names = append(names, string(messages.Get(i).Name()))
978	}
979	extensions := f.Desc.Extensions()
980	for i, n := 0, extensions.Len(); i < n; i++ {
981		names = append(names, string(extensions.Get(i).Name()))
982	}
983	enums := f.Desc.Enums()
984	for i, n := 0, enums.Len(); i < n; i++ {
985		names = append(names, string(enums.Get(i).Name()))
986	}
987	sort.Strings(names)
988	return names
989}
990
991// A MessageDescriptor is an immutable Starlark value that describes a protocol
992// message type.
993//
994// A MessageDescriptor value contains a reference to a protoreflect.MessageDescriptor.
995// Two MessageDescriptor values compare equal if and only if they refer to the
996// same protoreflect.MessageDescriptor.
997//
998// The fields of a MessageDescriptor value are the names of any message types
999// (MessageDescriptor), fields or extension fields (FieldDescriptor),
1000// and enum types (EnumDescriptor) nested within the declaration of this message type.
1001type MessageDescriptor struct {
1002	Desc protoreflect.MessageDescriptor
1003}
1004
1005var (
1006	_ starlark.Callable = MessageDescriptor{}
1007	_ starlark.HasAttrs = MessageDescriptor{}
1008)
1009
1010func (d MessageDescriptor) String() string       { return string(d.Desc.FullName()) }
1011func (d MessageDescriptor) Type() string         { return "proto.MessageDescriptor" }
1012func (d MessageDescriptor) Truth() starlark.Bool { return true }
1013func (d MessageDescriptor) Freeze()              {} // immutable
1014func (d MessageDescriptor) Hash() (h uint32, err error) {
1015	return starlark.String(d.Desc.FullName()).Hash()
1016}
1017func (d MessageDescriptor) Attr(name string) (starlark.Value, error) {
1018	if desc := d.Desc.Messages().ByName(protoreflect.Name(name)); desc != nil {
1019		return MessageDescriptor{desc}, nil
1020	}
1021	if desc := d.Desc.Extensions().ByName(protoreflect.Name(name)); desc != nil {
1022		return FieldDescriptor{desc}, nil
1023	}
1024	if desc := d.Desc.Fields().ByName(protoreflect.Name(name)); desc != nil {
1025		return FieldDescriptor{desc}, nil
1026	}
1027	if desc := d.Desc.Enums().ByName(protoreflect.Name(name)); desc != nil {
1028		return EnumDescriptor{desc}, nil
1029	}
1030	return nil, nil
1031}
1032func (d MessageDescriptor) AttrNames() []string {
1033	var names []string
1034	messages := d.Desc.Messages()
1035	for i, n := 0, messages.Len(); i < n; i++ {
1036		names = append(names, string(messages.Get(i).Name()))
1037	}
1038	enums := d.Desc.Enums()
1039	for i, n := 0, enums.Len(); i < n; i++ {
1040		names = append(names, string(enums.Get(i).Name()))
1041	}
1042	sort.Strings(names)
1043	return names
1044}
1045func (d MessageDescriptor) Name() string { return string(d.Desc.Name()) } // for Callable
1046
1047// A FieldDescriptor is an immutable Starlark value that describes
1048// a field (possibly an extension field) of protocol message.
1049//
1050// A FieldDescriptor value contains a reference to a protoreflect.FieldDescriptor.
1051// Two FieldDescriptor values compare equal if and only if they refer to the
1052// same protoreflect.FieldDescriptor.
1053//
1054// The primary use for FieldDescriptors is to access extension fields of a message.
1055//
1056// A FieldDescriptor value has not attributes.
1057// TODO(adonovan): expose metadata fields (e.g. name, type).
1058type FieldDescriptor struct {
1059	Desc protoreflect.FieldDescriptor
1060}
1061
1062var (
1063	_ starlark.HasAttrs = FieldDescriptor{}
1064)
1065
1066func (d FieldDescriptor) String() string       { return string(d.Desc.FullName()) }
1067func (d FieldDescriptor) Type() string         { return "proto.FieldDescriptor" }
1068func (d FieldDescriptor) Truth() starlark.Bool { return true }
1069func (d FieldDescriptor) Freeze()              {} // immutable
1070func (d FieldDescriptor) Hash() (h uint32, err error) {
1071	return starlark.String(d.Desc.FullName()).Hash()
1072}
1073func (d FieldDescriptor) Attr(name string) (starlark.Value, error) {
1074	// TODO(adonovan): expose metadata fields of Desc?
1075	return nil, nil
1076}
1077func (d FieldDescriptor) AttrNames() []string {
1078	var names []string
1079	// TODO(adonovan): expose metadata fields of Desc?
1080	sort.Strings(names)
1081	return names
1082}
1083
1084// An EnumDescriptor is an immutable Starlark value that describes an
1085// protocol enum type.
1086//
1087// An EnumDescriptor contains a reference to a protoreflect.EnumDescriptor.
1088// Two EnumDescriptor values compare equal if and only if they
1089// refer to the same protoreflect.EnumDescriptor.
1090//
1091// An EnumDescriptor may be called like a function.  It converts its
1092// sole argument, which must be an int, string, or EnumValueDescriptor,
1093// to an EnumValueDescriptor.
1094//
1095// The fields of an EnumDescriptor value are the values of the
1096// enumeration, each of type EnumValueDescriptor.
1097type EnumDescriptor struct {
1098	Desc protoreflect.EnumDescriptor
1099}
1100
1101var (
1102	_ starlark.HasAttrs = EnumDescriptor{}
1103	_ starlark.Callable = EnumDescriptor{}
1104)
1105
1106func (e EnumDescriptor) String() string              { return string(e.Desc.FullName()) }
1107func (e EnumDescriptor) Type() string                { return "proto.EnumDescriptor" }
1108func (e EnumDescriptor) Truth() starlark.Bool        { return true }
1109func (e EnumDescriptor) Freeze()                     {}                // immutable
1110func (e EnumDescriptor) Hash() (h uint32, err error) { return 0, nil } // TODO(adonovan): number?
1111func (e EnumDescriptor) Attr(name string) (starlark.Value, error) {
1112	if v := e.Desc.Values().ByName(protoreflect.Name(name)); v != nil {
1113		return EnumValueDescriptor{v}, nil
1114	}
1115	return nil, nil
1116}
1117func (e EnumDescriptor) AttrNames() []string {
1118	var names []string
1119	values := e.Desc.Values()
1120	for i, n := 0, values.Len(); i < n; i++ {
1121		names = append(names, string(values.Get(i).Name()))
1122	}
1123	sort.Strings(names)
1124	return names
1125}
1126func (e EnumDescriptor) Name() string { return string(e.Desc.Name()) } // for Callable
1127
1128// The Call method implements the starlark.Callable interface.
1129// A call to an enum descriptor converts its argument to a value of that enum type.
1130func (e EnumDescriptor) CallInternal(_ *starlark.Thread, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
1131	var x starlark.Value
1132	if err := starlark.UnpackPositionalArgs(string(e.Desc.Name()), args, kwargs, 1, &x); err != nil {
1133		return nil, err
1134	}
1135	v, err := enumValueOf(e.Desc, x)
1136	if err != nil {
1137		return nil, fmt.Errorf("%s: %v", e.Desc.Name(), err)
1138	}
1139	return EnumValueDescriptor{Desc: v}, nil
1140}
1141
1142// enumValueOf converts an int, string, or enum value to a value of the specified enum type.
1143func enumValueOf(enum protoreflect.EnumDescriptor, x starlark.Value) (protoreflect.EnumValueDescriptor, error) {
1144	switch x := x.(type) {
1145	case starlark.Int:
1146		i, err := starlark.AsInt32(x)
1147		if err != nil {
1148			return nil, fmt.Errorf("invalid number %s for %s enum", x, enum.Name())
1149		}
1150		desc := enum.Values().ByNumber(protoreflect.EnumNumber(i))
1151		if desc == nil {
1152			return nil, fmt.Errorf("invalid number %d for %s enum", i, enum.Name())
1153		}
1154		return desc, nil
1155
1156	case starlark.String:
1157		name := protoreflect.Name(x)
1158		desc := enum.Values().ByName(name)
1159		if desc == nil {
1160			return nil, fmt.Errorf("invalid name %q for %s enum", name, enum.Name())
1161		}
1162		return desc, nil
1163
1164	case EnumValueDescriptor:
1165		if parent := x.Desc.Parent(); parent != enum {
1166			return nil, fmt.Errorf("invalid value %s.%s for %s enum",
1167				parent.Name(), x.Desc.Name(), enum.Name())
1168		}
1169		return x.Desc, nil
1170	}
1171
1172	return nil, fmt.Errorf("cannot convert %s to %s enum", x.Type(), enum.Name())
1173}
1174
1175// An EnumValueDescriptor is an immutable Starlark value that represents one value of an enumeration.
1176//
1177// An EnumValueDescriptor contains a reference to a protoreflect.EnumValueDescriptor.
1178// Two EnumValueDescriptor values compare equal if and only if they
1179// refer to the same protoreflect.EnumValueDescriptor.
1180//
1181// An EnumValueDescriptor has the following fields:
1182//
1183//      index   -- int, index of this value within the enum sequence
1184//      name    -- string, name of this enum value
1185//      number  -- int, numeric value of this enum value
1186//      type    -- EnumDescriptor, the enum type to which this value belongs
1187//
1188type EnumValueDescriptor struct {
1189	Desc protoreflect.EnumValueDescriptor
1190}
1191
1192var (
1193	_ starlark.HasAttrs   = EnumValueDescriptor{}
1194	_ starlark.Comparable = EnumValueDescriptor{}
1195)
1196
1197func (e EnumValueDescriptor) String() string {
1198	enum := e.Desc.Parent()
1199	return string(enum.Name() + "." + e.Desc.Name()) // "Enum.EnumValue"
1200}
1201func (e EnumValueDescriptor) Type() string                { return "proto.EnumValueDescriptor" }
1202func (e EnumValueDescriptor) Truth() starlark.Bool        { return true }
1203func (e EnumValueDescriptor) Freeze()                     {} // immutable
1204func (e EnumValueDescriptor) Hash() (h uint32, err error) { return uint32(e.Desc.Number()), nil }
1205func (e EnumValueDescriptor) AttrNames() []string {
1206	return []string{"index", "name", "number", "type"}
1207}
1208func (e EnumValueDescriptor) Attr(name string) (starlark.Value, error) {
1209	switch name {
1210	case "index":
1211		return starlark.MakeInt(e.Desc.Index()), nil
1212	case "name":
1213		return starlark.String(e.Desc.Name()), nil
1214	case "number":
1215		return starlark.MakeInt(int(e.Desc.Number())), nil
1216	case "type":
1217		enum := e.Desc.Parent()
1218		return EnumDescriptor{Desc: enum.(protoreflect.EnumDescriptor)}, nil
1219	}
1220	return nil, nil
1221}
1222func (x EnumValueDescriptor) CompareSameType(op syntax.Token, y_ starlark.Value, depth int) (bool, error) {
1223	y := y_.(EnumValueDescriptor)
1224	switch op {
1225	case syntax.EQL:
1226		return x.Desc == y.Desc, nil
1227	case syntax.NEQ:
1228		return x.Desc != y.Desc, nil
1229	default:
1230		return false, fmt.Errorf("%s %s %s not implemented", x.Type(), op, y_.Type())
1231	}
1232}
1233