1/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 3Licensed under the Apache License, Version 2.0 (the "License"); 4you may not use this file except in compliance with the License. 5You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9Unless required by applicable law or agreed to in writing, software 10distributed under the License is distributed on an "AS IS" BASIS, 11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12See the License for the specific language governing permissions and 13limitations under the License. 14==============================================================================*/ 15 16// This proto file defines messages which represent the HLO module. This is a 17// full fidelity serialization of the c++ HLO constructs. 18// 19// Many of the protos below are simple 1-to-1 serializations of the 20// corresponding C++ classes, e.g., HloModule, HloComputation, and 21// HloInstruction. 22// 23// FIELD NAMES ARE IMPORTANT 24// 25// Unlike most protos, you can't safely change the names of fields, even if you 26// keep the numeric ids the same. This is because we sometimes serialize these 27// protos as JSON, which includes the field names in the serialization. 28 29syntax = "proto3"; 30 31package xla; 32 33import "tensorflow/compiler/xla/xla_data.proto"; 34 35option cc_enable_arenas = true; 36 37enum CustomCallSchedule { 38 SCHEDULE_NONE = 0; 39 SCHEDULE_LATEST = 1; 40 SCHEDULE_EARLIEST = 2; 41} 42 43// The version of the API used by the custom call function. The signatures for 44// each version are given below. 45// TODO(b/189822916): Remove this enum when all clients are migrated to the 46// status-returning API. 47enum CustomCallApiVersion { 48 API_VERSION_UNSPECIFIED = 0; 49 50 // The first version of the API, with the following signatures: 51 // 52 // CPU: 53 // void do_custom_call(void* out, const void** in); 54 // 55 // GPU: 56 // void do_custom_call(CUstream stream, void** buffers, 57 // const char* opaque, size_t opaque_len); 58 API_VERSION_ORIGINAL = 1; 59 60 // When the ability to return success/failure status was added: 61 // 62 // CPU: 63 // void do_custom_call(void* out, const void** in, 64 // XlaCustomCallStatus* status); 65 // 66 // GPU: 67 // void do_custom_call(CUstream stream, void** buffers, 68 // const char* opaque, size_t opaque_len, 69 // XlaCustomCallStatus* status); 70 // 71 API_VERSION_STATUS_RETURNING = 2; 72 73 // Fixes the API signatures on the CPU side of the version STATUS_RETURNING by 74 // adding the opaque string so that the custom call API is consistent across 75 // CPUs and GPUs. For GPUs, the behaviors invoked by 76 // API_VERSION_STATUS_RETURNING and API_VERSION_STATUS_RETURNING_UNIFIED are 77 // the same. 78 // 79 // CPU: 80 // void do_custom_call(void* out, const void** in, 81 // const char* opaque, size_t opaque_len, 82 // XlaCustomCallStatus* status); 83 // 84 // GPU: 85 // void do_custom_call(CUstream stream, void** buffers, 86 // const char* opaque, size_t opaque_len, 87 // XlaCustomCallStatus* status); 88 // 89 API_VERSION_STATUS_RETURNING_UNIFIED = 3; 90} 91 92// Serialization of HloInstruction. 93// Next ID: 80 94message HloInstructionProto { 95 reserved 10; 96 reserved "parameter_name"; 97 reserved 12; 98 reserved "fused_instructions_computation"; 99 reserved 4; 100 reserved "operand_names"; 101 reserved 5; 102 reserved "control_predecessor_names"; 103 reserved 6; 104 reserved "called_computation_names"; 105 reserved 44; 106 reserved "replica_group_ids"; 107 // Use backend_config instead for custom_call_opaque. 108 reserved 53; 109 reserved "custom_call_opaque"; 110 // Use backend_config instead for all_reduce_barrier. 111 reserved 46; 112 reserved "all_reduce_barrier"; 113 114 string name = 1; 115 string opcode = 2; 116 xla.ShapeProto shape = 3; 117 118 xla.OpMetadata metadata = 7; 119 120 // Literal, only present for kConstant. 121 xla.LiteralProto literal = 8; 122 123 // Parameter number is only present for kParameter. 124 int64 parameter_number = 9; 125 126 // Fusion state, only present for kFusion. 127 string fusion_kind = 11; 128 129 // Index for kGetTupleElement. 130 int64 tuple_index = 13; 131 132 // Dimensions present for some operations that require reshaping or 133 // broadcasting, including Reshape, Reduce, ReduceWindow, and Reverse. 134 repeated int64 dimensions = 14; 135 136 // Describes the window in a windowed operation such as convolution. 137 xla.Window window = 15; 138 139 // Describes the dimension numbers used for a convolution. 140 xla.ConvolutionDimensionNumbers convolution_dimension_numbers = 16; 141 142 // The number of feature groups. Used for a convolution. Must be a divisor of 143 // the input feature dimension and output feature dimension. If not specified, 144 // it will use a default value of 1. 145 int64 feature_group_count = 50; 146 147 int64 batch_group_count = 58; 148 149 // Describes the [begin, end) index range and stride for slices. 150 message SliceDimensions { 151 int64 start = 1; 152 int64 limit = 2; 153 int64 stride = 3; 154 } 155 repeated SliceDimensions slice_dimensions = 17; 156 157 // The bit sizes for a reduce-precision operation. 158 int32 exponent_bits = 18; 159 int32 mantissa_bits = 19; 160 161 // Describes the [start, start + size) range size for a dynamic slice 162 // ('start' is specified dynamically in the second operand of the operation). 163 repeated int64 dynamic_slice_sizes = 20; 164 165 // The padding configuration that describes the edge padding and interior 166 // padding of this pad instruction. Only set for pad instructions. 167 xla.PaddingConfig padding_config = 21; 168 169 // Outfeed configuration information, only present for kOutfeed. 170 bytes outfeed_config = 22; 171 172 // The distribution requested for random number generation. 173 // Only present for kRng. 174 xla.RandomDistribution distribution = 23; 175 176 // A small float number added to the variance to avoid divide-by-zero error. 177 // Only present for kBatchNormTraining. 178 float epsilon = 24; 179 180 // An integer value representing the index of the feature dimension. 181 // Only present for kBatchNormTraining. 182 int64 feature_index = 25; 183 184 // Represents a unique identifier for each Send/Recv instruction pair or 185 // optionally for collective instructions (AllReduce, CollectivePermute, 186 // AllToAll). Non-positive channel_id is equivalent to no channel id. 187 int64 channel_id = 26; 188 189 // The string representation of the infeed configuration. 190 bytes infeed_config = 27; 191 192 // Name of a external target (eg, global symbol) to call, only present for 193 // kCustomCall. 194 string custom_call_target = 28; 195 196 // Shape of outfeed request. 197 xla.ShapeProto outfeed_shape = 29; 198 199 // Describes the dimension numbers used for a dot operation 200 xla.DotDimensionNumbers dot_dimension_numbers = 30; 201 202 // FFT type (FFT, IFFT, etc). 203 xla.FftType fft_type = 31; 204 205 // FFT length. 206 repeated int64 fft_length = 32; 207 208 // Comparison direction only used for kCompare. 209 string comparison_direction = 63; 210 211 // Gather dimension numbers. 212 xla.GatherDimensionNumbers gather_dimension_numbers = 33; 213 repeated int64 gather_slice_sizes = 34; 214 215 // Compute Host. 216 string channel_name = 41; 217 int64 cost_estimate_ns = 42; 218 219 // The id of this instruction. 220 int64 id = 35; 221 222 repeated int64 operand_ids = 36; 223 repeated int64 control_predecessor_ids = 37; 224 repeated int64 called_computation_ids = 38; 225 226 xla.OpSharding sharding = 40; 227 228 // Backend configuration for the instruction. Has backend-specific meaning. 229 bytes backend_config = 43; 230 231 // Cross replica op fields. 232 repeated ReplicaGroup replica_groups = 49; 233 // Deprecated, but keeping it for backward compatibility. Use channel_id. 234 // Non-positive all_reduce_id is equivalent to no all_reduce_id. 235 int64 all_reduce_id = 45 [deprecated = true]; 236 237 // If true, interprets ids in ReplicaGroup as global device ids, which is 238 // a linearized id of `replica_id * partition_count + partition_id`. 239 bool use_global_device_ids = 71; 240 241 // Whether this Send/Recv instruction transfers data to/from the host. Only 242 // present for Send and Recv instructions and their SendDone and RecvDone 243 // partners. 244 bool is_host_transfer = 47; 245 246 // Whether this Sort instruction should be stable. 247 bool is_stable = 60; 248 249 xla.ScatterDimensionNumbers scatter_dimension_numbers = 48; 250 251 // Precision configuration for the instruction. Has backend-specific meaning. 252 xla.PrecisionConfig precision_config = 51; 253 254 // Collective permute field. 255 repeated SourceTarget source_target_pairs = 52; 256 257 // Sharding for kDomain instructions. 258 xla.OpSharding domain_entry_sharding = 54; 259 xla.OpSharding domain_exit_sharding = 55; 260 261 // For custom call this indicates that the layouts are constrained. If 262 // constrain_layout is true then the 'shape' field must contain a layout, and 263 // 'operand_shapes_with_layout' must contain a shape with layout for each 264 // operand. 265 bool constrain_layout = 56; 266 repeated xla.ShapeProto operand_shapes_with_layout = 57; 267 268 // Options for TriangularSolve 269 xla.TriangularSolveOptions triangular_solve_options = 59; 270 271 // Options for Cholesky 272 xla.CholeskyOptions cholesky_options = 62; 273 274 // Describes how parameters behave with regards to replicas. 275 xla.ParameterReplication parameter_replication = 61; 276 277 // If set, the given instruction is run in parallel on e.g. multiple CPU 278 // cores. The outermost dimension gets split up into 279 // outer_dimension_partitions[0] pieces, the next-outermost dim gets split 280 // into outer_dimension_partitions[1] pieces, etc. 281 // 282 // It's illegal to partition a dimension into more shards than there are 283 // elements in that dimension. 284 repeated int64 outer_dimension_partitions = 64; 285 286 // Whether the kCustomCall instruction has side-effects, only present for 287 // kCustomCall. 288 bool custom_call_has_side_effect = 65; 289 290 // A list of CustomCallOutputOperandAliasing pairs that specifies aliasing 291 // buffers between output and operands for kCustomCall. 292 repeated xla.CustomCallOutputOperandAliasing 293 custom_call_output_operand_aliasing = 74; 294 295 // Specifies the desired schedule for the custom-call. The field is only 296 // present for custom-call. 297 CustomCallSchedule custom_call_schedule = 76; 298 299 // The delta value for kRngGetAndUpdateState. 300 int64 delta = 66; 301 302 // Specifies if the gather/scatter indices are guaranteed to be sorted by the 303 // caller. 304 bool indices_are_sorted = 67; 305 306 // Frontend attributes to pass to the XLA backend. 307 xla.FrontendAttributes frontend_attributes = 68; 308 309 // Specifies if all elements updated are guaranteed to be unique by 310 // the caller. 311 bool unique_indices = 69; 312 313 // RNG algorithm used by kRngBitGenerator. 314 xla.RandomAlgorithm rng_algorithm = 70; 315 316 // The comparison type used for kCompare. 317 string comparison_type = 72; 318 319 // Specifies if this is a cross-program-prefetch, used by kCopyStart. 320 bool is_cross_program_prefetch = 73; 321 322 // If a convolution is dynamic, a dynamic padding type will be specified. 323 xla.PaddingType padding_type = 75; 324 325 // The API version used by the custom call function. This field is only 326 // present for custom-call. 327 // TODO(b/189822916): Remove this field when all clients are migrated to the 328 // status-returning API. 329 CustomCallApiVersion custom_call_api_version = 77; 330 331 // Represents a unique identifier for an async group which consists of an 332 // async start, async done, and zero or more async update operations. 333 // Negative async_group_id is equivalent to no async group id. 334 int64 async_group_id = 78; 335 336 // Represents a unique execution thread name for one or more async groups. 337 // Each HLO module may contain a main thread and one or more parallel threads. 338 // Empty async_execution_thread is equivalent to main thread. 339 string async_execution_thread = 79; 340} 341 342// Serialization of HloComputation. 343message HloComputationProto { 344 reserved 3; 345 reserved "root_name"; 346 347 string name = 1; 348 349 // The array of instructions is always in a valid dependency order, where 350 // operands appear before their users. 351 repeated HloInstructionProto instructions = 2; 352 353 // The program shape (with layout) of this computation. 354 355 xla.ProgramShapeProto program_shape = 4; 356 357 // The id of this computation. 358 int64 id = 5; 359 360 // The id of the root of the computation. 361 int64 root_id = 6; 362 363 // Whether this is a fusion computation. Fusion computations should use this 364 // to determine whether they are a fusion in CreateFromProto since the 365 // parent fusion_instruction_ may get removed and be nullptr. 366 bool is_fusion_computation = 7; 367 368 // The name of execution thread this computation belongs to. 369 string execution_thread = 8; 370} 371 372// Serialization of an HLO schedule. An HLO schedule contains a total order of 373// instructions for each non-fusion computation in the module. 374message HloScheduleProto { 375 message InstructionSequence { 376 repeated int64 instruction_ids = 1; 377 } 378 379 // Map from computation id to sequence. 380 map<int64, InstructionSequence> sequences = 1; 381} 382 383enum Kind { 384 // Define a UNDEFINED_ALIAS equal to zero to get around the default-0 proto3 385 // behavior and missing has_*() APIs. 386 UNDEFINED_ALIAS = 0; 387 // The buffers may or may not alias at runtime. 388 MAY_ALIAS = 1; 389 // The buffers must alias at runtime. 390 MUST_ALIAS = 2; 391} 392 393message HloInputOutputAliasProto { 394 // The following proto describes a pair of aliased an input 395 // (described by parameter number and a ShapeIndex of the parameter) 396 // and an output (described by a ShapeIndex of the root 397 // instruction). For example: 398 // 399 // entry = { 400 // output_shape_index={1}, 401 // parameter_number=0, 402 // parameter_shape_index={1, 2}, 403 // } 404 // 405 // This entry indicates that the first paremter's {1, 2} element is 406 // aliased with the {1} element of the root instruction. 407 message AliasEntryProto { 408 // ShapeIndex of the root hlo. 409 repeated int64 output_shape_index = 1; 410 // Number of the parameter in entry computation. 411 int64 parameter_number = 2; 412 // ShapeIndex of the parameter instruction. 413 repeated int64 parameter_shape_index = 3; 414 // The kind of alias to be setup. 415 Kind kind = 4; 416 } 417 418 repeated AliasEntryProto entries = 1; 419} 420 421message DynamicParameterBindingProto { 422 // A list of bindings which indicates that the `target_dim_num` in 423 // the subshape `target_param_index` of parameter `target_param_num` 424 // is a dynamic dimension and its real dynamic size is represented 425 // by `dynamic_param_index` in parameter `dynamic_param_num`. 426 // 427 // As an example, imagine we have a program: 428 // 429 // ENTRY main { 430 // a = f32[] parameter(0) 431 // b = f32[10] parameter(1) 432 // ROOT root = (f32[], f32[10]) tuple(%a, %b) 433 // } 434 // 435 // Let's say 'b' (param index 1) is a dynamic shape whose input has 436 // an upperbound of 10 and real size is determined at runtime.'a' 437 // represents the real size of b's first dimension. 438 // 439 // In this case, the fields are set in the following way: 440 // dynamic_param_num = 1 441 // dynamic_param_index = {} 442 // target_param_num = 0 443 // target_param_index = {} 444 // target_param_dim = 0 445 message Binding { 446 int64 dynamic_param_num = 1; 447 repeated int64 dynamic_param_index = 2; 448 int64 target_param_num = 3; 449 repeated int64 target_param_index = 4; 450 int64 target_param_dim_num = 5; 451 } 452 453 repeated Binding entries = 1; 454} 455 456message CrossProgramPrefetch { 457 int64 parameter = 1; 458 repeated int64 index = 2; 459} 460 461// Serialization of HloModule. 462message HloModuleProto { 463 string name = 1; 464 string entry_computation_name = 2; 465 int64 entry_computation_id = 6; 466 467 // The array of computations is always in a valid dependency order, where 468 // callees appear before their callers. 469 repeated HloComputationProto computations = 3; 470 471 // The host program shape (with layout) of the entry computation. 472 xla.ProgramShapeProto host_program_shape = 4; 473 474 // The id of this module. 475 int64 id = 5; 476 477 // The schedule for this module. 478 HloScheduleProto schedule = 7; 479 480 // Describes alias information between inputs and outputs. 481 HloInputOutputAliasProto input_output_alias = 8; 482 483 DynamicParameterBindingProto dynamic_parameter_binding = 9; 484 485 repeated CrossProgramPrefetch cross_program_prefetches = 10; 486 487 // True if the module contains dynamic computation. 488 bool is_dynamic = 11; 489 490 xla.OpSharding spmd_output_sharding = 12; 491 492 repeated xla.OpSharding spmd_parameters_shardings = 14; 493 494 // Uses AutoSharding pass or not. 495 bool use_auto_spmd_partitioning = 16; 496 497 // The type of optimization profile in use for module-level optimizations. 498 enum ProfileType { 499 INVALID = 0; 500 FLAG = 1; 501 FUSION = 2; 502 LAYOUT = 3; 503 } 504 505 // Information about the optimization profile that this module contains. 506 message ProfileInfo { 507 // The optimization profiles that this module contains. 508 ProfileType profile_type = 1; 509 // Speedup of tuned config compared to default config. 510 double relative_speedup = 2; 511 // The source of the optimization profile that this module contains. 512 xla.ProfileSource profile_source = 3; 513 // The compilation event that triggered the use of the profile. 514 xla.CompilationEvent compilation_event = 4; 515 } 516 517 // Profile information for the HLO module. 518 repeated ProfileInfo profile_info = 13; 519 520 // DeviceAssignment object information. 521 DeviceAssignmentProto device_assignment = 15; 522} 523 524// Serialization of LogicalBuffer. 525message LogicalBufferProto { 526 // Location represents an instruction and its shape index, which uniquely 527 // identifies a point where a buffer is needed. 528 message Location { 529 // NOTE: module_name isn't necessary, since all LogicalBuffers are 530 // associated with a single HloModule. 531 // TODO(b/239098765): Remove instruction_name and computation_name. 532 string computation_name = 1 [deprecated = true]; 533 string instruction_name = 2 [deprecated = true]; 534 int64 instruction_id = 4; 535 repeated int64 shape_index = 3; 536 } 537 538 int64 id = 1; 539 int64 size = 2; 540 541 // The location where the buffer is defined. 542 Location defined_at = 3; 543 544 int64 color = 4; 545} 546 547// Serialization of BufferAllocation. 548message BufferAllocationProto { 549 // Assigned represents a single LogicalBuffer that is assigned to this 550 // BufferAllocation. 551 message Assigned { 552 int64 logical_buffer_id = 1; 553 int64 offset = 2; 554 int64 size = 3; 555 } 556 557 int64 index = 1; 558 int64 size = 2; 559 bool is_thread_local = 3; 560 bool is_tuple = 11; 561 bool is_entry_computation_parameter = 5; 562 bool is_constant = 12; 563 int64 parameter_number = 6; 564 repeated int64 parameter_shape_index = 10; 565 bool maybe_live_out = 7; 566 int64 color = 8; 567 repeated Assigned assigned = 9; 568} 569 570// A trace of a HeapSimulator run. 571message HeapSimulatorTrace { 572 // The trace includes a list of events, where each event describes one action 573 // performed by the heap simulator. 574 message Event { 575 enum Kind { 576 ALLOC = 0; // A memory region was allocated for the buffer. 577 FREE = 1; // A memory region was freed for the buffer. 578 579 // A buffer was shared with another (canonical) buffer. This is similar to 580 // ALLOC, except that instead of allocating a new region of memory, the 581 // memory region of the canonical buffer is directly re-used. Multiple 582 // buffers may share with the same canonical buffer. The lifetime of the 583 // canonical buffer is extended to the union of all lifetimes. 584 SHARE_WITH = 2; 585 } 586 Kind kind = 1; 587 588 // The id of the LogicalBuffer that the event applies to. 589 int64 buffer_id = 2; 590 591 // The HloInstruction that the simulation was processing that caused this 592 // event to occur, identified by its computation and instruction name. E.g. 593 // buffers defined by instruction A are allocated when processing A. 594 string computation_name = 3; 595 string instruction_name = 4; 596 597 // The id of the canonical LogicalBuffer that the buffer shares with. Only 598 // set for SHARE_WITH events. 599 int64 share_with_canonical_id = 5; 600 } 601 repeated Event events = 1; 602 bool whole_module_simulation = 2; 603 int64 buffer_allocation_index = 3; 604} 605 606// An abstraction representing a set of HLO module built to run concurrently 607// across different devices. 608message HloModuleGroupProto { 609 string name = 1; 610 repeated HloModuleProto hlo_modules = 2; 611} 612 613// Serialization of BufferAssignment. 614message BufferAssignmentProto { 615 // Alias represents a source LogicalBuffer, and the buffer location that 616 // aliases it. 617 message BufferAlias { 618 int64 source_buffer_id = 1; 619 LogicalBufferProto.Location location = 2; 620 } 621 622 repeated LogicalBufferProto logical_buffers = 1; 623 repeated BufferAlias buffer_aliases = 2; 624 repeated BufferAllocationProto buffer_allocations = 3; 625 repeated HeapSimulatorTrace heap_simulator_traces = 4; 626} 627 628// Grouping message that contains all of the information above. 629message HloProto { 630 reserved 2; 631 reserved "hlo_ordering"; 632 633 HloModuleProto hlo_module = 1; 634 BufferAssignmentProto buffer_assignment = 3; 635} 636 637// Encapsulates HloProto together with the arguments, result, and 638// execution_platform. This message is used for purposes such as 639// analysis/replay/file-storage. 640message HloSnapshot { 641 // The hlo graph. 642 HloProto hlo = 1; 643 644 // The arguments passed to the graph. 645 repeated LiteralProto arguments = 2; 646 647 // The result of the graph. 648 LiteralProto result = 3; 649 650 // The name of the platform used to run the graph. 651 string execution_platform = 4; 652} 653 654// Metadata for an HLO module. Dumped after HLO passes and before LLO lowering 655// with filename module_####.metadata.textproto, where #### is 656// canonical_module_id. 657message HloModuleMetadataProto { 658 // Uniquely identifies an HloModuleMetadata. Equal to the first unique_id 659 // of the module (a module may go through multiple unique_ids). If a module 660 // is partitioned into multiple modules, those modules will each have a new 661 // HloModuleMetadata with a different canonical_module_id. 662 int64 canonical_module_id = 1; 663 664 // Name of the module group that the module is part of. 665 string module_group_name = 2; 666 667 // The canonical module id of the module that this one is partitioned from, 668 // if applicable. 669 int64 original_module_id = 3; 670 671 // The canonical module ids of the modules that this one is partitioned into, 672 // if applicable. 673 repeated int64 partitioned_module_ids = 4; 674 675 // Metadata for the HLO passes that are run on the module. 676 repeated HloPassMetadata pass_metadata = 5; 677} 678 679// Metadata for one run of an HLO pass on a module. Provides more information 680// when processing debug dumps of HloProtos about the order of HLO passes and 681// various other stats like duration. `pass_id` may also be used to identify a 682// particular run of a pass in debug info that propagates through stages of 683// compilation. 684message HloPassMetadata { 685 // For a given module, pass_id uniquely identifies a run of an HLO pass on 686 // that module. Note that a pass_id may not always refer to the same pass 687 // because the order of passes during compilation may change. For finding 688 // metadata for a particular pass, pass_name and pipeline_name would be more 689 // reliable, although note that they may not be unique. 690 int64 pass_id = 1; 691 string pass_name = 2; 692 string pipeline_name = 3; 693 694 // Filenames of the dumps of the module after this pass ran. Module may be 695 // dumped in multiple formats, and the order of formats in this field will 696 // stay consistent across passes. 697 repeated string dump_filenames = 4; 698 699 // Return value of pass.Run(). True if this pass changed the module, or, in 700 // the case where the module was run through this pass as part of a module 701 // group, true if this pass changed any module in the same module group. 702 bool module_changed = 5; 703 704 // The unique_id of the module that this pass is run on. May be different from 705 // the canonical_module_id of the HloModuleMetadata that this HloPassMetadata 706 // is inside. 707 int64 module_id = 6; 708 709 // If the module went through this pass as part of a module group, this is 710 // set as the ids of all the modules in the module group. Empty otherwise. 711 repeated int64 module_group_module_ids = 7; 712 713 // Timestamp before and after the pass is run. Note they may be equal. 714 int64 start_timestamp_usec = 8; 715 int64 end_timestamp_usec = 9; 716} 717 718// Encodes attributes for an entry function. 719message EntryFunctionAttributes { 720 // Acts as the underlying container for an xla::ShapeIndex. 721 message ShapeIndex { 722 repeated int64 indices = 1; 723 } 724 725 // Encodes attributes for a single buffer parameter. 726 message BufferParameterAttributes { 727 // Represents an lmhlo.params function argument attribute. 728 int64 lmhlo_params = 1; 729 // TODO(hanbinyoon): Deprecate when optional fields are available in proto3 730 // (Protocol Buffers v3.15.0). 731 bool lmhlo_params_present = 6; 732 733 // Represents an lmhlo.param_shape_index function argument attribute. 734 ShapeIndex lmhlo_param_shape_index = 2; 735 736 // Represents an lmhlo.constant_name function argument attribute. 737 string lmhlo_constant_name = 3; 738 739 // Represents an lmhlo.must_alias function argument attribute. 740 bool lmhlo_must_alias = 4; 741 742 // Represents an lmhlo.params function argument attribute. 743 ShapeIndex lmhlo_output_index = 5; 744 } 745 746 repeated BufferParameterAttributes buffers = 1; 747 748 // xla::Shape in string format. 749 string result_xla_shape = 2; 750} 751 752// Encodes the underlying JitRt executable compiled from the XLA module. 753message JitRtExecutableProto { 754 HloModuleProto hlo_module_proto = 1; 755 756 // XLA-specific attributes of the executable's entry function. 757 EntryFunctionAttributes entry_func_attrs = 2; 758 759 // TODO(b/232263665)): We need to know the TargetMachine this executable was 760 // compiled for, otherwise we can accidentally use illegal instrauctions (e.g. 761 // use AVX512 when it's not available). 762 763 // TODO(b/232263665)): Serialized executable has to know what APIs it has to 764 // be linked with, including the version. For example Gpu executable must be 765 // linked with a runtime layer that abstracts over CUDA. 766 767 // Serialized object file compiled from the XLA module. 768 bytes obj_file = 3; 769 770 // Serialized MLIR module corresponding to compiled object file. 771 string mlir_module = 4; 772}