1<html><body> 2<style> 3 4body, h1, h2, h3, div, span, p, pre, a { 5 margin: 0; 6 padding: 0; 7 border: 0; 8 font-weight: inherit; 9 font-style: inherit; 10 font-size: 100%; 11 font-family: inherit; 12 vertical-align: baseline; 13} 14 15body { 16 font-size: 13px; 17 padding: 1em; 18} 19 20h1 { 21 font-size: 26px; 22 margin-bottom: 1em; 23} 24 25h2 { 26 font-size: 24px; 27 margin-bottom: 1em; 28} 29 30h3 { 31 font-size: 20px; 32 margin-bottom: 1em; 33 margin-top: 1em; 34} 35 36pre, code { 37 line-height: 1.5; 38 font-family: Monaco, 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', 'Lucida Console', monospace; 39} 40 41pre { 42 margin-top: 0.5em; 43} 44 45h1, h2, h3, p { 46 font-family: Arial, sans serif; 47} 48 49h1, h2, h3 { 50 border-bottom: solid #CCC 1px; 51} 52 53.toc_element { 54 margin-top: 0.5em; 55} 56 57.firstline { 58 margin-left: 2 em; 59} 60 61.method { 62 margin-top: 1em; 63 border: solid 1px #CCC; 64 padding: 1em; 65 background: #EEE; 66} 67 68.details { 69 font-weight: bold; 70 font-size: 14px; 71} 72 73</style> 74 75<h1><a href="dataflow_v1b3.html">Dataflow API</a> . <a href="dataflow_v1b3.projects.html">projects</a> . <a href="dataflow_v1b3.projects.jobs.html">jobs</a> . <a href="dataflow_v1b3.projects.jobs.workItems.html">workItems</a></h1> 76<h2>Instance Methods</h2> 77<p class="toc_element"> 78 <code><a href="#lease">lease(projectId, jobId, body, x__xgafv=None)</a></code></p> 79<p class="firstline">Leases a dataflow WorkItem to run.</p> 80<p class="toc_element"> 81 <code><a href="#reportStatus">reportStatus(projectId, jobId, body, x__xgafv=None)</a></code></p> 82<p class="firstline">Reports the status of dataflow WorkItems leased by a worker.</p> 83<h3>Method Details</h3> 84<div class="method"> 85 <code class="details" id="lease">lease(projectId, jobId, body, x__xgafv=None)</code> 86 <pre>Leases a dataflow WorkItem to run. 87 88Args: 89 projectId: string, Identifies the project this worker belongs to. (required) 90 jobId: string, Identifies the workflow job this worker belongs to. (required) 91 body: object, The request body. (required) 92 The object takes the form of: 93 94{ # Request to lease WorkItems. 95 "workItemTypes": [ # Filter for WorkItem type. 96 "A String", 97 ], 98 "workerCapabilities": [ # Worker capabilities. WorkItems might be limited to workers with specific 99 # capabilities. 100 "A String", 101 ], 102 "currentWorkerTime": "A String", # The current timestamp at the worker. 103 "requestedLeaseDuration": "A String", # The initial lease period. 104 "workerId": "A String", # Identifies the worker leasing work -- typically the ID of the 105 # virtual machine running the worker. 106 "unifiedWorkerRequest": { # Untranslated bag-of-bytes WorkRequest from UnifiedWorker. 107 "a_key": "", # Properties of the object. Contains field @type with type URL. 108 }, 109 "location": "A String", # The [regional endpoint] 110 # (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that 111 # contains the WorkItem's job. 112 } 113 114 x__xgafv: string, V1 error format. 115 Allowed values 116 1 - v1 error format 117 2 - v2 error format 118 119Returns: 120 An object of the form: 121 122 { # Response to a request to lease WorkItems. 123 "workItems": [ # A list of the leased WorkItems. 124 { # WorkItem represents basic information about a WorkItem to be executed 125 # in the cloud. 126 "packages": [ # Any required packages that need to be fetched in order to execute 127 # this WorkItem. 128 { # The packages that must be installed in order for a worker to run the 129 # steps of the Cloud Dataflow job that will be assigned to its worker 130 # pool. 131 # 132 # This is the mechanism by which the Cloud Dataflow SDK causes code to 133 # be loaded onto the workers. For example, the Cloud Dataflow Java SDK 134 # might use this to install jars containing the user's code and all of the 135 # various dependencies (libraries, data files, etc.) required in order 136 # for that code to run. 137 "location": "A String", # The resource to read the package from. The supported resource type is: 138 # 139 # Google Cloud Storage: 140 # 141 # storage.googleapis.com/{bucket} 142 # bucket.storage.googleapis.com/ 143 "name": "A String", # The name of the package. 144 }, 145 ], 146 "leaseExpireTime": "A String", # Time when the lease on this Work will expire. 147 "seqMapTask": { # Describes a particular function to invoke. # Additional information for SeqMapTask WorkItems. 148 "inputs": [ # Information about each of the inputs. 149 { # Information about a side input of a DoFn or an input of a SeqDoFn. 150 "sources": [ # The source(s) to read element(s) from to get the value of this side input. 151 # If more than one source, then the elements are taken from the 152 # sources, in the specified order if order matters. 153 # At least one source is required. 154 { # A source that records can be read and decoded from. 155 "codec": { # The codec to use to decode data read from the source. 156 "a_key": "", # Properties of the object. 157 }, 158 "baseSpecs": [ # While splitting, sources may specify the produced bundles 159 # as differences against another source, in order to save backend-side 160 # memory and allow bigger jobs. For details, see SourceSplitRequest. 161 # To support this use case, the full set of parameters of the source 162 # is logically obtained by taking the latest explicitly specified value 163 # of each parameter in the order: 164 # base_specs (later items win), spec (overrides anything in base_specs). 165 { 166 "a_key": "", # Properties of the object. 167 }, 168 ], 169 "spec": { # The source to read from, plus its parameters. 170 "a_key": "", # Properties of the object. 171 }, 172 "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source 173 # doesn't need splitting, and using SourceSplitRequest on it would 174 # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. 175 # 176 # E.g. a file splitter may set this to true when splitting a single file 177 # into a set of byte ranges of appropriate size, and set this 178 # to false when splitting a filepattern into individual files. 179 # However, for efficiency, a file splitter may decide to produce 180 # file subranges directly from the filepattern to avoid a splitting 181 # round-trip. 182 # 183 # See SourceSplitRequest for an overview of the splitting process. 184 # 185 # This field is meaningful only in the Source objects populated 186 # by the user (e.g. when filling in a DerivedSource). 187 # Source objects supplied by the framework to the user don't have 188 # this field populated. 189 "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, 190 # avoiding a SourceGetMetadataOperation roundtrip 191 # (see SourceOperationRequest). 192 # 193 # This field is meaningful only in the Source objects populated 194 # by the user (e.g. when filling in a DerivedSource). 195 # Source objects supplied by the framework to the user don't have 196 # this field populated. 197 # and tuning the pipeline, etc. 198 "infinite": True or False, # Specifies that the size of this source is known to be infinite 199 # (this is a streaming source). 200 "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be 201 # read from this source. This estimate is in terms of external storage 202 # size, before any decompression or other processing done by the reader. 203 "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with 204 # the (encoded) keys in lexicographically sorted order. 205 }, 206 }, 207 ], 208 "kind": { # How to interpret the source element(s) as a side input value. 209 "a_key": "", # Properties of the object. 210 }, 211 "tag": "A String", # The id of the tag the user code will access this side input by; 212 # this should correspond to the tag of some MultiOutputInfo. 213 }, 214 ], 215 "name": "A String", # The user-provided name of the SeqDo operation. 216 "stageName": "A String", # System-defined name of the stage containing the SeqDo operation. 217 # Unique across the workflow. 218 "systemName": "A String", # System-defined name of the SeqDo operation. 219 # Unique across the workflow. 220 "userFn": { # The user function to invoke. 221 "a_key": "", # Properties of the object. 222 }, 223 "outputInfos": [ # Information about each of the outputs. 224 { # Information about an output of a SeqMapTask. 225 "tag": "A String", # The id of the TupleTag the user code will tag the output value by. 226 "sink": { # A sink that records can be encoded and written to. # The sink to write the output value to. 227 "codec": { # The codec to use to encode data written to the sink. 228 "a_key": "", # Properties of the object. 229 }, 230 "spec": { # The sink to write to, plus its parameters. 231 "a_key": "", # Properties of the object. 232 }, 233 }, 234 }, 235 ], 236 }, 237 "projectId": "A String", # Identifies the cloud project this WorkItem belongs to. 238 "streamingComputationTask": { # A task which describes what action should be performed for the specified # Additional information for StreamingComputationTask WorkItems. 239 # streaming computation ranges. 240 "taskType": "A String", # A type of streaming computation task. 241 "computationRanges": [ # Contains ranges of a streaming computation this task should apply to. 242 { # Describes full or partial data disk assignment information of the computation 243 # ranges. 244 "rangeAssignments": [ # Data disk assignments for ranges from this computation. 245 { # Data disk assignment information for a specific key-range of a sharded 246 # computation. 247 # Currently we only support UTF-8 character splits to simplify encoding into 248 # JSON. 249 "start": "A String", # The start (inclusive) of the key range. 250 "end": "A String", # The end (exclusive) of the key range. 251 "dataDisk": "A String", # The name of the data disk where data for this range is stored. 252 # This name is local to the Google Cloud Platform project and uniquely 253 # identifies the disk within that project, for example 254 # "myproject-1014-104817-4c2-harness-0-disk-1". 255 }, 256 ], 257 "computationId": "A String", # The ID of the computation. 258 }, 259 ], 260 "dataDisks": [ # Describes the set of data disks this task should apply to. 261 { # Describes mounted data disk. 262 "dataDisk": "A String", # The name of the data disk. 263 # This name is local to the Google Cloud Platform project and uniquely 264 # identifies the disk within that project, for example 265 # "myproject-1014-104817-4c2-harness-0-disk-1". 266 }, 267 ], 268 }, 269 "initialReportIndex": "A String", # The initial index to use when reporting the status of the WorkItem. 270 "mapTask": { # MapTask consists of an ordered set of instructions, each of which # Additional information for MapTask WorkItems. 271 # describes one particular low-level operation for the worker to 272 # perform in order to accomplish the MapTask's WorkItem. 273 # 274 # Each instruction must appear in the list before any instructions which 275 # depends on its output. 276 "systemName": "A String", # System-defined name of this MapTask. 277 # Unique across the workflow. 278 "counterPrefix": "A String", # Counter prefix that can be used to prefix counters. Not currently used in 279 # Dataflow. 280 "stageName": "A String", # System-defined name of the stage containing this MapTask. 281 # Unique across the workflow. 282 "instructions": [ # The instructions in the MapTask. 283 { # Describes a particular operation comprising a MapTask. 284 "name": "A String", # User-provided name of this operation. 285 "read": { # An instruction that reads records. # Additional information for Read instructions. 286 # Takes no inputs, produces one output. 287 "source": { # A source that records can be read and decoded from. # The source to read from. 288 "codec": { # The codec to use to decode data read from the source. 289 "a_key": "", # Properties of the object. 290 }, 291 "baseSpecs": [ # While splitting, sources may specify the produced bundles 292 # as differences against another source, in order to save backend-side 293 # memory and allow bigger jobs. For details, see SourceSplitRequest. 294 # To support this use case, the full set of parameters of the source 295 # is logically obtained by taking the latest explicitly specified value 296 # of each parameter in the order: 297 # base_specs (later items win), spec (overrides anything in base_specs). 298 { 299 "a_key": "", # Properties of the object. 300 }, 301 ], 302 "spec": { # The source to read from, plus its parameters. 303 "a_key": "", # Properties of the object. 304 }, 305 "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source 306 # doesn't need splitting, and using SourceSplitRequest on it would 307 # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. 308 # 309 # E.g. a file splitter may set this to true when splitting a single file 310 # into a set of byte ranges of appropriate size, and set this 311 # to false when splitting a filepattern into individual files. 312 # However, for efficiency, a file splitter may decide to produce 313 # file subranges directly from the filepattern to avoid a splitting 314 # round-trip. 315 # 316 # See SourceSplitRequest for an overview of the splitting process. 317 # 318 # This field is meaningful only in the Source objects populated 319 # by the user (e.g. when filling in a DerivedSource). 320 # Source objects supplied by the framework to the user don't have 321 # this field populated. 322 "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, 323 # avoiding a SourceGetMetadataOperation roundtrip 324 # (see SourceOperationRequest). 325 # 326 # This field is meaningful only in the Source objects populated 327 # by the user (e.g. when filling in a DerivedSource). 328 # Source objects supplied by the framework to the user don't have 329 # this field populated. 330 # and tuning the pipeline, etc. 331 "infinite": True or False, # Specifies that the size of this source is known to be infinite 332 # (this is a streaming source). 333 "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be 334 # read from this source. This estimate is in terms of external storage 335 # size, before any decompression or other processing done by the reader. 336 "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with 337 # the (encoded) keys in lexicographically sorted order. 338 }, 339 }, 340 }, 341 "outputs": [ # Describes the outputs of the instruction. 342 { # An output of an instruction. 343 "name": "A String", # The user-provided name of this output. 344 "onlyCountKeyBytes": True or False, # For system-generated byte and mean byte metrics, certain instructions 345 # should only report the key size. 346 "codec": { # The codec to use to encode data being written via this output. 347 "a_key": "", # Properties of the object. 348 }, 349 "systemName": "A String", # System-defined name of this output. 350 # Unique across the workflow. 351 "originalName": "A String", # System-defined name for this output in the original workflow graph. 352 # Outputs that do not contribute to an original instruction do not set this. 353 "onlyCountValueBytes": True or False, # For system-generated byte and mean byte metrics, certain instructions 354 # should only report the value size. 355 }, 356 ], 357 "partialGroupByKey": { # An instruction that does a partial group-by-key. # Additional information for PartialGroupByKey instructions. 358 # One input and one output. 359 "sideInputs": [ # Zero or more side inputs. 360 { # Information about a side input of a DoFn or an input of a SeqDoFn. 361 "sources": [ # The source(s) to read element(s) from to get the value of this side input. 362 # If more than one source, then the elements are taken from the 363 # sources, in the specified order if order matters. 364 # At least one source is required. 365 { # A source that records can be read and decoded from. 366 "codec": { # The codec to use to decode data read from the source. 367 "a_key": "", # Properties of the object. 368 }, 369 "baseSpecs": [ # While splitting, sources may specify the produced bundles 370 # as differences against another source, in order to save backend-side 371 # memory and allow bigger jobs. For details, see SourceSplitRequest. 372 # To support this use case, the full set of parameters of the source 373 # is logically obtained by taking the latest explicitly specified value 374 # of each parameter in the order: 375 # base_specs (later items win), spec (overrides anything in base_specs). 376 { 377 "a_key": "", # Properties of the object. 378 }, 379 ], 380 "spec": { # The source to read from, plus its parameters. 381 "a_key": "", # Properties of the object. 382 }, 383 "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source 384 # doesn't need splitting, and using SourceSplitRequest on it would 385 # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. 386 # 387 # E.g. a file splitter may set this to true when splitting a single file 388 # into a set of byte ranges of appropriate size, and set this 389 # to false when splitting a filepattern into individual files. 390 # However, for efficiency, a file splitter may decide to produce 391 # file subranges directly from the filepattern to avoid a splitting 392 # round-trip. 393 # 394 # See SourceSplitRequest for an overview of the splitting process. 395 # 396 # This field is meaningful only in the Source objects populated 397 # by the user (e.g. when filling in a DerivedSource). 398 # Source objects supplied by the framework to the user don't have 399 # this field populated. 400 "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, 401 # avoiding a SourceGetMetadataOperation roundtrip 402 # (see SourceOperationRequest). 403 # 404 # This field is meaningful only in the Source objects populated 405 # by the user (e.g. when filling in a DerivedSource). 406 # Source objects supplied by the framework to the user don't have 407 # this field populated. 408 # and tuning the pipeline, etc. 409 "infinite": True or False, # Specifies that the size of this source is known to be infinite 410 # (this is a streaming source). 411 "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be 412 # read from this source. This estimate is in terms of external storage 413 # size, before any decompression or other processing done by the reader. 414 "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with 415 # the (encoded) keys in lexicographically sorted order. 416 }, 417 }, 418 ], 419 "kind": { # How to interpret the source element(s) as a side input value. 420 "a_key": "", # Properties of the object. 421 }, 422 "tag": "A String", # The id of the tag the user code will access this side input by; 423 # this should correspond to the tag of some MultiOutputInfo. 424 }, 425 ], 426 "originalCombineValuesInputStoreName": "A String", # If this instruction includes a combining function this is the name of the 427 # intermediate store between the GBK and the CombineValues. 428 "originalCombineValuesStepName": "A String", # If this instruction includes a combining function, this is the name of the 429 # CombineValues instruction lifted into this instruction. 430 "valueCombiningFn": { # The value combining function to invoke. 431 "a_key": "", # Properties of the object. 432 }, 433 "input": { # An input of an instruction, as a reference to an output of a # Describes the input to the partial group-by-key instruction. 434 # producer instruction. 435 "outputNum": 42, # The output index (origin zero) within the producer. 436 "producerInstructionIndex": 42, # The index (origin zero) of the parallel instruction that produces 437 # the output to be consumed by this input. This index is relative 438 # to the list of instructions in this input's instruction's 439 # containing MapTask. 440 }, 441 "inputElementCodec": { # The codec to use for interpreting an element in the input PTable. 442 "a_key": "", # Properties of the object. 443 }, 444 }, 445 "write": { # An instruction that writes records. # Additional information for Write instructions. 446 # Takes one input, produces no outputs. 447 "input": { # An input of an instruction, as a reference to an output of a # The input. 448 # producer instruction. 449 "outputNum": 42, # The output index (origin zero) within the producer. 450 "producerInstructionIndex": 42, # The index (origin zero) of the parallel instruction that produces 451 # the output to be consumed by this input. This index is relative 452 # to the list of instructions in this input's instruction's 453 # containing MapTask. 454 }, 455 "sink": { # A sink that records can be encoded and written to. # The sink to write to. 456 "codec": { # The codec to use to encode data written to the sink. 457 "a_key": "", # Properties of the object. 458 }, 459 "spec": { # The sink to write to, plus its parameters. 460 "a_key": "", # Properties of the object. 461 }, 462 }, 463 }, 464 "systemName": "A String", # System-defined name of this operation. 465 # Unique across the workflow. 466 "flatten": { # An instruction that copies its inputs (zero or more) to its (single) output. # Additional information for Flatten instructions. 467 "inputs": [ # Describes the inputs to the flatten instruction. 468 { # An input of an instruction, as a reference to an output of a 469 # producer instruction. 470 "outputNum": 42, # The output index (origin zero) within the producer. 471 "producerInstructionIndex": 42, # The index (origin zero) of the parallel instruction that produces 472 # the output to be consumed by this input. This index is relative 473 # to the list of instructions in this input's instruction's 474 # containing MapTask. 475 }, 476 ], 477 }, 478 "originalName": "A String", # System-defined name for the operation in the original workflow graph. 479 "parDo": { # An instruction that does a ParDo operation. # Additional information for ParDo instructions. 480 # Takes one main input and zero or more side inputs, and produces 481 # zero or more outputs. 482 # Runs user code. 483 "sideInputs": [ # Zero or more side inputs. 484 { # Information about a side input of a DoFn or an input of a SeqDoFn. 485 "sources": [ # The source(s) to read element(s) from to get the value of this side input. 486 # If more than one source, then the elements are taken from the 487 # sources, in the specified order if order matters. 488 # At least one source is required. 489 { # A source that records can be read and decoded from. 490 "codec": { # The codec to use to decode data read from the source. 491 "a_key": "", # Properties of the object. 492 }, 493 "baseSpecs": [ # While splitting, sources may specify the produced bundles 494 # as differences against another source, in order to save backend-side 495 # memory and allow bigger jobs. For details, see SourceSplitRequest. 496 # To support this use case, the full set of parameters of the source 497 # is logically obtained by taking the latest explicitly specified value 498 # of each parameter in the order: 499 # base_specs (later items win), spec (overrides anything in base_specs). 500 { 501 "a_key": "", # Properties of the object. 502 }, 503 ], 504 "spec": { # The source to read from, plus its parameters. 505 "a_key": "", # Properties of the object. 506 }, 507 "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source 508 # doesn't need splitting, and using SourceSplitRequest on it would 509 # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. 510 # 511 # E.g. a file splitter may set this to true when splitting a single file 512 # into a set of byte ranges of appropriate size, and set this 513 # to false when splitting a filepattern into individual files. 514 # However, for efficiency, a file splitter may decide to produce 515 # file subranges directly from the filepattern to avoid a splitting 516 # round-trip. 517 # 518 # See SourceSplitRequest for an overview of the splitting process. 519 # 520 # This field is meaningful only in the Source objects populated 521 # by the user (e.g. when filling in a DerivedSource). 522 # Source objects supplied by the framework to the user don't have 523 # this field populated. 524 "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, 525 # avoiding a SourceGetMetadataOperation roundtrip 526 # (see SourceOperationRequest). 527 # 528 # This field is meaningful only in the Source objects populated 529 # by the user (e.g. when filling in a DerivedSource). 530 # Source objects supplied by the framework to the user don't have 531 # this field populated. 532 # and tuning the pipeline, etc. 533 "infinite": True or False, # Specifies that the size of this source is known to be infinite 534 # (this is a streaming source). 535 "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be 536 # read from this source. This estimate is in terms of external storage 537 # size, before any decompression or other processing done by the reader. 538 "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with 539 # the (encoded) keys in lexicographically sorted order. 540 }, 541 }, 542 ], 543 "kind": { # How to interpret the source element(s) as a side input value. 544 "a_key": "", # Properties of the object. 545 }, 546 "tag": "A String", # The id of the tag the user code will access this side input by; 547 # this should correspond to the tag of some MultiOutputInfo. 548 }, 549 ], 550 "input": { # An input of an instruction, as a reference to an output of a # The input. 551 # producer instruction. 552 "outputNum": 42, # The output index (origin zero) within the producer. 553 "producerInstructionIndex": 42, # The index (origin zero) of the parallel instruction that produces 554 # the output to be consumed by this input. This index is relative 555 # to the list of instructions in this input's instruction's 556 # containing MapTask. 557 }, 558 "multiOutputInfos": [ # Information about each of the outputs, if user_fn is a MultiDoFn. 559 { # Information about an output of a multi-output DoFn. 560 "tag": "A String", # The id of the tag the user code will emit to this output by; this 561 # should correspond to the tag of some SideInputInfo. 562 }, 563 ], 564 "numOutputs": 42, # The number of outputs. 565 "userFn": { # The user function to invoke. 566 "a_key": "", # Properties of the object. 567 }, 568 }, 569 }, 570 ], 571 }, 572 "jobId": "A String", # Identifies the workflow job this WorkItem belongs to. 573 "reportStatusInterval": "A String", # Recommended reporting interval. 574 "sourceOperationTask": { # A work item that represents the different operations that can be # Additional information for source operation WorkItems. 575 # performed on a user-defined Source specification. 576 "name": "A String", # User-provided name of the Read instruction for this source. 577 "stageName": "A String", # System-defined name of the stage containing the source operation. 578 # Unique across the workflow. 579 "getMetadata": { # A request to compute the SourceMetadata of a Source. # Information about a request to get metadata about a source. 580 "source": { # A source that records can be read and decoded from. # Specification of the source whose metadata should be computed. 581 "codec": { # The codec to use to decode data read from the source. 582 "a_key": "", # Properties of the object. 583 }, 584 "baseSpecs": [ # While splitting, sources may specify the produced bundles 585 # as differences against another source, in order to save backend-side 586 # memory and allow bigger jobs. For details, see SourceSplitRequest. 587 # To support this use case, the full set of parameters of the source 588 # is logically obtained by taking the latest explicitly specified value 589 # of each parameter in the order: 590 # base_specs (later items win), spec (overrides anything in base_specs). 591 { 592 "a_key": "", # Properties of the object. 593 }, 594 ], 595 "spec": { # The source to read from, plus its parameters. 596 "a_key": "", # Properties of the object. 597 }, 598 "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source 599 # doesn't need splitting, and using SourceSplitRequest on it would 600 # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. 601 # 602 # E.g. a file splitter may set this to true when splitting a single file 603 # into a set of byte ranges of appropriate size, and set this 604 # to false when splitting a filepattern into individual files. 605 # However, for efficiency, a file splitter may decide to produce 606 # file subranges directly from the filepattern to avoid a splitting 607 # round-trip. 608 # 609 # See SourceSplitRequest for an overview of the splitting process. 610 # 611 # This field is meaningful only in the Source objects populated 612 # by the user (e.g. when filling in a DerivedSource). 613 # Source objects supplied by the framework to the user don't have 614 # this field populated. 615 "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, 616 # avoiding a SourceGetMetadataOperation roundtrip 617 # (see SourceOperationRequest). 618 # 619 # This field is meaningful only in the Source objects populated 620 # by the user (e.g. when filling in a DerivedSource). 621 # Source objects supplied by the framework to the user don't have 622 # this field populated. 623 # and tuning the pipeline, etc. 624 "infinite": True or False, # Specifies that the size of this source is known to be infinite 625 # (this is a streaming source). 626 "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be 627 # read from this source. This estimate is in terms of external storage 628 # size, before any decompression or other processing done by the reader. 629 "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with 630 # the (encoded) keys in lexicographically sorted order. 631 }, 632 }, 633 }, 634 "systemName": "A String", # System-defined name of the Read instruction for this source. 635 # Unique across the workflow. 636 "split": { # Represents the operation to split a high-level Source specification # Information about a request to split a source. 637 # into bundles (parts for parallel processing). 638 # 639 # At a high level, splitting of a source into bundles happens as follows: 640 # SourceSplitRequest is applied to the source. If it returns 641 # SOURCE_SPLIT_OUTCOME_USE_CURRENT, no further splitting happens and the source 642 # is used "as is". Otherwise, splitting is applied recursively to each 643 # produced DerivedSource. 644 # 645 # As an optimization, for any Source, if its does_not_need_splitting is 646 # true, the framework assumes that splitting this source would return 647 # SOURCE_SPLIT_OUTCOME_USE_CURRENT, and doesn't initiate a SourceSplitRequest. 648 # This applies both to the initial source being split and to bundles 649 # produced from it. 650 "source": { # A source that records can be read and decoded from. # Specification of the source to be split. 651 "codec": { # The codec to use to decode data read from the source. 652 "a_key": "", # Properties of the object. 653 }, 654 "baseSpecs": [ # While splitting, sources may specify the produced bundles 655 # as differences against another source, in order to save backend-side 656 # memory and allow bigger jobs. For details, see SourceSplitRequest. 657 # To support this use case, the full set of parameters of the source 658 # is logically obtained by taking the latest explicitly specified value 659 # of each parameter in the order: 660 # base_specs (later items win), spec (overrides anything in base_specs). 661 { 662 "a_key": "", # Properties of the object. 663 }, 664 ], 665 "spec": { # The source to read from, plus its parameters. 666 "a_key": "", # Properties of the object. 667 }, 668 "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source 669 # doesn't need splitting, and using SourceSplitRequest on it would 670 # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. 671 # 672 # E.g. a file splitter may set this to true when splitting a single file 673 # into a set of byte ranges of appropriate size, and set this 674 # to false when splitting a filepattern into individual files. 675 # However, for efficiency, a file splitter may decide to produce 676 # file subranges directly from the filepattern to avoid a splitting 677 # round-trip. 678 # 679 # See SourceSplitRequest for an overview of the splitting process. 680 # 681 # This field is meaningful only in the Source objects populated 682 # by the user (e.g. when filling in a DerivedSource). 683 # Source objects supplied by the framework to the user don't have 684 # this field populated. 685 "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, 686 # avoiding a SourceGetMetadataOperation roundtrip 687 # (see SourceOperationRequest). 688 # 689 # This field is meaningful only in the Source objects populated 690 # by the user (e.g. when filling in a DerivedSource). 691 # Source objects supplied by the framework to the user don't have 692 # this field populated. 693 # and tuning the pipeline, etc. 694 "infinite": True or False, # Specifies that the size of this source is known to be infinite 695 # (this is a streaming source). 696 "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be 697 # read from this source. This estimate is in terms of external storage 698 # size, before any decompression or other processing done by the reader. 699 "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with 700 # the (encoded) keys in lexicographically sorted order. 701 }, 702 }, 703 "options": { # Hints for splitting a Source into bundles (parts for parallel # Hints for tuning the splitting process. 704 # processing) using SourceSplitRequest. 705 "desiredShardSizeBytes": "A String", # DEPRECATED in favor of desired_bundle_size_bytes. 706 "desiredBundleSizeBytes": "A String", # The source should be split into a set of bundles where the estimated size 707 # of each is approximately this many bytes. 708 }, 709 }, 710 "originalName": "A String", # System-defined name for the Read instruction for this source 711 # in the original workflow graph. 712 }, 713 "streamingSetupTask": { # A task which initializes part of a streaming Dataflow job. # Additional information for StreamingSetupTask WorkItems. 714 "snapshotConfig": { # Streaming appliance snapshot configuration. # Configures streaming appliance snapshot. 715 "snapshotId": "A String", # If set, indicates the snapshot id for the snapshot being performed. 716 "importStateEndpoint": "A String", # Indicates which endpoint is used to import appliance state. 717 }, 718 "workerHarnessPort": 42, # The TCP port used by the worker to communicate with the Dataflow 719 # worker harness. 720 "drain": True or False, # The user has requested drain. 721 "streamingComputationTopology": { # Global topology of the streaming Dataflow job, including all # The global topology of the streaming Dataflow job. 722 # computations and their sharded locations. 723 "computations": [ # The computations associated with a streaming Dataflow job. 724 { # All configuration data for a particular Computation. 725 "inputs": [ # The inputs to the computation. 726 { # Describes a stream of data, either as input to be processed or as 727 # output of a streaming Dataflow job. 728 "streamingStageLocation": { # Identifies the location of a streaming computation stage, for # The stream is part of another computation within the current 729 # streaming Dataflow job. 730 # stage-to-stage communication. 731 "streamId": "A String", # Identifies the particular stream within the streaming Dataflow 732 # job. 733 }, 734 "pubsubLocation": { # Identifies a pubsub location to use for transferring data into or # The stream is a pubsub stream. 735 # out of a streaming Dataflow job. 736 "idLabel": "A String", # If set, contains a pubsub label from which to extract record ids. 737 # If left empty, record deduplication will be strictly best effort. 738 "timestampLabel": "A String", # If set, contains a pubsub label from which to extract record timestamps. 739 # If left empty, record timestamps will be generated upon arrival. 740 "dropLateData": True or False, # Indicates whether the pipeline allows late-arriving data. 741 "topic": "A String", # A pubsub topic, in the form of 742 # "pubsub.googleapis.com/topics/<project-id>/<topic-name>" 743 "trackingSubscription": "A String", # If set, specifies the pubsub subscription that will be used for tracking 744 # custom time timestamps for watermark estimation. 745 "withAttributes": True or False, # If true, then the client has requested to get pubsub attributes. 746 "subscription": "A String", # A pubsub subscription, in the form of 747 # "pubsub.googleapis.com/subscriptions/<project-id>/<subscription-name>" 748 }, 749 "customSourceLocation": { # Identifies the location of a custom souce. # The stream is a custom source. 750 "stateful": True or False, # Whether this source is stateful. 751 }, 752 "sideInputLocation": { # Identifies the location of a streaming side input. # The stream is a streaming side input. 753 "stateFamily": "A String", # Identifies the state family where this side input is stored. 754 "tag": "A String", # Identifies the particular side input within the streaming Dataflow job. 755 }, 756 }, 757 ], 758 "outputs": [ # The outputs from the computation. 759 { # Describes a stream of data, either as input to be processed or as 760 # output of a streaming Dataflow job. 761 "streamingStageLocation": { # Identifies the location of a streaming computation stage, for # The stream is part of another computation within the current 762 # streaming Dataflow job. 763 # stage-to-stage communication. 764 "streamId": "A String", # Identifies the particular stream within the streaming Dataflow 765 # job. 766 }, 767 "pubsubLocation": { # Identifies a pubsub location to use for transferring data into or # The stream is a pubsub stream. 768 # out of a streaming Dataflow job. 769 "idLabel": "A String", # If set, contains a pubsub label from which to extract record ids. 770 # If left empty, record deduplication will be strictly best effort. 771 "timestampLabel": "A String", # If set, contains a pubsub label from which to extract record timestamps. 772 # If left empty, record timestamps will be generated upon arrival. 773 "dropLateData": True or False, # Indicates whether the pipeline allows late-arriving data. 774 "topic": "A String", # A pubsub topic, in the form of 775 # "pubsub.googleapis.com/topics/<project-id>/<topic-name>" 776 "trackingSubscription": "A String", # If set, specifies the pubsub subscription that will be used for tracking 777 # custom time timestamps for watermark estimation. 778 "withAttributes": True or False, # If true, then the client has requested to get pubsub attributes. 779 "subscription": "A String", # A pubsub subscription, in the form of 780 # "pubsub.googleapis.com/subscriptions/<project-id>/<subscription-name>" 781 }, 782 "customSourceLocation": { # Identifies the location of a custom souce. # The stream is a custom source. 783 "stateful": True or False, # Whether this source is stateful. 784 }, 785 "sideInputLocation": { # Identifies the location of a streaming side input. # The stream is a streaming side input. 786 "stateFamily": "A String", # Identifies the state family where this side input is stored. 787 "tag": "A String", # Identifies the particular side input within the streaming Dataflow job. 788 }, 789 }, 790 ], 791 "keyRanges": [ # The key ranges processed by the computation. 792 { # Location information for a specific key-range of a sharded computation. 793 # Currently we only support UTF-8 character splits to simplify encoding into 794 # JSON. 795 "deprecatedPersistentDirectory": "A String", # DEPRECATED. The location of the persistent state for this range, as a 796 # persistent directory in the worker local filesystem. 797 "start": "A String", # The start (inclusive) of the key range. 798 "deliveryEndpoint": "A String", # The physical location of this range assignment to be used for 799 # streaming computation cross-worker message delivery. 800 "end": "A String", # The end (exclusive) of the key range. 801 "dataDisk": "A String", # The name of the data disk where data for this range is stored. 802 # This name is local to the Google Cloud Platform project and uniquely 803 # identifies the disk within that project, for example 804 # "myproject-1014-104817-4c2-harness-0-disk-1". 805 }, 806 ], 807 "computationId": "A String", # The ID of the computation. 808 "systemStageName": "A String", # The system stage name. 809 "stateFamilies": [ # The state family values. 810 { # State family configuration. 811 "stateFamily": "A String", # The state family value. 812 "isRead": True or False, # If true, this family corresponds to a read operation. 813 }, 814 ], 815 }, 816 ], 817 "dataDiskAssignments": [ # The disks assigned to a streaming Dataflow job. 818 { # Data disk assignment for a given VM instance. 819 "vmInstance": "A String", # VM instance name the data disks mounted to, for example 820 # "myproject-1014-104817-4c2-harness-0". 821 "dataDisks": [ # Mounted data disks. The order is important a data disk's 0-based index in 822 # this list defines which persistent directory the disk is mounted to, for 823 # example the list of { "myproject-1014-104817-4c2-harness-0-disk-0" }, 824 # { "myproject-1014-104817-4c2-harness-0-disk-1" }. 825 "A String", 826 ], 827 }, 828 ], 829 "persistentStateVersion": 42, # Version number for persistent state. 830 "userStageToComputationNameMap": { # Maps user stage names to stable computation names. 831 "a_key": "A String", 832 }, 833 "forwardingKeyBits": 42, # The size (in bits) of keys that will be assigned to source messages. 834 }, 835 "receiveWorkPort": 42, # The TCP port on which the worker should listen for messages from 836 # other streaming computation workers. 837 }, 838 "streamingConfigTask": { # A task that carries configuration information for streaming computations. # Additional information for StreamingConfigTask WorkItems. 839 "userStepToStateFamilyNameMap": { # Map from user step names to state families. 840 "a_key": "A String", 841 }, 842 "windmillServicePort": "A String", # If present, the worker must use this port to communicate with Windmill 843 # Service dispatchers. Only applicable when windmill_service_endpoint is 844 # specified. 845 "streamingComputationConfigs": [ # Set of computation configuration information. 846 { # Configuration information for a single streaming computation. 847 "transformUserNameToStateFamily": { # Map from user name of stateful transforms in this stage to their state 848 # family. 849 "a_key": "A String", 850 }, 851 "computationId": "A String", # Unique identifier for this computation. 852 "systemName": "A String", # System defined name for this computation. 853 "stageName": "A String", # Stage name of this computation. 854 "instructions": [ # Instructions that comprise the computation. 855 { # Describes a particular operation comprising a MapTask. 856 "name": "A String", # User-provided name of this operation. 857 "read": { # An instruction that reads records. # Additional information for Read instructions. 858 # Takes no inputs, produces one output. 859 "source": { # A source that records can be read and decoded from. # The source to read from. 860 "codec": { # The codec to use to decode data read from the source. 861 "a_key": "", # Properties of the object. 862 }, 863 "baseSpecs": [ # While splitting, sources may specify the produced bundles 864 # as differences against another source, in order to save backend-side 865 # memory and allow bigger jobs. For details, see SourceSplitRequest. 866 # To support this use case, the full set of parameters of the source 867 # is logically obtained by taking the latest explicitly specified value 868 # of each parameter in the order: 869 # base_specs (later items win), spec (overrides anything in base_specs). 870 { 871 "a_key": "", # Properties of the object. 872 }, 873 ], 874 "spec": { # The source to read from, plus its parameters. 875 "a_key": "", # Properties of the object. 876 }, 877 "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source 878 # doesn't need splitting, and using SourceSplitRequest on it would 879 # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. 880 # 881 # E.g. a file splitter may set this to true when splitting a single file 882 # into a set of byte ranges of appropriate size, and set this 883 # to false when splitting a filepattern into individual files. 884 # However, for efficiency, a file splitter may decide to produce 885 # file subranges directly from the filepattern to avoid a splitting 886 # round-trip. 887 # 888 # See SourceSplitRequest for an overview of the splitting process. 889 # 890 # This field is meaningful only in the Source objects populated 891 # by the user (e.g. when filling in a DerivedSource). 892 # Source objects supplied by the framework to the user don't have 893 # this field populated. 894 "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, 895 # avoiding a SourceGetMetadataOperation roundtrip 896 # (see SourceOperationRequest). 897 # 898 # This field is meaningful only in the Source objects populated 899 # by the user (e.g. when filling in a DerivedSource). 900 # Source objects supplied by the framework to the user don't have 901 # this field populated. 902 # and tuning the pipeline, etc. 903 "infinite": True or False, # Specifies that the size of this source is known to be infinite 904 # (this is a streaming source). 905 "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be 906 # read from this source. This estimate is in terms of external storage 907 # size, before any decompression or other processing done by the reader. 908 "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with 909 # the (encoded) keys in lexicographically sorted order. 910 }, 911 }, 912 }, 913 "outputs": [ # Describes the outputs of the instruction. 914 { # An output of an instruction. 915 "name": "A String", # The user-provided name of this output. 916 "onlyCountKeyBytes": True or False, # For system-generated byte and mean byte metrics, certain instructions 917 # should only report the key size. 918 "codec": { # The codec to use to encode data being written via this output. 919 "a_key": "", # Properties of the object. 920 }, 921 "systemName": "A String", # System-defined name of this output. 922 # Unique across the workflow. 923 "originalName": "A String", # System-defined name for this output in the original workflow graph. 924 # Outputs that do not contribute to an original instruction do not set this. 925 "onlyCountValueBytes": True or False, # For system-generated byte and mean byte metrics, certain instructions 926 # should only report the value size. 927 }, 928 ], 929 "partialGroupByKey": { # An instruction that does a partial group-by-key. # Additional information for PartialGroupByKey instructions. 930 # One input and one output. 931 "sideInputs": [ # Zero or more side inputs. 932 { # Information about a side input of a DoFn or an input of a SeqDoFn. 933 "sources": [ # The source(s) to read element(s) from to get the value of this side input. 934 # If more than one source, then the elements are taken from the 935 # sources, in the specified order if order matters. 936 # At least one source is required. 937 { # A source that records can be read and decoded from. 938 "codec": { # The codec to use to decode data read from the source. 939 "a_key": "", # Properties of the object. 940 }, 941 "baseSpecs": [ # While splitting, sources may specify the produced bundles 942 # as differences against another source, in order to save backend-side 943 # memory and allow bigger jobs. For details, see SourceSplitRequest. 944 # To support this use case, the full set of parameters of the source 945 # is logically obtained by taking the latest explicitly specified value 946 # of each parameter in the order: 947 # base_specs (later items win), spec (overrides anything in base_specs). 948 { 949 "a_key": "", # Properties of the object. 950 }, 951 ], 952 "spec": { # The source to read from, plus its parameters. 953 "a_key": "", # Properties of the object. 954 }, 955 "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source 956 # doesn't need splitting, and using SourceSplitRequest on it would 957 # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. 958 # 959 # E.g. a file splitter may set this to true when splitting a single file 960 # into a set of byte ranges of appropriate size, and set this 961 # to false when splitting a filepattern into individual files. 962 # However, for efficiency, a file splitter may decide to produce 963 # file subranges directly from the filepattern to avoid a splitting 964 # round-trip. 965 # 966 # See SourceSplitRequest for an overview of the splitting process. 967 # 968 # This field is meaningful only in the Source objects populated 969 # by the user (e.g. when filling in a DerivedSource). 970 # Source objects supplied by the framework to the user don't have 971 # this field populated. 972 "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, 973 # avoiding a SourceGetMetadataOperation roundtrip 974 # (see SourceOperationRequest). 975 # 976 # This field is meaningful only in the Source objects populated 977 # by the user (e.g. when filling in a DerivedSource). 978 # Source objects supplied by the framework to the user don't have 979 # this field populated. 980 # and tuning the pipeline, etc. 981 "infinite": True or False, # Specifies that the size of this source is known to be infinite 982 # (this is a streaming source). 983 "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be 984 # read from this source. This estimate is in terms of external storage 985 # size, before any decompression or other processing done by the reader. 986 "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with 987 # the (encoded) keys in lexicographically sorted order. 988 }, 989 }, 990 ], 991 "kind": { # How to interpret the source element(s) as a side input value. 992 "a_key": "", # Properties of the object. 993 }, 994 "tag": "A String", # The id of the tag the user code will access this side input by; 995 # this should correspond to the tag of some MultiOutputInfo. 996 }, 997 ], 998 "originalCombineValuesInputStoreName": "A String", # If this instruction includes a combining function this is the name of the 999 # intermediate store between the GBK and the CombineValues. 1000 "originalCombineValuesStepName": "A String", # If this instruction includes a combining function, this is the name of the 1001 # CombineValues instruction lifted into this instruction. 1002 "valueCombiningFn": { # The value combining function to invoke. 1003 "a_key": "", # Properties of the object. 1004 }, 1005 "input": { # An input of an instruction, as a reference to an output of a # Describes the input to the partial group-by-key instruction. 1006 # producer instruction. 1007 "outputNum": 42, # The output index (origin zero) within the producer. 1008 "producerInstructionIndex": 42, # The index (origin zero) of the parallel instruction that produces 1009 # the output to be consumed by this input. This index is relative 1010 # to the list of instructions in this input's instruction's 1011 # containing MapTask. 1012 }, 1013 "inputElementCodec": { # The codec to use for interpreting an element in the input PTable. 1014 "a_key": "", # Properties of the object. 1015 }, 1016 }, 1017 "write": { # An instruction that writes records. # Additional information for Write instructions. 1018 # Takes one input, produces no outputs. 1019 "input": { # An input of an instruction, as a reference to an output of a # The input. 1020 # producer instruction. 1021 "outputNum": 42, # The output index (origin zero) within the producer. 1022 "producerInstructionIndex": 42, # The index (origin zero) of the parallel instruction that produces 1023 # the output to be consumed by this input. This index is relative 1024 # to the list of instructions in this input's instruction's 1025 # containing MapTask. 1026 }, 1027 "sink": { # A sink that records can be encoded and written to. # The sink to write to. 1028 "codec": { # The codec to use to encode data written to the sink. 1029 "a_key": "", # Properties of the object. 1030 }, 1031 "spec": { # The sink to write to, plus its parameters. 1032 "a_key": "", # Properties of the object. 1033 }, 1034 }, 1035 }, 1036 "systemName": "A String", # System-defined name of this operation. 1037 # Unique across the workflow. 1038 "flatten": { # An instruction that copies its inputs (zero or more) to its (single) output. # Additional information for Flatten instructions. 1039 "inputs": [ # Describes the inputs to the flatten instruction. 1040 { # An input of an instruction, as a reference to an output of a 1041 # producer instruction. 1042 "outputNum": 42, # The output index (origin zero) within the producer. 1043 "producerInstructionIndex": 42, # The index (origin zero) of the parallel instruction that produces 1044 # the output to be consumed by this input. This index is relative 1045 # to the list of instructions in this input's instruction's 1046 # containing MapTask. 1047 }, 1048 ], 1049 }, 1050 "originalName": "A String", # System-defined name for the operation in the original workflow graph. 1051 "parDo": { # An instruction that does a ParDo operation. # Additional information for ParDo instructions. 1052 # Takes one main input and zero or more side inputs, and produces 1053 # zero or more outputs. 1054 # Runs user code. 1055 "sideInputs": [ # Zero or more side inputs. 1056 { # Information about a side input of a DoFn or an input of a SeqDoFn. 1057 "sources": [ # The source(s) to read element(s) from to get the value of this side input. 1058 # If more than one source, then the elements are taken from the 1059 # sources, in the specified order if order matters. 1060 # At least one source is required. 1061 { # A source that records can be read and decoded from. 1062 "codec": { # The codec to use to decode data read from the source. 1063 "a_key": "", # Properties of the object. 1064 }, 1065 "baseSpecs": [ # While splitting, sources may specify the produced bundles 1066 # as differences against another source, in order to save backend-side 1067 # memory and allow bigger jobs. For details, see SourceSplitRequest. 1068 # To support this use case, the full set of parameters of the source 1069 # is logically obtained by taking the latest explicitly specified value 1070 # of each parameter in the order: 1071 # base_specs (later items win), spec (overrides anything in base_specs). 1072 { 1073 "a_key": "", # Properties of the object. 1074 }, 1075 ], 1076 "spec": { # The source to read from, plus its parameters. 1077 "a_key": "", # Properties of the object. 1078 }, 1079 "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source 1080 # doesn't need splitting, and using SourceSplitRequest on it would 1081 # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. 1082 # 1083 # E.g. a file splitter may set this to true when splitting a single file 1084 # into a set of byte ranges of appropriate size, and set this 1085 # to false when splitting a filepattern into individual files. 1086 # However, for efficiency, a file splitter may decide to produce 1087 # file subranges directly from the filepattern to avoid a splitting 1088 # round-trip. 1089 # 1090 # See SourceSplitRequest for an overview of the splitting process. 1091 # 1092 # This field is meaningful only in the Source objects populated 1093 # by the user (e.g. when filling in a DerivedSource). 1094 # Source objects supplied by the framework to the user don't have 1095 # this field populated. 1096 "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, 1097 # avoiding a SourceGetMetadataOperation roundtrip 1098 # (see SourceOperationRequest). 1099 # 1100 # This field is meaningful only in the Source objects populated 1101 # by the user (e.g. when filling in a DerivedSource). 1102 # Source objects supplied by the framework to the user don't have 1103 # this field populated. 1104 # and tuning the pipeline, etc. 1105 "infinite": True or False, # Specifies that the size of this source is known to be infinite 1106 # (this is a streaming source). 1107 "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be 1108 # read from this source. This estimate is in terms of external storage 1109 # size, before any decompression or other processing done by the reader. 1110 "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with 1111 # the (encoded) keys in lexicographically sorted order. 1112 }, 1113 }, 1114 ], 1115 "kind": { # How to interpret the source element(s) as a side input value. 1116 "a_key": "", # Properties of the object. 1117 }, 1118 "tag": "A String", # The id of the tag the user code will access this side input by; 1119 # this should correspond to the tag of some MultiOutputInfo. 1120 }, 1121 ], 1122 "input": { # An input of an instruction, as a reference to an output of a # The input. 1123 # producer instruction. 1124 "outputNum": 42, # The output index (origin zero) within the producer. 1125 "producerInstructionIndex": 42, # The index (origin zero) of the parallel instruction that produces 1126 # the output to be consumed by this input. This index is relative 1127 # to the list of instructions in this input's instruction's 1128 # containing MapTask. 1129 }, 1130 "multiOutputInfos": [ # Information about each of the outputs, if user_fn is a MultiDoFn. 1131 { # Information about an output of a multi-output DoFn. 1132 "tag": "A String", # The id of the tag the user code will emit to this output by; this 1133 # should correspond to the tag of some SideInputInfo. 1134 }, 1135 ], 1136 "numOutputs": 42, # The number of outputs. 1137 "userFn": { # The user function to invoke. 1138 "a_key": "", # Properties of the object. 1139 }, 1140 }, 1141 }, 1142 ], 1143 }, 1144 ], 1145 "maxWorkItemCommitBytes": "A String", # Maximum size for work item commit supported windmill storage layer. 1146 "windmillServiceEndpoint": "A String", # If present, the worker must use this endpoint to communicate with Windmill 1147 # Service dispatchers, otherwise the worker must continue to use whatever 1148 # endpoint it had been using. 1149 }, 1150 "configuration": "A String", # Work item-specific configuration as an opaque blob. 1151 "shellTask": { # A task which consists of a shell command for the worker to execute. # Additional information for ShellTask WorkItems. 1152 "command": "A String", # The shell command to run. 1153 "exitCode": 42, # Exit code for the task. 1154 }, 1155 "id": "A String", # Identifies this WorkItem. 1156 }, 1157 ], 1158 "unifiedWorkerResponse": { # Untranslated bag-of-bytes WorkResponse for UnifiedWorker. 1159 "a_key": "", # Properties of the object. Contains field @type with type URL. 1160 }, 1161 }</pre> 1162</div> 1163 1164<div class="method"> 1165 <code class="details" id="reportStatus">reportStatus(projectId, jobId, body, x__xgafv=None)</code> 1166 <pre>Reports the status of dataflow WorkItems leased by a worker. 1167 1168Args: 1169 projectId: string, The project which owns the WorkItem's job. (required) 1170 jobId: string, The job which the WorkItem is part of. (required) 1171 body: object, The request body. (required) 1172 The object takes the form of: 1173 1174{ # Request to report the status of WorkItems. 1175 "workerId": "A String", # The ID of the worker reporting the WorkItem status. If this 1176 # does not match the ID of the worker which the Dataflow service 1177 # believes currently has the lease on the WorkItem, the report 1178 # will be dropped (with an error response). 1179 "unifiedWorkerRequest": { # Untranslated bag-of-bytes WorkProgressUpdateRequest from UnifiedWorker. 1180 "a_key": "", # Properties of the object. Contains field @type with type URL. 1181 }, 1182 "currentWorkerTime": "A String", # The current timestamp at the worker. 1183 "workItemStatuses": [ # The order is unimportant, except that the order of the 1184 # WorkItemServiceState messages in the ReportWorkItemStatusResponse 1185 # corresponds to the order of WorkItemStatus messages here. 1186 { # Conveys a worker's progress through the work described by a WorkItem. 1187 "reportIndex": "A String", # The report index. When a WorkItem is leased, the lease will 1188 # contain an initial report index. When a WorkItem's status is 1189 # reported to the system, the report should be sent with 1190 # that report index, and the response will contain the index the 1191 # worker should use for the next report. Reports received with 1192 # unexpected index values will be rejected by the service. 1193 # 1194 # In order to preserve idempotency, the worker should not alter the 1195 # contents of a report, even if the worker must submit the same 1196 # report multiple times before getting back a response. The worker 1197 # should not submit a subsequent report until the response for the 1198 # previous report had been received from the service. 1199 "errors": [ # Specifies errors which occurred during processing. If errors are 1200 # provided, and completed = true, then the WorkItem is considered 1201 # to have failed. 1202 { # The `Status` type defines a logical error model that is suitable for 1203 # different programming environments, including REST APIs and RPC APIs. It is 1204 # used by [gRPC](https://github.com/grpc). The error model is designed to be: 1205 # 1206 # - Simple to use and understand for most users 1207 # - Flexible enough to meet unexpected needs 1208 # 1209 # # Overview 1210 # 1211 # The `Status` message contains three pieces of data: error code, error 1212 # message, and error details. The error code should be an enum value of 1213 # google.rpc.Code, but it may accept additional error codes if needed. The 1214 # error message should be a developer-facing English message that helps 1215 # developers *understand* and *resolve* the error. If a localized user-facing 1216 # error message is needed, put the localized message in the error details or 1217 # localize it in the client. The optional error details may contain arbitrary 1218 # information about the error. There is a predefined set of error detail types 1219 # in the package `google.rpc` that can be used for common error conditions. 1220 # 1221 # # Language mapping 1222 # 1223 # The `Status` message is the logical representation of the error model, but it 1224 # is not necessarily the actual wire format. When the `Status` message is 1225 # exposed in different client libraries and different wire protocols, it can be 1226 # mapped differently. For example, it will likely be mapped to some exceptions 1227 # in Java, but more likely mapped to some error codes in C. 1228 # 1229 # # Other uses 1230 # 1231 # The error model and the `Status` message can be used in a variety of 1232 # environments, either with or without APIs, to provide a 1233 # consistent developer experience across different environments. 1234 # 1235 # Example uses of this error model include: 1236 # 1237 # - Partial errors. If a service needs to return partial errors to the client, 1238 # it may embed the `Status` in the normal response to indicate the partial 1239 # errors. 1240 # 1241 # - Workflow errors. A typical workflow has multiple steps. Each step may 1242 # have a `Status` message for error reporting. 1243 # 1244 # - Batch operations. If a client uses batch request and batch response, the 1245 # `Status` message should be used directly inside batch response, one for 1246 # each error sub-response. 1247 # 1248 # - Asynchronous operations. If an API call embeds asynchronous operation 1249 # results in its response, the status of those operations should be 1250 # represented directly using the `Status` message. 1251 # 1252 # - Logging. If some API errors are stored in logs, the message `Status` could 1253 # be used directly after any stripping needed for security/privacy reasons. 1254 "message": "A String", # A developer-facing error message, which should be in English. Any 1255 # user-facing error message should be localized and sent in the 1256 # google.rpc.Status.details field, or localized by the client. 1257 "code": 42, # The status code, which should be an enum value of google.rpc.Code. 1258 "details": [ # A list of messages that carry the error details. There is a common set of 1259 # message types for APIs to use. 1260 { 1261 "a_key": "", # Properties of the object. Contains field @type with type URL. 1262 }, 1263 ], 1264 }, 1265 ], 1266 "sourceOperationResponse": { # The result of a SourceOperationRequest, specified in # If the work item represented a SourceOperationRequest, and the work 1267 # is completed, contains the result of the operation. 1268 # ReportWorkItemStatusRequest.source_operation when the work item 1269 # is completed. 1270 "getMetadata": { # The result of a SourceGetMetadataOperation. # A response to a request to get metadata about a source. 1271 "metadata": { # Metadata about a Source useful for automatically optimizing # The computed metadata. 1272 # and tuning the pipeline, etc. 1273 "infinite": True or False, # Specifies that the size of this source is known to be infinite 1274 # (this is a streaming source). 1275 "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be 1276 # read from this source. This estimate is in terms of external storage 1277 # size, before any decompression or other processing done by the reader. 1278 "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with 1279 # the (encoded) keys in lexicographically sorted order. 1280 }, 1281 }, 1282 "split": { # The response to a SourceSplitRequest. # A response to a request to split a source. 1283 "outcome": "A String", # Indicates whether splitting happened and produced a list of bundles. 1284 # If this is USE_CURRENT_SOURCE_AS_IS, the current source should 1285 # be processed "as is" without splitting. "bundles" is ignored in this case. 1286 # If this is SPLITTING_HAPPENED, then "bundles" contains a list of 1287 # bundles into which the source was split. 1288 "bundles": [ # If outcome is SPLITTING_HAPPENED, then this is a list of bundles 1289 # into which the source was split. Otherwise this field is ignored. 1290 # This list can be empty, which means the source represents an empty input. 1291 { # Specification of one of the bundles produced as a result of splitting 1292 # a Source (e.g. when executing a SourceSplitRequest, or when 1293 # splitting an active task using WorkItemStatus.dynamic_source_split), 1294 # relative to the source being split. 1295 "derivationMode": "A String", # What source to base the produced source on (if any). 1296 "source": { # A source that records can be read and decoded from. # Specification of the source. 1297 "codec": { # The codec to use to decode data read from the source. 1298 "a_key": "", # Properties of the object. 1299 }, 1300 "baseSpecs": [ # While splitting, sources may specify the produced bundles 1301 # as differences against another source, in order to save backend-side 1302 # memory and allow bigger jobs. For details, see SourceSplitRequest. 1303 # To support this use case, the full set of parameters of the source 1304 # is logically obtained by taking the latest explicitly specified value 1305 # of each parameter in the order: 1306 # base_specs (later items win), spec (overrides anything in base_specs). 1307 { 1308 "a_key": "", # Properties of the object. 1309 }, 1310 ], 1311 "spec": { # The source to read from, plus its parameters. 1312 "a_key": "", # Properties of the object. 1313 }, 1314 "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source 1315 # doesn't need splitting, and using SourceSplitRequest on it would 1316 # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. 1317 # 1318 # E.g. a file splitter may set this to true when splitting a single file 1319 # into a set of byte ranges of appropriate size, and set this 1320 # to false when splitting a filepattern into individual files. 1321 # However, for efficiency, a file splitter may decide to produce 1322 # file subranges directly from the filepattern to avoid a splitting 1323 # round-trip. 1324 # 1325 # See SourceSplitRequest for an overview of the splitting process. 1326 # 1327 # This field is meaningful only in the Source objects populated 1328 # by the user (e.g. when filling in a DerivedSource). 1329 # Source objects supplied by the framework to the user don't have 1330 # this field populated. 1331 "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, 1332 # avoiding a SourceGetMetadataOperation roundtrip 1333 # (see SourceOperationRequest). 1334 # 1335 # This field is meaningful only in the Source objects populated 1336 # by the user (e.g. when filling in a DerivedSource). 1337 # Source objects supplied by the framework to the user don't have 1338 # this field populated. 1339 # and tuning the pipeline, etc. 1340 "infinite": True or False, # Specifies that the size of this source is known to be infinite 1341 # (this is a streaming source). 1342 "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be 1343 # read from this source. This estimate is in terms of external storage 1344 # size, before any decompression or other processing done by the reader. 1345 "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with 1346 # the (encoded) keys in lexicographically sorted order. 1347 }, 1348 }, 1349 }, 1350 ], 1351 "shards": [ # DEPRECATED in favor of bundles. 1352 { # DEPRECATED in favor of DerivedSource. 1353 "derivationMode": "A String", # DEPRECATED 1354 "source": { # A source that records can be read and decoded from. # DEPRECATED 1355 "codec": { # The codec to use to decode data read from the source. 1356 "a_key": "", # Properties of the object. 1357 }, 1358 "baseSpecs": [ # While splitting, sources may specify the produced bundles 1359 # as differences against another source, in order to save backend-side 1360 # memory and allow bigger jobs. For details, see SourceSplitRequest. 1361 # To support this use case, the full set of parameters of the source 1362 # is logically obtained by taking the latest explicitly specified value 1363 # of each parameter in the order: 1364 # base_specs (later items win), spec (overrides anything in base_specs). 1365 { 1366 "a_key": "", # Properties of the object. 1367 }, 1368 ], 1369 "spec": { # The source to read from, plus its parameters. 1370 "a_key": "", # Properties of the object. 1371 }, 1372 "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source 1373 # doesn't need splitting, and using SourceSplitRequest on it would 1374 # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. 1375 # 1376 # E.g. a file splitter may set this to true when splitting a single file 1377 # into a set of byte ranges of appropriate size, and set this 1378 # to false when splitting a filepattern into individual files. 1379 # However, for efficiency, a file splitter may decide to produce 1380 # file subranges directly from the filepattern to avoid a splitting 1381 # round-trip. 1382 # 1383 # See SourceSplitRequest for an overview of the splitting process. 1384 # 1385 # This field is meaningful only in the Source objects populated 1386 # by the user (e.g. when filling in a DerivedSource). 1387 # Source objects supplied by the framework to the user don't have 1388 # this field populated. 1389 "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, 1390 # avoiding a SourceGetMetadataOperation roundtrip 1391 # (see SourceOperationRequest). 1392 # 1393 # This field is meaningful only in the Source objects populated 1394 # by the user (e.g. when filling in a DerivedSource). 1395 # Source objects supplied by the framework to the user don't have 1396 # this field populated. 1397 # and tuning the pipeline, etc. 1398 "infinite": True or False, # Specifies that the size of this source is known to be infinite 1399 # (this is a streaming source). 1400 "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be 1401 # read from this source. This estimate is in terms of external storage 1402 # size, before any decompression or other processing done by the reader. 1403 "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with 1404 # the (encoded) keys in lexicographically sorted order. 1405 }, 1406 }, 1407 }, 1408 ], 1409 }, 1410 }, 1411 "stopPosition": { # Position defines a position within a collection of data. The value # A worker may split an active map task in two parts, "primary" and 1412 # "residual", continuing to process the primary part and returning the 1413 # residual part into the pool of available work. 1414 # This event is called a "dynamic split" and is critical to the dynamic 1415 # work rebalancing feature. The two obtained sub-tasks are called 1416 # "parts" of the split. 1417 # The parts, if concatenated, must represent the same input as would 1418 # be read by the current task if the split did not happen. 1419 # The exact way in which the original task is decomposed into the two 1420 # parts is specified either as a position demarcating them 1421 # (stop_position), or explicitly as two DerivedSources, if this 1422 # task consumes a user-defined source type (dynamic_source_split). 1423 # 1424 # The "current" task is adjusted as a result of the split: after a task 1425 # with range [A, B) sends a stop_position update at C, its range is 1426 # considered to be [A, C), e.g.: 1427 # * Progress should be interpreted relative to the new range, e.g. 1428 # "75% completed" means "75% of [A, C) completed" 1429 # * The worker should interpret proposed_stop_position relative to the 1430 # new range, e.g. "split at 68%" should be interpreted as 1431 # "split at 68% of [A, C)". 1432 # * If the worker chooses to split again using stop_position, only 1433 # stop_positions in [A, C) will be accepted. 1434 # * Etc. 1435 # dynamic_source_split has similar semantics: e.g., if a task with 1436 # source S splits using dynamic_source_split into {P, R} 1437 # (where P and R must be together equivalent to S), then subsequent 1438 # progress and proposed_stop_position should be interpreted relative 1439 # to P, and in a potential subsequent dynamic_source_split into {P', R'}, 1440 # P' and R' must be together equivalent to P, etc. 1441 # can be either the end position, a key (used with ordered 1442 # collections), a byte offset, or a record index. 1443 "end": True or False, # Position is past all other positions. Also useful for the end 1444 # position of an unbounded range. 1445 "recordIndex": "A String", # Position is a record index. 1446 "byteOffset": "A String", # Position is a byte offset. 1447 "key": "A String", # Position is a string key, ordered lexicographically. 1448 "concatPosition": { # A position that encapsulates an inner position and an index for the inner # CloudPosition is a concat position. 1449 # position. A ConcatPosition can be used by a reader of a source that 1450 # encapsulates a set of other sources. 1451 "position": # Object with schema name: Position # Position within the inner source. 1452 "index": 42, # Index of the inner source. 1453 }, 1454 "shufflePosition": "A String", # CloudPosition is a base64 encoded BatchShufflePosition (with FIXED 1455 # sharding). 1456 }, 1457 "sourceFork": { # DEPRECATED in favor of DynamicSourceSplit. # DEPRECATED in favor of dynamic_source_split. 1458 "residualSource": { # Specification of one of the bundles produced as a result of splitting # DEPRECATED 1459 # a Source (e.g. when executing a SourceSplitRequest, or when 1460 # splitting an active task using WorkItemStatus.dynamic_source_split), 1461 # relative to the source being split. 1462 "derivationMode": "A String", # What source to base the produced source on (if any). 1463 "source": { # A source that records can be read and decoded from. # Specification of the source. 1464 "codec": { # The codec to use to decode data read from the source. 1465 "a_key": "", # Properties of the object. 1466 }, 1467 "baseSpecs": [ # While splitting, sources may specify the produced bundles 1468 # as differences against another source, in order to save backend-side 1469 # memory and allow bigger jobs. For details, see SourceSplitRequest. 1470 # To support this use case, the full set of parameters of the source 1471 # is logically obtained by taking the latest explicitly specified value 1472 # of each parameter in the order: 1473 # base_specs (later items win), spec (overrides anything in base_specs). 1474 { 1475 "a_key": "", # Properties of the object. 1476 }, 1477 ], 1478 "spec": { # The source to read from, plus its parameters. 1479 "a_key": "", # Properties of the object. 1480 }, 1481 "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source 1482 # doesn't need splitting, and using SourceSplitRequest on it would 1483 # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. 1484 # 1485 # E.g. a file splitter may set this to true when splitting a single file 1486 # into a set of byte ranges of appropriate size, and set this 1487 # to false when splitting a filepattern into individual files. 1488 # However, for efficiency, a file splitter may decide to produce 1489 # file subranges directly from the filepattern to avoid a splitting 1490 # round-trip. 1491 # 1492 # See SourceSplitRequest for an overview of the splitting process. 1493 # 1494 # This field is meaningful only in the Source objects populated 1495 # by the user (e.g. when filling in a DerivedSource). 1496 # Source objects supplied by the framework to the user don't have 1497 # this field populated. 1498 "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, 1499 # avoiding a SourceGetMetadataOperation roundtrip 1500 # (see SourceOperationRequest). 1501 # 1502 # This field is meaningful only in the Source objects populated 1503 # by the user (e.g. when filling in a DerivedSource). 1504 # Source objects supplied by the framework to the user don't have 1505 # this field populated. 1506 # and tuning the pipeline, etc. 1507 "infinite": True or False, # Specifies that the size of this source is known to be infinite 1508 # (this is a streaming source). 1509 "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be 1510 # read from this source. This estimate is in terms of external storage 1511 # size, before any decompression or other processing done by the reader. 1512 "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with 1513 # the (encoded) keys in lexicographically sorted order. 1514 }, 1515 }, 1516 }, 1517 "primarySource": { # Specification of one of the bundles produced as a result of splitting # DEPRECATED 1518 # a Source (e.g. when executing a SourceSplitRequest, or when 1519 # splitting an active task using WorkItemStatus.dynamic_source_split), 1520 # relative to the source being split. 1521 "derivationMode": "A String", # What source to base the produced source on (if any). 1522 "source": { # A source that records can be read and decoded from. # Specification of the source. 1523 "codec": { # The codec to use to decode data read from the source. 1524 "a_key": "", # Properties of the object. 1525 }, 1526 "baseSpecs": [ # While splitting, sources may specify the produced bundles 1527 # as differences against another source, in order to save backend-side 1528 # memory and allow bigger jobs. For details, see SourceSplitRequest. 1529 # To support this use case, the full set of parameters of the source 1530 # is logically obtained by taking the latest explicitly specified value 1531 # of each parameter in the order: 1532 # base_specs (later items win), spec (overrides anything in base_specs). 1533 { 1534 "a_key": "", # Properties of the object. 1535 }, 1536 ], 1537 "spec": { # The source to read from, plus its parameters. 1538 "a_key": "", # Properties of the object. 1539 }, 1540 "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source 1541 # doesn't need splitting, and using SourceSplitRequest on it would 1542 # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. 1543 # 1544 # E.g. a file splitter may set this to true when splitting a single file 1545 # into a set of byte ranges of appropriate size, and set this 1546 # to false when splitting a filepattern into individual files. 1547 # However, for efficiency, a file splitter may decide to produce 1548 # file subranges directly from the filepattern to avoid a splitting 1549 # round-trip. 1550 # 1551 # See SourceSplitRequest for an overview of the splitting process. 1552 # 1553 # This field is meaningful only in the Source objects populated 1554 # by the user (e.g. when filling in a DerivedSource). 1555 # Source objects supplied by the framework to the user don't have 1556 # this field populated. 1557 "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, 1558 # avoiding a SourceGetMetadataOperation roundtrip 1559 # (see SourceOperationRequest). 1560 # 1561 # This field is meaningful only in the Source objects populated 1562 # by the user (e.g. when filling in a DerivedSource). 1563 # Source objects supplied by the framework to the user don't have 1564 # this field populated. 1565 # and tuning the pipeline, etc. 1566 "infinite": True or False, # Specifies that the size of this source is known to be infinite 1567 # (this is a streaming source). 1568 "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be 1569 # read from this source. This estimate is in terms of external storage 1570 # size, before any decompression or other processing done by the reader. 1571 "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with 1572 # the (encoded) keys in lexicographically sorted order. 1573 }, 1574 }, 1575 }, 1576 "primary": { # DEPRECATED in favor of DerivedSource. # DEPRECATED 1577 "derivationMode": "A String", # DEPRECATED 1578 "source": { # A source that records can be read and decoded from. # DEPRECATED 1579 "codec": { # The codec to use to decode data read from the source. 1580 "a_key": "", # Properties of the object. 1581 }, 1582 "baseSpecs": [ # While splitting, sources may specify the produced bundles 1583 # as differences against another source, in order to save backend-side 1584 # memory and allow bigger jobs. For details, see SourceSplitRequest. 1585 # To support this use case, the full set of parameters of the source 1586 # is logically obtained by taking the latest explicitly specified value 1587 # of each parameter in the order: 1588 # base_specs (later items win), spec (overrides anything in base_specs). 1589 { 1590 "a_key": "", # Properties of the object. 1591 }, 1592 ], 1593 "spec": { # The source to read from, plus its parameters. 1594 "a_key": "", # Properties of the object. 1595 }, 1596 "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source 1597 # doesn't need splitting, and using SourceSplitRequest on it would 1598 # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. 1599 # 1600 # E.g. a file splitter may set this to true when splitting a single file 1601 # into a set of byte ranges of appropriate size, and set this 1602 # to false when splitting a filepattern into individual files. 1603 # However, for efficiency, a file splitter may decide to produce 1604 # file subranges directly from the filepattern to avoid a splitting 1605 # round-trip. 1606 # 1607 # See SourceSplitRequest for an overview of the splitting process. 1608 # 1609 # This field is meaningful only in the Source objects populated 1610 # by the user (e.g. when filling in a DerivedSource). 1611 # Source objects supplied by the framework to the user don't have 1612 # this field populated. 1613 "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, 1614 # avoiding a SourceGetMetadataOperation roundtrip 1615 # (see SourceOperationRequest). 1616 # 1617 # This field is meaningful only in the Source objects populated 1618 # by the user (e.g. when filling in a DerivedSource). 1619 # Source objects supplied by the framework to the user don't have 1620 # this field populated. 1621 # and tuning the pipeline, etc. 1622 "infinite": True or False, # Specifies that the size of this source is known to be infinite 1623 # (this is a streaming source). 1624 "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be 1625 # read from this source. This estimate is in terms of external storage 1626 # size, before any decompression or other processing done by the reader. 1627 "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with 1628 # the (encoded) keys in lexicographically sorted order. 1629 }, 1630 }, 1631 }, 1632 "residual": { # DEPRECATED in favor of DerivedSource. # DEPRECATED 1633 "derivationMode": "A String", # DEPRECATED 1634 "source": { # A source that records can be read and decoded from. # DEPRECATED 1635 "codec": { # The codec to use to decode data read from the source. 1636 "a_key": "", # Properties of the object. 1637 }, 1638 "baseSpecs": [ # While splitting, sources may specify the produced bundles 1639 # as differences against another source, in order to save backend-side 1640 # memory and allow bigger jobs. For details, see SourceSplitRequest. 1641 # To support this use case, the full set of parameters of the source 1642 # is logically obtained by taking the latest explicitly specified value 1643 # of each parameter in the order: 1644 # base_specs (later items win), spec (overrides anything in base_specs). 1645 { 1646 "a_key": "", # Properties of the object. 1647 }, 1648 ], 1649 "spec": { # The source to read from, plus its parameters. 1650 "a_key": "", # Properties of the object. 1651 }, 1652 "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source 1653 # doesn't need splitting, and using SourceSplitRequest on it would 1654 # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. 1655 # 1656 # E.g. a file splitter may set this to true when splitting a single file 1657 # into a set of byte ranges of appropriate size, and set this 1658 # to false when splitting a filepattern into individual files. 1659 # However, for efficiency, a file splitter may decide to produce 1660 # file subranges directly from the filepattern to avoid a splitting 1661 # round-trip. 1662 # 1663 # See SourceSplitRequest for an overview of the splitting process. 1664 # 1665 # This field is meaningful only in the Source objects populated 1666 # by the user (e.g. when filling in a DerivedSource). 1667 # Source objects supplied by the framework to the user don't have 1668 # this field populated. 1669 "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, 1670 # avoiding a SourceGetMetadataOperation roundtrip 1671 # (see SourceOperationRequest). 1672 # 1673 # This field is meaningful only in the Source objects populated 1674 # by the user (e.g. when filling in a DerivedSource). 1675 # Source objects supplied by the framework to the user don't have 1676 # this field populated. 1677 # and tuning the pipeline, etc. 1678 "infinite": True or False, # Specifies that the size of this source is known to be infinite 1679 # (this is a streaming source). 1680 "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be 1681 # read from this source. This estimate is in terms of external storage 1682 # size, before any decompression or other processing done by the reader. 1683 "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with 1684 # the (encoded) keys in lexicographically sorted order. 1685 }, 1686 }, 1687 }, 1688 }, 1689 "requestedLeaseDuration": "A String", # Amount of time the worker requests for its lease. 1690 "completed": True or False, # True if the WorkItem was completed (successfully or unsuccessfully). 1691 "workItemId": "A String", # Identifies the WorkItem. 1692 "dynamicSourceSplit": { # When a task splits using WorkItemStatus.dynamic_source_split, this # See documentation of stop_position. 1693 # message describes the two parts of the split relative to the 1694 # description of the current task's input. 1695 "residual": { # Specification of one of the bundles produced as a result of splitting # Residual part (returned to the pool of work). 1696 # Specified relative to the previously-current source. 1697 # a Source (e.g. when executing a SourceSplitRequest, or when 1698 # splitting an active task using WorkItemStatus.dynamic_source_split), 1699 # relative to the source being split. 1700 "derivationMode": "A String", # What source to base the produced source on (if any). 1701 "source": { # A source that records can be read and decoded from. # Specification of the source. 1702 "codec": { # The codec to use to decode data read from the source. 1703 "a_key": "", # Properties of the object. 1704 }, 1705 "baseSpecs": [ # While splitting, sources may specify the produced bundles 1706 # as differences against another source, in order to save backend-side 1707 # memory and allow bigger jobs. For details, see SourceSplitRequest. 1708 # To support this use case, the full set of parameters of the source 1709 # is logically obtained by taking the latest explicitly specified value 1710 # of each parameter in the order: 1711 # base_specs (later items win), spec (overrides anything in base_specs). 1712 { 1713 "a_key": "", # Properties of the object. 1714 }, 1715 ], 1716 "spec": { # The source to read from, plus its parameters. 1717 "a_key": "", # Properties of the object. 1718 }, 1719 "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source 1720 # doesn't need splitting, and using SourceSplitRequest on it would 1721 # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. 1722 # 1723 # E.g. a file splitter may set this to true when splitting a single file 1724 # into a set of byte ranges of appropriate size, and set this 1725 # to false when splitting a filepattern into individual files. 1726 # However, for efficiency, a file splitter may decide to produce 1727 # file subranges directly from the filepattern to avoid a splitting 1728 # round-trip. 1729 # 1730 # See SourceSplitRequest for an overview of the splitting process. 1731 # 1732 # This field is meaningful only in the Source objects populated 1733 # by the user (e.g. when filling in a DerivedSource). 1734 # Source objects supplied by the framework to the user don't have 1735 # this field populated. 1736 "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, 1737 # avoiding a SourceGetMetadataOperation roundtrip 1738 # (see SourceOperationRequest). 1739 # 1740 # This field is meaningful only in the Source objects populated 1741 # by the user (e.g. when filling in a DerivedSource). 1742 # Source objects supplied by the framework to the user don't have 1743 # this field populated. 1744 # and tuning the pipeline, etc. 1745 "infinite": True or False, # Specifies that the size of this source is known to be infinite 1746 # (this is a streaming source). 1747 "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be 1748 # read from this source. This estimate is in terms of external storage 1749 # size, before any decompression or other processing done by the reader. 1750 "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with 1751 # the (encoded) keys in lexicographically sorted order. 1752 }, 1753 }, 1754 }, 1755 "primary": { # Specification of one of the bundles produced as a result of splitting # Primary part (continued to be processed by worker). 1756 # Specified relative to the previously-current source. 1757 # Becomes current. 1758 # a Source (e.g. when executing a SourceSplitRequest, or when 1759 # splitting an active task using WorkItemStatus.dynamic_source_split), 1760 # relative to the source being split. 1761 "derivationMode": "A String", # What source to base the produced source on (if any). 1762 "source": { # A source that records can be read and decoded from. # Specification of the source. 1763 "codec": { # The codec to use to decode data read from the source. 1764 "a_key": "", # Properties of the object. 1765 }, 1766 "baseSpecs": [ # While splitting, sources may specify the produced bundles 1767 # as differences against another source, in order to save backend-side 1768 # memory and allow bigger jobs. For details, see SourceSplitRequest. 1769 # To support this use case, the full set of parameters of the source 1770 # is logically obtained by taking the latest explicitly specified value 1771 # of each parameter in the order: 1772 # base_specs (later items win), spec (overrides anything in base_specs). 1773 { 1774 "a_key": "", # Properties of the object. 1775 }, 1776 ], 1777 "spec": { # The source to read from, plus its parameters. 1778 "a_key": "", # Properties of the object. 1779 }, 1780 "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source 1781 # doesn't need splitting, and using SourceSplitRequest on it would 1782 # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. 1783 # 1784 # E.g. a file splitter may set this to true when splitting a single file 1785 # into a set of byte ranges of appropriate size, and set this 1786 # to false when splitting a filepattern into individual files. 1787 # However, for efficiency, a file splitter may decide to produce 1788 # file subranges directly from the filepattern to avoid a splitting 1789 # round-trip. 1790 # 1791 # See SourceSplitRequest for an overview of the splitting process. 1792 # 1793 # This field is meaningful only in the Source objects populated 1794 # by the user (e.g. when filling in a DerivedSource). 1795 # Source objects supplied by the framework to the user don't have 1796 # this field populated. 1797 "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, 1798 # avoiding a SourceGetMetadataOperation roundtrip 1799 # (see SourceOperationRequest). 1800 # 1801 # This field is meaningful only in the Source objects populated 1802 # by the user (e.g. when filling in a DerivedSource). 1803 # Source objects supplied by the framework to the user don't have 1804 # this field populated. 1805 # and tuning the pipeline, etc. 1806 "infinite": True or False, # Specifies that the size of this source is known to be infinite 1807 # (this is a streaming source). 1808 "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be 1809 # read from this source. This estimate is in terms of external storage 1810 # size, before any decompression or other processing done by the reader. 1811 "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with 1812 # the (encoded) keys in lexicographically sorted order. 1813 }, 1814 }, 1815 }, 1816 }, 1817 "totalThrottlerWaitTimeSeconds": 3.14, # Total time the worker spent being throttled by external systems. 1818 "counterUpdates": [ # Worker output counters for this WorkItem. 1819 { # An update to a Counter sent from a worker. 1820 "floatingPointList": { # A metric value representing a list of floating point numbers. # List of floating point numbers, for Set. 1821 "elements": [ # Elements of the list. 1822 3.14, 1823 ], 1824 }, 1825 "floatingPoint": 3.14, # Floating point value for Sum, Max, Min. 1826 "integerMean": { # A representation of an integer mean metric contribution. # Integer mean aggregation value for Mean. 1827 "count": { # A representation of an int64, n, that is immune to precision loss when # The number of values being aggregated. 1828 # encoded in JSON. 1829 "lowBits": 42, # The low order bits: n & 0xffffffff. 1830 "highBits": 42, # The high order bits, including the sign: n >> 32. 1831 }, 1832 "sum": { # A representation of an int64, n, that is immune to precision loss when # The sum of all values being aggregated. 1833 # encoded in JSON. 1834 "lowBits": 42, # The low order bits: n & 0xffffffff. 1835 "highBits": 42, # The high order bits, including the sign: n >> 32. 1836 }, 1837 }, 1838 "boolean": True or False, # Boolean value for And, Or. 1839 "integerList": { # A metric value representing a list of integers. # List of integers, for Set. 1840 "elements": [ # Elements of the list. 1841 { # A representation of an int64, n, that is immune to precision loss when 1842 # encoded in JSON. 1843 "lowBits": 42, # The low order bits: n & 0xffffffff. 1844 "highBits": 42, # The high order bits, including the sign: n >> 32. 1845 }, 1846 ], 1847 }, 1848 "cumulative": True or False, # True if this counter is reported as the total cumulative aggregate 1849 # value accumulated since the worker started working on this WorkItem. 1850 # By default this is false, indicating that this counter is reported 1851 # as a delta. 1852 "shortId": "A String", # The service-generated short identifier for this counter. 1853 # The short_id -> (name, metadata) mapping is constant for the lifetime of 1854 # a job. 1855 "integerGauge": { # A metric value representing temporal values of a variable. # Gauge data 1856 "timestamp": "A String", # The time at which this value was measured. Measured as msecs from epoch. 1857 "value": { # A representation of an int64, n, that is immune to precision loss when # The value of the variable represented by this gauge. 1858 # encoded in JSON. 1859 "lowBits": 42, # The low order bits: n & 0xffffffff. 1860 "highBits": 42, # The high order bits, including the sign: n >> 32. 1861 }, 1862 }, 1863 "floatingPointMean": { # A representation of a floating point mean metric contribution. # Floating point mean aggregation value for Mean. 1864 "count": { # A representation of an int64, n, that is immune to precision loss when # The number of values being aggregated. 1865 # encoded in JSON. 1866 "lowBits": 42, # The low order bits: n & 0xffffffff. 1867 "highBits": 42, # The high order bits, including the sign: n >> 32. 1868 }, 1869 "sum": 3.14, # The sum of all values being aggregated. 1870 }, 1871 "internal": "", # Value for internally-defined counters used by the Dataflow service. 1872 "structuredNameAndMetadata": { # A single message which encapsulates structured name and metadata for a given # Counter structured name and metadata. 1873 # counter. 1874 "name": { # Identifies a counter within a per-job namespace. Counters whose structured # Structured name of the counter. 1875 # names are the same get merged into a single value for the job. 1876 "origin": "A String", # One of the standard Origins defined above. 1877 "executionStepName": "A String", # Name of the stage. An execution step contains multiple component steps. 1878 "name": "A String", # Counter name. Not necessarily globally-unique, but unique within the 1879 # context of the other fields. 1880 # Required. 1881 "workerId": "A String", # ID of a particular worker. 1882 "inputIndex": 42, # Index of an input collection that's being read from/written to as a side 1883 # input. 1884 # The index identifies a step's side inputs starting by 1 (e.g. the first 1885 # side input has input_index 1, the third has input_index 3). 1886 # Side inputs are identified by a pair of (original_step_name, input_index). 1887 # This field helps uniquely identify them. 1888 "originNamespace": "A String", # A string containing a more specific namespace of the counter's origin. 1889 "originalRequestingStepName": "A String", # The step name requesting an operation, such as GBK. 1890 # I.e. the ParDo causing a read/write from shuffle to occur, or a 1891 # read from side inputs. 1892 "portion": "A String", # Portion of this counter, either key or value. 1893 "componentStepName": "A String", # Name of the optimized step being executed by the workers. 1894 "originalStepName": "A String", # System generated name of the original step in the user's graph, before 1895 # optimization. 1896 }, 1897 "metadata": { # CounterMetadata includes all static non-name non-value counter attributes. # Metadata associated with a counter 1898 "standardUnits": "A String", # System defined Units, see above enum. 1899 "kind": "A String", # Counter aggregation kind. 1900 "description": "A String", # Human-readable description of the counter semantics. 1901 "otherUnits": "A String", # A string referring to the unit type. 1902 }, 1903 }, 1904 "nameAndKind": { # Basic metadata about a counter. # Counter name and aggregation type. 1905 "kind": "A String", # Counter aggregation kind. 1906 "name": "A String", # Name of the counter. 1907 }, 1908 "integer": { # A representation of an int64, n, that is immune to precision loss when # Integer value for Sum, Max, Min. 1909 # encoded in JSON. 1910 "lowBits": 42, # The low order bits: n & 0xffffffff. 1911 "highBits": 42, # The high order bits, including the sign: n >> 32. 1912 }, 1913 "distribution": { # A metric value representing a distribution. # Distribution data 1914 "count": { # A representation of an int64, n, that is immune to precision loss when # The count of the number of elements present in the distribution. 1915 # encoded in JSON. 1916 "lowBits": 42, # The low order bits: n & 0xffffffff. 1917 "highBits": 42, # The high order bits, including the sign: n >> 32. 1918 }, 1919 "min": { # A representation of an int64, n, that is immune to precision loss when # The minimum value present in the distribution. 1920 # encoded in JSON. 1921 "lowBits": 42, # The low order bits: n & 0xffffffff. 1922 "highBits": 42, # The high order bits, including the sign: n >> 32. 1923 }, 1924 "max": { # A representation of an int64, n, that is immune to precision loss when # The maximum value present in the distribution. 1925 # encoded in JSON. 1926 "lowBits": 42, # The low order bits: n & 0xffffffff. 1927 "highBits": 42, # The high order bits, including the sign: n >> 32. 1928 }, 1929 "sum": { # A representation of an int64, n, that is immune to precision loss when # Use an int64 since we'd prefer the added precision. If overflow is a common 1930 # problem we can detect it and use an additional int64 or a double. 1931 # encoded in JSON. 1932 "lowBits": 42, # The low order bits: n & 0xffffffff. 1933 "highBits": 42, # The high order bits, including the sign: n >> 32. 1934 }, 1935 "histogram": { # Histogram of value counts for a distribution. # (Optional) Histogram of value counts for the distribution. 1936 # 1937 # Buckets have an inclusive lower bound and exclusive upper bound and use 1938 # "1,2,5 bucketing": The first bucket range is from [0,1) and all subsequent 1939 # bucket boundaries are powers of ten multiplied by 1, 2, or 5. Thus, bucket 1940 # boundaries are 0, 1, 2, 5, 10, 20, 50, 100, 200, 500, 1000, ... 1941 # Negative values are not supported. 1942 "firstBucketOffset": 42, # Starting index of first stored bucket. The non-inclusive upper-bound of 1943 # the ith bucket is given by: 1944 # pow(10,(i-first_bucket_offset)/3) * (1,2,5)[(i-first_bucket_offset)%3] 1945 "bucketCounts": [ # Counts of values in each bucket. For efficiency, prefix and trailing 1946 # buckets with count = 0 are elided. Buckets can store the full range of 1947 # values of an unsigned long, with ULLONG_MAX falling into the 59th bucket 1948 # with range [1e19, 2e19). 1949 "A String", 1950 ], 1951 }, 1952 "sumOfSquares": 3.14, # Use a double since the sum of squares is likely to overflow int64. 1953 }, 1954 "stringList": { # A metric value representing a list of strings. # List of strings, for Set. 1955 "elements": [ # Elements of the list. 1956 "A String", 1957 ], 1958 }, 1959 }, 1960 ], 1961 "progress": { # Obsolete in favor of ApproximateReportedProgress and ApproximateSplitRequest. # DEPRECATED in favor of reported_progress. 1962 "position": { # Position defines a position within a collection of data. The value # Obsolete. 1963 # can be either the end position, a key (used with ordered 1964 # collections), a byte offset, or a record index. 1965 "end": True or False, # Position is past all other positions. Also useful for the end 1966 # position of an unbounded range. 1967 "recordIndex": "A String", # Position is a record index. 1968 "byteOffset": "A String", # Position is a byte offset. 1969 "key": "A String", # Position is a string key, ordered lexicographically. 1970 "concatPosition": { # A position that encapsulates an inner position and an index for the inner # CloudPosition is a concat position. 1971 # position. A ConcatPosition can be used by a reader of a source that 1972 # encapsulates a set of other sources. 1973 "position": # Object with schema name: Position # Position within the inner source. 1974 "index": 42, # Index of the inner source. 1975 }, 1976 "shufflePosition": "A String", # CloudPosition is a base64 encoded BatchShufflePosition (with FIXED 1977 # sharding). 1978 }, 1979 "remainingTime": "A String", # Obsolete. 1980 "percentComplete": 3.14, # Obsolete. 1981 }, 1982 "metricUpdates": [ # DEPRECATED in favor of counter_updates. 1983 { # Describes the state of a metric. 1984 "meanCount": "", # Worker-computed aggregate value for the "Mean" aggregation kind. 1985 # This holds the count of the aggregated values and is used in combination 1986 # with mean_sum above to obtain the actual mean aggregate value. 1987 # The only possible value type is Long. 1988 "kind": "A String", # Metric aggregation kind. The possible metric aggregation kinds are 1989 # "Sum", "Max", "Min", "Mean", "Set", "And", "Or", and "Distribution". 1990 # The specified aggregation kind is case-insensitive. 1991 # 1992 # If omitted, this is not an aggregated value but instead 1993 # a single metric sample value. 1994 "set": "", # Worker-computed aggregate value for the "Set" aggregation kind. The only 1995 # possible value type is a list of Values whose type can be Long, Double, 1996 # or String, according to the metric's type. All Values in the list must 1997 # be of the same type. 1998 "name": { # Identifies a metric, by describing the source which generated the # Name of the metric. 1999 # metric. 2000 "origin": "A String", # Origin (namespace) of metric name. May be blank for user-define metrics; 2001 # will be "dataflow" for metrics defined by the Dataflow service or SDK. 2002 "name": "A String", # Worker-defined metric name. 2003 "context": { # Zero or more labeled fields which identify the part of the job this 2004 # metric is associated with, such as the name of a step or collection. 2005 # 2006 # For example, built-in counters associated with steps will have 2007 # context['step'] = <step-name>. Counters associated with PCollections 2008 # in the SDK will have context['pcollection'] = <pcollection-name>. 2009 "a_key": "A String", 2010 }, 2011 }, 2012 "meanSum": "", # Worker-computed aggregate value for the "Mean" aggregation kind. 2013 # This holds the sum of the aggregated values and is used in combination 2014 # with mean_count below to obtain the actual mean aggregate value. 2015 # The only possible value types are Long and Double. 2016 "cumulative": True or False, # True if this metric is reported as the total cumulative aggregate 2017 # value accumulated since the worker started working on this WorkItem. 2018 # By default this is false, indicating that this metric is reported 2019 # as a delta that is not associated with any WorkItem. 2020 "updateTime": "A String", # Timestamp associated with the metric value. Optional when workers are 2021 # reporting work progress; it will be filled in responses from the 2022 # metrics API. 2023 "scalar": "", # Worker-computed aggregate value for aggregation kinds "Sum", "Max", "Min", 2024 # "And", and "Or". The possible value types are Long, Double, and Boolean. 2025 "internal": "", # Worker-computed aggregate value for internal use by the Dataflow 2026 # service. 2027 "gauge": "", # A struct value describing properties of a Gauge. 2028 # Metrics of gauge type show the value of a metric across time, and is 2029 # aggregated based on the newest value. 2030 "distribution": "", # A struct value describing properties of a distribution of numeric values. 2031 }, 2032 ], 2033 "reportedProgress": { # A progress measurement of a WorkItem by a worker. # The worker's progress through this WorkItem. 2034 "fractionConsumed": 3.14, # Completion as fraction of the input consumed, from 0.0 (beginning, nothing 2035 # consumed), to 1.0 (end of the input, entire input consumed). 2036 "position": { # Position defines a position within a collection of data. The value # A Position within the work to represent a progress. 2037 # can be either the end position, a key (used with ordered 2038 # collections), a byte offset, or a record index. 2039 "end": True or False, # Position is past all other positions. Also useful for the end 2040 # position of an unbounded range. 2041 "recordIndex": "A String", # Position is a record index. 2042 "byteOffset": "A String", # Position is a byte offset. 2043 "key": "A String", # Position is a string key, ordered lexicographically. 2044 "concatPosition": { # A position that encapsulates an inner position and an index for the inner # CloudPosition is a concat position. 2045 # position. A ConcatPosition can be used by a reader of a source that 2046 # encapsulates a set of other sources. 2047 "position": # Object with schema name: Position # Position within the inner source. 2048 "index": 42, # Index of the inner source. 2049 }, 2050 "shufflePosition": "A String", # CloudPosition is a base64 encoded BatchShufflePosition (with FIXED 2051 # sharding). 2052 }, 2053 "remainingParallelism": { # Represents the level of parallelism in a WorkItem's input, # Total amount of parallelism in the input of this task that remains, 2054 # (i.e. can be delegated to this task and any new tasks via dynamic 2055 # splitting). Always at least 1 for non-finished work items and 0 for 2056 # finished. 2057 # 2058 # "Amount of parallelism" refers to how many non-empty parts of the input 2059 # can be read in parallel. This does not necessarily equal number 2060 # of records. An input that can be read in parallel down to the 2061 # individual records is called "perfectly splittable". 2062 # An example of non-perfectly parallelizable input is a block-compressed 2063 # file format where a block of records has to be read as a whole, 2064 # but different blocks can be read in parallel. 2065 # 2066 # Examples: 2067 # * If we are processing record #30 (starting at 1) out of 50 in a perfectly 2068 # splittable 50-record input, this value should be 21 (20 remaining + 1 2069 # current). 2070 # * If we are reading through block 3 in a block-compressed file consisting 2071 # of 5 blocks, this value should be 3 (since blocks 4 and 5 can be 2072 # processed in parallel by new tasks via dynamic splitting and the current 2073 # task remains processing block 3). 2074 # * If we are reading through the last block in a block-compressed file, 2075 # or reading or processing the last record in a perfectly splittable 2076 # input, this value should be 1, because apart from the current task, no 2077 # additional remainder can be split off. 2078 # reported by the worker. 2079 "isInfinite": True or False, # Specifies whether the parallelism is infinite. If true, "value" is 2080 # ignored. 2081 # Infinite parallelism means the service will assume that the work item 2082 # can always be split into more non-empty work items by dynamic splitting. 2083 # This is a work-around for lack of support for infinity by the current 2084 # JSON-based Java RPC stack. 2085 "value": 3.14, # Specifies the level of parallelism in case it is finite. 2086 }, 2087 "consumedParallelism": { # Represents the level of parallelism in a WorkItem's input, # Total amount of parallelism in the portion of input of this task that has 2088 # already been consumed and is no longer active. In the first two examples 2089 # above (see remaining_parallelism), the value should be 29 or 2 2090 # respectively. The sum of remaining_parallelism and consumed_parallelism 2091 # should equal the total amount of parallelism in this work item. If 2092 # specified, must be finite. 2093 # reported by the worker. 2094 "isInfinite": True or False, # Specifies whether the parallelism is infinite. If true, "value" is 2095 # ignored. 2096 # Infinite parallelism means the service will assume that the work item 2097 # can always be split into more non-empty work items by dynamic splitting. 2098 # This is a work-around for lack of support for infinity by the current 2099 # JSON-based Java RPC stack. 2100 "value": 3.14, # Specifies the level of parallelism in case it is finite. 2101 }, 2102 }, 2103 }, 2104 ], 2105 "location": "A String", # The [regional endpoint] 2106 # (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that 2107 # contains the WorkItem's job. 2108 } 2109 2110 x__xgafv: string, V1 error format. 2111 Allowed values 2112 1 - v1 error format 2113 2 - v2 error format 2114 2115Returns: 2116 An object of the form: 2117 2118 { # Response from a request to report the status of WorkItems. 2119 "workItemServiceStates": [ # A set of messages indicating the service-side state for each 2120 # WorkItem whose status was reported, in the same order as the 2121 # WorkItemStatus messages in the ReportWorkItemStatusRequest which 2122 # resulting in this response. 2123 { # The Dataflow service's idea of the current state of a WorkItem 2124 # being processed by a worker. 2125 "reportStatusInterval": "A String", # New recommended reporting interval. 2126 "suggestedStopPosition": { # Position defines a position within a collection of data. The value # Obsolete, always empty. 2127 # can be either the end position, a key (used with ordered 2128 # collections), a byte offset, or a record index. 2129 "end": True or False, # Position is past all other positions. Also useful for the end 2130 # position of an unbounded range. 2131 "recordIndex": "A String", # Position is a record index. 2132 "byteOffset": "A String", # Position is a byte offset. 2133 "key": "A String", # Position is a string key, ordered lexicographically. 2134 "concatPosition": { # A position that encapsulates an inner position and an index for the inner # CloudPosition is a concat position. 2135 # position. A ConcatPosition can be used by a reader of a source that 2136 # encapsulates a set of other sources. 2137 "position": # Object with schema name: Position # Position within the inner source. 2138 "index": 42, # Index of the inner source. 2139 }, 2140 "shufflePosition": "A String", # CloudPosition is a base64 encoded BatchShufflePosition (with FIXED 2141 # sharding). 2142 }, 2143 "leaseExpireTime": "A String", # Time at which the current lease will expire. 2144 "nextReportIndex": "A String", # The index value to use for the next report sent by the worker. 2145 # Note: If the report call fails for whatever reason, the worker should 2146 # reuse this index for subsequent report attempts. 2147 "harnessData": { # Other data returned by the service, specific to the particular 2148 # worker harness. 2149 "a_key": "", # Properties of the object. 2150 }, 2151 "metricShortId": [ # The short ids that workers should use in subsequent metric updates. 2152 # Workers should strive to use short ids whenever possible, but it is ok 2153 # to request the short_id again if a worker lost track of it 2154 # (e.g. if the worker is recovering from a crash). 2155 # NOTE: it is possible that the response may have short ids for a subset 2156 # of the metrics. 2157 { # The metric short id is returned to the user alongside an offset into 2158 # ReportWorkItemStatusRequest 2159 "shortId": "A String", # The service-generated short identifier for the metric. 2160 "metricIndex": 42, # The index of the corresponding metric in 2161 # the ReportWorkItemStatusRequest. Required. 2162 }, 2163 ], 2164 "splitRequest": { # A suggestion by the service to the worker to dynamically split the WorkItem. # The progress point in the WorkItem where the Dataflow service 2165 # suggests that the worker truncate the task. 2166 "fractionConsumed": 3.14, # A fraction at which to split the work item, from 0.0 (beginning of the 2167 # input) to 1.0 (end of the input). 2168 "position": { # Position defines a position within a collection of data. The value # A Position at which to split the work item. 2169 # can be either the end position, a key (used with ordered 2170 # collections), a byte offset, or a record index. 2171 "end": True or False, # Position is past all other positions. Also useful for the end 2172 # position of an unbounded range. 2173 "recordIndex": "A String", # Position is a record index. 2174 "byteOffset": "A String", # Position is a byte offset. 2175 "key": "A String", # Position is a string key, ordered lexicographically. 2176 "concatPosition": { # A position that encapsulates an inner position and an index for the inner # CloudPosition is a concat position. 2177 # position. A ConcatPosition can be used by a reader of a source that 2178 # encapsulates a set of other sources. 2179 "position": # Object with schema name: Position # Position within the inner source. 2180 "index": 42, # Index of the inner source. 2181 }, 2182 "shufflePosition": "A String", # CloudPosition is a base64 encoded BatchShufflePosition (with FIXED 2183 # sharding). 2184 }, 2185 "fractionOfRemainder": 3.14, # The fraction of the remainder of work to split the work item at, from 0.0 2186 # (split at the current position) to 1.0 (end of the input). 2187 }, 2188 "suggestedStopPoint": { # Obsolete in favor of ApproximateReportedProgress and ApproximateSplitRequest. # DEPRECATED in favor of split_request. 2189 "position": { # Position defines a position within a collection of data. The value # Obsolete. 2190 # can be either the end position, a key (used with ordered 2191 # collections), a byte offset, or a record index. 2192 "end": True or False, # Position is past all other positions. Also useful for the end 2193 # position of an unbounded range. 2194 "recordIndex": "A String", # Position is a record index. 2195 "byteOffset": "A String", # Position is a byte offset. 2196 "key": "A String", # Position is a string key, ordered lexicographically. 2197 "concatPosition": { # A position that encapsulates an inner position and an index for the inner # CloudPosition is a concat position. 2198 # position. A ConcatPosition can be used by a reader of a source that 2199 # encapsulates a set of other sources. 2200 "position": # Object with schema name: Position # Position within the inner source. 2201 "index": 42, # Index of the inner source. 2202 }, 2203 "shufflePosition": "A String", # CloudPosition is a base64 encoded BatchShufflePosition (with FIXED 2204 # sharding). 2205 }, 2206 "remainingTime": "A String", # Obsolete. 2207 "percentComplete": 3.14, # Obsolete. 2208 }, 2209 }, 2210 ], 2211 "unifiedWorkerResponse": { # Untranslated bag-of-bytes WorkProgressUpdateResponse for UnifiedWorker. 2212 "a_key": "", # Properties of the object. Contains field @type with type URL. 2213 }, 2214 }</pre> 2215</div> 2216 2217</body></html>