1<html><body> 2<style> 3 4body, h1, h2, h3, div, span, p, pre, a { 5 margin: 0; 6 padding: 0; 7 border: 0; 8 font-weight: inherit; 9 font-style: inherit; 10 font-size: 100%; 11 font-family: inherit; 12 vertical-align: baseline; 13} 14 15body { 16 font-size: 13px; 17 padding: 1em; 18} 19 20h1 { 21 font-size: 26px; 22 margin-bottom: 1em; 23} 24 25h2 { 26 font-size: 24px; 27 margin-bottom: 1em; 28} 29 30h3 { 31 font-size: 20px; 32 margin-bottom: 1em; 33 margin-top: 1em; 34} 35 36pre, code { 37 line-height: 1.5; 38 font-family: Monaco, 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', 'Lucida Console', monospace; 39} 40 41pre { 42 margin-top: 0.5em; 43} 44 45h1, h2, h3, p { 46 font-family: Arial, sans serif; 47} 48 49h1, h2, h3 { 50 border-bottom: solid #CCC 1px; 51} 52 53.toc_element { 54 margin-top: 0.5em; 55} 56 57.firstline { 58 margin-left: 2 em; 59} 60 61.method { 62 margin-top: 1em; 63 border: solid 1px #CCC; 64 padding: 1em; 65 background: #EEE; 66} 67 68.details { 69 font-weight: bold; 70 font-size: 14px; 71} 72 73</style> 74 75<h1><a href="dataflow_v1b3.html">Google Dataflow API</a> . <a href="dataflow_v1b3.projects.html">projects</a> . <a href="dataflow_v1b3.projects.jobs.html">jobs</a> . <a href="dataflow_v1b3.projects.jobs.workItems.html">workItems</a></h1> 76<h2>Instance Methods</h2> 77<p class="toc_element"> 78 <code><a href="#lease">lease(projectId, jobId, body, x__xgafv=None)</a></code></p> 79<p class="firstline">Leases a dataflow WorkItem to run.</p> 80<p class="toc_element"> 81 <code><a href="#reportStatus">reportStatus(projectId, jobId, body, x__xgafv=None)</a></code></p> 82<p class="firstline">Reports the status of dataflow WorkItems leased by a worker.</p> 83<h3>Method Details</h3> 84<div class="method"> 85 <code class="details" id="lease">lease(projectId, jobId, body, x__xgafv=None)</code> 86 <pre>Leases a dataflow WorkItem to run. 87 88Args: 89 projectId: string, Identifies the project this worker belongs to. (required) 90 jobId: string, Identifies the workflow job this worker belongs to. (required) 91 body: object, The request body. (required) 92 The object takes the form of: 93 94{ # Request to lease WorkItems. 95 "workItemTypes": [ # Filter for WorkItem type. 96 "A String", 97 ], 98 "workerCapabilities": [ # Worker capabilities. WorkItems might be limited to workers with specific 99 # capabilities. 100 "A String", 101 ], 102 "requestedLeaseDuration": "A String", # The initial lease period. 103 "workerId": "A String", # Identifies the worker leasing work -- typically the ID of the 104 # virtual machine running the worker. 105 "currentWorkerTime": "A String", # The current timestamp at the worker. 106 "location": "A String", # The location which contains the WorkItem's job. 107 } 108 109 x__xgafv: string, V1 error format. 110 Allowed values 111 1 - v1 error format 112 2 - v2 error format 113 114Returns: 115 An object of the form: 116 117 { # Response to a request to lease WorkItems. 118 "workItems": [ # A list of the leased WorkItems. 119 { # WorkItem represents basic information about a WorkItem to be executed 120 # in the cloud. 121 "reportStatusInterval": "A String", # Recommended reporting interval. 122 "leaseExpireTime": "A String", # Time when the lease on this Work will expire. 123 "seqMapTask": { # Describes a particular function to invoke. # Additional information for SeqMapTask WorkItems. 124 "inputs": [ # Information about each of the inputs. 125 { # Information about a side input of a DoFn or an input of a SeqDoFn. 126 "sources": [ # The source(s) to read element(s) from to get the value of this side input. 127 # If more than one source, then the elements are taken from the 128 # sources, in the specified order if order matters. 129 # At least one source is required. 130 { # A source that records can be read and decoded from. 131 "codec": { # The codec to use to decode data read from the source. 132 "a_key": "", # Properties of the object. 133 }, 134 "baseSpecs": [ # While splitting, sources may specify the produced bundles 135 # as differences against another source, in order to save backend-side 136 # memory and allow bigger jobs. For details, see SourceSplitRequest. 137 # To support this use case, the full set of parameters of the source 138 # is logically obtained by taking the latest explicitly specified value 139 # of each parameter in the order: 140 # base_specs (later items win), spec (overrides anything in base_specs). 141 { 142 "a_key": "", # Properties of the object. 143 }, 144 ], 145 "spec": { # The source to read from, plus its parameters. 146 "a_key": "", # Properties of the object. 147 }, 148 "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source 149 # doesn't need splitting, and using SourceSplitRequest on it would 150 # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. 151 # 152 # E.g. a file splitter may set this to true when splitting a single file 153 # into a set of byte ranges of appropriate size, and set this 154 # to false when splitting a filepattern into individual files. 155 # However, for efficiency, a file splitter may decide to produce 156 # file subranges directly from the filepattern to avoid a splitting 157 # round-trip. 158 # 159 # See SourceSplitRequest for an overview of the splitting process. 160 # 161 # This field is meaningful only in the Source objects populated 162 # by the user (e.g. when filling in a DerivedSource). 163 # Source objects supplied by the framework to the user don't have 164 # this field populated. 165 "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, 166 # avoiding a SourceGetMetadataOperation roundtrip 167 # (see SourceOperationRequest). 168 # 169 # This field is meaningful only in the Source objects populated 170 # by the user (e.g. when filling in a DerivedSource). 171 # Source objects supplied by the framework to the user don't have 172 # this field populated. 173 # and tuning the pipeline, etc. 174 "infinite": True or False, # Specifies that the size of this source is known to be infinite 175 # (this is a streaming source). 176 "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be 177 # read from this source. This estimate is in terms of external storage 178 # size, before any decompression or other processing done by the reader. 179 "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with 180 # the (encoded) keys in lexicographically sorted order. 181 }, 182 }, 183 ], 184 "kind": { # How to interpret the source element(s) as a side input value. 185 "a_key": "", # Properties of the object. 186 }, 187 "tag": "A String", # The id of the tag the user code will access this side input by; 188 # this should correspond to the tag of some MultiOutputInfo. 189 }, 190 ], 191 "outputInfos": [ # Information about each of the outputs. 192 { # Information about an output of a SeqMapTask. 193 "tag": "A String", # The id of the TupleTag the user code will tag the output value by. 194 "sink": { # A sink that records can be encoded and written to. # The sink to write the output value to. 195 "codec": { # The codec to use to encode data written to the sink. 196 "a_key": "", # Properties of the object. 197 }, 198 "spec": { # The sink to write to, plus its parameters. 199 "a_key": "", # Properties of the object. 200 }, 201 }, 202 }, 203 ], 204 "stageName": "A String", # System-defined name of the stage containing the SeqDo operation. 205 # Unique across the workflow. 206 "systemName": "A String", # System-defined name of the SeqDo operation. 207 # Unique across the workflow. 208 "userFn": { # The user function to invoke. 209 "a_key": "", # Properties of the object. 210 }, 211 "name": "A String", # The user-provided name of the SeqDo operation. 212 }, 213 "projectId": "A String", # Identifies the cloud project this WorkItem belongs to. 214 "sourceOperationTask": { # A work item that represents the different operations that can be # Additional information for source operation WorkItems. 215 # performed on a user-defined Source specification. 216 "getMetadata": { # A request to compute the SourceMetadata of a Source. # Information about a request to get metadata about a source. 217 "source": { # A source that records can be read and decoded from. # Specification of the source whose metadata should be computed. 218 "codec": { # The codec to use to decode data read from the source. 219 "a_key": "", # Properties of the object. 220 }, 221 "baseSpecs": [ # While splitting, sources may specify the produced bundles 222 # as differences against another source, in order to save backend-side 223 # memory and allow bigger jobs. For details, see SourceSplitRequest. 224 # To support this use case, the full set of parameters of the source 225 # is logically obtained by taking the latest explicitly specified value 226 # of each parameter in the order: 227 # base_specs (later items win), spec (overrides anything in base_specs). 228 { 229 "a_key": "", # Properties of the object. 230 }, 231 ], 232 "spec": { # The source to read from, plus its parameters. 233 "a_key": "", # Properties of the object. 234 }, 235 "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source 236 # doesn't need splitting, and using SourceSplitRequest on it would 237 # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. 238 # 239 # E.g. a file splitter may set this to true when splitting a single file 240 # into a set of byte ranges of appropriate size, and set this 241 # to false when splitting a filepattern into individual files. 242 # However, for efficiency, a file splitter may decide to produce 243 # file subranges directly from the filepattern to avoid a splitting 244 # round-trip. 245 # 246 # See SourceSplitRequest for an overview of the splitting process. 247 # 248 # This field is meaningful only in the Source objects populated 249 # by the user (e.g. when filling in a DerivedSource). 250 # Source objects supplied by the framework to the user don't have 251 # this field populated. 252 "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, 253 # avoiding a SourceGetMetadataOperation roundtrip 254 # (see SourceOperationRequest). 255 # 256 # This field is meaningful only in the Source objects populated 257 # by the user (e.g. when filling in a DerivedSource). 258 # Source objects supplied by the framework to the user don't have 259 # this field populated. 260 # and tuning the pipeline, etc. 261 "infinite": True or False, # Specifies that the size of this source is known to be infinite 262 # (this is a streaming source). 263 "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be 264 # read from this source. This estimate is in terms of external storage 265 # size, before any decompression or other processing done by the reader. 266 "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with 267 # the (encoded) keys in lexicographically sorted order. 268 }, 269 }, 270 }, 271 "split": { # Represents the operation to split a high-level Source specification # Information about a request to split a source. 272 # into bundles (parts for parallel processing). 273 # 274 # At a high level, splitting of a source into bundles happens as follows: 275 # SourceSplitRequest is applied to the source. If it returns 276 # SOURCE_SPLIT_OUTCOME_USE_CURRENT, no further splitting happens and the source 277 # is used "as is". Otherwise, splitting is applied recursively to each 278 # produced DerivedSource. 279 # 280 # As an optimization, for any Source, if its does_not_need_splitting is 281 # true, the framework assumes that splitting this source would return 282 # SOURCE_SPLIT_OUTCOME_USE_CURRENT, and doesn't initiate a SourceSplitRequest. 283 # This applies both to the initial source being split and to bundles 284 # produced from it. 285 "source": { # A source that records can be read and decoded from. # Specification of the source to be split. 286 "codec": { # The codec to use to decode data read from the source. 287 "a_key": "", # Properties of the object. 288 }, 289 "baseSpecs": [ # While splitting, sources may specify the produced bundles 290 # as differences against another source, in order to save backend-side 291 # memory and allow bigger jobs. For details, see SourceSplitRequest. 292 # To support this use case, the full set of parameters of the source 293 # is logically obtained by taking the latest explicitly specified value 294 # of each parameter in the order: 295 # base_specs (later items win), spec (overrides anything in base_specs). 296 { 297 "a_key": "", # Properties of the object. 298 }, 299 ], 300 "spec": { # The source to read from, plus its parameters. 301 "a_key": "", # Properties of the object. 302 }, 303 "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source 304 # doesn't need splitting, and using SourceSplitRequest on it would 305 # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. 306 # 307 # E.g. a file splitter may set this to true when splitting a single file 308 # into a set of byte ranges of appropriate size, and set this 309 # to false when splitting a filepattern into individual files. 310 # However, for efficiency, a file splitter may decide to produce 311 # file subranges directly from the filepattern to avoid a splitting 312 # round-trip. 313 # 314 # See SourceSplitRequest for an overview of the splitting process. 315 # 316 # This field is meaningful only in the Source objects populated 317 # by the user (e.g. when filling in a DerivedSource). 318 # Source objects supplied by the framework to the user don't have 319 # this field populated. 320 "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, 321 # avoiding a SourceGetMetadataOperation roundtrip 322 # (see SourceOperationRequest). 323 # 324 # This field is meaningful only in the Source objects populated 325 # by the user (e.g. when filling in a DerivedSource). 326 # Source objects supplied by the framework to the user don't have 327 # this field populated. 328 # and tuning the pipeline, etc. 329 "infinite": True or False, # Specifies that the size of this source is known to be infinite 330 # (this is a streaming source). 331 "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be 332 # read from this source. This estimate is in terms of external storage 333 # size, before any decompression or other processing done by the reader. 334 "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with 335 # the (encoded) keys in lexicographically sorted order. 336 }, 337 }, 338 "options": { # Hints for splitting a Source into bundles (parts for parallel # Hints for tuning the splitting process. 339 # processing) using SourceSplitRequest. 340 "desiredShardSizeBytes": "A String", # DEPRECATED in favor of desired_bundle_size_bytes. 341 "desiredBundleSizeBytes": "A String", # The source should be split into a set of bundles where the estimated size 342 # of each is approximately this many bytes. 343 }, 344 }, 345 }, 346 "initialReportIndex": "A String", # The initial index to use when reporting the status of the WorkItem. 347 "mapTask": { # MapTask consists of an ordered set of instructions, each of which # Additional information for MapTask WorkItems. 348 # describes one particular low-level operation for the worker to 349 # perform in order to accomplish the MapTask's WorkItem. 350 # 351 # Each instruction must appear in the list before any instructions which 352 # depends on its output. 353 "systemName": "A String", # System-defined name of this MapTask. 354 # Unique across the workflow. 355 "stageName": "A String", # System-defined name of the stage containing this MapTask. 356 # Unique across the workflow. 357 "instructions": [ # The instructions in the MapTask. 358 { # Describes a particular operation comprising a MapTask. 359 "name": "A String", # User-provided name of this operation. 360 "read": { # An instruction that reads records. # Additional information for Read instructions. 361 # Takes no inputs, produces one output. 362 "source": { # A source that records can be read and decoded from. # The source to read from. 363 "codec": { # The codec to use to decode data read from the source. 364 "a_key": "", # Properties of the object. 365 }, 366 "baseSpecs": [ # While splitting, sources may specify the produced bundles 367 # as differences against another source, in order to save backend-side 368 # memory and allow bigger jobs. For details, see SourceSplitRequest. 369 # To support this use case, the full set of parameters of the source 370 # is logically obtained by taking the latest explicitly specified value 371 # of each parameter in the order: 372 # base_specs (later items win), spec (overrides anything in base_specs). 373 { 374 "a_key": "", # Properties of the object. 375 }, 376 ], 377 "spec": { # The source to read from, plus its parameters. 378 "a_key": "", # Properties of the object. 379 }, 380 "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source 381 # doesn't need splitting, and using SourceSplitRequest on it would 382 # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. 383 # 384 # E.g. a file splitter may set this to true when splitting a single file 385 # into a set of byte ranges of appropriate size, and set this 386 # to false when splitting a filepattern into individual files. 387 # However, for efficiency, a file splitter may decide to produce 388 # file subranges directly from the filepattern to avoid a splitting 389 # round-trip. 390 # 391 # See SourceSplitRequest for an overview of the splitting process. 392 # 393 # This field is meaningful only in the Source objects populated 394 # by the user (e.g. when filling in a DerivedSource). 395 # Source objects supplied by the framework to the user don't have 396 # this field populated. 397 "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, 398 # avoiding a SourceGetMetadataOperation roundtrip 399 # (see SourceOperationRequest). 400 # 401 # This field is meaningful only in the Source objects populated 402 # by the user (e.g. when filling in a DerivedSource). 403 # Source objects supplied by the framework to the user don't have 404 # this field populated. 405 # and tuning the pipeline, etc. 406 "infinite": True or False, # Specifies that the size of this source is known to be infinite 407 # (this is a streaming source). 408 "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be 409 # read from this source. This estimate is in terms of external storage 410 # size, before any decompression or other processing done by the reader. 411 "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with 412 # the (encoded) keys in lexicographically sorted order. 413 }, 414 }, 415 }, 416 "outputs": [ # Describes the outputs of the instruction. 417 { # An output of an instruction. 418 "name": "A String", # The user-provided name of this output. 419 "onlyCountKeyBytes": True or False, # For system-generated byte and mean byte metrics, certain instructions 420 # should only report the key size. 421 "codec": { # The codec to use to encode data being written via this output. 422 "a_key": "", # Properties of the object. 423 }, 424 "systemName": "A String", # System-defined name of this output. 425 # Unique across the workflow. 426 "originalName": "A String", # System-defined name for this output in the original workflow graph. 427 # Outputs that do not contribute to an original instruction do not set this. 428 "onlyCountValueBytes": True or False, # For system-generated byte and mean byte metrics, certain instructions 429 # should only report the value size. 430 }, 431 ], 432 "partialGroupByKey": { # An instruction that does a partial group-by-key. # Additional information for PartialGroupByKey instructions. 433 # One input and one output. 434 "sideInputs": [ # Zero or more side inputs. 435 { # Information about a side input of a DoFn or an input of a SeqDoFn. 436 "sources": [ # The source(s) to read element(s) from to get the value of this side input. 437 # If more than one source, then the elements are taken from the 438 # sources, in the specified order if order matters. 439 # At least one source is required. 440 { # A source that records can be read and decoded from. 441 "codec": { # The codec to use to decode data read from the source. 442 "a_key": "", # Properties of the object. 443 }, 444 "baseSpecs": [ # While splitting, sources may specify the produced bundles 445 # as differences against another source, in order to save backend-side 446 # memory and allow bigger jobs. For details, see SourceSplitRequest. 447 # To support this use case, the full set of parameters of the source 448 # is logically obtained by taking the latest explicitly specified value 449 # of each parameter in the order: 450 # base_specs (later items win), spec (overrides anything in base_specs). 451 { 452 "a_key": "", # Properties of the object. 453 }, 454 ], 455 "spec": { # The source to read from, plus its parameters. 456 "a_key": "", # Properties of the object. 457 }, 458 "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source 459 # doesn't need splitting, and using SourceSplitRequest on it would 460 # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. 461 # 462 # E.g. a file splitter may set this to true when splitting a single file 463 # into a set of byte ranges of appropriate size, and set this 464 # to false when splitting a filepattern into individual files. 465 # However, for efficiency, a file splitter may decide to produce 466 # file subranges directly from the filepattern to avoid a splitting 467 # round-trip. 468 # 469 # See SourceSplitRequest for an overview of the splitting process. 470 # 471 # This field is meaningful only in the Source objects populated 472 # by the user (e.g. when filling in a DerivedSource). 473 # Source objects supplied by the framework to the user don't have 474 # this field populated. 475 "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, 476 # avoiding a SourceGetMetadataOperation roundtrip 477 # (see SourceOperationRequest). 478 # 479 # This field is meaningful only in the Source objects populated 480 # by the user (e.g. when filling in a DerivedSource). 481 # Source objects supplied by the framework to the user don't have 482 # this field populated. 483 # and tuning the pipeline, etc. 484 "infinite": True or False, # Specifies that the size of this source is known to be infinite 485 # (this is a streaming source). 486 "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be 487 # read from this source. This estimate is in terms of external storage 488 # size, before any decompression or other processing done by the reader. 489 "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with 490 # the (encoded) keys in lexicographically sorted order. 491 }, 492 }, 493 ], 494 "kind": { # How to interpret the source element(s) as a side input value. 495 "a_key": "", # Properties of the object. 496 }, 497 "tag": "A String", # The id of the tag the user code will access this side input by; 498 # this should correspond to the tag of some MultiOutputInfo. 499 }, 500 ], 501 "inputElementCodec": { # The codec to use for interpreting an element in the input PTable. 502 "a_key": "", # Properties of the object. 503 }, 504 "originalCombineValuesStepName": "A String", # If this instruction includes a combining function, this is the name of the 505 # CombineValues instruction lifted into this instruction. 506 "valueCombiningFn": { # The value combining function to invoke. 507 "a_key": "", # Properties of the object. 508 }, 509 "input": { # An input of an instruction, as a reference to an output of a # Describes the input to the partial group-by-key instruction. 510 # producer instruction. 511 "outputNum": 42, # The output index (origin zero) within the producer. 512 "producerInstructionIndex": 42, # The index (origin zero) of the parallel instruction that produces 513 # the output to be consumed by this input. This index is relative 514 # to the list of instructions in this input's instruction's 515 # containing MapTask. 516 }, 517 "originalCombineValuesInputStoreName": "A String", # If this instruction includes a combining function this is the name of the 518 # intermediate store between the GBK and the CombineValues. 519 }, 520 "write": { # An instruction that writes records. # Additional information for Write instructions. 521 # Takes one input, produces no outputs. 522 "input": { # An input of an instruction, as a reference to an output of a # The input. 523 # producer instruction. 524 "outputNum": 42, # The output index (origin zero) within the producer. 525 "producerInstructionIndex": 42, # The index (origin zero) of the parallel instruction that produces 526 # the output to be consumed by this input. This index is relative 527 # to the list of instructions in this input's instruction's 528 # containing MapTask. 529 }, 530 "sink": { # A sink that records can be encoded and written to. # The sink to write to. 531 "codec": { # The codec to use to encode data written to the sink. 532 "a_key": "", # Properties of the object. 533 }, 534 "spec": { # The sink to write to, plus its parameters. 535 "a_key": "", # Properties of the object. 536 }, 537 }, 538 }, 539 "systemName": "A String", # System-defined name of this operation. 540 # Unique across the workflow. 541 "flatten": { # An instruction that copies its inputs (zero or more) to its (single) output. # Additional information for Flatten instructions. 542 "inputs": [ # Describes the inputs to the flatten instruction. 543 { # An input of an instruction, as a reference to an output of a 544 # producer instruction. 545 "outputNum": 42, # The output index (origin zero) within the producer. 546 "producerInstructionIndex": 42, # The index (origin zero) of the parallel instruction that produces 547 # the output to be consumed by this input. This index is relative 548 # to the list of instructions in this input's instruction's 549 # containing MapTask. 550 }, 551 ], 552 }, 553 "originalName": "A String", # System-defined name for the operation in the original workflow graph. 554 "parDo": { # An instruction that does a ParDo operation. # Additional information for ParDo instructions. 555 # Takes one main input and zero or more side inputs, and produces 556 # zero or more outputs. 557 # Runs user code. 558 "sideInputs": [ # Zero or more side inputs. 559 { # Information about a side input of a DoFn or an input of a SeqDoFn. 560 "sources": [ # The source(s) to read element(s) from to get the value of this side input. 561 # If more than one source, then the elements are taken from the 562 # sources, in the specified order if order matters. 563 # At least one source is required. 564 { # A source that records can be read and decoded from. 565 "codec": { # The codec to use to decode data read from the source. 566 "a_key": "", # Properties of the object. 567 }, 568 "baseSpecs": [ # While splitting, sources may specify the produced bundles 569 # as differences against another source, in order to save backend-side 570 # memory and allow bigger jobs. For details, see SourceSplitRequest. 571 # To support this use case, the full set of parameters of the source 572 # is logically obtained by taking the latest explicitly specified value 573 # of each parameter in the order: 574 # base_specs (later items win), spec (overrides anything in base_specs). 575 { 576 "a_key": "", # Properties of the object. 577 }, 578 ], 579 "spec": { # The source to read from, plus its parameters. 580 "a_key": "", # Properties of the object. 581 }, 582 "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source 583 # doesn't need splitting, and using SourceSplitRequest on it would 584 # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. 585 # 586 # E.g. a file splitter may set this to true when splitting a single file 587 # into a set of byte ranges of appropriate size, and set this 588 # to false when splitting a filepattern into individual files. 589 # However, for efficiency, a file splitter may decide to produce 590 # file subranges directly from the filepattern to avoid a splitting 591 # round-trip. 592 # 593 # See SourceSplitRequest for an overview of the splitting process. 594 # 595 # This field is meaningful only in the Source objects populated 596 # by the user (e.g. when filling in a DerivedSource). 597 # Source objects supplied by the framework to the user don't have 598 # this field populated. 599 "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, 600 # avoiding a SourceGetMetadataOperation roundtrip 601 # (see SourceOperationRequest). 602 # 603 # This field is meaningful only in the Source objects populated 604 # by the user (e.g. when filling in a DerivedSource). 605 # Source objects supplied by the framework to the user don't have 606 # this field populated. 607 # and tuning the pipeline, etc. 608 "infinite": True or False, # Specifies that the size of this source is known to be infinite 609 # (this is a streaming source). 610 "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be 611 # read from this source. This estimate is in terms of external storage 612 # size, before any decompression or other processing done by the reader. 613 "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with 614 # the (encoded) keys in lexicographically sorted order. 615 }, 616 }, 617 ], 618 "kind": { # How to interpret the source element(s) as a side input value. 619 "a_key": "", # Properties of the object. 620 }, 621 "tag": "A String", # The id of the tag the user code will access this side input by; 622 # this should correspond to the tag of some MultiOutputInfo. 623 }, 624 ], 625 "input": { # An input of an instruction, as a reference to an output of a # The input. 626 # producer instruction. 627 "outputNum": 42, # The output index (origin zero) within the producer. 628 "producerInstructionIndex": 42, # The index (origin zero) of the parallel instruction that produces 629 # the output to be consumed by this input. This index is relative 630 # to the list of instructions in this input's instruction's 631 # containing MapTask. 632 }, 633 "multiOutputInfos": [ # Information about each of the outputs, if user_fn is a MultiDoFn. 634 { # Information about an output of a multi-output DoFn. 635 "tag": "A String", # The id of the tag the user code will emit to this output by; this 636 # should correspond to the tag of some SideInputInfo. 637 }, 638 ], 639 "numOutputs": 42, # The number of outputs. 640 "userFn": { # The user function to invoke. 641 "a_key": "", # Properties of the object. 642 }, 643 }, 644 }, 645 ], 646 }, 647 "jobId": "A String", # Identifies the workflow job this WorkItem belongs to. 648 "configuration": "A String", # Work item-specific configuration as an opaque blob. 649 "streamingSetupTask": { # A task which initializes part of a streaming Dataflow job. # Additional information for StreamingSetupTask WorkItems. 650 "workerHarnessPort": 42, # The TCP port used by the worker to communicate with the Dataflow 651 # worker harness. 652 "drain": True or False, # The user has requested drain. 653 "streamingComputationTopology": { # Global topology of the streaming Dataflow job, including all # The global topology of the streaming Dataflow job. 654 # computations and their sharded locations. 655 "computations": [ # The computations associated with a streaming Dataflow job. 656 { # All configuration data for a particular Computation. 657 "inputs": [ # The inputs to the computation. 658 { # Describes a stream of data, either as input to be processed or as 659 # output of a streaming Dataflow job. 660 "streamingStageLocation": { # Identifies the location of a streaming computation stage, for # The stream is part of another computation within the current 661 # streaming Dataflow job. 662 # stage-to-stage communication. 663 "streamId": "A String", # Identifies the particular stream within the streaming Dataflow 664 # job. 665 }, 666 "pubsubLocation": { # Identifies a pubsub location to use for transferring data into or # The stream is a pubsub stream. 667 # out of a streaming Dataflow job. 668 "idLabel": "A String", # If set, contains a pubsub label from which to extract record ids. 669 # If left empty, record deduplication will be strictly best effort. 670 "timestampLabel": "A String", # If set, contains a pubsub label from which to extract record timestamps. 671 # If left empty, record timestamps will be generated upon arrival. 672 "dropLateData": True or False, # Indicates whether the pipeline allows late-arriving data. 673 "topic": "A String", # A pubsub topic, in the form of 674 # "pubsub.googleapis.com/topics/<project-id>/<topic-name>" 675 "trackingSubscription": "A String", # If set, specifies the pubsub subscription that will be used for tracking 676 # custom time timestamps for watermark estimation. 677 "withAttributes": True or False, # If true, then the client has requested to get pubsub attributes. 678 "subscription": "A String", # A pubsub subscription, in the form of 679 # "pubsub.googleapis.com/subscriptions/<project-id>/<subscription-name>" 680 }, 681 "customSourceLocation": { # Identifies the location of a custom souce. # The stream is a custom source. 682 "stateful": True or False, # Whether this source is stateful. 683 }, 684 "sideInputLocation": { # Identifies the location of a streaming side input. # The stream is a streaming side input. 685 "stateFamily": "A String", # Identifies the state family where this side input is stored. 686 "tag": "A String", # Identifies the particular side input within the streaming Dataflow job. 687 }, 688 }, 689 ], 690 "outputs": [ # The outputs from the computation. 691 { # Describes a stream of data, either as input to be processed or as 692 # output of a streaming Dataflow job. 693 "streamingStageLocation": { # Identifies the location of a streaming computation stage, for # The stream is part of another computation within the current 694 # streaming Dataflow job. 695 # stage-to-stage communication. 696 "streamId": "A String", # Identifies the particular stream within the streaming Dataflow 697 # job. 698 }, 699 "pubsubLocation": { # Identifies a pubsub location to use for transferring data into or # The stream is a pubsub stream. 700 # out of a streaming Dataflow job. 701 "idLabel": "A String", # If set, contains a pubsub label from which to extract record ids. 702 # If left empty, record deduplication will be strictly best effort. 703 "timestampLabel": "A String", # If set, contains a pubsub label from which to extract record timestamps. 704 # If left empty, record timestamps will be generated upon arrival. 705 "dropLateData": True or False, # Indicates whether the pipeline allows late-arriving data. 706 "topic": "A String", # A pubsub topic, in the form of 707 # "pubsub.googleapis.com/topics/<project-id>/<topic-name>" 708 "trackingSubscription": "A String", # If set, specifies the pubsub subscription that will be used for tracking 709 # custom time timestamps for watermark estimation. 710 "withAttributes": True or False, # If true, then the client has requested to get pubsub attributes. 711 "subscription": "A String", # A pubsub subscription, in the form of 712 # "pubsub.googleapis.com/subscriptions/<project-id>/<subscription-name>" 713 }, 714 "customSourceLocation": { # Identifies the location of a custom souce. # The stream is a custom source. 715 "stateful": True or False, # Whether this source is stateful. 716 }, 717 "sideInputLocation": { # Identifies the location of a streaming side input. # The stream is a streaming side input. 718 "stateFamily": "A String", # Identifies the state family where this side input is stored. 719 "tag": "A String", # Identifies the particular side input within the streaming Dataflow job. 720 }, 721 }, 722 ], 723 "keyRanges": [ # The key ranges processed by the computation. 724 { # Location information for a specific key-range of a sharded computation. 725 # Currently we only support UTF-8 character splits to simplify encoding into 726 # JSON. 727 "deprecatedPersistentDirectory": "A String", # DEPRECATED. The location of the persistent state for this range, as a 728 # persistent directory in the worker local filesystem. 729 "start": "A String", # The start (inclusive) of the key range. 730 "deliveryEndpoint": "A String", # The physical location of this range assignment to be used for 731 # streaming computation cross-worker message delivery. 732 "end": "A String", # The end (exclusive) of the key range. 733 "dataDisk": "A String", # The name of the data disk where data for this range is stored. 734 # This name is local to the Google Cloud Platform project and uniquely 735 # identifies the disk within that project, for example 736 # "myproject-1014-104817-4c2-harness-0-disk-1". 737 }, 738 ], 739 "computationId": "A String", # The ID of the computation. 740 "systemStageName": "A String", # The system stage name. 741 "stateFamilies": [ # The state family values. 742 { # State family configuration. 743 "stateFamily": "A String", # The state family value. 744 "isRead": True or False, # If true, this family corresponds to a read operation. 745 }, 746 ], 747 }, 748 ], 749 "dataDiskAssignments": [ # The disks assigned to a streaming Dataflow job. 750 { # Data disk assignment for a given VM instance. 751 "vmInstance": "A String", # VM instance name the data disks mounted to, for example 752 # "myproject-1014-104817-4c2-harness-0". 753 "dataDisks": [ # Mounted data disks. The order is important a data disk's 0-based index in 754 # this list defines which persistent directory the disk is mounted to, for 755 # example the list of { "myproject-1014-104817-4c2-harness-0-disk-0" }, 756 # { "myproject-1014-104817-4c2-harness-0-disk-1" }. 757 "A String", 758 ], 759 }, 760 ], 761 "forwardingKeyBits": 42, # The size (in bits) of keys that will be assigned to source messages. 762 "userStageToComputationNameMap": { # Maps user stage names to stable computation names. 763 "a_key": "A String", 764 }, 765 "persistentStateVersion": 42, # Version number for persistent state. 766 }, 767 "receiveWorkPort": 42, # The TCP port on which the worker should listen for messages from 768 # other streaming computation workers. 769 }, 770 "id": "A String", # Identifies this WorkItem. 771 "streamingConfigTask": { # A task that carries configuration information for streaming computations. # Additional information for StreamingConfigTask WorkItems. 772 "userStepToStateFamilyNameMap": { # Map from user step names to state families. 773 "a_key": "A String", 774 }, 775 "windmillServicePort": "A String", # If present, the worker must use this port to communicate with Windmill 776 # Service dispatchers. Only applicable when windmill_service_endpoint is 777 # specified. 778 "streamingComputationConfigs": [ # Set of computation configuration information. 779 { # Configuration information for a single streaming computation. 780 "computationId": "A String", # Unique identifier for this computation. 781 "systemName": "A String", # System defined name for this computation. 782 "stageName": "A String", # Stage name of this computation. 783 "instructions": [ # Instructions that comprise the computation. 784 { # Describes a particular operation comprising a MapTask. 785 "name": "A String", # User-provided name of this operation. 786 "read": { # An instruction that reads records. # Additional information for Read instructions. 787 # Takes no inputs, produces one output. 788 "source": { # A source that records can be read and decoded from. # The source to read from. 789 "codec": { # The codec to use to decode data read from the source. 790 "a_key": "", # Properties of the object. 791 }, 792 "baseSpecs": [ # While splitting, sources may specify the produced bundles 793 # as differences against another source, in order to save backend-side 794 # memory and allow bigger jobs. For details, see SourceSplitRequest. 795 # To support this use case, the full set of parameters of the source 796 # is logically obtained by taking the latest explicitly specified value 797 # of each parameter in the order: 798 # base_specs (later items win), spec (overrides anything in base_specs). 799 { 800 "a_key": "", # Properties of the object. 801 }, 802 ], 803 "spec": { # The source to read from, plus its parameters. 804 "a_key": "", # Properties of the object. 805 }, 806 "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source 807 # doesn't need splitting, and using SourceSplitRequest on it would 808 # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. 809 # 810 # E.g. a file splitter may set this to true when splitting a single file 811 # into a set of byte ranges of appropriate size, and set this 812 # to false when splitting a filepattern into individual files. 813 # However, for efficiency, a file splitter may decide to produce 814 # file subranges directly from the filepattern to avoid a splitting 815 # round-trip. 816 # 817 # See SourceSplitRequest for an overview of the splitting process. 818 # 819 # This field is meaningful only in the Source objects populated 820 # by the user (e.g. when filling in a DerivedSource). 821 # Source objects supplied by the framework to the user don't have 822 # this field populated. 823 "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, 824 # avoiding a SourceGetMetadataOperation roundtrip 825 # (see SourceOperationRequest). 826 # 827 # This field is meaningful only in the Source objects populated 828 # by the user (e.g. when filling in a DerivedSource). 829 # Source objects supplied by the framework to the user don't have 830 # this field populated. 831 # and tuning the pipeline, etc. 832 "infinite": True or False, # Specifies that the size of this source is known to be infinite 833 # (this is a streaming source). 834 "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be 835 # read from this source. This estimate is in terms of external storage 836 # size, before any decompression or other processing done by the reader. 837 "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with 838 # the (encoded) keys in lexicographically sorted order. 839 }, 840 }, 841 }, 842 "outputs": [ # Describes the outputs of the instruction. 843 { # An output of an instruction. 844 "name": "A String", # The user-provided name of this output. 845 "onlyCountKeyBytes": True or False, # For system-generated byte and mean byte metrics, certain instructions 846 # should only report the key size. 847 "codec": { # The codec to use to encode data being written via this output. 848 "a_key": "", # Properties of the object. 849 }, 850 "systemName": "A String", # System-defined name of this output. 851 # Unique across the workflow. 852 "originalName": "A String", # System-defined name for this output in the original workflow graph. 853 # Outputs that do not contribute to an original instruction do not set this. 854 "onlyCountValueBytes": True or False, # For system-generated byte and mean byte metrics, certain instructions 855 # should only report the value size. 856 }, 857 ], 858 "partialGroupByKey": { # An instruction that does a partial group-by-key. # Additional information for PartialGroupByKey instructions. 859 # One input and one output. 860 "sideInputs": [ # Zero or more side inputs. 861 { # Information about a side input of a DoFn or an input of a SeqDoFn. 862 "sources": [ # The source(s) to read element(s) from to get the value of this side input. 863 # If more than one source, then the elements are taken from the 864 # sources, in the specified order if order matters. 865 # At least one source is required. 866 { # A source that records can be read and decoded from. 867 "codec": { # The codec to use to decode data read from the source. 868 "a_key": "", # Properties of the object. 869 }, 870 "baseSpecs": [ # While splitting, sources may specify the produced bundles 871 # as differences against another source, in order to save backend-side 872 # memory and allow bigger jobs. For details, see SourceSplitRequest. 873 # To support this use case, the full set of parameters of the source 874 # is logically obtained by taking the latest explicitly specified value 875 # of each parameter in the order: 876 # base_specs (later items win), spec (overrides anything in base_specs). 877 { 878 "a_key": "", # Properties of the object. 879 }, 880 ], 881 "spec": { # The source to read from, plus its parameters. 882 "a_key": "", # Properties of the object. 883 }, 884 "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source 885 # doesn't need splitting, and using SourceSplitRequest on it would 886 # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. 887 # 888 # E.g. a file splitter may set this to true when splitting a single file 889 # into a set of byte ranges of appropriate size, and set this 890 # to false when splitting a filepattern into individual files. 891 # However, for efficiency, a file splitter may decide to produce 892 # file subranges directly from the filepattern to avoid a splitting 893 # round-trip. 894 # 895 # See SourceSplitRequest for an overview of the splitting process. 896 # 897 # This field is meaningful only in the Source objects populated 898 # by the user (e.g. when filling in a DerivedSource). 899 # Source objects supplied by the framework to the user don't have 900 # this field populated. 901 "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, 902 # avoiding a SourceGetMetadataOperation roundtrip 903 # (see SourceOperationRequest). 904 # 905 # This field is meaningful only in the Source objects populated 906 # by the user (e.g. when filling in a DerivedSource). 907 # Source objects supplied by the framework to the user don't have 908 # this field populated. 909 # and tuning the pipeline, etc. 910 "infinite": True or False, # Specifies that the size of this source is known to be infinite 911 # (this is a streaming source). 912 "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be 913 # read from this source. This estimate is in terms of external storage 914 # size, before any decompression or other processing done by the reader. 915 "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with 916 # the (encoded) keys in lexicographically sorted order. 917 }, 918 }, 919 ], 920 "kind": { # How to interpret the source element(s) as a side input value. 921 "a_key": "", # Properties of the object. 922 }, 923 "tag": "A String", # The id of the tag the user code will access this side input by; 924 # this should correspond to the tag of some MultiOutputInfo. 925 }, 926 ], 927 "inputElementCodec": { # The codec to use for interpreting an element in the input PTable. 928 "a_key": "", # Properties of the object. 929 }, 930 "originalCombineValuesStepName": "A String", # If this instruction includes a combining function, this is the name of the 931 # CombineValues instruction lifted into this instruction. 932 "valueCombiningFn": { # The value combining function to invoke. 933 "a_key": "", # Properties of the object. 934 }, 935 "input": { # An input of an instruction, as a reference to an output of a # Describes the input to the partial group-by-key instruction. 936 # producer instruction. 937 "outputNum": 42, # The output index (origin zero) within the producer. 938 "producerInstructionIndex": 42, # The index (origin zero) of the parallel instruction that produces 939 # the output to be consumed by this input. This index is relative 940 # to the list of instructions in this input's instruction's 941 # containing MapTask. 942 }, 943 "originalCombineValuesInputStoreName": "A String", # If this instruction includes a combining function this is the name of the 944 # intermediate store between the GBK and the CombineValues. 945 }, 946 "write": { # An instruction that writes records. # Additional information for Write instructions. 947 # Takes one input, produces no outputs. 948 "input": { # An input of an instruction, as a reference to an output of a # The input. 949 # producer instruction. 950 "outputNum": 42, # The output index (origin zero) within the producer. 951 "producerInstructionIndex": 42, # The index (origin zero) of the parallel instruction that produces 952 # the output to be consumed by this input. This index is relative 953 # to the list of instructions in this input's instruction's 954 # containing MapTask. 955 }, 956 "sink": { # A sink that records can be encoded and written to. # The sink to write to. 957 "codec": { # The codec to use to encode data written to the sink. 958 "a_key": "", # Properties of the object. 959 }, 960 "spec": { # The sink to write to, plus its parameters. 961 "a_key": "", # Properties of the object. 962 }, 963 }, 964 }, 965 "systemName": "A String", # System-defined name of this operation. 966 # Unique across the workflow. 967 "flatten": { # An instruction that copies its inputs (zero or more) to its (single) output. # Additional information for Flatten instructions. 968 "inputs": [ # Describes the inputs to the flatten instruction. 969 { # An input of an instruction, as a reference to an output of a 970 # producer instruction. 971 "outputNum": 42, # The output index (origin zero) within the producer. 972 "producerInstructionIndex": 42, # The index (origin zero) of the parallel instruction that produces 973 # the output to be consumed by this input. This index is relative 974 # to the list of instructions in this input's instruction's 975 # containing MapTask. 976 }, 977 ], 978 }, 979 "originalName": "A String", # System-defined name for the operation in the original workflow graph. 980 "parDo": { # An instruction that does a ParDo operation. # Additional information for ParDo instructions. 981 # Takes one main input and zero or more side inputs, and produces 982 # zero or more outputs. 983 # Runs user code. 984 "sideInputs": [ # Zero or more side inputs. 985 { # Information about a side input of a DoFn or an input of a SeqDoFn. 986 "sources": [ # The source(s) to read element(s) from to get the value of this side input. 987 # If more than one source, then the elements are taken from the 988 # sources, in the specified order if order matters. 989 # At least one source is required. 990 { # A source that records can be read and decoded from. 991 "codec": { # The codec to use to decode data read from the source. 992 "a_key": "", # Properties of the object. 993 }, 994 "baseSpecs": [ # While splitting, sources may specify the produced bundles 995 # as differences against another source, in order to save backend-side 996 # memory and allow bigger jobs. For details, see SourceSplitRequest. 997 # To support this use case, the full set of parameters of the source 998 # is logically obtained by taking the latest explicitly specified value 999 # of each parameter in the order: 1000 # base_specs (later items win), spec (overrides anything in base_specs). 1001 { 1002 "a_key": "", # Properties of the object. 1003 }, 1004 ], 1005 "spec": { # The source to read from, plus its parameters. 1006 "a_key": "", # Properties of the object. 1007 }, 1008 "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source 1009 # doesn't need splitting, and using SourceSplitRequest on it would 1010 # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. 1011 # 1012 # E.g. a file splitter may set this to true when splitting a single file 1013 # into a set of byte ranges of appropriate size, and set this 1014 # to false when splitting a filepattern into individual files. 1015 # However, for efficiency, a file splitter may decide to produce 1016 # file subranges directly from the filepattern to avoid a splitting 1017 # round-trip. 1018 # 1019 # See SourceSplitRequest for an overview of the splitting process. 1020 # 1021 # This field is meaningful only in the Source objects populated 1022 # by the user (e.g. when filling in a DerivedSource). 1023 # Source objects supplied by the framework to the user don't have 1024 # this field populated. 1025 "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, 1026 # avoiding a SourceGetMetadataOperation roundtrip 1027 # (see SourceOperationRequest). 1028 # 1029 # This field is meaningful only in the Source objects populated 1030 # by the user (e.g. when filling in a DerivedSource). 1031 # Source objects supplied by the framework to the user don't have 1032 # this field populated. 1033 # and tuning the pipeline, etc. 1034 "infinite": True or False, # Specifies that the size of this source is known to be infinite 1035 # (this is a streaming source). 1036 "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be 1037 # read from this source. This estimate is in terms of external storage 1038 # size, before any decompression or other processing done by the reader. 1039 "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with 1040 # the (encoded) keys in lexicographically sorted order. 1041 }, 1042 }, 1043 ], 1044 "kind": { # How to interpret the source element(s) as a side input value. 1045 "a_key": "", # Properties of the object. 1046 }, 1047 "tag": "A String", # The id of the tag the user code will access this side input by; 1048 # this should correspond to the tag of some MultiOutputInfo. 1049 }, 1050 ], 1051 "input": { # An input of an instruction, as a reference to an output of a # The input. 1052 # producer instruction. 1053 "outputNum": 42, # The output index (origin zero) within the producer. 1054 "producerInstructionIndex": 42, # The index (origin zero) of the parallel instruction that produces 1055 # the output to be consumed by this input. This index is relative 1056 # to the list of instructions in this input's instruction's 1057 # containing MapTask. 1058 }, 1059 "multiOutputInfos": [ # Information about each of the outputs, if user_fn is a MultiDoFn. 1060 { # Information about an output of a multi-output DoFn. 1061 "tag": "A String", # The id of the tag the user code will emit to this output by; this 1062 # should correspond to the tag of some SideInputInfo. 1063 }, 1064 ], 1065 "numOutputs": 42, # The number of outputs. 1066 "userFn": { # The user function to invoke. 1067 "a_key": "", # Properties of the object. 1068 }, 1069 }, 1070 }, 1071 ], 1072 }, 1073 ], 1074 "windmillServiceEndpoint": "A String", # If present, the worker must use this endpoint to communicate with Windmill 1075 # Service dispatchers, otherwise the worker must continue to use whatever 1076 # endpoint it had been using. 1077 }, 1078 "packages": [ # Any required packages that need to be fetched in order to execute 1079 # this WorkItem. 1080 { # The packages that must be installed in order for a worker to run the 1081 # steps of the Cloud Dataflow job that will be assigned to its worker 1082 # pool. 1083 # 1084 # This is the mechanism by which the Cloud Dataflow SDK causes code to 1085 # be loaded onto the workers. For example, the Cloud Dataflow Java SDK 1086 # might use this to install jars containing the user's code and all of the 1087 # various dependencies (libraries, data files, etc.) required in order 1088 # for that code to run. 1089 "location": "A String", # The resource to read the package from. The supported resource type is: 1090 # 1091 # Google Cloud Storage: 1092 # 1093 # storage.googleapis.com/{bucket} 1094 # bucket.storage.googleapis.com/ 1095 "name": "A String", # The name of the package. 1096 }, 1097 ], 1098 "shellTask": { # A task which consists of a shell command for the worker to execute. # Additional information for ShellTask WorkItems. 1099 "command": "A String", # The shell command to run. 1100 "exitCode": 42, # Exit code for the task. 1101 }, 1102 "streamingComputationTask": { # A task which describes what action should be performed for the specified # Additional information for StreamingComputationTask WorkItems. 1103 # streaming computation ranges. 1104 "taskType": "A String", # A type of streaming computation task. 1105 "computationRanges": [ # Contains ranges of a streaming computation this task should apply to. 1106 { # Describes full or partial data disk assignment information of the computation 1107 # ranges. 1108 "computationId": "A String", # The ID of the computation. 1109 "rangeAssignments": [ # Data disk assignments for ranges from this computation. 1110 { # Data disk assignment information for a specific key-range of a sharded 1111 # computation. 1112 # Currently we only support UTF-8 character splits to simplify encoding into 1113 # JSON. 1114 "start": "A String", # The start (inclusive) of the key range. 1115 "end": "A String", # The end (exclusive) of the key range. 1116 "dataDisk": "A String", # The name of the data disk where data for this range is stored. 1117 # This name is local to the Google Cloud Platform project and uniquely 1118 # identifies the disk within that project, for example 1119 # "myproject-1014-104817-4c2-harness-0-disk-1". 1120 }, 1121 ], 1122 }, 1123 ], 1124 "dataDisks": [ # Describes the set of data disks this task should apply to. 1125 { # Describes mounted data disk. 1126 "dataDisk": "A String", # The name of the data disk. 1127 # This name is local to the Google Cloud Platform project and uniquely 1128 # identifies the disk within that project, for example 1129 # "myproject-1014-104817-4c2-harness-0-disk-1". 1130 }, 1131 ], 1132 }, 1133 }, 1134 ], 1135 }</pre> 1136</div> 1137 1138<div class="method"> 1139 <code class="details" id="reportStatus">reportStatus(projectId, jobId, body, x__xgafv=None)</code> 1140 <pre>Reports the status of dataflow WorkItems leased by a worker. 1141 1142Args: 1143 projectId: string, The project which owns the WorkItem's job. (required) 1144 jobId: string, The job which the WorkItem is part of. (required) 1145 body: object, The request body. (required) 1146 The object takes the form of: 1147 1148{ # Request to report the status of WorkItems. 1149 "workerId": "A String", # The ID of the worker reporting the WorkItem status. If this 1150 # does not match the ID of the worker which the Dataflow service 1151 # believes currently has the lease on the WorkItem, the report 1152 # will be dropped (with an error response). 1153 "currentWorkerTime": "A String", # The current timestamp at the worker. 1154 "workItemStatuses": [ # The order is unimportant, except that the order of the 1155 # WorkItemServiceState messages in the ReportWorkItemStatusResponse 1156 # corresponds to the order of WorkItemStatus messages here. 1157 { # Conveys a worker's progress through the work described by a WorkItem. 1158 "reportIndex": "A String", # The report index. When a WorkItem is leased, the lease will 1159 # contain an initial report index. When a WorkItem's status is 1160 # reported to the system, the report should be sent with 1161 # that report index, and the response will contain the index the 1162 # worker should use for the next report. Reports received with 1163 # unexpected index values will be rejected by the service. 1164 # 1165 # In order to preserve idempotency, the worker should not alter the 1166 # contents of a report, even if the worker must submit the same 1167 # report multiple times before getting back a response. The worker 1168 # should not submit a subsequent report until the response for the 1169 # previous report had been received from the service. 1170 "errors": [ # Specifies errors which occurred during processing. If errors are 1171 # provided, and completed = true, then the WorkItem is considered 1172 # to have failed. 1173 { # The `Status` type defines a logical error model that is suitable for different 1174 # programming environments, including REST APIs and RPC APIs. It is used by 1175 # [gRPC](https://github.com/grpc). The error model is designed to be: 1176 # 1177 # - Simple to use and understand for most users 1178 # - Flexible enough to meet unexpected needs 1179 # 1180 # # Overview 1181 # 1182 # The `Status` message contains three pieces of data: error code, error message, 1183 # and error details. The error code should be an enum value of 1184 # google.rpc.Code, but it may accept additional error codes if needed. The 1185 # error message should be a developer-facing English message that helps 1186 # developers *understand* and *resolve* the error. If a localized user-facing 1187 # error message is needed, put the localized message in the error details or 1188 # localize it in the client. The optional error details may contain arbitrary 1189 # information about the error. There is a predefined set of error detail types 1190 # in the package `google.rpc` that can be used for common error conditions. 1191 # 1192 # # Language mapping 1193 # 1194 # The `Status` message is the logical representation of the error model, but it 1195 # is not necessarily the actual wire format. When the `Status` message is 1196 # exposed in different client libraries and different wire protocols, it can be 1197 # mapped differently. For example, it will likely be mapped to some exceptions 1198 # in Java, but more likely mapped to some error codes in C. 1199 # 1200 # # Other uses 1201 # 1202 # The error model and the `Status` message can be used in a variety of 1203 # environments, either with or without APIs, to provide a 1204 # consistent developer experience across different environments. 1205 # 1206 # Example uses of this error model include: 1207 # 1208 # - Partial errors. If a service needs to return partial errors to the client, 1209 # it may embed the `Status` in the normal response to indicate the partial 1210 # errors. 1211 # 1212 # - Workflow errors. A typical workflow has multiple steps. Each step may 1213 # have a `Status` message for error reporting. 1214 # 1215 # - Batch operations. If a client uses batch request and batch response, the 1216 # `Status` message should be used directly inside batch response, one for 1217 # each error sub-response. 1218 # 1219 # - Asynchronous operations. If an API call embeds asynchronous operation 1220 # results in its response, the status of those operations should be 1221 # represented directly using the `Status` message. 1222 # 1223 # - Logging. If some API errors are stored in logs, the message `Status` could 1224 # be used directly after any stripping needed for security/privacy reasons. 1225 "message": "A String", # A developer-facing error message, which should be in English. Any 1226 # user-facing error message should be localized and sent in the 1227 # google.rpc.Status.details field, or localized by the client. 1228 "code": 42, # The status code, which should be an enum value of google.rpc.Code. 1229 "details": [ # A list of messages that carry the error details. There will be a 1230 # common set of message types for APIs to use. 1231 { 1232 "a_key": "", # Properties of the object. Contains field @type with type URL. 1233 }, 1234 ], 1235 }, 1236 ], 1237 "sourceOperationResponse": { # The result of a SourceOperationRequest, specified in # If the work item represented a SourceOperationRequest, and the work 1238 # is completed, contains the result of the operation. 1239 # ReportWorkItemStatusRequest.source_operation when the work item 1240 # is completed. 1241 "getMetadata": { # The result of a SourceGetMetadataOperation. # A response to a request to get metadata about a source. 1242 "metadata": { # Metadata about a Source useful for automatically optimizing # The computed metadata. 1243 # and tuning the pipeline, etc. 1244 "infinite": True or False, # Specifies that the size of this source is known to be infinite 1245 # (this is a streaming source). 1246 "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be 1247 # read from this source. This estimate is in terms of external storage 1248 # size, before any decompression or other processing done by the reader. 1249 "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with 1250 # the (encoded) keys in lexicographically sorted order. 1251 }, 1252 }, 1253 "split": { # The response to a SourceSplitRequest. # A response to a request to split a source. 1254 "outcome": "A String", # Indicates whether splitting happened and produced a list of bundles. 1255 # If this is USE_CURRENT_SOURCE_AS_IS, the current source should 1256 # be processed "as is" without splitting. "bundles" is ignored in this case. 1257 # If this is SPLITTING_HAPPENED, then "bundles" contains a list of 1258 # bundles into which the source was split. 1259 "bundles": [ # If outcome is SPLITTING_HAPPENED, then this is a list of bundles 1260 # into which the source was split. Otherwise this field is ignored. 1261 # This list can be empty, which means the source represents an empty input. 1262 { # Specification of one of the bundles produced as a result of splitting 1263 # a Source (e.g. when executing a SourceSplitRequest, or when 1264 # splitting an active task using WorkItemStatus.dynamic_source_split), 1265 # relative to the source being split. 1266 "derivationMode": "A String", # What source to base the produced source on (if any). 1267 "source": { # A source that records can be read and decoded from. # Specification of the source. 1268 "codec": { # The codec to use to decode data read from the source. 1269 "a_key": "", # Properties of the object. 1270 }, 1271 "baseSpecs": [ # While splitting, sources may specify the produced bundles 1272 # as differences against another source, in order to save backend-side 1273 # memory and allow bigger jobs. For details, see SourceSplitRequest. 1274 # To support this use case, the full set of parameters of the source 1275 # is logically obtained by taking the latest explicitly specified value 1276 # of each parameter in the order: 1277 # base_specs (later items win), spec (overrides anything in base_specs). 1278 { 1279 "a_key": "", # Properties of the object. 1280 }, 1281 ], 1282 "spec": { # The source to read from, plus its parameters. 1283 "a_key": "", # Properties of the object. 1284 }, 1285 "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source 1286 # doesn't need splitting, and using SourceSplitRequest on it would 1287 # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. 1288 # 1289 # E.g. a file splitter may set this to true when splitting a single file 1290 # into a set of byte ranges of appropriate size, and set this 1291 # to false when splitting a filepattern into individual files. 1292 # However, for efficiency, a file splitter may decide to produce 1293 # file subranges directly from the filepattern to avoid a splitting 1294 # round-trip. 1295 # 1296 # See SourceSplitRequest for an overview of the splitting process. 1297 # 1298 # This field is meaningful only in the Source objects populated 1299 # by the user (e.g. when filling in a DerivedSource). 1300 # Source objects supplied by the framework to the user don't have 1301 # this field populated. 1302 "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, 1303 # avoiding a SourceGetMetadataOperation roundtrip 1304 # (see SourceOperationRequest). 1305 # 1306 # This field is meaningful only in the Source objects populated 1307 # by the user (e.g. when filling in a DerivedSource). 1308 # Source objects supplied by the framework to the user don't have 1309 # this field populated. 1310 # and tuning the pipeline, etc. 1311 "infinite": True or False, # Specifies that the size of this source is known to be infinite 1312 # (this is a streaming source). 1313 "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be 1314 # read from this source. This estimate is in terms of external storage 1315 # size, before any decompression or other processing done by the reader. 1316 "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with 1317 # the (encoded) keys in lexicographically sorted order. 1318 }, 1319 }, 1320 }, 1321 ], 1322 "shards": [ # DEPRECATED in favor of bundles. 1323 { # DEPRECATED in favor of DerivedSource. 1324 "derivationMode": "A String", # DEPRECATED 1325 "source": { # A source that records can be read and decoded from. # DEPRECATED 1326 "codec": { # The codec to use to decode data read from the source. 1327 "a_key": "", # Properties of the object. 1328 }, 1329 "baseSpecs": [ # While splitting, sources may specify the produced bundles 1330 # as differences against another source, in order to save backend-side 1331 # memory and allow bigger jobs. For details, see SourceSplitRequest. 1332 # To support this use case, the full set of parameters of the source 1333 # is logically obtained by taking the latest explicitly specified value 1334 # of each parameter in the order: 1335 # base_specs (later items win), spec (overrides anything in base_specs). 1336 { 1337 "a_key": "", # Properties of the object. 1338 }, 1339 ], 1340 "spec": { # The source to read from, plus its parameters. 1341 "a_key": "", # Properties of the object. 1342 }, 1343 "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source 1344 # doesn't need splitting, and using SourceSplitRequest on it would 1345 # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. 1346 # 1347 # E.g. a file splitter may set this to true when splitting a single file 1348 # into a set of byte ranges of appropriate size, and set this 1349 # to false when splitting a filepattern into individual files. 1350 # However, for efficiency, a file splitter may decide to produce 1351 # file subranges directly from the filepattern to avoid a splitting 1352 # round-trip. 1353 # 1354 # See SourceSplitRequest for an overview of the splitting process. 1355 # 1356 # This field is meaningful only in the Source objects populated 1357 # by the user (e.g. when filling in a DerivedSource). 1358 # Source objects supplied by the framework to the user don't have 1359 # this field populated. 1360 "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, 1361 # avoiding a SourceGetMetadataOperation roundtrip 1362 # (see SourceOperationRequest). 1363 # 1364 # This field is meaningful only in the Source objects populated 1365 # by the user (e.g. when filling in a DerivedSource). 1366 # Source objects supplied by the framework to the user don't have 1367 # this field populated. 1368 # and tuning the pipeline, etc. 1369 "infinite": True or False, # Specifies that the size of this source is known to be infinite 1370 # (this is a streaming source). 1371 "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be 1372 # read from this source. This estimate is in terms of external storage 1373 # size, before any decompression or other processing done by the reader. 1374 "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with 1375 # the (encoded) keys in lexicographically sorted order. 1376 }, 1377 }, 1378 }, 1379 ], 1380 }, 1381 }, 1382 "stopPosition": { # Position defines a position within a collection of data. The value # A worker may split an active map task in two parts, "primary" and 1383 # "residual", continuing to process the primary part and returning the 1384 # residual part into the pool of available work. 1385 # This event is called a "dynamic split" and is critical to the dynamic 1386 # work rebalancing feature. The two obtained sub-tasks are called 1387 # "parts" of the split. 1388 # The parts, if concatenated, must represent the same input as would 1389 # be read by the current task if the split did not happen. 1390 # The exact way in which the original task is decomposed into the two 1391 # parts is specified either as a position demarcating them 1392 # (stop_position), or explicitly as two DerivedSources, if this 1393 # task consumes a user-defined source type (dynamic_source_split). 1394 # 1395 # The "current" task is adjusted as a result of the split: after a task 1396 # with range [A, B) sends a stop_position update at C, its range is 1397 # considered to be [A, C), e.g.: 1398 # * Progress should be interpreted relative to the new range, e.g. 1399 # "75% completed" means "75% of [A, C) completed" 1400 # * The worker should interpret proposed_stop_position relative to the 1401 # new range, e.g. "split at 68%" should be interpreted as 1402 # "split at 68% of [A, C)". 1403 # * If the worker chooses to split again using stop_position, only 1404 # stop_positions in [A, C) will be accepted. 1405 # * Etc. 1406 # dynamic_source_split has similar semantics: e.g., if a task with 1407 # source S splits using dynamic_source_split into {P, R} 1408 # (where P and R must be together equivalent to S), then subsequent 1409 # progress and proposed_stop_position should be interpreted relative 1410 # to P, and in a potential subsequent dynamic_source_split into {P', R'}, 1411 # P' and R' must be together equivalent to P, etc. 1412 # can be either the end position, a key (used with ordered 1413 # collections), a byte offset, or a record index. 1414 "end": True or False, # Position is past all other positions. Also useful for the end 1415 # position of an unbounded range. 1416 "recordIndex": "A String", # Position is a record index. 1417 "byteOffset": "A String", # Position is a byte offset. 1418 "key": "A String", # Position is a string key, ordered lexicographically. 1419 "concatPosition": { # A position that encapsulates an inner position and an index for the inner # CloudPosition is a concat position. 1420 # position. A ConcatPosition can be used by a reader of a source that 1421 # encapsulates a set of other sources. 1422 "position": # Object with schema name: Position # Position within the inner source. 1423 "index": 42, # Index of the inner source. 1424 }, 1425 "shufflePosition": "A String", # CloudPosition is a base64 encoded BatchShufflePosition (with FIXED 1426 # sharding). 1427 }, 1428 "sourceFork": { # DEPRECATED in favor of DynamicSourceSplit. # DEPRECATED in favor of dynamic_source_split. 1429 "residualSource": { # Specification of one of the bundles produced as a result of splitting # DEPRECATED 1430 # a Source (e.g. when executing a SourceSplitRequest, or when 1431 # splitting an active task using WorkItemStatus.dynamic_source_split), 1432 # relative to the source being split. 1433 "derivationMode": "A String", # What source to base the produced source on (if any). 1434 "source": { # A source that records can be read and decoded from. # Specification of the source. 1435 "codec": { # The codec to use to decode data read from the source. 1436 "a_key": "", # Properties of the object. 1437 }, 1438 "baseSpecs": [ # While splitting, sources may specify the produced bundles 1439 # as differences against another source, in order to save backend-side 1440 # memory and allow bigger jobs. For details, see SourceSplitRequest. 1441 # To support this use case, the full set of parameters of the source 1442 # is logically obtained by taking the latest explicitly specified value 1443 # of each parameter in the order: 1444 # base_specs (later items win), spec (overrides anything in base_specs). 1445 { 1446 "a_key": "", # Properties of the object. 1447 }, 1448 ], 1449 "spec": { # The source to read from, plus its parameters. 1450 "a_key": "", # Properties of the object. 1451 }, 1452 "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source 1453 # doesn't need splitting, and using SourceSplitRequest on it would 1454 # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. 1455 # 1456 # E.g. a file splitter may set this to true when splitting a single file 1457 # into a set of byte ranges of appropriate size, and set this 1458 # to false when splitting a filepattern into individual files. 1459 # However, for efficiency, a file splitter may decide to produce 1460 # file subranges directly from the filepattern to avoid a splitting 1461 # round-trip. 1462 # 1463 # See SourceSplitRequest for an overview of the splitting process. 1464 # 1465 # This field is meaningful only in the Source objects populated 1466 # by the user (e.g. when filling in a DerivedSource). 1467 # Source objects supplied by the framework to the user don't have 1468 # this field populated. 1469 "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, 1470 # avoiding a SourceGetMetadataOperation roundtrip 1471 # (see SourceOperationRequest). 1472 # 1473 # This field is meaningful only in the Source objects populated 1474 # by the user (e.g. when filling in a DerivedSource). 1475 # Source objects supplied by the framework to the user don't have 1476 # this field populated. 1477 # and tuning the pipeline, etc. 1478 "infinite": True or False, # Specifies that the size of this source is known to be infinite 1479 # (this is a streaming source). 1480 "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be 1481 # read from this source. This estimate is in terms of external storage 1482 # size, before any decompression or other processing done by the reader. 1483 "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with 1484 # the (encoded) keys in lexicographically sorted order. 1485 }, 1486 }, 1487 }, 1488 "primarySource": { # Specification of one of the bundles produced as a result of splitting # DEPRECATED 1489 # a Source (e.g. when executing a SourceSplitRequest, or when 1490 # splitting an active task using WorkItemStatus.dynamic_source_split), 1491 # relative to the source being split. 1492 "derivationMode": "A String", # What source to base the produced source on (if any). 1493 "source": { # A source that records can be read and decoded from. # Specification of the source. 1494 "codec": { # The codec to use to decode data read from the source. 1495 "a_key": "", # Properties of the object. 1496 }, 1497 "baseSpecs": [ # While splitting, sources may specify the produced bundles 1498 # as differences against another source, in order to save backend-side 1499 # memory and allow bigger jobs. For details, see SourceSplitRequest. 1500 # To support this use case, the full set of parameters of the source 1501 # is logically obtained by taking the latest explicitly specified value 1502 # of each parameter in the order: 1503 # base_specs (later items win), spec (overrides anything in base_specs). 1504 { 1505 "a_key": "", # Properties of the object. 1506 }, 1507 ], 1508 "spec": { # The source to read from, plus its parameters. 1509 "a_key": "", # Properties of the object. 1510 }, 1511 "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source 1512 # doesn't need splitting, and using SourceSplitRequest on it would 1513 # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. 1514 # 1515 # E.g. a file splitter may set this to true when splitting a single file 1516 # into a set of byte ranges of appropriate size, and set this 1517 # to false when splitting a filepattern into individual files. 1518 # However, for efficiency, a file splitter may decide to produce 1519 # file subranges directly from the filepattern to avoid a splitting 1520 # round-trip. 1521 # 1522 # See SourceSplitRequest for an overview of the splitting process. 1523 # 1524 # This field is meaningful only in the Source objects populated 1525 # by the user (e.g. when filling in a DerivedSource). 1526 # Source objects supplied by the framework to the user don't have 1527 # this field populated. 1528 "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, 1529 # avoiding a SourceGetMetadataOperation roundtrip 1530 # (see SourceOperationRequest). 1531 # 1532 # This field is meaningful only in the Source objects populated 1533 # by the user (e.g. when filling in a DerivedSource). 1534 # Source objects supplied by the framework to the user don't have 1535 # this field populated. 1536 # and tuning the pipeline, etc. 1537 "infinite": True or False, # Specifies that the size of this source is known to be infinite 1538 # (this is a streaming source). 1539 "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be 1540 # read from this source. This estimate is in terms of external storage 1541 # size, before any decompression or other processing done by the reader. 1542 "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with 1543 # the (encoded) keys in lexicographically sorted order. 1544 }, 1545 }, 1546 }, 1547 "residual": { # DEPRECATED in favor of DerivedSource. # DEPRECATED 1548 "derivationMode": "A String", # DEPRECATED 1549 "source": { # A source that records can be read and decoded from. # DEPRECATED 1550 "codec": { # The codec to use to decode data read from the source. 1551 "a_key": "", # Properties of the object. 1552 }, 1553 "baseSpecs": [ # While splitting, sources may specify the produced bundles 1554 # as differences against another source, in order to save backend-side 1555 # memory and allow bigger jobs. For details, see SourceSplitRequest. 1556 # To support this use case, the full set of parameters of the source 1557 # is logically obtained by taking the latest explicitly specified value 1558 # of each parameter in the order: 1559 # base_specs (later items win), spec (overrides anything in base_specs). 1560 { 1561 "a_key": "", # Properties of the object. 1562 }, 1563 ], 1564 "spec": { # The source to read from, plus its parameters. 1565 "a_key": "", # Properties of the object. 1566 }, 1567 "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source 1568 # doesn't need splitting, and using SourceSplitRequest on it would 1569 # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. 1570 # 1571 # E.g. a file splitter may set this to true when splitting a single file 1572 # into a set of byte ranges of appropriate size, and set this 1573 # to false when splitting a filepattern into individual files. 1574 # However, for efficiency, a file splitter may decide to produce 1575 # file subranges directly from the filepattern to avoid a splitting 1576 # round-trip. 1577 # 1578 # See SourceSplitRequest for an overview of the splitting process. 1579 # 1580 # This field is meaningful only in the Source objects populated 1581 # by the user (e.g. when filling in a DerivedSource). 1582 # Source objects supplied by the framework to the user don't have 1583 # this field populated. 1584 "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, 1585 # avoiding a SourceGetMetadataOperation roundtrip 1586 # (see SourceOperationRequest). 1587 # 1588 # This field is meaningful only in the Source objects populated 1589 # by the user (e.g. when filling in a DerivedSource). 1590 # Source objects supplied by the framework to the user don't have 1591 # this field populated. 1592 # and tuning the pipeline, etc. 1593 "infinite": True or False, # Specifies that the size of this source is known to be infinite 1594 # (this is a streaming source). 1595 "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be 1596 # read from this source. This estimate is in terms of external storage 1597 # size, before any decompression or other processing done by the reader. 1598 "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with 1599 # the (encoded) keys in lexicographically sorted order. 1600 }, 1601 }, 1602 }, 1603 "primary": { # DEPRECATED in favor of DerivedSource. # DEPRECATED 1604 "derivationMode": "A String", # DEPRECATED 1605 "source": { # A source that records can be read and decoded from. # DEPRECATED 1606 "codec": { # The codec to use to decode data read from the source. 1607 "a_key": "", # Properties of the object. 1608 }, 1609 "baseSpecs": [ # While splitting, sources may specify the produced bundles 1610 # as differences against another source, in order to save backend-side 1611 # memory and allow bigger jobs. For details, see SourceSplitRequest. 1612 # To support this use case, the full set of parameters of the source 1613 # is logically obtained by taking the latest explicitly specified value 1614 # of each parameter in the order: 1615 # base_specs (later items win), spec (overrides anything in base_specs). 1616 { 1617 "a_key": "", # Properties of the object. 1618 }, 1619 ], 1620 "spec": { # The source to read from, plus its parameters. 1621 "a_key": "", # Properties of the object. 1622 }, 1623 "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source 1624 # doesn't need splitting, and using SourceSplitRequest on it would 1625 # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. 1626 # 1627 # E.g. a file splitter may set this to true when splitting a single file 1628 # into a set of byte ranges of appropriate size, and set this 1629 # to false when splitting a filepattern into individual files. 1630 # However, for efficiency, a file splitter may decide to produce 1631 # file subranges directly from the filepattern to avoid a splitting 1632 # round-trip. 1633 # 1634 # See SourceSplitRequest for an overview of the splitting process. 1635 # 1636 # This field is meaningful only in the Source objects populated 1637 # by the user (e.g. when filling in a DerivedSource). 1638 # Source objects supplied by the framework to the user don't have 1639 # this field populated. 1640 "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, 1641 # avoiding a SourceGetMetadataOperation roundtrip 1642 # (see SourceOperationRequest). 1643 # 1644 # This field is meaningful only in the Source objects populated 1645 # by the user (e.g. when filling in a DerivedSource). 1646 # Source objects supplied by the framework to the user don't have 1647 # this field populated. 1648 # and tuning the pipeline, etc. 1649 "infinite": True or False, # Specifies that the size of this source is known to be infinite 1650 # (this is a streaming source). 1651 "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be 1652 # read from this source. This estimate is in terms of external storage 1653 # size, before any decompression or other processing done by the reader. 1654 "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with 1655 # the (encoded) keys in lexicographically sorted order. 1656 }, 1657 }, 1658 }, 1659 }, 1660 "requestedLeaseDuration": "A String", # Amount of time the worker requests for its lease. 1661 "completed": True or False, # True if the WorkItem was completed (successfully or unsuccessfully). 1662 "workItemId": "A String", # Identifies the WorkItem. 1663 "dynamicSourceSplit": { # When a task splits using WorkItemStatus.dynamic_source_split, this # See documentation of stop_position. 1664 # message describes the two parts of the split relative to the 1665 # description of the current task's input. 1666 "residual": { # Specification of one of the bundles produced as a result of splitting # Residual part (returned to the pool of work). 1667 # Specified relative to the previously-current source. 1668 # a Source (e.g. when executing a SourceSplitRequest, or when 1669 # splitting an active task using WorkItemStatus.dynamic_source_split), 1670 # relative to the source being split. 1671 "derivationMode": "A String", # What source to base the produced source on (if any). 1672 "source": { # A source that records can be read and decoded from. # Specification of the source. 1673 "codec": { # The codec to use to decode data read from the source. 1674 "a_key": "", # Properties of the object. 1675 }, 1676 "baseSpecs": [ # While splitting, sources may specify the produced bundles 1677 # as differences against another source, in order to save backend-side 1678 # memory and allow bigger jobs. For details, see SourceSplitRequest. 1679 # To support this use case, the full set of parameters of the source 1680 # is logically obtained by taking the latest explicitly specified value 1681 # of each parameter in the order: 1682 # base_specs (later items win), spec (overrides anything in base_specs). 1683 { 1684 "a_key": "", # Properties of the object. 1685 }, 1686 ], 1687 "spec": { # The source to read from, plus its parameters. 1688 "a_key": "", # Properties of the object. 1689 }, 1690 "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source 1691 # doesn't need splitting, and using SourceSplitRequest on it would 1692 # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. 1693 # 1694 # E.g. a file splitter may set this to true when splitting a single file 1695 # into a set of byte ranges of appropriate size, and set this 1696 # to false when splitting a filepattern into individual files. 1697 # However, for efficiency, a file splitter may decide to produce 1698 # file subranges directly from the filepattern to avoid a splitting 1699 # round-trip. 1700 # 1701 # See SourceSplitRequest for an overview of the splitting process. 1702 # 1703 # This field is meaningful only in the Source objects populated 1704 # by the user (e.g. when filling in a DerivedSource). 1705 # Source objects supplied by the framework to the user don't have 1706 # this field populated. 1707 "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, 1708 # avoiding a SourceGetMetadataOperation roundtrip 1709 # (see SourceOperationRequest). 1710 # 1711 # This field is meaningful only in the Source objects populated 1712 # by the user (e.g. when filling in a DerivedSource). 1713 # Source objects supplied by the framework to the user don't have 1714 # this field populated. 1715 # and tuning the pipeline, etc. 1716 "infinite": True or False, # Specifies that the size of this source is known to be infinite 1717 # (this is a streaming source). 1718 "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be 1719 # read from this source. This estimate is in terms of external storage 1720 # size, before any decompression or other processing done by the reader. 1721 "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with 1722 # the (encoded) keys in lexicographically sorted order. 1723 }, 1724 }, 1725 }, 1726 "primary": { # Specification of one of the bundles produced as a result of splitting # Primary part (continued to be processed by worker). 1727 # Specified relative to the previously-current source. 1728 # Becomes current. 1729 # a Source (e.g. when executing a SourceSplitRequest, or when 1730 # splitting an active task using WorkItemStatus.dynamic_source_split), 1731 # relative to the source being split. 1732 "derivationMode": "A String", # What source to base the produced source on (if any). 1733 "source": { # A source that records can be read and decoded from. # Specification of the source. 1734 "codec": { # The codec to use to decode data read from the source. 1735 "a_key": "", # Properties of the object. 1736 }, 1737 "baseSpecs": [ # While splitting, sources may specify the produced bundles 1738 # as differences against another source, in order to save backend-side 1739 # memory and allow bigger jobs. For details, see SourceSplitRequest. 1740 # To support this use case, the full set of parameters of the source 1741 # is logically obtained by taking the latest explicitly specified value 1742 # of each parameter in the order: 1743 # base_specs (later items win), spec (overrides anything in base_specs). 1744 { 1745 "a_key": "", # Properties of the object. 1746 }, 1747 ], 1748 "spec": { # The source to read from, plus its parameters. 1749 "a_key": "", # Properties of the object. 1750 }, 1751 "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source 1752 # doesn't need splitting, and using SourceSplitRequest on it would 1753 # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. 1754 # 1755 # E.g. a file splitter may set this to true when splitting a single file 1756 # into a set of byte ranges of appropriate size, and set this 1757 # to false when splitting a filepattern into individual files. 1758 # However, for efficiency, a file splitter may decide to produce 1759 # file subranges directly from the filepattern to avoid a splitting 1760 # round-trip. 1761 # 1762 # See SourceSplitRequest for an overview of the splitting process. 1763 # 1764 # This field is meaningful only in the Source objects populated 1765 # by the user (e.g. when filling in a DerivedSource). 1766 # Source objects supplied by the framework to the user don't have 1767 # this field populated. 1768 "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, 1769 # avoiding a SourceGetMetadataOperation roundtrip 1770 # (see SourceOperationRequest). 1771 # 1772 # This field is meaningful only in the Source objects populated 1773 # by the user (e.g. when filling in a DerivedSource). 1774 # Source objects supplied by the framework to the user don't have 1775 # this field populated. 1776 # and tuning the pipeline, etc. 1777 "infinite": True or False, # Specifies that the size of this source is known to be infinite 1778 # (this is a streaming source). 1779 "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be 1780 # read from this source. This estimate is in terms of external storage 1781 # size, before any decompression or other processing done by the reader. 1782 "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with 1783 # the (encoded) keys in lexicographically sorted order. 1784 }, 1785 }, 1786 }, 1787 }, 1788 "counterUpdates": [ # Worker output counters for this WorkItem. 1789 { # An update to a Counter sent from a worker. 1790 "floatingPointList": { # A metric value representing a list of floating point numbers. # List of floating point numbers, for Set. 1791 "elements": [ # Elements of the list. 1792 3.14, 1793 ], 1794 }, 1795 "floatingPoint": 3.14, # Floating point value for Sum, Max, Min. 1796 "integerMean": { # A representation of an integer mean metric contribution. # Integer mean aggregation value for Mean. 1797 "count": { # A representation of an int64, n, that is immune to precision loss when # The number of values being aggregated. 1798 # encoded in JSON. 1799 "lowBits": 42, # The low order bits: n & 0xffffffff. 1800 "highBits": 42, # The high order bits, including the sign: n >> 32. 1801 }, 1802 "sum": { # A representation of an int64, n, that is immune to precision loss when # The sum of all values being aggregated. 1803 # encoded in JSON. 1804 "lowBits": 42, # The low order bits: n & 0xffffffff. 1805 "highBits": 42, # The high order bits, including the sign: n >> 32. 1806 }, 1807 }, 1808 "boolean": True or False, # Boolean value for And, Or. 1809 "integerList": { # A metric value representing a list of integers. # List of integers, for Set. 1810 "elements": [ # Elements of the list. 1811 { # A representation of an int64, n, that is immune to precision loss when 1812 # encoded in JSON. 1813 "lowBits": 42, # The low order bits: n & 0xffffffff. 1814 "highBits": 42, # The high order bits, including the sign: n >> 32. 1815 }, 1816 ], 1817 }, 1818 "cumulative": True or False, # True if this counter is reported as the total cumulative aggregate 1819 # value accumulated since the worker started working on this WorkItem. 1820 # By default this is false, indicating that this counter is reported 1821 # as a delta. 1822 "shortId": "A String", # The service-generated short identifier for this counter. 1823 # The short_id -> (name, metadata) mapping is constant for the lifetime of 1824 # a job. 1825 "floatingPointMean": { # A representation of a floating point mean metric contribution. # Floating point mean aggregation value for Mean. 1826 "count": { # A representation of an int64, n, that is immune to precision loss when # The number of values being aggregated. 1827 # encoded in JSON. 1828 "lowBits": 42, # The low order bits: n & 0xffffffff. 1829 "highBits": 42, # The high order bits, including the sign: n >> 32. 1830 }, 1831 "sum": 3.14, # The sum of all values being aggregated. 1832 }, 1833 "internal": "", # Value for internally-defined counters used by the Dataflow service. 1834 "structuredNameAndMetadata": { # A single message which encapsulates structured name and metadata for a given # Counter structured name and metadata. 1835 # counter. 1836 "name": { # Identifies a counter within a per-job namespace. Counters whose structured # Structured name of the counter. 1837 # names are the same get merged into a single value for the job. 1838 "origin": "A String", # One of the standard Origins defined above. 1839 "executionStepName": "A String", # Name of the stage. An execution step contains multiple component steps. 1840 "name": "A String", # Counter name. Not necessarily globally-unique, but unique within the 1841 # context of the other fields. 1842 # Required. 1843 "workerId": "A String", # ID of a particular worker. 1844 "originalStepName": "A String", # System generated name of the original step in the user's graph, before 1845 # optimization. 1846 "originNamespace": "A String", # A string containing a more specific namespace of the counter's origin. 1847 "portion": "A String", # Portion of this counter, either key or value. 1848 "componentStepName": "A String", # Name of the optimized step being executed by the workers. 1849 }, 1850 "metadata": { # CounterMetadata includes all static non-name non-value counter attributes. # Metadata associated with a counter 1851 "standardUnits": "A String", # System defined Units, see above enum. 1852 "kind": "A String", # Counter aggregation kind. 1853 "otherUnits": "A String", # A string referring to the unit type. 1854 "description": "A String", # Human-readable description of the counter semantics. 1855 }, 1856 }, 1857 "nameAndKind": { # Basic metadata about a counter. # Counter name and aggregation type. 1858 "kind": "A String", # Counter aggregation kind. 1859 "name": "A String", # Name of the counter. 1860 }, 1861 "integer": { # A representation of an int64, n, that is immune to precision loss when # Integer value for Sum, Max, Min. 1862 # encoded in JSON. 1863 "lowBits": 42, # The low order bits: n & 0xffffffff. 1864 "highBits": 42, # The high order bits, including the sign: n >> 32. 1865 }, 1866 "distribution": { # A metric value representing a distribution. # Distribution data 1867 "count": { # A representation of an int64, n, that is immune to precision loss when # The count of the number of elements present in the distribution. 1868 # encoded in JSON. 1869 "lowBits": 42, # The low order bits: n & 0xffffffff. 1870 "highBits": 42, # The high order bits, including the sign: n >> 32. 1871 }, 1872 "min": { # A representation of an int64, n, that is immune to precision loss when # The minimum value present in the distribution. 1873 # encoded in JSON. 1874 "lowBits": 42, # The low order bits: n & 0xffffffff. 1875 "highBits": 42, # The high order bits, including the sign: n >> 32. 1876 }, 1877 "max": { # A representation of an int64, n, that is immune to precision loss when # The maximum value present in the distribution. 1878 # encoded in JSON. 1879 "lowBits": 42, # The low order bits: n & 0xffffffff. 1880 "highBits": 42, # The high order bits, including the sign: n >> 32. 1881 }, 1882 "sum": { # A representation of an int64, n, that is immune to precision loss when # Use an int64 since we'd prefer the added precision. If overflow is a common 1883 # problem we can detect it and use an additional int64 or a double. 1884 # encoded in JSON. 1885 "lowBits": 42, # The low order bits: n & 0xffffffff. 1886 "highBits": 42, # The high order bits, including the sign: n >> 32. 1887 }, 1888 "sumOfSquares": 3.14, # Use a double since the sum of squares is likely to overflow int64. 1889 "logBuckets": [ # (Optional) Logarithmic histogram of values. 1890 # Each log may be in no more than one bucket. Order does not matter. 1891 { # Bucket of values for Distribution's logarithmic histogram. 1892 "count": "A String", # Number of values in this bucket. 1893 "log": 42, # floor(log2(value)); defined to be zero for nonpositive values. 1894 # log(-1) = 0 1895 # log(0) = 0 1896 # log(1) = 0 1897 # log(2) = 1 1898 # log(3) = 1 1899 # log(4) = 2 1900 # log(5) = 2 1901 }, 1902 ], 1903 }, 1904 "stringList": { # A metric value representing a list of strings. # List of strings, for Set. 1905 "elements": [ # Elements of the list. 1906 "A String", 1907 ], 1908 }, 1909 }, 1910 ], 1911 "progress": { # Obsolete in favor of ApproximateReportedProgress and ApproximateSplitRequest. # DEPRECATED in favor of reported_progress. 1912 "position": { # Position defines a position within a collection of data. The value # Obsolete. 1913 # can be either the end position, a key (used with ordered 1914 # collections), a byte offset, or a record index. 1915 "end": True or False, # Position is past all other positions. Also useful for the end 1916 # position of an unbounded range. 1917 "recordIndex": "A String", # Position is a record index. 1918 "byteOffset": "A String", # Position is a byte offset. 1919 "key": "A String", # Position is a string key, ordered lexicographically. 1920 "concatPosition": { # A position that encapsulates an inner position and an index for the inner # CloudPosition is a concat position. 1921 # position. A ConcatPosition can be used by a reader of a source that 1922 # encapsulates a set of other sources. 1923 "position": # Object with schema name: Position # Position within the inner source. 1924 "index": 42, # Index of the inner source. 1925 }, 1926 "shufflePosition": "A String", # CloudPosition is a base64 encoded BatchShufflePosition (with FIXED 1927 # sharding). 1928 }, 1929 "remainingTime": "A String", # Obsolete. 1930 "percentComplete": 3.14, # Obsolete. 1931 }, 1932 "metricUpdates": [ # DEPRECATED in favor of counter_updates. 1933 { # Describes the state of a metric. 1934 "meanCount": "", # Worker-computed aggregate value for the "Mean" aggregation kind. 1935 # This holds the count of the aggregated values and is used in combination 1936 # with mean_sum above to obtain the actual mean aggregate value. 1937 # The only possible value type is Long. 1938 "updateTime": "A String", # Timestamp associated with the metric value. Optional when workers are 1939 # reporting work progress; it will be filled in responses from the 1940 # metrics API. 1941 "set": "", # Worker-computed aggregate value for the "Set" aggregation kind. The only 1942 # possible value type is a list of Values whose type can be Long, Double, 1943 # or String, according to the metric's type. All Values in the list must 1944 # be of the same type. 1945 "name": { # Identifies a metric, by describing the source which generated the # Name of the metric. 1946 # metric. 1947 "origin": "A String", # Origin (namespace) of metric name. May be blank for user-define metrics; 1948 # will be "dataflow" for metrics defined by the Dataflow service or SDK. 1949 "name": "A String", # Worker-defined metric name. 1950 "context": { # Zero or more labeled fields which identify the part of the job this 1951 # metric is associated with, such as the name of a step or collection. 1952 # 1953 # For example, built-in counters associated with steps will have 1954 # context['step'] = <step-name>. Counters associated with PCollections 1955 # in the SDK will have context['pcollection'] = <pcollection-name>. 1956 "a_key": "A String", 1957 }, 1958 }, 1959 "cumulative": True or False, # True if this metric is reported as the total cumulative aggregate 1960 # value accumulated since the worker started working on this WorkItem. 1961 # By default this is false, indicating that this metric is reported 1962 # as a delta that is not associated with any WorkItem. 1963 "kind": "A String", # Metric aggregation kind. The possible metric aggregation kinds are 1964 # "Sum", "Max", "Min", "Mean", "Set", "And", "Or", and "Distribution". 1965 # The specified aggregation kind is case-insensitive. 1966 # 1967 # If omitted, this is not an aggregated value but instead 1968 # a single metric sample value. 1969 "scalar": "", # Worker-computed aggregate value for aggregation kinds "Sum", "Max", "Min", 1970 # "And", and "Or". The possible value types are Long, Double, and Boolean. 1971 "meanSum": "", # Worker-computed aggregate value for the "Mean" aggregation kind. 1972 # This holds the sum of the aggregated values and is used in combination 1973 # with mean_count below to obtain the actual mean aggregate value. 1974 # The only possible value types are Long and Double. 1975 "distribution": "", # A struct value describing properties of a distribution of numeric values. 1976 "internal": "", # Worker-computed aggregate value for internal use by the Dataflow 1977 # service. 1978 }, 1979 ], 1980 "reportedProgress": { # A progress measurement of a WorkItem by a worker. # The worker's progress through this WorkItem. 1981 "fractionConsumed": 3.14, # Completion as fraction of the input consumed, from 0.0 (beginning, nothing 1982 # consumed), to 1.0 (end of the input, entire input consumed). 1983 "position": { # Position defines a position within a collection of data. The value # A Position within the work to represent a progress. 1984 # can be either the end position, a key (used with ordered 1985 # collections), a byte offset, or a record index. 1986 "end": True or False, # Position is past all other positions. Also useful for the end 1987 # position of an unbounded range. 1988 "recordIndex": "A String", # Position is a record index. 1989 "byteOffset": "A String", # Position is a byte offset. 1990 "key": "A String", # Position is a string key, ordered lexicographically. 1991 "concatPosition": { # A position that encapsulates an inner position and an index for the inner # CloudPosition is a concat position. 1992 # position. A ConcatPosition can be used by a reader of a source that 1993 # encapsulates a set of other sources. 1994 "position": # Object with schema name: Position # Position within the inner source. 1995 "index": 42, # Index of the inner source. 1996 }, 1997 "shufflePosition": "A String", # CloudPosition is a base64 encoded BatchShufflePosition (with FIXED 1998 # sharding). 1999 }, 2000 "remainingParallelism": { # Represents the level of parallelism in a WorkItem's input, # Total amount of parallelism in the input of this task that remains, 2001 # (i.e. can be delegated to this task and any new tasks via dynamic 2002 # splitting). Always at least 1 for non-finished work items and 0 for 2003 # finished. 2004 # 2005 # "Amount of parallelism" refers to how many non-empty parts of the input 2006 # can be read in parallel. This does not necessarily equal number 2007 # of records. An input that can be read in parallel down to the 2008 # individual records is called "perfectly splittable". 2009 # An example of non-perfectly parallelizable input is a block-compressed 2010 # file format where a block of records has to be read as a whole, 2011 # but different blocks can be read in parallel. 2012 # 2013 # Examples: 2014 # * If we are processing record #30 (starting at 1) out of 50 in a perfectly 2015 # splittable 50-record input, this value should be 21 (20 remaining + 1 2016 # current). 2017 # * If we are reading through block 3 in a block-compressed file consisting 2018 # of 5 blocks, this value should be 3 (since blocks 4 and 5 can be 2019 # processed in parallel by new tasks via dynamic splitting and the current 2020 # task remains processing block 3). 2021 # * If we are reading through the last block in a block-compressed file, 2022 # or reading or processing the last record in a perfectly splittable 2023 # input, this value should be 1, because apart from the current task, no 2024 # additional remainder can be split off. 2025 # reported by the worker. 2026 "isInfinite": True or False, # Specifies whether the parallelism is infinite. If true, "value" is 2027 # ignored. 2028 # Infinite parallelism means the service will assume that the work item 2029 # can always be split into more non-empty work items by dynamic splitting. 2030 # This is a work-around for lack of support for infinity by the current 2031 # JSON-based Java RPC stack. 2032 "value": 3.14, # Specifies the level of parallelism in case it is finite. 2033 }, 2034 "consumedParallelism": { # Represents the level of parallelism in a WorkItem's input, # Total amount of parallelism in the portion of input of this task that has 2035 # already been consumed and is no longer active. In the first two examples 2036 # above (see remaining_parallelism), the value should be 29 or 2 2037 # respectively. The sum of remaining_parallelism and consumed_parallelism 2038 # should equal the total amount of parallelism in this work item. If 2039 # specified, must be finite. 2040 # reported by the worker. 2041 "isInfinite": True or False, # Specifies whether the parallelism is infinite. If true, "value" is 2042 # ignored. 2043 # Infinite parallelism means the service will assume that the work item 2044 # can always be split into more non-empty work items by dynamic splitting. 2045 # This is a work-around for lack of support for infinity by the current 2046 # JSON-based Java RPC stack. 2047 "value": 3.14, # Specifies the level of parallelism in case it is finite. 2048 }, 2049 }, 2050 }, 2051 ], 2052 "location": "A String", # The location which contains the WorkItem's job. 2053 } 2054 2055 x__xgafv: string, V1 error format. 2056 Allowed values 2057 1 - v1 error format 2058 2 - v2 error format 2059 2060Returns: 2061 An object of the form: 2062 2063 { # Response from a request to report the status of WorkItems. 2064 "workItemServiceStates": [ # A set of messages indicating the service-side state for each 2065 # WorkItem whose status was reported, in the same order as the 2066 # WorkItemStatus messages in the ReportWorkItemStatusRequest which 2067 # resulting in this response. 2068 { # The Dataflow service's idea of the current state of a WorkItem 2069 # being processed by a worker. 2070 "reportStatusInterval": "A String", # New recommended reporting interval. 2071 "suggestedStopPosition": { # Position defines a position within a collection of data. The value # Obsolete, always empty. 2072 # can be either the end position, a key (used with ordered 2073 # collections), a byte offset, or a record index. 2074 "end": True or False, # Position is past all other positions. Also useful for the end 2075 # position of an unbounded range. 2076 "recordIndex": "A String", # Position is a record index. 2077 "byteOffset": "A String", # Position is a byte offset. 2078 "key": "A String", # Position is a string key, ordered lexicographically. 2079 "concatPosition": { # A position that encapsulates an inner position and an index for the inner # CloudPosition is a concat position. 2080 # position. A ConcatPosition can be used by a reader of a source that 2081 # encapsulates a set of other sources. 2082 "position": # Object with schema name: Position # Position within the inner source. 2083 "index": 42, # Index of the inner source. 2084 }, 2085 "shufflePosition": "A String", # CloudPosition is a base64 encoded BatchShufflePosition (with FIXED 2086 # sharding). 2087 }, 2088 "harnessData": { # Other data returned by the service, specific to the particular 2089 # worker harness. 2090 "a_key": "", # Properties of the object. 2091 }, 2092 "nextReportIndex": "A String", # The index value to use for the next report sent by the worker. 2093 # Note: If the report call fails for whatever reason, the worker should 2094 # reuse this index for subsequent report attempts. 2095 "leaseExpireTime": "A String", # Time at which the current lease will expire. 2096 "metricShortId": [ # The short ids that workers should use in subsequent metric updates. 2097 # Workers should strive to use short ids whenever possible, but it is ok 2098 # to request the short_id again if a worker lost track of it 2099 # (e.g. if the worker is recovering from a crash). 2100 # NOTE: it is possible that the response may have short ids for a subset 2101 # of the metrics. 2102 { # The metric short id is returned to the user alongside an offset into 2103 # ReportWorkItemStatusRequest 2104 "shortId": "A String", # The service-generated short identifier for the metric. 2105 "metricIndex": 42, # The index of the corresponding metric in 2106 # the ReportWorkItemStatusRequest. Required. 2107 }, 2108 ], 2109 "splitRequest": { # A suggestion by the service to the worker to dynamically split the WorkItem. # The progress point in the WorkItem where the Dataflow service 2110 # suggests that the worker truncate the task. 2111 "fractionConsumed": 3.14, # A fraction at which to split the work item, from 0.0 (beginning of the 2112 # input) to 1.0 (end of the input). 2113 "position": { # Position defines a position within a collection of data. The value # A Position at which to split the work item. 2114 # can be either the end position, a key (used with ordered 2115 # collections), a byte offset, or a record index. 2116 "end": True or False, # Position is past all other positions. Also useful for the end 2117 # position of an unbounded range. 2118 "recordIndex": "A String", # Position is a record index. 2119 "byteOffset": "A String", # Position is a byte offset. 2120 "key": "A String", # Position is a string key, ordered lexicographically. 2121 "concatPosition": { # A position that encapsulates an inner position and an index for the inner # CloudPosition is a concat position. 2122 # position. A ConcatPosition can be used by a reader of a source that 2123 # encapsulates a set of other sources. 2124 "position": # Object with schema name: Position # Position within the inner source. 2125 "index": 42, # Index of the inner source. 2126 }, 2127 "shufflePosition": "A String", # CloudPosition is a base64 encoded BatchShufflePosition (with FIXED 2128 # sharding). 2129 }, 2130 }, 2131 "suggestedStopPoint": { # Obsolete in favor of ApproximateReportedProgress and ApproximateSplitRequest. # DEPRECATED in favor of split_request. 2132 "position": { # Position defines a position within a collection of data. The value # Obsolete. 2133 # can be either the end position, a key (used with ordered 2134 # collections), a byte offset, or a record index. 2135 "end": True or False, # Position is past all other positions. Also useful for the end 2136 # position of an unbounded range. 2137 "recordIndex": "A String", # Position is a record index. 2138 "byteOffset": "A String", # Position is a byte offset. 2139 "key": "A String", # Position is a string key, ordered lexicographically. 2140 "concatPosition": { # A position that encapsulates an inner position and an index for the inner # CloudPosition is a concat position. 2141 # position. A ConcatPosition can be used by a reader of a source that 2142 # encapsulates a set of other sources. 2143 "position": # Object with schema name: Position # Position within the inner source. 2144 "index": 42, # Index of the inner source. 2145 }, 2146 "shufflePosition": "A String", # CloudPosition is a base64 encoded BatchShufflePosition (with FIXED 2147 # sharding). 2148 }, 2149 "remainingTime": "A String", # Obsolete. 2150 "percentComplete": 3.14, # Obsolete. 2151 }, 2152 }, 2153 ], 2154 }</pre> 2155</div> 2156 2157</body></html>