• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1<html><body>
2<style>
3
4body, h1, h2, h3, div, span, p, pre, a {
5  margin: 0;
6  padding: 0;
7  border: 0;
8  font-weight: inherit;
9  font-style: inherit;
10  font-size: 100%;
11  font-family: inherit;
12  vertical-align: baseline;
13}
14
15body {
16  font-size: 13px;
17  padding: 1em;
18}
19
20h1 {
21  font-size: 26px;
22  margin-bottom: 1em;
23}
24
25h2 {
26  font-size: 24px;
27  margin-bottom: 1em;
28}
29
30h3 {
31  font-size: 20px;
32  margin-bottom: 1em;
33  margin-top: 1em;
34}
35
36pre, code {
37  line-height: 1.5;
38  font-family: Monaco, 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', 'Lucida Console', monospace;
39}
40
41pre {
42  margin-top: 0.5em;
43}
44
45h1, h2, h3, p {
46  font-family: Arial, sans serif;
47}
48
49h1, h2, h3 {
50  border-bottom: solid #CCC 1px;
51}
52
53.toc_element {
54  margin-top: 0.5em;
55}
56
57.firstline {
58  margin-left: 2 em;
59}
60
61.method  {
62  margin-top: 1em;
63  border: solid 1px #CCC;
64  padding: 1em;
65  background: #EEE;
66}
67
68.details {
69  font-weight: bold;
70  font-size: 14px;
71}
72
73</style>
74
75<h1><a href="dataflow_v1b3.html">Dataflow API</a> . <a href="dataflow_v1b3.projects.html">projects</a> . <a href="dataflow_v1b3.projects.jobs.html">jobs</a></h1>
76<h2>Instance Methods</h2>
77<p class="toc_element">
78  <code><a href="dataflow_v1b3.projects.jobs.debug.html">debug()</a></code>
79</p>
80<p class="firstline">Returns the debug Resource.</p>
81
82<p class="toc_element">
83  <code><a href="dataflow_v1b3.projects.jobs.messages.html">messages()</a></code>
84</p>
85<p class="firstline">Returns the messages Resource.</p>
86
87<p class="toc_element">
88  <code><a href="dataflow_v1b3.projects.jobs.workItems.html">workItems()</a></code>
89</p>
90<p class="firstline">Returns the workItems Resource.</p>
91
92<p class="toc_element">
93  <code><a href="#aggregated">aggregated(projectId, pageSize=None, pageToken=None, x__xgafv=None, location=None, filter=None, view=None)</a></code></p>
94<p class="firstline">List the jobs of a project across all regions.</p>
95<p class="toc_element">
96  <code><a href="#aggregated_next">aggregated_next(previous_request, previous_response)</a></code></p>
97<p class="firstline">Retrieves the next page of results.</p>
98<p class="toc_element">
99  <code><a href="#create">create(projectId, body, location=None, x__xgafv=None, replaceJobId=None, view=None)</a></code></p>
100<p class="firstline">Creates a Cloud Dataflow job.</p>
101<p class="toc_element">
102  <code><a href="#get">get(projectId, jobId, location=None, x__xgafv=None, view=None)</a></code></p>
103<p class="firstline">Gets the state of the specified Cloud Dataflow job.</p>
104<p class="toc_element">
105  <code><a href="#getMetrics">getMetrics(projectId, jobId, startTime=None, location=None, x__xgafv=None)</a></code></p>
106<p class="firstline">Request the job status.</p>
107<p class="toc_element">
108  <code><a href="#list">list(projectId, pageSize=None, pageToken=None, x__xgafv=None, location=None, filter=None, view=None)</a></code></p>
109<p class="firstline">List the jobs of a project.</p>
110<p class="toc_element">
111  <code><a href="#list_next">list_next(previous_request, previous_response)</a></code></p>
112<p class="firstline">Retrieves the next page of results.</p>
113<p class="toc_element">
114  <code><a href="#snapshot">snapshot(projectId, jobId, body, x__xgafv=None)</a></code></p>
115<p class="firstline">Snapshot the state of a streaming job.</p>
116<p class="toc_element">
117  <code><a href="#update">update(projectId, jobId, body, location=None, x__xgafv=None)</a></code></p>
118<p class="firstline">Updates the state of an existing Cloud Dataflow job.</p>
119<h3>Method Details</h3>
120<div class="method">
121    <code class="details" id="aggregated">aggregated(projectId, pageSize=None, pageToken=None, x__xgafv=None, location=None, filter=None, view=None)</code>
122  <pre>List the jobs of a project across all regions.
123
124Args:
125  projectId: string, The project which owns the jobs. (required)
126  pageSize: integer, If there are many jobs, limit response to at most this many.
127The actual number of jobs returned will be the lesser of max_responses
128and an unspecified server-defined limit.
129  pageToken: string, Set this to the 'next_page_token' field of a previous response
130to request additional results in a long list.
131  x__xgafv: string, V1 error format.
132    Allowed values
133      1 - v1 error format
134      2 - v2 error format
135  location: string, The [regional endpoint]
136(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
137contains this job.
138  filter: string, The kind of filter to use.
139  view: string, Level of information requested in response. Default is `JOB_VIEW_SUMMARY`.
140
141Returns:
142  An object of the form:
143
144    { # Response to a request to list Cloud Dataflow jobs.  This may be a partial
145      # response, depending on the page size in the ListJobsRequest.
146    "nextPageToken": "A String", # Set if there may be more results than fit in this response.
147    "failedLocation": [ # Zero or more messages describing the [regional endpoints]
148        # (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
149        # failed to respond.
150      { # Indicates which [regional endpoint]
151          # (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) failed
152          # to respond to a request for data.
153        "name": "A String", # The name of the [regional endpoint]
154            # (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
155            # failed to respond.
156      },
157    ],
158    "jobs": [ # A subset of the requested job information.
159      { # Defines a job to be run by the Cloud Dataflow service.
160        "labels": { # User-defined labels for this job.
161            #
162            # The labels map can contain no more than 64 entries.  Entries of the labels
163            # map are UTF8 strings that comply with the following restrictions:
164            #
165            # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
166            # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
167            # * Both keys and values are additionally constrained to be <= 128 bytes in
168            # size.
169          "a_key": "A String",
170        },
171        "jobMetadata": { # Metadata available primarily for filtering jobs. Will be included in the # This field is populated by the Dataflow service to support filtering jobs
172            # by the metadata values provided here. Populated for ListJobs and all GetJob
173            # views SUMMARY and higher.
174            # ListJob response and Job SUMMARY view.
175          "sdkVersion": { # The version of the SDK used to run the job. # The SDK version used to run the job.
176            "versionDisplayName": "A String", # A readable string describing the version of the SDK.
177            "version": "A String", # The version of the SDK used to run the job.
178            "sdkSupportStatus": "A String", # The support status for this SDK version.
179          },
180          "pubsubDetails": [ # Identification of a PubSub source used in the Dataflow job.
181            { # Metadata for a PubSub connector used by the job.
182              "topic": "A String", # Topic accessed in the connection.
183              "subscription": "A String", # Subscription used in the connection.
184            },
185          ],
186          "datastoreDetails": [ # Identification of a Datastore source used in the Dataflow job.
187            { # Metadata for a Datastore connector used by the job.
188              "projectId": "A String", # ProjectId accessed in the connection.
189              "namespace": "A String", # Namespace used in the connection.
190            },
191          ],
192          "fileDetails": [ # Identification of a File source used in the Dataflow job.
193            { # Metadata for a File connector used by the job.
194              "filePattern": "A String", # File Pattern used to access files by the connector.
195            },
196          ],
197          "spannerDetails": [ # Identification of a Spanner source used in the Dataflow job.
198            { # Metadata for a Spanner connector used by the job.
199              "instanceId": "A String", # InstanceId accessed in the connection.
200              "projectId": "A String", # ProjectId accessed in the connection.
201              "databaseId": "A String", # DatabaseId accessed in the connection.
202            },
203          ],
204          "bigTableDetails": [ # Identification of a BigTable source used in the Dataflow job.
205            { # Metadata for a BigTable connector used by the job.
206              "instanceId": "A String", # InstanceId accessed in the connection.
207              "projectId": "A String", # ProjectId accessed in the connection.
208              "tableId": "A String", # TableId accessed in the connection.
209            },
210          ],
211          "bigqueryDetails": [ # Identification of a BigQuery source used in the Dataflow job.
212            { # Metadata for a BigQuery connector used by the job.
213              "projectId": "A String", # Project accessed in the connection.
214              "dataset": "A String", # Dataset accessed in the connection.
215              "table": "A String", # Table accessed in the connection.
216              "query": "A String", # Query used to access data in the connection.
217            },
218          ],
219        },
220        "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
221            # A description of the user pipeline and stages through which it is executed.
222            # Created by Cloud Dataflow service.  Only retrieved with
223            # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
224            # form.  This data is provided by the Dataflow service for ease of visualizing
225            # the pipeline and interpreting Dataflow provided metrics.
226          "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
227            { # Description of the type, names/ids, and input/outputs for a transform.
228              "kind": "A String", # Type of transform.
229              "name": "A String", # User provided name for this transform instance.
230              "inputCollectionName": [ # User names for all collection inputs to this transform.
231                "A String",
232              ],
233              "displayData": [ # Transform-specific display data.
234                { # Data provided with a pipeline or transform to provide descriptive info.
235                  "shortStrValue": "A String", # A possible additional shorter value to display.
236                      # For example a java_class_name_value of com.mypackage.MyDoFn
237                      # will be stored with MyDoFn as the short_str_value and
238                      # com.mypackage.MyDoFn as the java_class_name value.
239                      # short_str_value can be displayed and java_class_name_value
240                      # will be displayed as a tooltip.
241                  "durationValue": "A String", # Contains value if the data is of duration type.
242                  "url": "A String", # An optional full URL.
243                  "floatValue": 3.14, # Contains value if the data is of float type.
244                  "namespace": "A String", # The namespace for the key. This is usually a class name or programming
245                      # language namespace (i.e. python module) which defines the display data.
246                      # This allows a dax monitoring system to specially handle the data
247                      # and perform custom rendering.
248                  "javaClassValue": "A String", # Contains value if the data is of java class type.
249                  "label": "A String", # An optional label to display in a dax UI for the element.
250                  "boolValue": True or False, # Contains value if the data is of a boolean type.
251                  "strValue": "A String", # Contains value if the data is of string type.
252                  "key": "A String", # The key identifying the display data.
253                      # This is intended to be used as a label for the display data
254                      # when viewed in a dax monitoring system.
255                  "int64Value": "A String", # Contains value if the data is of int64 type.
256                  "timestampValue": "A String", # Contains value if the data is of timestamp type.
257                },
258              ],
259              "outputCollectionName": [ # User  names for all collection outputs to this transform.
260                "A String",
261              ],
262              "id": "A String", # SDK generated id of this transform instance.
263            },
264          ],
265          "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
266            { # Description of the composing transforms, names/ids, and input/outputs of a
267                # stage of execution.  Some composing transforms and sources may have been
268                # generated by the Dataflow service during execution planning.
269              "componentSource": [ # Collections produced and consumed by component transforms of this stage.
270                { # Description of an interstitial value between transforms in an execution
271                    # stage.
272                  "userName": "A String", # Human-readable name for this transform; may be user or system generated.
273                  "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
274                      # source is most closely associated.
275                  "name": "A String", # Dataflow service generated name for this source.
276                },
277              ],
278              "kind": "A String", # Type of tranform this stage is executing.
279              "name": "A String", # Dataflow service generated name for this stage.
280              "outputSource": [ # Output sources for this stage.
281                { # Description of an input or output of an execution stage.
282                  "userName": "A String", # Human-readable name for this source; may be user or system generated.
283                  "sizeBytes": "A String", # Size of the source, if measurable.
284                  "name": "A String", # Dataflow service generated name for this source.
285                  "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
286                      # source is most closely associated.
287                },
288              ],
289              "inputSource": [ # Input sources for this stage.
290                { # Description of an input or output of an execution stage.
291                  "userName": "A String", # Human-readable name for this source; may be user or system generated.
292                  "sizeBytes": "A String", # Size of the source, if measurable.
293                  "name": "A String", # Dataflow service generated name for this source.
294                  "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
295                      # source is most closely associated.
296                },
297              ],
298              "componentTransform": [ # Transforms that comprise this execution stage.
299                { # Description of a transform executed as part of an execution stage.
300                  "userName": "A String", # Human-readable name for this transform; may be user or system generated.
301                  "originalTransform": "A String", # User name for the original user transform with which this transform is
302                      # most closely associated.
303                  "name": "A String", # Dataflow service generated name for this source.
304                },
305              ],
306              "id": "A String", # Dataflow service generated id for this stage.
307            },
308          ],
309          "displayData": [ # Pipeline level display data.
310            { # Data provided with a pipeline or transform to provide descriptive info.
311              "shortStrValue": "A String", # A possible additional shorter value to display.
312                  # For example a java_class_name_value of com.mypackage.MyDoFn
313                  # will be stored with MyDoFn as the short_str_value and
314                  # com.mypackage.MyDoFn as the java_class_name value.
315                  # short_str_value can be displayed and java_class_name_value
316                  # will be displayed as a tooltip.
317              "durationValue": "A String", # Contains value if the data is of duration type.
318              "url": "A String", # An optional full URL.
319              "floatValue": 3.14, # Contains value if the data is of float type.
320              "namespace": "A String", # The namespace for the key. This is usually a class name or programming
321                  # language namespace (i.e. python module) which defines the display data.
322                  # This allows a dax monitoring system to specially handle the data
323                  # and perform custom rendering.
324              "javaClassValue": "A String", # Contains value if the data is of java class type.
325              "label": "A String", # An optional label to display in a dax UI for the element.
326              "boolValue": True or False, # Contains value if the data is of a boolean type.
327              "strValue": "A String", # Contains value if the data is of string type.
328              "key": "A String", # The key identifying the display data.
329                  # This is intended to be used as a label for the display data
330                  # when viewed in a dax monitoring system.
331              "int64Value": "A String", # Contains value if the data is of int64 type.
332              "timestampValue": "A String", # Contains value if the data is of timestamp type.
333            },
334          ],
335        },
336        "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
337            # callers cannot mutate it.
338          { # A message describing the state of a particular execution stage.
339            "executionStageName": "A String", # The name of the execution stage.
340            "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
341            "currentStateTime": "A String", # The time at which the stage transitioned to this state.
342          },
343        ],
344        "id": "A String", # The unique ID of this job.
345            #
346            # This field is set by the Cloud Dataflow service when the Job is
347            # created, and is immutable for the life of the job.
348        "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
349            # `JOB_STATE_UPDATED`), this field contains the ID of that job.
350        "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
351        "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
352            # corresponding name prefixes of the new job.
353          "a_key": "A String",
354        },
355        "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
356          "version": { # A structure describing which components and their versions of the service
357              # are required in order to run the job.
358            "a_key": "", # Properties of the object.
359          },
360          "flexResourceSchedulingGoal": "A String", # Which Flexible Resource Scheduling mode to run in.
361          "serviceKmsKeyName": "A String", # If set, contains the Cloud KMS key identifier used to encrypt data
362              # at rest, AKA a Customer Managed Encryption Key (CMEK).
363              #
364              # Format:
365              #   projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY
366          "internalExperiments": { # Experimental settings.
367            "a_key": "", # Properties of the object. Contains field @type with type URL.
368          },
369          "dataset": "A String", # The dataset for the current project where various workflow
370              # related tables are stored.
371              #
372              # The supported resource type is:
373              #
374              # Google BigQuery:
375              #   bigquery.googleapis.com/{dataset}
376          "experiments": [ # The list of experiments to enable.
377            "A String",
378          ],
379          "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
380          "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
381              # options are passed through the service and are used to recreate the
382              # SDK pipeline options on the worker in a language agnostic and platform
383              # independent way.
384            "a_key": "", # Properties of the object.
385          },
386          "userAgent": { # A description of the process that generated the request.
387            "a_key": "", # Properties of the object.
388          },
389          "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
390              # unspecified, the service will attempt to choose a reasonable
391              # default.  This should be in the form of the API service name,
392              # e.g. "compute.googleapis.com".
393          "workerPools": [ # The worker pools. At least one "harness" worker pool must be
394              # specified in order for the job to have workers.
395            { # Describes one particular pool of Cloud Dataflow workers to be
396                # instantiated by the Cloud Dataflow service in order to perform the
397                # computations required by a job.  Note that a workflow job may use
398                # multiple pools, in order to match the various computational
399                # requirements of the various stages of the job.
400              "diskSourceImage": "A String", # Fully qualified source image for disks.
401              "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
402                  # using the standard Dataflow task runner.  Users should ignore
403                  # this field.
404                "workflowFileName": "A String", # The file to store the workflow in.
405                "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
406                    # will not be uploaded.
407                    #
408                    # The supported resource type is:
409                    #
410                    # Google Cloud Storage:
411                    #   storage.googleapis.com/{bucket}/{object}
412                    #   bucket.storage.googleapis.com/{object}
413                "commandlinesFileName": "A String", # The file to store preprocessing commands in.
414                "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
415                  "reportingEnabled": True or False, # Whether to send work progress updates to the service.
416                  "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
417                      # "shuffle/v1beta1".
418                  "workerId": "A String", # The ID of the worker running this pipeline.
419                  "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
420                      #
421                      # When workers access Google Cloud APIs, they logically do so via
422                      # relative URLs.  If this field is specified, it supplies the base
423                      # URL to use for resolving these relative URLs.  The normative
424                      # algorithm used is defined by RFC 1808, "Relative Uniform Resource
425                      # Locators".
426                      #
427                      # If not specified, the default value is "http://www.googleapis.com/"
428                  "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
429                      # "dataflow/v1b3/projects".
430                  "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
431                      # storage.
432                      #
433                      # The supported resource type is:
434                      #
435                      # Google Cloud Storage:
436                      #
437                      #   storage.googleapis.com/{bucket}/{object}
438                      #   bucket.storage.googleapis.com/{object}
439                },
440                "vmId": "A String", # The ID string of the VM.
441                "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
442                "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
443                "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
444                    # access the Cloud Dataflow API.
445                  "A String",
446                ],
447                "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
448                    # taskrunner; e.g. "root".
449                "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
450                    #
451                    # When workers access Google Cloud APIs, they logically do so via
452                    # relative URLs.  If this field is specified, it supplies the base
453                    # URL to use for resolving these relative URLs.  The normative
454                    # algorithm used is defined by RFC 1808, "Relative Uniform Resource
455                    # Locators".
456                    #
457                    # If not specified, the default value is "http://www.googleapis.com/"
458                "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
459                    # taskrunner; e.g. "wheel".
460                "languageHint": "A String", # The suggested backend language.
461                "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
462                    # console.
463                "streamingWorkerMainClass": "A String", # The streaming worker main class name.
464                "logDir": "A String", # The directory on the VM to store logs.
465                "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
466                "harnessCommand": "A String", # The command to launch the worker harness.
467                "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
468                    # temporary storage.
469                    #
470                    # The supported resource type is:
471                    #
472                    # Google Cloud Storage:
473                    #   storage.googleapis.com/{bucket}/{object}
474                    #   bucket.storage.googleapis.com/{object}
475                "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
476              },
477              "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
478                  # are supported.
479              "packages": [ # Packages to be installed on workers.
480                { # The packages that must be installed in order for a worker to run the
481                    # steps of the Cloud Dataflow job that will be assigned to its worker
482                    # pool.
483                    #
484                    # This is the mechanism by which the Cloud Dataflow SDK causes code to
485                    # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
486                    # might use this to install jars containing the user's code and all of the
487                    # various dependencies (libraries, data files, etc.) required in order
488                    # for that code to run.
489                  "location": "A String", # The resource to read the package from. The supported resource type is:
490                      #
491                      # Google Cloud Storage:
492                      #
493                      #   storage.googleapis.com/{bucket}
494                      #   bucket.storage.googleapis.com/
495                  "name": "A String", # The name of the package.
496                },
497              ],
498              "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
499                  # service will attempt to choose a reasonable default.
500              "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
501                  # the service will use the network "default".
502              "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
503                  # will attempt to choose a reasonable default.
504              "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
505                  # attempt to choose a reasonable default.
506              "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
507                  # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
508                  # `TEARDOWN_NEVER`.
509                  # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
510                  # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
511                  # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
512                  # down.
513                  #
514                  # If the workers are not torn down by the service, they will
515                  # continue to run and use Google Compute Engine VM resources in the
516                  # user's project until they are explicitly terminated by the user.
517                  # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
518                  # policy except for small, manually supervised test jobs.
519                  #
520                  # If unknown or unspecified, the service will attempt to choose a reasonable
521                  # default.
522              "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
523                  # Compute Engine API.
524              "ipConfiguration": "A String", # Configuration for VM IPs.
525              "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
526                  # service will choose a number of threads (according to the number of cores
527                  # on the selected machine type for batch, or 1 by convention for streaming).
528              "poolArgs": { # Extra arguments for this worker pool.
529                "a_key": "", # Properties of the object. Contains field @type with type URL.
530              },
531              "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
532                  # execute the job.  If zero or unspecified, the service will
533                  # attempt to choose a reasonable default.
534              "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
535                  # harness, residing in Google Container Registry.
536              "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
537                  # the form "regions/REGION/subnetworks/SUBNETWORK".
538              "dataDisks": [ # Data disks that are used by a VM in this workflow.
539                { # Describes the data disk used by a workflow job.
540                  "mountPoint": "A String", # Directory in a VM where disk is mounted.
541                  "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
542                      # attempt to choose a reasonable default.
543                  "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
544                      # must be a disk type appropriate to the project and zone in which
545                      # the workers will run.  If unknown or unspecified, the service
546                      # will attempt to choose a reasonable default.
547                      #
548                      # For example, the standard persistent disk type is a resource name
549                      # typically ending in "pd-standard".  If SSD persistent disks are
550                      # available, the resource name typically ends with "pd-ssd".  The
551                      # actual valid values are defined the Google Compute Engine API,
552                      # not by the Cloud Dataflow API; consult the Google Compute Engine
553                      # documentation for more information about determining the set of
554                      # available disk types for a particular project and zone.
555                      #
556                      # Google Compute Engine Disk types are local to a particular
557                      # project in a particular zone, and so the resource name will
558                      # typically look something like this:
559                      #
560                      # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
561                },
562              ],
563              "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
564                "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
565                "algorithm": "A String", # The algorithm to use for autoscaling.
566              },
567              "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
568                  # select a default set of packages which are useful to worker
569                  # harnesses written in a particular language.
570              "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
571                  # attempt to choose a reasonable default.
572              "metadata": { # Metadata to set on the Google Compute Engine VMs.
573                "a_key": "A String",
574              },
575            },
576          ],
577          "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
578              # storage.  The system will append the suffix "/temp-{JOBNAME} to
579              # this resource prefix, where {JOBNAME} is the value of the
580              # job_name field.  The resulting bucket and object prefix is used
581              # as the prefix of the resources used to store temporary data
582              # needed during the job execution.  NOTE: This will override the
583              # value in taskrunner_settings.
584              # The supported resource type is:
585              #
586              # Google Cloud Storage:
587              #
588              #   storage.googleapis.com/{bucket}/{object}
589              #   bucket.storage.googleapis.com/{object}
590        },
591        "location": "A String", # The [regional endpoint]
592            # (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
593            # contains this job.
594        "tempFiles": [ # A set of files the system should be aware of that are used
595            # for temporary storage. These temporary files will be
596            # removed on job completion.
597            # No duplicates are allowed.
598            # No file patterns are supported.
599            #
600            # The supported files are:
601            #
602            # Google Cloud Storage:
603            #
604            #    storage.googleapis.com/{bucket}/{object}
605            #    bucket.storage.googleapis.com/{object}
606          "A String",
607        ],
608        "type": "A String", # The type of Cloud Dataflow job.
609        "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
610            # If this field is set, the service will ensure its uniqueness.
611            # The request to create a job will fail if the service has knowledge of a
612            # previously submitted job with the same client's ID and job name.
613            # The caller may use this field to ensure idempotence of job
614            # creation across retried attempts to create a job.
615            # By default, the field is empty and, in that case, the service ignores it.
616        "createdFromSnapshotId": "A String", # If this is specified, the job's initial state is populated from the given
617            # snapshot.
618        "stepsLocation": "A String", # The GCS location where the steps are stored.
619        "currentStateTime": "A String", # The timestamp associated with the current state.
620        "startTime": "A String", # The timestamp when the job was started (transitioned to JOB_STATE_PENDING).
621            # Flexible resource scheduling jobs are started with some delay after job
622            # creation, so start_time is unset before start and is updated when the
623            # job is started by the Cloud Dataflow service. For other jobs, start_time
624            # always equals to create_time and is immutable and set by the Cloud Dataflow
625            # service.
626        "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
627            # Cloud Dataflow service.
628        "requestedState": "A String", # The job's requested state.
629            #
630            # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
631            # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
632            # also be used to directly set a job's requested state to
633            # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
634            # job if it has not already reached a terminal state.
635        "name": "A String", # The user-specified Cloud Dataflow job name.
636            #
637            # Only one Job with a given name may exist in a project at any
638            # given time. If a caller attempts to create a Job with the same
639            # name as an already-existing Job, the attempt returns the
640            # existing Job.
641            #
642            # The name must match the regular expression
643            # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
644        "steps": [ # Exactly one of step or steps_location should be specified.
645            #
646            # The top-level steps that constitute the entire job.
647          { # Defines a particular step within a Cloud Dataflow job.
648              #
649              # A job consists of multiple steps, each of which performs some
650              # specific operation as part of the overall job.  Data is typically
651              # passed from one step to another as part of the job.
652              #
653              # Here's an example of a sequence of steps which together implement a
654              # Map-Reduce job:
655              #
656              #   * Read a collection of data from some source, parsing the
657              #     collection's elements.
658              #
659              #   * Validate the elements.
660              #
661              #   * Apply a user-defined function to map each element to some value
662              #     and extract an element-specific key value.
663              #
664              #   * Group elements with the same key into a single element with
665              #     that key, transforming a multiply-keyed collection into a
666              #     uniquely-keyed collection.
667              #
668              #   * Write the elements out to some data sink.
669              #
670              # Note that the Cloud Dataflow service may be used to run many different
671              # types of jobs, not just Map-Reduce.
672            "kind": "A String", # The kind of step in the Cloud Dataflow job.
673            "properties": { # Named properties associated with the step. Each kind of
674                # predefined step has its own required set of properties.
675                # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
676              "a_key": "", # Properties of the object.
677            },
678            "name": "A String", # The name that identifies the step. This must be unique for each
679                # step with respect to all other steps in the Cloud Dataflow job.
680          },
681        ],
682        "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
683            # of the job it replaced.
684            #
685            # When sending a `CreateJobRequest`, you can update a job by specifying it
686            # here. The job named here is stopped, and its intermediate state is
687            # transferred to this job.
688        "currentState": "A String", # The current state of the job.
689            #
690            # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
691            # specified.
692            #
693            # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
694            # terminal state. After a job has reached a terminal state, no
695            # further state updates may be made.
696            #
697            # This field may be mutated by the Cloud Dataflow service;
698            # callers cannot mutate it.
699        "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
700            # isn't contained in the submitted job.
701          "stages": { # A mapping from each stage to the information about that stage.
702            "a_key": { # Contains information about how a particular
703                # google.dataflow.v1beta3.Step will be executed.
704              "stepName": [ # The steps associated with the execution stage.
705                  # Note that stages may have several steps, and that a given step
706                  # might be run by more than one stage.
707                "A String",
708              ],
709            },
710          },
711        },
712      },
713    ],
714  }</pre>
715</div>
716
717<div class="method">
718    <code class="details" id="aggregated_next">aggregated_next(previous_request, previous_response)</code>
719  <pre>Retrieves the next page of results.
720
721Args:
722  previous_request: The request for the previous page. (required)
723  previous_response: The response from the request for the previous page. (required)
724
725Returns:
726  A request object that you can call 'execute()' on to request the next
727  page. Returns None if there are no more items in the collection.
728    </pre>
729</div>
730
731<div class="method">
732    <code class="details" id="create">create(projectId, body, location=None, x__xgafv=None, replaceJobId=None, view=None)</code>
733  <pre>Creates a Cloud Dataflow job.
734
735To create a job, we recommend using `projects.locations.jobs.create` with a
736[regional endpoint]
737(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints). Using
738`projects.jobs.create` is not recommended, as your job will always start
739in `us-central1`.
740
741Args:
742  projectId: string, The ID of the Cloud Platform project that the job belongs to. (required)
743  body: object, The request body. (required)
744    The object takes the form of:
745
746{ # Defines a job to be run by the Cloud Dataflow service.
747  "labels": { # User-defined labels for this job.
748      #
749      # The labels map can contain no more than 64 entries.  Entries of the labels
750      # map are UTF8 strings that comply with the following restrictions:
751      #
752      # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
753      # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
754      # * Both keys and values are additionally constrained to be <= 128 bytes in
755      # size.
756    "a_key": "A String",
757  },
758  "jobMetadata": { # Metadata available primarily for filtering jobs. Will be included in the # This field is populated by the Dataflow service to support filtering jobs
759      # by the metadata values provided here. Populated for ListJobs and all GetJob
760      # views SUMMARY and higher.
761      # ListJob response and Job SUMMARY view.
762    "sdkVersion": { # The version of the SDK used to run the job. # The SDK version used to run the job.
763      "versionDisplayName": "A String", # A readable string describing the version of the SDK.
764      "version": "A String", # The version of the SDK used to run the job.
765      "sdkSupportStatus": "A String", # The support status for this SDK version.
766    },
767    "pubsubDetails": [ # Identification of a PubSub source used in the Dataflow job.
768      { # Metadata for a PubSub connector used by the job.
769        "topic": "A String", # Topic accessed in the connection.
770        "subscription": "A String", # Subscription used in the connection.
771      },
772    ],
773    "datastoreDetails": [ # Identification of a Datastore source used in the Dataflow job.
774      { # Metadata for a Datastore connector used by the job.
775        "projectId": "A String", # ProjectId accessed in the connection.
776        "namespace": "A String", # Namespace used in the connection.
777      },
778    ],
779    "fileDetails": [ # Identification of a File source used in the Dataflow job.
780      { # Metadata for a File connector used by the job.
781        "filePattern": "A String", # File Pattern used to access files by the connector.
782      },
783    ],
784    "spannerDetails": [ # Identification of a Spanner source used in the Dataflow job.
785      { # Metadata for a Spanner connector used by the job.
786        "instanceId": "A String", # InstanceId accessed in the connection.
787        "projectId": "A String", # ProjectId accessed in the connection.
788        "databaseId": "A String", # DatabaseId accessed in the connection.
789      },
790    ],
791    "bigTableDetails": [ # Identification of a BigTable source used in the Dataflow job.
792      { # Metadata for a BigTable connector used by the job.
793        "instanceId": "A String", # InstanceId accessed in the connection.
794        "projectId": "A String", # ProjectId accessed in the connection.
795        "tableId": "A String", # TableId accessed in the connection.
796      },
797    ],
798    "bigqueryDetails": [ # Identification of a BigQuery source used in the Dataflow job.
799      { # Metadata for a BigQuery connector used by the job.
800        "projectId": "A String", # Project accessed in the connection.
801        "dataset": "A String", # Dataset accessed in the connection.
802        "table": "A String", # Table accessed in the connection.
803        "query": "A String", # Query used to access data in the connection.
804      },
805    ],
806  },
807  "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
808      # A description of the user pipeline and stages through which it is executed.
809      # Created by Cloud Dataflow service.  Only retrieved with
810      # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
811      # form.  This data is provided by the Dataflow service for ease of visualizing
812      # the pipeline and interpreting Dataflow provided metrics.
813    "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
814      { # Description of the type, names/ids, and input/outputs for a transform.
815        "kind": "A String", # Type of transform.
816        "name": "A String", # User provided name for this transform instance.
817        "inputCollectionName": [ # User names for all collection inputs to this transform.
818          "A String",
819        ],
820        "displayData": [ # Transform-specific display data.
821          { # Data provided with a pipeline or transform to provide descriptive info.
822            "shortStrValue": "A String", # A possible additional shorter value to display.
823                # For example a java_class_name_value of com.mypackage.MyDoFn
824                # will be stored with MyDoFn as the short_str_value and
825                # com.mypackage.MyDoFn as the java_class_name value.
826                # short_str_value can be displayed and java_class_name_value
827                # will be displayed as a tooltip.
828            "durationValue": "A String", # Contains value if the data is of duration type.
829            "url": "A String", # An optional full URL.
830            "floatValue": 3.14, # Contains value if the data is of float type.
831            "namespace": "A String", # The namespace for the key. This is usually a class name or programming
832                # language namespace (i.e. python module) which defines the display data.
833                # This allows a dax monitoring system to specially handle the data
834                # and perform custom rendering.
835            "javaClassValue": "A String", # Contains value if the data is of java class type.
836            "label": "A String", # An optional label to display in a dax UI for the element.
837            "boolValue": True or False, # Contains value if the data is of a boolean type.
838            "strValue": "A String", # Contains value if the data is of string type.
839            "key": "A String", # The key identifying the display data.
840                # This is intended to be used as a label for the display data
841                # when viewed in a dax monitoring system.
842            "int64Value": "A String", # Contains value if the data is of int64 type.
843            "timestampValue": "A String", # Contains value if the data is of timestamp type.
844          },
845        ],
846        "outputCollectionName": [ # User  names for all collection outputs to this transform.
847          "A String",
848        ],
849        "id": "A String", # SDK generated id of this transform instance.
850      },
851    ],
852    "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
853      { # Description of the composing transforms, names/ids, and input/outputs of a
854          # stage of execution.  Some composing transforms and sources may have been
855          # generated by the Dataflow service during execution planning.
856        "componentSource": [ # Collections produced and consumed by component transforms of this stage.
857          { # Description of an interstitial value between transforms in an execution
858              # stage.
859            "userName": "A String", # Human-readable name for this transform; may be user or system generated.
860            "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
861                # source is most closely associated.
862            "name": "A String", # Dataflow service generated name for this source.
863          },
864        ],
865        "kind": "A String", # Type of tranform this stage is executing.
866        "name": "A String", # Dataflow service generated name for this stage.
867        "outputSource": [ # Output sources for this stage.
868          { # Description of an input or output of an execution stage.
869            "userName": "A String", # Human-readable name for this source; may be user or system generated.
870            "sizeBytes": "A String", # Size of the source, if measurable.
871            "name": "A String", # Dataflow service generated name for this source.
872            "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
873                # source is most closely associated.
874          },
875        ],
876        "inputSource": [ # Input sources for this stage.
877          { # Description of an input or output of an execution stage.
878            "userName": "A String", # Human-readable name for this source; may be user or system generated.
879            "sizeBytes": "A String", # Size of the source, if measurable.
880            "name": "A String", # Dataflow service generated name for this source.
881            "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
882                # source is most closely associated.
883          },
884        ],
885        "componentTransform": [ # Transforms that comprise this execution stage.
886          { # Description of a transform executed as part of an execution stage.
887            "userName": "A String", # Human-readable name for this transform; may be user or system generated.
888            "originalTransform": "A String", # User name for the original user transform with which this transform is
889                # most closely associated.
890            "name": "A String", # Dataflow service generated name for this source.
891          },
892        ],
893        "id": "A String", # Dataflow service generated id for this stage.
894      },
895    ],
896    "displayData": [ # Pipeline level display data.
897      { # Data provided with a pipeline or transform to provide descriptive info.
898        "shortStrValue": "A String", # A possible additional shorter value to display.
899            # For example a java_class_name_value of com.mypackage.MyDoFn
900            # will be stored with MyDoFn as the short_str_value and
901            # com.mypackage.MyDoFn as the java_class_name value.
902            # short_str_value can be displayed and java_class_name_value
903            # will be displayed as a tooltip.
904        "durationValue": "A String", # Contains value if the data is of duration type.
905        "url": "A String", # An optional full URL.
906        "floatValue": 3.14, # Contains value if the data is of float type.
907        "namespace": "A String", # The namespace for the key. This is usually a class name or programming
908            # language namespace (i.e. python module) which defines the display data.
909            # This allows a dax monitoring system to specially handle the data
910            # and perform custom rendering.
911        "javaClassValue": "A String", # Contains value if the data is of java class type.
912        "label": "A String", # An optional label to display in a dax UI for the element.
913        "boolValue": True or False, # Contains value if the data is of a boolean type.
914        "strValue": "A String", # Contains value if the data is of string type.
915        "key": "A String", # The key identifying the display data.
916            # This is intended to be used as a label for the display data
917            # when viewed in a dax monitoring system.
918        "int64Value": "A String", # Contains value if the data is of int64 type.
919        "timestampValue": "A String", # Contains value if the data is of timestamp type.
920      },
921    ],
922  },
923  "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
924      # callers cannot mutate it.
925    { # A message describing the state of a particular execution stage.
926      "executionStageName": "A String", # The name of the execution stage.
927      "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
928      "currentStateTime": "A String", # The time at which the stage transitioned to this state.
929    },
930  ],
931  "id": "A String", # The unique ID of this job.
932      #
933      # This field is set by the Cloud Dataflow service when the Job is
934      # created, and is immutable for the life of the job.
935  "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
936      # `JOB_STATE_UPDATED`), this field contains the ID of that job.
937  "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
938  "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
939      # corresponding name prefixes of the new job.
940    "a_key": "A String",
941  },
942  "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
943    "version": { # A structure describing which components and their versions of the service
944        # are required in order to run the job.
945      "a_key": "", # Properties of the object.
946    },
947    "flexResourceSchedulingGoal": "A String", # Which Flexible Resource Scheduling mode to run in.
948    "serviceKmsKeyName": "A String", # If set, contains the Cloud KMS key identifier used to encrypt data
949        # at rest, AKA a Customer Managed Encryption Key (CMEK).
950        #
951        # Format:
952        #   projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY
953    "internalExperiments": { # Experimental settings.
954      "a_key": "", # Properties of the object. Contains field @type with type URL.
955    },
956    "dataset": "A String", # The dataset for the current project where various workflow
957        # related tables are stored.
958        #
959        # The supported resource type is:
960        #
961        # Google BigQuery:
962        #   bigquery.googleapis.com/{dataset}
963    "experiments": [ # The list of experiments to enable.
964      "A String",
965    ],
966    "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
967    "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
968        # options are passed through the service and are used to recreate the
969        # SDK pipeline options on the worker in a language agnostic and platform
970        # independent way.
971      "a_key": "", # Properties of the object.
972    },
973    "userAgent": { # A description of the process that generated the request.
974      "a_key": "", # Properties of the object.
975    },
976    "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
977        # unspecified, the service will attempt to choose a reasonable
978        # default.  This should be in the form of the API service name,
979        # e.g. "compute.googleapis.com".
980    "workerPools": [ # The worker pools. At least one "harness" worker pool must be
981        # specified in order for the job to have workers.
982      { # Describes one particular pool of Cloud Dataflow workers to be
983          # instantiated by the Cloud Dataflow service in order to perform the
984          # computations required by a job.  Note that a workflow job may use
985          # multiple pools, in order to match the various computational
986          # requirements of the various stages of the job.
987        "diskSourceImage": "A String", # Fully qualified source image for disks.
988        "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
989            # using the standard Dataflow task runner.  Users should ignore
990            # this field.
991          "workflowFileName": "A String", # The file to store the workflow in.
992          "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
993              # will not be uploaded.
994              #
995              # The supported resource type is:
996              #
997              # Google Cloud Storage:
998              #   storage.googleapis.com/{bucket}/{object}
999              #   bucket.storage.googleapis.com/{object}
1000          "commandlinesFileName": "A String", # The file to store preprocessing commands in.
1001          "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
1002            "reportingEnabled": True or False, # Whether to send work progress updates to the service.
1003            "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
1004                # "shuffle/v1beta1".
1005            "workerId": "A String", # The ID of the worker running this pipeline.
1006            "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
1007                #
1008                # When workers access Google Cloud APIs, they logically do so via
1009                # relative URLs.  If this field is specified, it supplies the base
1010                # URL to use for resolving these relative URLs.  The normative
1011                # algorithm used is defined by RFC 1808, "Relative Uniform Resource
1012                # Locators".
1013                #
1014                # If not specified, the default value is "http://www.googleapis.com/"
1015            "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
1016                # "dataflow/v1b3/projects".
1017            "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
1018                # storage.
1019                #
1020                # The supported resource type is:
1021                #
1022                # Google Cloud Storage:
1023                #
1024                #   storage.googleapis.com/{bucket}/{object}
1025                #   bucket.storage.googleapis.com/{object}
1026          },
1027          "vmId": "A String", # The ID string of the VM.
1028          "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
1029          "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
1030          "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
1031              # access the Cloud Dataflow API.
1032            "A String",
1033          ],
1034          "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
1035              # taskrunner; e.g. "root".
1036          "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
1037              #
1038              # When workers access Google Cloud APIs, they logically do so via
1039              # relative URLs.  If this field is specified, it supplies the base
1040              # URL to use for resolving these relative URLs.  The normative
1041              # algorithm used is defined by RFC 1808, "Relative Uniform Resource
1042              # Locators".
1043              #
1044              # If not specified, the default value is "http://www.googleapis.com/"
1045          "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
1046              # taskrunner; e.g. "wheel".
1047          "languageHint": "A String", # The suggested backend language.
1048          "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
1049              # console.
1050          "streamingWorkerMainClass": "A String", # The streaming worker main class name.
1051          "logDir": "A String", # The directory on the VM to store logs.
1052          "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
1053          "harnessCommand": "A String", # The command to launch the worker harness.
1054          "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
1055              # temporary storage.
1056              #
1057              # The supported resource type is:
1058              #
1059              # Google Cloud Storage:
1060              #   storage.googleapis.com/{bucket}/{object}
1061              #   bucket.storage.googleapis.com/{object}
1062          "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
1063        },
1064        "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
1065            # are supported.
1066        "packages": [ # Packages to be installed on workers.
1067          { # The packages that must be installed in order for a worker to run the
1068              # steps of the Cloud Dataflow job that will be assigned to its worker
1069              # pool.
1070              #
1071              # This is the mechanism by which the Cloud Dataflow SDK causes code to
1072              # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
1073              # might use this to install jars containing the user's code and all of the
1074              # various dependencies (libraries, data files, etc.) required in order
1075              # for that code to run.
1076            "location": "A String", # The resource to read the package from. The supported resource type is:
1077                #
1078                # Google Cloud Storage:
1079                #
1080                #   storage.googleapis.com/{bucket}
1081                #   bucket.storage.googleapis.com/
1082            "name": "A String", # The name of the package.
1083          },
1084        ],
1085        "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
1086            # service will attempt to choose a reasonable default.
1087        "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
1088            # the service will use the network "default".
1089        "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
1090            # will attempt to choose a reasonable default.
1091        "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
1092            # attempt to choose a reasonable default.
1093        "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
1094            # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
1095            # `TEARDOWN_NEVER`.
1096            # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
1097            # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
1098            # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
1099            # down.
1100            #
1101            # If the workers are not torn down by the service, they will
1102            # continue to run and use Google Compute Engine VM resources in the
1103            # user's project until they are explicitly terminated by the user.
1104            # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
1105            # policy except for small, manually supervised test jobs.
1106            #
1107            # If unknown or unspecified, the service will attempt to choose a reasonable
1108            # default.
1109        "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
1110            # Compute Engine API.
1111        "ipConfiguration": "A String", # Configuration for VM IPs.
1112        "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
1113            # service will choose a number of threads (according to the number of cores
1114            # on the selected machine type for batch, or 1 by convention for streaming).
1115        "poolArgs": { # Extra arguments for this worker pool.
1116          "a_key": "", # Properties of the object. Contains field @type with type URL.
1117        },
1118        "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
1119            # execute the job.  If zero or unspecified, the service will
1120            # attempt to choose a reasonable default.
1121        "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
1122            # harness, residing in Google Container Registry.
1123        "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
1124            # the form "regions/REGION/subnetworks/SUBNETWORK".
1125        "dataDisks": [ # Data disks that are used by a VM in this workflow.
1126          { # Describes the data disk used by a workflow job.
1127            "mountPoint": "A String", # Directory in a VM where disk is mounted.
1128            "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
1129                # attempt to choose a reasonable default.
1130            "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
1131                # must be a disk type appropriate to the project and zone in which
1132                # the workers will run.  If unknown or unspecified, the service
1133                # will attempt to choose a reasonable default.
1134                #
1135                # For example, the standard persistent disk type is a resource name
1136                # typically ending in "pd-standard".  If SSD persistent disks are
1137                # available, the resource name typically ends with "pd-ssd".  The
1138                # actual valid values are defined the Google Compute Engine API,
1139                # not by the Cloud Dataflow API; consult the Google Compute Engine
1140                # documentation for more information about determining the set of
1141                # available disk types for a particular project and zone.
1142                #
1143                # Google Compute Engine Disk types are local to a particular
1144                # project in a particular zone, and so the resource name will
1145                # typically look something like this:
1146                #
1147                # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
1148          },
1149        ],
1150        "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
1151          "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
1152          "algorithm": "A String", # The algorithm to use for autoscaling.
1153        },
1154        "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
1155            # select a default set of packages which are useful to worker
1156            # harnesses written in a particular language.
1157        "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
1158            # attempt to choose a reasonable default.
1159        "metadata": { # Metadata to set on the Google Compute Engine VMs.
1160          "a_key": "A String",
1161        },
1162      },
1163    ],
1164    "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
1165        # storage.  The system will append the suffix "/temp-{JOBNAME} to
1166        # this resource prefix, where {JOBNAME} is the value of the
1167        # job_name field.  The resulting bucket and object prefix is used
1168        # as the prefix of the resources used to store temporary data
1169        # needed during the job execution.  NOTE: This will override the
1170        # value in taskrunner_settings.
1171        # The supported resource type is:
1172        #
1173        # Google Cloud Storage:
1174        #
1175        #   storage.googleapis.com/{bucket}/{object}
1176        #   bucket.storage.googleapis.com/{object}
1177  },
1178  "location": "A String", # The [regional endpoint]
1179      # (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
1180      # contains this job.
1181  "tempFiles": [ # A set of files the system should be aware of that are used
1182      # for temporary storage. These temporary files will be
1183      # removed on job completion.
1184      # No duplicates are allowed.
1185      # No file patterns are supported.
1186      #
1187      # The supported files are:
1188      #
1189      # Google Cloud Storage:
1190      #
1191      #    storage.googleapis.com/{bucket}/{object}
1192      #    bucket.storage.googleapis.com/{object}
1193    "A String",
1194  ],
1195  "type": "A String", # The type of Cloud Dataflow job.
1196  "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
1197      # If this field is set, the service will ensure its uniqueness.
1198      # The request to create a job will fail if the service has knowledge of a
1199      # previously submitted job with the same client's ID and job name.
1200      # The caller may use this field to ensure idempotence of job
1201      # creation across retried attempts to create a job.
1202      # By default, the field is empty and, in that case, the service ignores it.
1203  "createdFromSnapshotId": "A String", # If this is specified, the job's initial state is populated from the given
1204      # snapshot.
1205  "stepsLocation": "A String", # The GCS location where the steps are stored.
1206  "currentStateTime": "A String", # The timestamp associated with the current state.
1207  "startTime": "A String", # The timestamp when the job was started (transitioned to JOB_STATE_PENDING).
1208      # Flexible resource scheduling jobs are started with some delay after job
1209      # creation, so start_time is unset before start and is updated when the
1210      # job is started by the Cloud Dataflow service. For other jobs, start_time
1211      # always equals to create_time and is immutable and set by the Cloud Dataflow
1212      # service.
1213  "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
1214      # Cloud Dataflow service.
1215  "requestedState": "A String", # The job's requested state.
1216      #
1217      # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
1218      # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
1219      # also be used to directly set a job's requested state to
1220      # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
1221      # job if it has not already reached a terminal state.
1222  "name": "A String", # The user-specified Cloud Dataflow job name.
1223      #
1224      # Only one Job with a given name may exist in a project at any
1225      # given time. If a caller attempts to create a Job with the same
1226      # name as an already-existing Job, the attempt returns the
1227      # existing Job.
1228      #
1229      # The name must match the regular expression
1230      # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
1231  "steps": [ # Exactly one of step or steps_location should be specified.
1232      #
1233      # The top-level steps that constitute the entire job.
1234    { # Defines a particular step within a Cloud Dataflow job.
1235        #
1236        # A job consists of multiple steps, each of which performs some
1237        # specific operation as part of the overall job.  Data is typically
1238        # passed from one step to another as part of the job.
1239        #
1240        # Here's an example of a sequence of steps which together implement a
1241        # Map-Reduce job:
1242        #
1243        #   * Read a collection of data from some source, parsing the
1244        #     collection's elements.
1245        #
1246        #   * Validate the elements.
1247        #
1248        #   * Apply a user-defined function to map each element to some value
1249        #     and extract an element-specific key value.
1250        #
1251        #   * Group elements with the same key into a single element with
1252        #     that key, transforming a multiply-keyed collection into a
1253        #     uniquely-keyed collection.
1254        #
1255        #   * Write the elements out to some data sink.
1256        #
1257        # Note that the Cloud Dataflow service may be used to run many different
1258        # types of jobs, not just Map-Reduce.
1259      "kind": "A String", # The kind of step in the Cloud Dataflow job.
1260      "properties": { # Named properties associated with the step. Each kind of
1261          # predefined step has its own required set of properties.
1262          # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
1263        "a_key": "", # Properties of the object.
1264      },
1265      "name": "A String", # The name that identifies the step. This must be unique for each
1266          # step with respect to all other steps in the Cloud Dataflow job.
1267    },
1268  ],
1269  "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
1270      # of the job it replaced.
1271      #
1272      # When sending a `CreateJobRequest`, you can update a job by specifying it
1273      # here. The job named here is stopped, and its intermediate state is
1274      # transferred to this job.
1275  "currentState": "A String", # The current state of the job.
1276      #
1277      # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
1278      # specified.
1279      #
1280      # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
1281      # terminal state. After a job has reached a terminal state, no
1282      # further state updates may be made.
1283      #
1284      # This field may be mutated by the Cloud Dataflow service;
1285      # callers cannot mutate it.
1286  "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
1287      # isn't contained in the submitted job.
1288    "stages": { # A mapping from each stage to the information about that stage.
1289      "a_key": { # Contains information about how a particular
1290          # google.dataflow.v1beta3.Step will be executed.
1291        "stepName": [ # The steps associated with the execution stage.
1292            # Note that stages may have several steps, and that a given step
1293            # might be run by more than one stage.
1294          "A String",
1295        ],
1296      },
1297    },
1298  },
1299}
1300
1301  location: string, The [regional endpoint]
1302(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
1303contains this job.
1304  x__xgafv: string, V1 error format.
1305    Allowed values
1306      1 - v1 error format
1307      2 - v2 error format
1308  replaceJobId: string, Deprecated. This field is now in the Job message.
1309  view: string, The level of information requested in response.
1310
1311Returns:
1312  An object of the form:
1313
1314    { # Defines a job to be run by the Cloud Dataflow service.
1315    "labels": { # User-defined labels for this job.
1316        #
1317        # The labels map can contain no more than 64 entries.  Entries of the labels
1318        # map are UTF8 strings that comply with the following restrictions:
1319        #
1320        # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
1321        # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
1322        # * Both keys and values are additionally constrained to be <= 128 bytes in
1323        # size.
1324      "a_key": "A String",
1325    },
1326    "jobMetadata": { # Metadata available primarily for filtering jobs. Will be included in the # This field is populated by the Dataflow service to support filtering jobs
1327        # by the metadata values provided here. Populated for ListJobs and all GetJob
1328        # views SUMMARY and higher.
1329        # ListJob response and Job SUMMARY view.
1330      "sdkVersion": { # The version of the SDK used to run the job. # The SDK version used to run the job.
1331        "versionDisplayName": "A String", # A readable string describing the version of the SDK.
1332        "version": "A String", # The version of the SDK used to run the job.
1333        "sdkSupportStatus": "A String", # The support status for this SDK version.
1334      },
1335      "pubsubDetails": [ # Identification of a PubSub source used in the Dataflow job.
1336        { # Metadata for a PubSub connector used by the job.
1337          "topic": "A String", # Topic accessed in the connection.
1338          "subscription": "A String", # Subscription used in the connection.
1339        },
1340      ],
1341      "datastoreDetails": [ # Identification of a Datastore source used in the Dataflow job.
1342        { # Metadata for a Datastore connector used by the job.
1343          "projectId": "A String", # ProjectId accessed in the connection.
1344          "namespace": "A String", # Namespace used in the connection.
1345        },
1346      ],
1347      "fileDetails": [ # Identification of a File source used in the Dataflow job.
1348        { # Metadata for a File connector used by the job.
1349          "filePattern": "A String", # File Pattern used to access files by the connector.
1350        },
1351      ],
1352      "spannerDetails": [ # Identification of a Spanner source used in the Dataflow job.
1353        { # Metadata for a Spanner connector used by the job.
1354          "instanceId": "A String", # InstanceId accessed in the connection.
1355          "projectId": "A String", # ProjectId accessed in the connection.
1356          "databaseId": "A String", # DatabaseId accessed in the connection.
1357        },
1358      ],
1359      "bigTableDetails": [ # Identification of a BigTable source used in the Dataflow job.
1360        { # Metadata for a BigTable connector used by the job.
1361          "instanceId": "A String", # InstanceId accessed in the connection.
1362          "projectId": "A String", # ProjectId accessed in the connection.
1363          "tableId": "A String", # TableId accessed in the connection.
1364        },
1365      ],
1366      "bigqueryDetails": [ # Identification of a BigQuery source used in the Dataflow job.
1367        { # Metadata for a BigQuery connector used by the job.
1368          "projectId": "A String", # Project accessed in the connection.
1369          "dataset": "A String", # Dataset accessed in the connection.
1370          "table": "A String", # Table accessed in the connection.
1371          "query": "A String", # Query used to access data in the connection.
1372        },
1373      ],
1374    },
1375    "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
1376        # A description of the user pipeline and stages through which it is executed.
1377        # Created by Cloud Dataflow service.  Only retrieved with
1378        # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
1379        # form.  This data is provided by the Dataflow service for ease of visualizing
1380        # the pipeline and interpreting Dataflow provided metrics.
1381      "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
1382        { # Description of the type, names/ids, and input/outputs for a transform.
1383          "kind": "A String", # Type of transform.
1384          "name": "A String", # User provided name for this transform instance.
1385          "inputCollectionName": [ # User names for all collection inputs to this transform.
1386            "A String",
1387          ],
1388          "displayData": [ # Transform-specific display data.
1389            { # Data provided with a pipeline or transform to provide descriptive info.
1390              "shortStrValue": "A String", # A possible additional shorter value to display.
1391                  # For example a java_class_name_value of com.mypackage.MyDoFn
1392                  # will be stored with MyDoFn as the short_str_value and
1393                  # com.mypackage.MyDoFn as the java_class_name value.
1394                  # short_str_value can be displayed and java_class_name_value
1395                  # will be displayed as a tooltip.
1396              "durationValue": "A String", # Contains value if the data is of duration type.
1397              "url": "A String", # An optional full URL.
1398              "floatValue": 3.14, # Contains value if the data is of float type.
1399              "namespace": "A String", # The namespace for the key. This is usually a class name or programming
1400                  # language namespace (i.e. python module) which defines the display data.
1401                  # This allows a dax monitoring system to specially handle the data
1402                  # and perform custom rendering.
1403              "javaClassValue": "A String", # Contains value if the data is of java class type.
1404              "label": "A String", # An optional label to display in a dax UI for the element.
1405              "boolValue": True or False, # Contains value if the data is of a boolean type.
1406              "strValue": "A String", # Contains value if the data is of string type.
1407              "key": "A String", # The key identifying the display data.
1408                  # This is intended to be used as a label for the display data
1409                  # when viewed in a dax monitoring system.
1410              "int64Value": "A String", # Contains value if the data is of int64 type.
1411              "timestampValue": "A String", # Contains value if the data is of timestamp type.
1412            },
1413          ],
1414          "outputCollectionName": [ # User  names for all collection outputs to this transform.
1415            "A String",
1416          ],
1417          "id": "A String", # SDK generated id of this transform instance.
1418        },
1419      ],
1420      "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
1421        { # Description of the composing transforms, names/ids, and input/outputs of a
1422            # stage of execution.  Some composing transforms and sources may have been
1423            # generated by the Dataflow service during execution planning.
1424          "componentSource": [ # Collections produced and consumed by component transforms of this stage.
1425            { # Description of an interstitial value between transforms in an execution
1426                # stage.
1427              "userName": "A String", # Human-readable name for this transform; may be user or system generated.
1428              "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
1429                  # source is most closely associated.
1430              "name": "A String", # Dataflow service generated name for this source.
1431            },
1432          ],
1433          "kind": "A String", # Type of tranform this stage is executing.
1434          "name": "A String", # Dataflow service generated name for this stage.
1435          "outputSource": [ # Output sources for this stage.
1436            { # Description of an input or output of an execution stage.
1437              "userName": "A String", # Human-readable name for this source; may be user or system generated.
1438              "sizeBytes": "A String", # Size of the source, if measurable.
1439              "name": "A String", # Dataflow service generated name for this source.
1440              "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
1441                  # source is most closely associated.
1442            },
1443          ],
1444          "inputSource": [ # Input sources for this stage.
1445            { # Description of an input or output of an execution stage.
1446              "userName": "A String", # Human-readable name for this source; may be user or system generated.
1447              "sizeBytes": "A String", # Size of the source, if measurable.
1448              "name": "A String", # Dataflow service generated name for this source.
1449              "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
1450                  # source is most closely associated.
1451            },
1452          ],
1453          "componentTransform": [ # Transforms that comprise this execution stage.
1454            { # Description of a transform executed as part of an execution stage.
1455              "userName": "A String", # Human-readable name for this transform; may be user or system generated.
1456              "originalTransform": "A String", # User name for the original user transform with which this transform is
1457                  # most closely associated.
1458              "name": "A String", # Dataflow service generated name for this source.
1459            },
1460          ],
1461          "id": "A String", # Dataflow service generated id for this stage.
1462        },
1463      ],
1464      "displayData": [ # Pipeline level display data.
1465        { # Data provided with a pipeline or transform to provide descriptive info.
1466          "shortStrValue": "A String", # A possible additional shorter value to display.
1467              # For example a java_class_name_value of com.mypackage.MyDoFn
1468              # will be stored with MyDoFn as the short_str_value and
1469              # com.mypackage.MyDoFn as the java_class_name value.
1470              # short_str_value can be displayed and java_class_name_value
1471              # will be displayed as a tooltip.
1472          "durationValue": "A String", # Contains value if the data is of duration type.
1473          "url": "A String", # An optional full URL.
1474          "floatValue": 3.14, # Contains value if the data is of float type.
1475          "namespace": "A String", # The namespace for the key. This is usually a class name or programming
1476              # language namespace (i.e. python module) which defines the display data.
1477              # This allows a dax monitoring system to specially handle the data
1478              # and perform custom rendering.
1479          "javaClassValue": "A String", # Contains value if the data is of java class type.
1480          "label": "A String", # An optional label to display in a dax UI for the element.
1481          "boolValue": True or False, # Contains value if the data is of a boolean type.
1482          "strValue": "A String", # Contains value if the data is of string type.
1483          "key": "A String", # The key identifying the display data.
1484              # This is intended to be used as a label for the display data
1485              # when viewed in a dax monitoring system.
1486          "int64Value": "A String", # Contains value if the data is of int64 type.
1487          "timestampValue": "A String", # Contains value if the data is of timestamp type.
1488        },
1489      ],
1490    },
1491    "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
1492        # callers cannot mutate it.
1493      { # A message describing the state of a particular execution stage.
1494        "executionStageName": "A String", # The name of the execution stage.
1495        "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
1496        "currentStateTime": "A String", # The time at which the stage transitioned to this state.
1497      },
1498    ],
1499    "id": "A String", # The unique ID of this job.
1500        #
1501        # This field is set by the Cloud Dataflow service when the Job is
1502        # created, and is immutable for the life of the job.
1503    "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
1504        # `JOB_STATE_UPDATED`), this field contains the ID of that job.
1505    "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
1506    "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
1507        # corresponding name prefixes of the new job.
1508      "a_key": "A String",
1509    },
1510    "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
1511      "version": { # A structure describing which components and their versions of the service
1512          # are required in order to run the job.
1513        "a_key": "", # Properties of the object.
1514      },
1515      "flexResourceSchedulingGoal": "A String", # Which Flexible Resource Scheduling mode to run in.
1516      "serviceKmsKeyName": "A String", # If set, contains the Cloud KMS key identifier used to encrypt data
1517          # at rest, AKA a Customer Managed Encryption Key (CMEK).
1518          #
1519          # Format:
1520          #   projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY
1521      "internalExperiments": { # Experimental settings.
1522        "a_key": "", # Properties of the object. Contains field @type with type URL.
1523      },
1524      "dataset": "A String", # The dataset for the current project where various workflow
1525          # related tables are stored.
1526          #
1527          # The supported resource type is:
1528          #
1529          # Google BigQuery:
1530          #   bigquery.googleapis.com/{dataset}
1531      "experiments": [ # The list of experiments to enable.
1532        "A String",
1533      ],
1534      "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
1535      "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
1536          # options are passed through the service and are used to recreate the
1537          # SDK pipeline options on the worker in a language agnostic and platform
1538          # independent way.
1539        "a_key": "", # Properties of the object.
1540      },
1541      "userAgent": { # A description of the process that generated the request.
1542        "a_key": "", # Properties of the object.
1543      },
1544      "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
1545          # unspecified, the service will attempt to choose a reasonable
1546          # default.  This should be in the form of the API service name,
1547          # e.g. "compute.googleapis.com".
1548      "workerPools": [ # The worker pools. At least one "harness" worker pool must be
1549          # specified in order for the job to have workers.
1550        { # Describes one particular pool of Cloud Dataflow workers to be
1551            # instantiated by the Cloud Dataflow service in order to perform the
1552            # computations required by a job.  Note that a workflow job may use
1553            # multiple pools, in order to match the various computational
1554            # requirements of the various stages of the job.
1555          "diskSourceImage": "A String", # Fully qualified source image for disks.
1556          "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
1557              # using the standard Dataflow task runner.  Users should ignore
1558              # this field.
1559            "workflowFileName": "A String", # The file to store the workflow in.
1560            "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
1561                # will not be uploaded.
1562                #
1563                # The supported resource type is:
1564                #
1565                # Google Cloud Storage:
1566                #   storage.googleapis.com/{bucket}/{object}
1567                #   bucket.storage.googleapis.com/{object}
1568            "commandlinesFileName": "A String", # The file to store preprocessing commands in.
1569            "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
1570              "reportingEnabled": True or False, # Whether to send work progress updates to the service.
1571              "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
1572                  # "shuffle/v1beta1".
1573              "workerId": "A String", # The ID of the worker running this pipeline.
1574              "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
1575                  #
1576                  # When workers access Google Cloud APIs, they logically do so via
1577                  # relative URLs.  If this field is specified, it supplies the base
1578                  # URL to use for resolving these relative URLs.  The normative
1579                  # algorithm used is defined by RFC 1808, "Relative Uniform Resource
1580                  # Locators".
1581                  #
1582                  # If not specified, the default value is "http://www.googleapis.com/"
1583              "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
1584                  # "dataflow/v1b3/projects".
1585              "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
1586                  # storage.
1587                  #
1588                  # The supported resource type is:
1589                  #
1590                  # Google Cloud Storage:
1591                  #
1592                  #   storage.googleapis.com/{bucket}/{object}
1593                  #   bucket.storage.googleapis.com/{object}
1594            },
1595            "vmId": "A String", # The ID string of the VM.
1596            "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
1597            "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
1598            "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
1599                # access the Cloud Dataflow API.
1600              "A String",
1601            ],
1602            "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
1603                # taskrunner; e.g. "root".
1604            "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
1605                #
1606                # When workers access Google Cloud APIs, they logically do so via
1607                # relative URLs.  If this field is specified, it supplies the base
1608                # URL to use for resolving these relative URLs.  The normative
1609                # algorithm used is defined by RFC 1808, "Relative Uniform Resource
1610                # Locators".
1611                #
1612                # If not specified, the default value is "http://www.googleapis.com/"
1613            "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
1614                # taskrunner; e.g. "wheel".
1615            "languageHint": "A String", # The suggested backend language.
1616            "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
1617                # console.
1618            "streamingWorkerMainClass": "A String", # The streaming worker main class name.
1619            "logDir": "A String", # The directory on the VM to store logs.
1620            "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
1621            "harnessCommand": "A String", # The command to launch the worker harness.
1622            "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
1623                # temporary storage.
1624                #
1625                # The supported resource type is:
1626                #
1627                # Google Cloud Storage:
1628                #   storage.googleapis.com/{bucket}/{object}
1629                #   bucket.storage.googleapis.com/{object}
1630            "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
1631          },
1632          "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
1633              # are supported.
1634          "packages": [ # Packages to be installed on workers.
1635            { # The packages that must be installed in order for a worker to run the
1636                # steps of the Cloud Dataflow job that will be assigned to its worker
1637                # pool.
1638                #
1639                # This is the mechanism by which the Cloud Dataflow SDK causes code to
1640                # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
1641                # might use this to install jars containing the user's code and all of the
1642                # various dependencies (libraries, data files, etc.) required in order
1643                # for that code to run.
1644              "location": "A String", # The resource to read the package from. The supported resource type is:
1645                  #
1646                  # Google Cloud Storage:
1647                  #
1648                  #   storage.googleapis.com/{bucket}
1649                  #   bucket.storage.googleapis.com/
1650              "name": "A String", # The name of the package.
1651            },
1652          ],
1653          "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
1654              # service will attempt to choose a reasonable default.
1655          "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
1656              # the service will use the network "default".
1657          "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
1658              # will attempt to choose a reasonable default.
1659          "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
1660              # attempt to choose a reasonable default.
1661          "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
1662              # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
1663              # `TEARDOWN_NEVER`.
1664              # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
1665              # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
1666              # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
1667              # down.
1668              #
1669              # If the workers are not torn down by the service, they will
1670              # continue to run and use Google Compute Engine VM resources in the
1671              # user's project until they are explicitly terminated by the user.
1672              # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
1673              # policy except for small, manually supervised test jobs.
1674              #
1675              # If unknown or unspecified, the service will attempt to choose a reasonable
1676              # default.
1677          "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
1678              # Compute Engine API.
1679          "ipConfiguration": "A String", # Configuration for VM IPs.
1680          "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
1681              # service will choose a number of threads (according to the number of cores
1682              # on the selected machine type for batch, or 1 by convention for streaming).
1683          "poolArgs": { # Extra arguments for this worker pool.
1684            "a_key": "", # Properties of the object. Contains field @type with type URL.
1685          },
1686          "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
1687              # execute the job.  If zero or unspecified, the service will
1688              # attempt to choose a reasonable default.
1689          "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
1690              # harness, residing in Google Container Registry.
1691          "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
1692              # the form "regions/REGION/subnetworks/SUBNETWORK".
1693          "dataDisks": [ # Data disks that are used by a VM in this workflow.
1694            { # Describes the data disk used by a workflow job.
1695              "mountPoint": "A String", # Directory in a VM where disk is mounted.
1696              "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
1697                  # attempt to choose a reasonable default.
1698              "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
1699                  # must be a disk type appropriate to the project and zone in which
1700                  # the workers will run.  If unknown or unspecified, the service
1701                  # will attempt to choose a reasonable default.
1702                  #
1703                  # For example, the standard persistent disk type is a resource name
1704                  # typically ending in "pd-standard".  If SSD persistent disks are
1705                  # available, the resource name typically ends with "pd-ssd".  The
1706                  # actual valid values are defined the Google Compute Engine API,
1707                  # not by the Cloud Dataflow API; consult the Google Compute Engine
1708                  # documentation for more information about determining the set of
1709                  # available disk types for a particular project and zone.
1710                  #
1711                  # Google Compute Engine Disk types are local to a particular
1712                  # project in a particular zone, and so the resource name will
1713                  # typically look something like this:
1714                  #
1715                  # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
1716            },
1717          ],
1718          "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
1719            "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
1720            "algorithm": "A String", # The algorithm to use for autoscaling.
1721          },
1722          "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
1723              # select a default set of packages which are useful to worker
1724              # harnesses written in a particular language.
1725          "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
1726              # attempt to choose a reasonable default.
1727          "metadata": { # Metadata to set on the Google Compute Engine VMs.
1728            "a_key": "A String",
1729          },
1730        },
1731      ],
1732      "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
1733          # storage.  The system will append the suffix "/temp-{JOBNAME} to
1734          # this resource prefix, where {JOBNAME} is the value of the
1735          # job_name field.  The resulting bucket and object prefix is used
1736          # as the prefix of the resources used to store temporary data
1737          # needed during the job execution.  NOTE: This will override the
1738          # value in taskrunner_settings.
1739          # The supported resource type is:
1740          #
1741          # Google Cloud Storage:
1742          #
1743          #   storage.googleapis.com/{bucket}/{object}
1744          #   bucket.storage.googleapis.com/{object}
1745    },
1746    "location": "A String", # The [regional endpoint]
1747        # (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
1748        # contains this job.
1749    "tempFiles": [ # A set of files the system should be aware of that are used
1750        # for temporary storage. These temporary files will be
1751        # removed on job completion.
1752        # No duplicates are allowed.
1753        # No file patterns are supported.
1754        #
1755        # The supported files are:
1756        #
1757        # Google Cloud Storage:
1758        #
1759        #    storage.googleapis.com/{bucket}/{object}
1760        #    bucket.storage.googleapis.com/{object}
1761      "A String",
1762    ],
1763    "type": "A String", # The type of Cloud Dataflow job.
1764    "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
1765        # If this field is set, the service will ensure its uniqueness.
1766        # The request to create a job will fail if the service has knowledge of a
1767        # previously submitted job with the same client's ID and job name.
1768        # The caller may use this field to ensure idempotence of job
1769        # creation across retried attempts to create a job.
1770        # By default, the field is empty and, in that case, the service ignores it.
1771    "createdFromSnapshotId": "A String", # If this is specified, the job's initial state is populated from the given
1772        # snapshot.
1773    "stepsLocation": "A String", # The GCS location where the steps are stored.
1774    "currentStateTime": "A String", # The timestamp associated with the current state.
1775    "startTime": "A String", # The timestamp when the job was started (transitioned to JOB_STATE_PENDING).
1776        # Flexible resource scheduling jobs are started with some delay after job
1777        # creation, so start_time is unset before start and is updated when the
1778        # job is started by the Cloud Dataflow service. For other jobs, start_time
1779        # always equals to create_time and is immutable and set by the Cloud Dataflow
1780        # service.
1781    "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
1782        # Cloud Dataflow service.
1783    "requestedState": "A String", # The job's requested state.
1784        #
1785        # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
1786        # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
1787        # also be used to directly set a job's requested state to
1788        # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
1789        # job if it has not already reached a terminal state.
1790    "name": "A String", # The user-specified Cloud Dataflow job name.
1791        #
1792        # Only one Job with a given name may exist in a project at any
1793        # given time. If a caller attempts to create a Job with the same
1794        # name as an already-existing Job, the attempt returns the
1795        # existing Job.
1796        #
1797        # The name must match the regular expression
1798        # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
1799    "steps": [ # Exactly one of step or steps_location should be specified.
1800        #
1801        # The top-level steps that constitute the entire job.
1802      { # Defines a particular step within a Cloud Dataflow job.
1803          #
1804          # A job consists of multiple steps, each of which performs some
1805          # specific operation as part of the overall job.  Data is typically
1806          # passed from one step to another as part of the job.
1807          #
1808          # Here's an example of a sequence of steps which together implement a
1809          # Map-Reduce job:
1810          #
1811          #   * Read a collection of data from some source, parsing the
1812          #     collection's elements.
1813          #
1814          #   * Validate the elements.
1815          #
1816          #   * Apply a user-defined function to map each element to some value
1817          #     and extract an element-specific key value.
1818          #
1819          #   * Group elements with the same key into a single element with
1820          #     that key, transforming a multiply-keyed collection into a
1821          #     uniquely-keyed collection.
1822          #
1823          #   * Write the elements out to some data sink.
1824          #
1825          # Note that the Cloud Dataflow service may be used to run many different
1826          # types of jobs, not just Map-Reduce.
1827        "kind": "A String", # The kind of step in the Cloud Dataflow job.
1828        "properties": { # Named properties associated with the step. Each kind of
1829            # predefined step has its own required set of properties.
1830            # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
1831          "a_key": "", # Properties of the object.
1832        },
1833        "name": "A String", # The name that identifies the step. This must be unique for each
1834            # step with respect to all other steps in the Cloud Dataflow job.
1835      },
1836    ],
1837    "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
1838        # of the job it replaced.
1839        #
1840        # When sending a `CreateJobRequest`, you can update a job by specifying it
1841        # here. The job named here is stopped, and its intermediate state is
1842        # transferred to this job.
1843    "currentState": "A String", # The current state of the job.
1844        #
1845        # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
1846        # specified.
1847        #
1848        # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
1849        # terminal state. After a job has reached a terminal state, no
1850        # further state updates may be made.
1851        #
1852        # This field may be mutated by the Cloud Dataflow service;
1853        # callers cannot mutate it.
1854    "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
1855        # isn't contained in the submitted job.
1856      "stages": { # A mapping from each stage to the information about that stage.
1857        "a_key": { # Contains information about how a particular
1858            # google.dataflow.v1beta3.Step will be executed.
1859          "stepName": [ # The steps associated with the execution stage.
1860              # Note that stages may have several steps, and that a given step
1861              # might be run by more than one stage.
1862            "A String",
1863          ],
1864        },
1865      },
1866    },
1867  }</pre>
1868</div>
1869
1870<div class="method">
1871    <code class="details" id="get">get(projectId, jobId, location=None, x__xgafv=None, view=None)</code>
1872  <pre>Gets the state of the specified Cloud Dataflow job.
1873
1874To get the state of a job, we recommend using `projects.locations.jobs.get`
1875with a [regional endpoint]
1876(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints). Using
1877`projects.jobs.get` is not recommended, as you can only get the state of
1878jobs that are running in `us-central1`.
1879
1880Args:
1881  projectId: string, The ID of the Cloud Platform project that the job belongs to. (required)
1882  jobId: string, The job ID. (required)
1883  location: string, The [regional endpoint]
1884(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
1885contains this job.
1886  x__xgafv: string, V1 error format.
1887    Allowed values
1888      1 - v1 error format
1889      2 - v2 error format
1890  view: string, The level of information requested in response.
1891
1892Returns:
1893  An object of the form:
1894
1895    { # Defines a job to be run by the Cloud Dataflow service.
1896    "labels": { # User-defined labels for this job.
1897        #
1898        # The labels map can contain no more than 64 entries.  Entries of the labels
1899        # map are UTF8 strings that comply with the following restrictions:
1900        #
1901        # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
1902        # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
1903        # * Both keys and values are additionally constrained to be <= 128 bytes in
1904        # size.
1905      "a_key": "A String",
1906    },
1907    "jobMetadata": { # Metadata available primarily for filtering jobs. Will be included in the # This field is populated by the Dataflow service to support filtering jobs
1908        # by the metadata values provided here. Populated for ListJobs and all GetJob
1909        # views SUMMARY and higher.
1910        # ListJob response and Job SUMMARY view.
1911      "sdkVersion": { # The version of the SDK used to run the job. # The SDK version used to run the job.
1912        "versionDisplayName": "A String", # A readable string describing the version of the SDK.
1913        "version": "A String", # The version of the SDK used to run the job.
1914        "sdkSupportStatus": "A String", # The support status for this SDK version.
1915      },
1916      "pubsubDetails": [ # Identification of a PubSub source used in the Dataflow job.
1917        { # Metadata for a PubSub connector used by the job.
1918          "topic": "A String", # Topic accessed in the connection.
1919          "subscription": "A String", # Subscription used in the connection.
1920        },
1921      ],
1922      "datastoreDetails": [ # Identification of a Datastore source used in the Dataflow job.
1923        { # Metadata for a Datastore connector used by the job.
1924          "projectId": "A String", # ProjectId accessed in the connection.
1925          "namespace": "A String", # Namespace used in the connection.
1926        },
1927      ],
1928      "fileDetails": [ # Identification of a File source used in the Dataflow job.
1929        { # Metadata for a File connector used by the job.
1930          "filePattern": "A String", # File Pattern used to access files by the connector.
1931        },
1932      ],
1933      "spannerDetails": [ # Identification of a Spanner source used in the Dataflow job.
1934        { # Metadata for a Spanner connector used by the job.
1935          "instanceId": "A String", # InstanceId accessed in the connection.
1936          "projectId": "A String", # ProjectId accessed in the connection.
1937          "databaseId": "A String", # DatabaseId accessed in the connection.
1938        },
1939      ],
1940      "bigTableDetails": [ # Identification of a BigTable source used in the Dataflow job.
1941        { # Metadata for a BigTable connector used by the job.
1942          "instanceId": "A String", # InstanceId accessed in the connection.
1943          "projectId": "A String", # ProjectId accessed in the connection.
1944          "tableId": "A String", # TableId accessed in the connection.
1945        },
1946      ],
1947      "bigqueryDetails": [ # Identification of a BigQuery source used in the Dataflow job.
1948        { # Metadata for a BigQuery connector used by the job.
1949          "projectId": "A String", # Project accessed in the connection.
1950          "dataset": "A String", # Dataset accessed in the connection.
1951          "table": "A String", # Table accessed in the connection.
1952          "query": "A String", # Query used to access data in the connection.
1953        },
1954      ],
1955    },
1956    "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
1957        # A description of the user pipeline and stages through which it is executed.
1958        # Created by Cloud Dataflow service.  Only retrieved with
1959        # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
1960        # form.  This data is provided by the Dataflow service for ease of visualizing
1961        # the pipeline and interpreting Dataflow provided metrics.
1962      "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
1963        { # Description of the type, names/ids, and input/outputs for a transform.
1964          "kind": "A String", # Type of transform.
1965          "name": "A String", # User provided name for this transform instance.
1966          "inputCollectionName": [ # User names for all collection inputs to this transform.
1967            "A String",
1968          ],
1969          "displayData": [ # Transform-specific display data.
1970            { # Data provided with a pipeline or transform to provide descriptive info.
1971              "shortStrValue": "A String", # A possible additional shorter value to display.
1972                  # For example a java_class_name_value of com.mypackage.MyDoFn
1973                  # will be stored with MyDoFn as the short_str_value and
1974                  # com.mypackage.MyDoFn as the java_class_name value.
1975                  # short_str_value can be displayed and java_class_name_value
1976                  # will be displayed as a tooltip.
1977              "durationValue": "A String", # Contains value if the data is of duration type.
1978              "url": "A String", # An optional full URL.
1979              "floatValue": 3.14, # Contains value if the data is of float type.
1980              "namespace": "A String", # The namespace for the key. This is usually a class name or programming
1981                  # language namespace (i.e. python module) which defines the display data.
1982                  # This allows a dax monitoring system to specially handle the data
1983                  # and perform custom rendering.
1984              "javaClassValue": "A String", # Contains value if the data is of java class type.
1985              "label": "A String", # An optional label to display in a dax UI for the element.
1986              "boolValue": True or False, # Contains value if the data is of a boolean type.
1987              "strValue": "A String", # Contains value if the data is of string type.
1988              "key": "A String", # The key identifying the display data.
1989                  # This is intended to be used as a label for the display data
1990                  # when viewed in a dax monitoring system.
1991              "int64Value": "A String", # Contains value if the data is of int64 type.
1992              "timestampValue": "A String", # Contains value if the data is of timestamp type.
1993            },
1994          ],
1995          "outputCollectionName": [ # User  names for all collection outputs to this transform.
1996            "A String",
1997          ],
1998          "id": "A String", # SDK generated id of this transform instance.
1999        },
2000      ],
2001      "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
2002        { # Description of the composing transforms, names/ids, and input/outputs of a
2003            # stage of execution.  Some composing transforms and sources may have been
2004            # generated by the Dataflow service during execution planning.
2005          "componentSource": [ # Collections produced and consumed by component transforms of this stage.
2006            { # Description of an interstitial value between transforms in an execution
2007                # stage.
2008              "userName": "A String", # Human-readable name for this transform; may be user or system generated.
2009              "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
2010                  # source is most closely associated.
2011              "name": "A String", # Dataflow service generated name for this source.
2012            },
2013          ],
2014          "kind": "A String", # Type of tranform this stage is executing.
2015          "name": "A String", # Dataflow service generated name for this stage.
2016          "outputSource": [ # Output sources for this stage.
2017            { # Description of an input or output of an execution stage.
2018              "userName": "A String", # Human-readable name for this source; may be user or system generated.
2019              "sizeBytes": "A String", # Size of the source, if measurable.
2020              "name": "A String", # Dataflow service generated name for this source.
2021              "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
2022                  # source is most closely associated.
2023            },
2024          ],
2025          "inputSource": [ # Input sources for this stage.
2026            { # Description of an input or output of an execution stage.
2027              "userName": "A String", # Human-readable name for this source; may be user or system generated.
2028              "sizeBytes": "A String", # Size of the source, if measurable.
2029              "name": "A String", # Dataflow service generated name for this source.
2030              "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
2031                  # source is most closely associated.
2032            },
2033          ],
2034          "componentTransform": [ # Transforms that comprise this execution stage.
2035            { # Description of a transform executed as part of an execution stage.
2036              "userName": "A String", # Human-readable name for this transform; may be user or system generated.
2037              "originalTransform": "A String", # User name for the original user transform with which this transform is
2038                  # most closely associated.
2039              "name": "A String", # Dataflow service generated name for this source.
2040            },
2041          ],
2042          "id": "A String", # Dataflow service generated id for this stage.
2043        },
2044      ],
2045      "displayData": [ # Pipeline level display data.
2046        { # Data provided with a pipeline or transform to provide descriptive info.
2047          "shortStrValue": "A String", # A possible additional shorter value to display.
2048              # For example a java_class_name_value of com.mypackage.MyDoFn
2049              # will be stored with MyDoFn as the short_str_value and
2050              # com.mypackage.MyDoFn as the java_class_name value.
2051              # short_str_value can be displayed and java_class_name_value
2052              # will be displayed as a tooltip.
2053          "durationValue": "A String", # Contains value if the data is of duration type.
2054          "url": "A String", # An optional full URL.
2055          "floatValue": 3.14, # Contains value if the data is of float type.
2056          "namespace": "A String", # The namespace for the key. This is usually a class name or programming
2057              # language namespace (i.e. python module) which defines the display data.
2058              # This allows a dax monitoring system to specially handle the data
2059              # and perform custom rendering.
2060          "javaClassValue": "A String", # Contains value if the data is of java class type.
2061          "label": "A String", # An optional label to display in a dax UI for the element.
2062          "boolValue": True or False, # Contains value if the data is of a boolean type.
2063          "strValue": "A String", # Contains value if the data is of string type.
2064          "key": "A String", # The key identifying the display data.
2065              # This is intended to be used as a label for the display data
2066              # when viewed in a dax monitoring system.
2067          "int64Value": "A String", # Contains value if the data is of int64 type.
2068          "timestampValue": "A String", # Contains value if the data is of timestamp type.
2069        },
2070      ],
2071    },
2072    "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
2073        # callers cannot mutate it.
2074      { # A message describing the state of a particular execution stage.
2075        "executionStageName": "A String", # The name of the execution stage.
2076        "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
2077        "currentStateTime": "A String", # The time at which the stage transitioned to this state.
2078      },
2079    ],
2080    "id": "A String", # The unique ID of this job.
2081        #
2082        # This field is set by the Cloud Dataflow service when the Job is
2083        # created, and is immutable for the life of the job.
2084    "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
2085        # `JOB_STATE_UPDATED`), this field contains the ID of that job.
2086    "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
2087    "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
2088        # corresponding name prefixes of the new job.
2089      "a_key": "A String",
2090    },
2091    "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
2092      "version": { # A structure describing which components and their versions of the service
2093          # are required in order to run the job.
2094        "a_key": "", # Properties of the object.
2095      },
2096      "flexResourceSchedulingGoal": "A String", # Which Flexible Resource Scheduling mode to run in.
2097      "serviceKmsKeyName": "A String", # If set, contains the Cloud KMS key identifier used to encrypt data
2098          # at rest, AKA a Customer Managed Encryption Key (CMEK).
2099          #
2100          # Format:
2101          #   projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY
2102      "internalExperiments": { # Experimental settings.
2103        "a_key": "", # Properties of the object. Contains field @type with type URL.
2104      },
2105      "dataset": "A String", # The dataset for the current project where various workflow
2106          # related tables are stored.
2107          #
2108          # The supported resource type is:
2109          #
2110          # Google BigQuery:
2111          #   bigquery.googleapis.com/{dataset}
2112      "experiments": [ # The list of experiments to enable.
2113        "A String",
2114      ],
2115      "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
2116      "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
2117          # options are passed through the service and are used to recreate the
2118          # SDK pipeline options on the worker in a language agnostic and platform
2119          # independent way.
2120        "a_key": "", # Properties of the object.
2121      },
2122      "userAgent": { # A description of the process that generated the request.
2123        "a_key": "", # Properties of the object.
2124      },
2125      "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
2126          # unspecified, the service will attempt to choose a reasonable
2127          # default.  This should be in the form of the API service name,
2128          # e.g. "compute.googleapis.com".
2129      "workerPools": [ # The worker pools. At least one "harness" worker pool must be
2130          # specified in order for the job to have workers.
2131        { # Describes one particular pool of Cloud Dataflow workers to be
2132            # instantiated by the Cloud Dataflow service in order to perform the
2133            # computations required by a job.  Note that a workflow job may use
2134            # multiple pools, in order to match the various computational
2135            # requirements of the various stages of the job.
2136          "diskSourceImage": "A String", # Fully qualified source image for disks.
2137          "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
2138              # using the standard Dataflow task runner.  Users should ignore
2139              # this field.
2140            "workflowFileName": "A String", # The file to store the workflow in.
2141            "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
2142                # will not be uploaded.
2143                #
2144                # The supported resource type is:
2145                #
2146                # Google Cloud Storage:
2147                #   storage.googleapis.com/{bucket}/{object}
2148                #   bucket.storage.googleapis.com/{object}
2149            "commandlinesFileName": "A String", # The file to store preprocessing commands in.
2150            "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
2151              "reportingEnabled": True or False, # Whether to send work progress updates to the service.
2152              "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
2153                  # "shuffle/v1beta1".
2154              "workerId": "A String", # The ID of the worker running this pipeline.
2155              "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
2156                  #
2157                  # When workers access Google Cloud APIs, they logically do so via
2158                  # relative URLs.  If this field is specified, it supplies the base
2159                  # URL to use for resolving these relative URLs.  The normative
2160                  # algorithm used is defined by RFC 1808, "Relative Uniform Resource
2161                  # Locators".
2162                  #
2163                  # If not specified, the default value is "http://www.googleapis.com/"
2164              "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
2165                  # "dataflow/v1b3/projects".
2166              "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
2167                  # storage.
2168                  #
2169                  # The supported resource type is:
2170                  #
2171                  # Google Cloud Storage:
2172                  #
2173                  #   storage.googleapis.com/{bucket}/{object}
2174                  #   bucket.storage.googleapis.com/{object}
2175            },
2176            "vmId": "A String", # The ID string of the VM.
2177            "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
2178            "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
2179            "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
2180                # access the Cloud Dataflow API.
2181              "A String",
2182            ],
2183            "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
2184                # taskrunner; e.g. "root".
2185            "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
2186                #
2187                # When workers access Google Cloud APIs, they logically do so via
2188                # relative URLs.  If this field is specified, it supplies the base
2189                # URL to use for resolving these relative URLs.  The normative
2190                # algorithm used is defined by RFC 1808, "Relative Uniform Resource
2191                # Locators".
2192                #
2193                # If not specified, the default value is "http://www.googleapis.com/"
2194            "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
2195                # taskrunner; e.g. "wheel".
2196            "languageHint": "A String", # The suggested backend language.
2197            "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
2198                # console.
2199            "streamingWorkerMainClass": "A String", # The streaming worker main class name.
2200            "logDir": "A String", # The directory on the VM to store logs.
2201            "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
2202            "harnessCommand": "A String", # The command to launch the worker harness.
2203            "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
2204                # temporary storage.
2205                #
2206                # The supported resource type is:
2207                #
2208                # Google Cloud Storage:
2209                #   storage.googleapis.com/{bucket}/{object}
2210                #   bucket.storage.googleapis.com/{object}
2211            "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
2212          },
2213          "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
2214              # are supported.
2215          "packages": [ # Packages to be installed on workers.
2216            { # The packages that must be installed in order for a worker to run the
2217                # steps of the Cloud Dataflow job that will be assigned to its worker
2218                # pool.
2219                #
2220                # This is the mechanism by which the Cloud Dataflow SDK causes code to
2221                # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
2222                # might use this to install jars containing the user's code and all of the
2223                # various dependencies (libraries, data files, etc.) required in order
2224                # for that code to run.
2225              "location": "A String", # The resource to read the package from. The supported resource type is:
2226                  #
2227                  # Google Cloud Storage:
2228                  #
2229                  #   storage.googleapis.com/{bucket}
2230                  #   bucket.storage.googleapis.com/
2231              "name": "A String", # The name of the package.
2232            },
2233          ],
2234          "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
2235              # service will attempt to choose a reasonable default.
2236          "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
2237              # the service will use the network "default".
2238          "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
2239              # will attempt to choose a reasonable default.
2240          "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
2241              # attempt to choose a reasonable default.
2242          "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
2243              # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
2244              # `TEARDOWN_NEVER`.
2245              # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
2246              # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
2247              # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
2248              # down.
2249              #
2250              # If the workers are not torn down by the service, they will
2251              # continue to run and use Google Compute Engine VM resources in the
2252              # user's project until they are explicitly terminated by the user.
2253              # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
2254              # policy except for small, manually supervised test jobs.
2255              #
2256              # If unknown or unspecified, the service will attempt to choose a reasonable
2257              # default.
2258          "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
2259              # Compute Engine API.
2260          "ipConfiguration": "A String", # Configuration for VM IPs.
2261          "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
2262              # service will choose a number of threads (according to the number of cores
2263              # on the selected machine type for batch, or 1 by convention for streaming).
2264          "poolArgs": { # Extra arguments for this worker pool.
2265            "a_key": "", # Properties of the object. Contains field @type with type URL.
2266          },
2267          "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
2268              # execute the job.  If zero or unspecified, the service will
2269              # attempt to choose a reasonable default.
2270          "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
2271              # harness, residing in Google Container Registry.
2272          "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
2273              # the form "regions/REGION/subnetworks/SUBNETWORK".
2274          "dataDisks": [ # Data disks that are used by a VM in this workflow.
2275            { # Describes the data disk used by a workflow job.
2276              "mountPoint": "A String", # Directory in a VM where disk is mounted.
2277              "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
2278                  # attempt to choose a reasonable default.
2279              "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
2280                  # must be a disk type appropriate to the project and zone in which
2281                  # the workers will run.  If unknown or unspecified, the service
2282                  # will attempt to choose a reasonable default.
2283                  #
2284                  # For example, the standard persistent disk type is a resource name
2285                  # typically ending in "pd-standard".  If SSD persistent disks are
2286                  # available, the resource name typically ends with "pd-ssd".  The
2287                  # actual valid values are defined the Google Compute Engine API,
2288                  # not by the Cloud Dataflow API; consult the Google Compute Engine
2289                  # documentation for more information about determining the set of
2290                  # available disk types for a particular project and zone.
2291                  #
2292                  # Google Compute Engine Disk types are local to a particular
2293                  # project in a particular zone, and so the resource name will
2294                  # typically look something like this:
2295                  #
2296                  # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
2297            },
2298          ],
2299          "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
2300            "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
2301            "algorithm": "A String", # The algorithm to use for autoscaling.
2302          },
2303          "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
2304              # select a default set of packages which are useful to worker
2305              # harnesses written in a particular language.
2306          "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
2307              # attempt to choose a reasonable default.
2308          "metadata": { # Metadata to set on the Google Compute Engine VMs.
2309            "a_key": "A String",
2310          },
2311        },
2312      ],
2313      "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
2314          # storage.  The system will append the suffix "/temp-{JOBNAME} to
2315          # this resource prefix, where {JOBNAME} is the value of the
2316          # job_name field.  The resulting bucket and object prefix is used
2317          # as the prefix of the resources used to store temporary data
2318          # needed during the job execution.  NOTE: This will override the
2319          # value in taskrunner_settings.
2320          # The supported resource type is:
2321          #
2322          # Google Cloud Storage:
2323          #
2324          #   storage.googleapis.com/{bucket}/{object}
2325          #   bucket.storage.googleapis.com/{object}
2326    },
2327    "location": "A String", # The [regional endpoint]
2328        # (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
2329        # contains this job.
2330    "tempFiles": [ # A set of files the system should be aware of that are used
2331        # for temporary storage. These temporary files will be
2332        # removed on job completion.
2333        # No duplicates are allowed.
2334        # No file patterns are supported.
2335        #
2336        # The supported files are:
2337        #
2338        # Google Cloud Storage:
2339        #
2340        #    storage.googleapis.com/{bucket}/{object}
2341        #    bucket.storage.googleapis.com/{object}
2342      "A String",
2343    ],
2344    "type": "A String", # The type of Cloud Dataflow job.
2345    "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
2346        # If this field is set, the service will ensure its uniqueness.
2347        # The request to create a job will fail if the service has knowledge of a
2348        # previously submitted job with the same client's ID and job name.
2349        # The caller may use this field to ensure idempotence of job
2350        # creation across retried attempts to create a job.
2351        # By default, the field is empty and, in that case, the service ignores it.
2352    "createdFromSnapshotId": "A String", # If this is specified, the job's initial state is populated from the given
2353        # snapshot.
2354    "stepsLocation": "A String", # The GCS location where the steps are stored.
2355    "currentStateTime": "A String", # The timestamp associated with the current state.
2356    "startTime": "A String", # The timestamp when the job was started (transitioned to JOB_STATE_PENDING).
2357        # Flexible resource scheduling jobs are started with some delay after job
2358        # creation, so start_time is unset before start and is updated when the
2359        # job is started by the Cloud Dataflow service. For other jobs, start_time
2360        # always equals to create_time and is immutable and set by the Cloud Dataflow
2361        # service.
2362    "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
2363        # Cloud Dataflow service.
2364    "requestedState": "A String", # The job's requested state.
2365        #
2366        # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
2367        # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
2368        # also be used to directly set a job's requested state to
2369        # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
2370        # job if it has not already reached a terminal state.
2371    "name": "A String", # The user-specified Cloud Dataflow job name.
2372        #
2373        # Only one Job with a given name may exist in a project at any
2374        # given time. If a caller attempts to create a Job with the same
2375        # name as an already-existing Job, the attempt returns the
2376        # existing Job.
2377        #
2378        # The name must match the regular expression
2379        # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
2380    "steps": [ # Exactly one of step or steps_location should be specified.
2381        #
2382        # The top-level steps that constitute the entire job.
2383      { # Defines a particular step within a Cloud Dataflow job.
2384          #
2385          # A job consists of multiple steps, each of which performs some
2386          # specific operation as part of the overall job.  Data is typically
2387          # passed from one step to another as part of the job.
2388          #
2389          # Here's an example of a sequence of steps which together implement a
2390          # Map-Reduce job:
2391          #
2392          #   * Read a collection of data from some source, parsing the
2393          #     collection's elements.
2394          #
2395          #   * Validate the elements.
2396          #
2397          #   * Apply a user-defined function to map each element to some value
2398          #     and extract an element-specific key value.
2399          #
2400          #   * Group elements with the same key into a single element with
2401          #     that key, transforming a multiply-keyed collection into a
2402          #     uniquely-keyed collection.
2403          #
2404          #   * Write the elements out to some data sink.
2405          #
2406          # Note that the Cloud Dataflow service may be used to run many different
2407          # types of jobs, not just Map-Reduce.
2408        "kind": "A String", # The kind of step in the Cloud Dataflow job.
2409        "properties": { # Named properties associated with the step. Each kind of
2410            # predefined step has its own required set of properties.
2411            # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
2412          "a_key": "", # Properties of the object.
2413        },
2414        "name": "A String", # The name that identifies the step. This must be unique for each
2415            # step with respect to all other steps in the Cloud Dataflow job.
2416      },
2417    ],
2418    "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
2419        # of the job it replaced.
2420        #
2421        # When sending a `CreateJobRequest`, you can update a job by specifying it
2422        # here. The job named here is stopped, and its intermediate state is
2423        # transferred to this job.
2424    "currentState": "A String", # The current state of the job.
2425        #
2426        # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
2427        # specified.
2428        #
2429        # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
2430        # terminal state. After a job has reached a terminal state, no
2431        # further state updates may be made.
2432        #
2433        # This field may be mutated by the Cloud Dataflow service;
2434        # callers cannot mutate it.
2435    "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
2436        # isn't contained in the submitted job.
2437      "stages": { # A mapping from each stage to the information about that stage.
2438        "a_key": { # Contains information about how a particular
2439            # google.dataflow.v1beta3.Step will be executed.
2440          "stepName": [ # The steps associated with the execution stage.
2441              # Note that stages may have several steps, and that a given step
2442              # might be run by more than one stage.
2443            "A String",
2444          ],
2445        },
2446      },
2447    },
2448  }</pre>
2449</div>
2450
2451<div class="method">
2452    <code class="details" id="getMetrics">getMetrics(projectId, jobId, startTime=None, location=None, x__xgafv=None)</code>
2453  <pre>Request the job status.
2454
2455To request the status of a job, we recommend using
2456`projects.locations.jobs.getMetrics` with a [regional endpoint]
2457(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints). Using
2458`projects.jobs.getMetrics` is not recommended, as you can only request the
2459status of jobs that are running in `us-central1`.
2460
2461Args:
2462  projectId: string, A project id. (required)
2463  jobId: string, The job to get messages for. (required)
2464  startTime: string, Return only metric data that has changed since this time.
2465Default is to return all information about all metrics for the job.
2466  location: string, The [regional endpoint]
2467(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
2468contains the job specified by job_id.
2469  x__xgafv: string, V1 error format.
2470    Allowed values
2471      1 - v1 error format
2472      2 - v2 error format
2473
2474Returns:
2475  An object of the form:
2476
2477    { # JobMetrics contains a collection of metrics describing the detailed progress
2478      # of a Dataflow job. Metrics correspond to user-defined and system-defined
2479      # metrics in the job.
2480      #
2481      # This resource captures only the most recent values of each metric;
2482      # time-series data can be queried for them (under the same metric names)
2483      # from Cloud Monitoring.
2484    "metrics": [ # All metrics for this job.
2485      { # Describes the state of a metric.
2486        "meanCount": "", # Worker-computed aggregate value for the "Mean" aggregation kind.
2487            # This holds the count of the aggregated values and is used in combination
2488            # with mean_sum above to obtain the actual mean aggregate value.
2489            # The only possible value type is Long.
2490        "kind": "A String", # Metric aggregation kind.  The possible metric aggregation kinds are
2491            # "Sum", "Max", "Min", "Mean", "Set", "And", "Or", and "Distribution".
2492            # The specified aggregation kind is case-insensitive.
2493            #
2494            # If omitted, this is not an aggregated value but instead
2495            # a single metric sample value.
2496        "set": "", # Worker-computed aggregate value for the "Set" aggregation kind.  The only
2497            # possible value type is a list of Values whose type can be Long, Double,
2498            # or String, according to the metric's type.  All Values in the list must
2499            # be of the same type.
2500        "name": { # Identifies a metric, by describing the source which generated the # Name of the metric.
2501            # metric.
2502          "origin": "A String", # Origin (namespace) of metric name. May be blank for user-define metrics;
2503              # will be "dataflow" for metrics defined by the Dataflow service or SDK.
2504          "name": "A String", # Worker-defined metric name.
2505          "context": { # Zero or more labeled fields which identify the part of the job this
2506              # metric is associated with, such as the name of a step or collection.
2507              #
2508              # For example, built-in counters associated with steps will have
2509              # context['step'] = <step-name>. Counters associated with PCollections
2510              # in the SDK will have context['pcollection'] = <pcollection-name>.
2511            "a_key": "A String",
2512          },
2513        },
2514        "meanSum": "", # Worker-computed aggregate value for the "Mean" aggregation kind.
2515            # This holds the sum of the aggregated values and is used in combination
2516            # with mean_count below to obtain the actual mean aggregate value.
2517            # The only possible value types are Long and Double.
2518        "cumulative": True or False, # True if this metric is reported as the total cumulative aggregate
2519            # value accumulated since the worker started working on this WorkItem.
2520            # By default this is false, indicating that this metric is reported
2521            # as a delta that is not associated with any WorkItem.
2522        "updateTime": "A String", # Timestamp associated with the metric value. Optional when workers are
2523            # reporting work progress; it will be filled in responses from the
2524            # metrics API.
2525        "scalar": "", # Worker-computed aggregate value for aggregation kinds "Sum", "Max", "Min",
2526            # "And", and "Or".  The possible value types are Long, Double, and Boolean.
2527        "internal": "", # Worker-computed aggregate value for internal use by the Dataflow
2528            # service.
2529        "gauge": "", # A struct value describing properties of a Gauge.
2530            # Metrics of gauge type show the value of a metric across time, and is
2531            # aggregated based on the newest value.
2532        "distribution": "", # A struct value describing properties of a distribution of numeric values.
2533      },
2534    ],
2535    "metricTime": "A String", # Timestamp as of which metric values are current.
2536  }</pre>
2537</div>
2538
2539<div class="method">
2540    <code class="details" id="list">list(projectId, pageSize=None, pageToken=None, x__xgafv=None, location=None, filter=None, view=None)</code>
2541  <pre>List the jobs of a project.
2542
2543To list the jobs of a project in a region, we recommend using
2544`projects.locations.jobs.get` with a [regional endpoint]
2545(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints). To
2546list the all jobs across all regions, use `projects.jobs.aggregated`. Using
2547`projects.jobs.list` is not recommended, as you can only get the list of
2548jobs that are running in `us-central1`.
2549
2550Args:
2551  projectId: string, The project which owns the jobs. (required)
2552  pageSize: integer, If there are many jobs, limit response to at most this many.
2553The actual number of jobs returned will be the lesser of max_responses
2554and an unspecified server-defined limit.
2555  pageToken: string, Set this to the 'next_page_token' field of a previous response
2556to request additional results in a long list.
2557  x__xgafv: string, V1 error format.
2558    Allowed values
2559      1 - v1 error format
2560      2 - v2 error format
2561  location: string, The [regional endpoint]
2562(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
2563contains this job.
2564  filter: string, The kind of filter to use.
2565  view: string, Level of information requested in response. Default is `JOB_VIEW_SUMMARY`.
2566
2567Returns:
2568  An object of the form:
2569
2570    { # Response to a request to list Cloud Dataflow jobs.  This may be a partial
2571      # response, depending on the page size in the ListJobsRequest.
2572    "nextPageToken": "A String", # Set if there may be more results than fit in this response.
2573    "failedLocation": [ # Zero or more messages describing the [regional endpoints]
2574        # (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
2575        # failed to respond.
2576      { # Indicates which [regional endpoint]
2577          # (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) failed
2578          # to respond to a request for data.
2579        "name": "A String", # The name of the [regional endpoint]
2580            # (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
2581            # failed to respond.
2582      },
2583    ],
2584    "jobs": [ # A subset of the requested job information.
2585      { # Defines a job to be run by the Cloud Dataflow service.
2586        "labels": { # User-defined labels for this job.
2587            #
2588            # The labels map can contain no more than 64 entries.  Entries of the labels
2589            # map are UTF8 strings that comply with the following restrictions:
2590            #
2591            # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
2592            # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
2593            # * Both keys and values are additionally constrained to be <= 128 bytes in
2594            # size.
2595          "a_key": "A String",
2596        },
2597        "jobMetadata": { # Metadata available primarily for filtering jobs. Will be included in the # This field is populated by the Dataflow service to support filtering jobs
2598            # by the metadata values provided here. Populated for ListJobs and all GetJob
2599            # views SUMMARY and higher.
2600            # ListJob response and Job SUMMARY view.
2601          "sdkVersion": { # The version of the SDK used to run the job. # The SDK version used to run the job.
2602            "versionDisplayName": "A String", # A readable string describing the version of the SDK.
2603            "version": "A String", # The version of the SDK used to run the job.
2604            "sdkSupportStatus": "A String", # The support status for this SDK version.
2605          },
2606          "pubsubDetails": [ # Identification of a PubSub source used in the Dataflow job.
2607            { # Metadata for a PubSub connector used by the job.
2608              "topic": "A String", # Topic accessed in the connection.
2609              "subscription": "A String", # Subscription used in the connection.
2610            },
2611          ],
2612          "datastoreDetails": [ # Identification of a Datastore source used in the Dataflow job.
2613            { # Metadata for a Datastore connector used by the job.
2614              "projectId": "A String", # ProjectId accessed in the connection.
2615              "namespace": "A String", # Namespace used in the connection.
2616            },
2617          ],
2618          "fileDetails": [ # Identification of a File source used in the Dataflow job.
2619            { # Metadata for a File connector used by the job.
2620              "filePattern": "A String", # File Pattern used to access files by the connector.
2621            },
2622          ],
2623          "spannerDetails": [ # Identification of a Spanner source used in the Dataflow job.
2624            { # Metadata for a Spanner connector used by the job.
2625              "instanceId": "A String", # InstanceId accessed in the connection.
2626              "projectId": "A String", # ProjectId accessed in the connection.
2627              "databaseId": "A String", # DatabaseId accessed in the connection.
2628            },
2629          ],
2630          "bigTableDetails": [ # Identification of a BigTable source used in the Dataflow job.
2631            { # Metadata for a BigTable connector used by the job.
2632              "instanceId": "A String", # InstanceId accessed in the connection.
2633              "projectId": "A String", # ProjectId accessed in the connection.
2634              "tableId": "A String", # TableId accessed in the connection.
2635            },
2636          ],
2637          "bigqueryDetails": [ # Identification of a BigQuery source used in the Dataflow job.
2638            { # Metadata for a BigQuery connector used by the job.
2639              "projectId": "A String", # Project accessed in the connection.
2640              "dataset": "A String", # Dataset accessed in the connection.
2641              "table": "A String", # Table accessed in the connection.
2642              "query": "A String", # Query used to access data in the connection.
2643            },
2644          ],
2645        },
2646        "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
2647            # A description of the user pipeline and stages through which it is executed.
2648            # Created by Cloud Dataflow service.  Only retrieved with
2649            # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
2650            # form.  This data is provided by the Dataflow service for ease of visualizing
2651            # the pipeline and interpreting Dataflow provided metrics.
2652          "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
2653            { # Description of the type, names/ids, and input/outputs for a transform.
2654              "kind": "A String", # Type of transform.
2655              "name": "A String", # User provided name for this transform instance.
2656              "inputCollectionName": [ # User names for all collection inputs to this transform.
2657                "A String",
2658              ],
2659              "displayData": [ # Transform-specific display data.
2660                { # Data provided with a pipeline or transform to provide descriptive info.
2661                  "shortStrValue": "A String", # A possible additional shorter value to display.
2662                      # For example a java_class_name_value of com.mypackage.MyDoFn
2663                      # will be stored with MyDoFn as the short_str_value and
2664                      # com.mypackage.MyDoFn as the java_class_name value.
2665                      # short_str_value can be displayed and java_class_name_value
2666                      # will be displayed as a tooltip.
2667                  "durationValue": "A String", # Contains value if the data is of duration type.
2668                  "url": "A String", # An optional full URL.
2669                  "floatValue": 3.14, # Contains value if the data is of float type.
2670                  "namespace": "A String", # The namespace for the key. This is usually a class name or programming
2671                      # language namespace (i.e. python module) which defines the display data.
2672                      # This allows a dax monitoring system to specially handle the data
2673                      # and perform custom rendering.
2674                  "javaClassValue": "A String", # Contains value if the data is of java class type.
2675                  "label": "A String", # An optional label to display in a dax UI for the element.
2676                  "boolValue": True or False, # Contains value if the data is of a boolean type.
2677                  "strValue": "A String", # Contains value if the data is of string type.
2678                  "key": "A String", # The key identifying the display data.
2679                      # This is intended to be used as a label for the display data
2680                      # when viewed in a dax monitoring system.
2681                  "int64Value": "A String", # Contains value if the data is of int64 type.
2682                  "timestampValue": "A String", # Contains value if the data is of timestamp type.
2683                },
2684              ],
2685              "outputCollectionName": [ # User  names for all collection outputs to this transform.
2686                "A String",
2687              ],
2688              "id": "A String", # SDK generated id of this transform instance.
2689            },
2690          ],
2691          "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
2692            { # Description of the composing transforms, names/ids, and input/outputs of a
2693                # stage of execution.  Some composing transforms and sources may have been
2694                # generated by the Dataflow service during execution planning.
2695              "componentSource": [ # Collections produced and consumed by component transforms of this stage.
2696                { # Description of an interstitial value between transforms in an execution
2697                    # stage.
2698                  "userName": "A String", # Human-readable name for this transform; may be user or system generated.
2699                  "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
2700                      # source is most closely associated.
2701                  "name": "A String", # Dataflow service generated name for this source.
2702                },
2703              ],
2704              "kind": "A String", # Type of tranform this stage is executing.
2705              "name": "A String", # Dataflow service generated name for this stage.
2706              "outputSource": [ # Output sources for this stage.
2707                { # Description of an input or output of an execution stage.
2708                  "userName": "A String", # Human-readable name for this source; may be user or system generated.
2709                  "sizeBytes": "A String", # Size of the source, if measurable.
2710                  "name": "A String", # Dataflow service generated name for this source.
2711                  "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
2712                      # source is most closely associated.
2713                },
2714              ],
2715              "inputSource": [ # Input sources for this stage.
2716                { # Description of an input or output of an execution stage.
2717                  "userName": "A String", # Human-readable name for this source; may be user or system generated.
2718                  "sizeBytes": "A String", # Size of the source, if measurable.
2719                  "name": "A String", # Dataflow service generated name for this source.
2720                  "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
2721                      # source is most closely associated.
2722                },
2723              ],
2724              "componentTransform": [ # Transforms that comprise this execution stage.
2725                { # Description of a transform executed as part of an execution stage.
2726                  "userName": "A String", # Human-readable name for this transform; may be user or system generated.
2727                  "originalTransform": "A String", # User name for the original user transform with which this transform is
2728                      # most closely associated.
2729                  "name": "A String", # Dataflow service generated name for this source.
2730                },
2731              ],
2732              "id": "A String", # Dataflow service generated id for this stage.
2733            },
2734          ],
2735          "displayData": [ # Pipeline level display data.
2736            { # Data provided with a pipeline or transform to provide descriptive info.
2737              "shortStrValue": "A String", # A possible additional shorter value to display.
2738                  # For example a java_class_name_value of com.mypackage.MyDoFn
2739                  # will be stored with MyDoFn as the short_str_value and
2740                  # com.mypackage.MyDoFn as the java_class_name value.
2741                  # short_str_value can be displayed and java_class_name_value
2742                  # will be displayed as a tooltip.
2743              "durationValue": "A String", # Contains value if the data is of duration type.
2744              "url": "A String", # An optional full URL.
2745              "floatValue": 3.14, # Contains value if the data is of float type.
2746              "namespace": "A String", # The namespace for the key. This is usually a class name or programming
2747                  # language namespace (i.e. python module) which defines the display data.
2748                  # This allows a dax monitoring system to specially handle the data
2749                  # and perform custom rendering.
2750              "javaClassValue": "A String", # Contains value if the data is of java class type.
2751              "label": "A String", # An optional label to display in a dax UI for the element.
2752              "boolValue": True or False, # Contains value if the data is of a boolean type.
2753              "strValue": "A String", # Contains value if the data is of string type.
2754              "key": "A String", # The key identifying the display data.
2755                  # This is intended to be used as a label for the display data
2756                  # when viewed in a dax monitoring system.
2757              "int64Value": "A String", # Contains value if the data is of int64 type.
2758              "timestampValue": "A String", # Contains value if the data is of timestamp type.
2759            },
2760          ],
2761        },
2762        "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
2763            # callers cannot mutate it.
2764          { # A message describing the state of a particular execution stage.
2765            "executionStageName": "A String", # The name of the execution stage.
2766            "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
2767            "currentStateTime": "A String", # The time at which the stage transitioned to this state.
2768          },
2769        ],
2770        "id": "A String", # The unique ID of this job.
2771            #
2772            # This field is set by the Cloud Dataflow service when the Job is
2773            # created, and is immutable for the life of the job.
2774        "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
2775            # `JOB_STATE_UPDATED`), this field contains the ID of that job.
2776        "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
2777        "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
2778            # corresponding name prefixes of the new job.
2779          "a_key": "A String",
2780        },
2781        "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
2782          "version": { # A structure describing which components and their versions of the service
2783              # are required in order to run the job.
2784            "a_key": "", # Properties of the object.
2785          },
2786          "flexResourceSchedulingGoal": "A String", # Which Flexible Resource Scheduling mode to run in.
2787          "serviceKmsKeyName": "A String", # If set, contains the Cloud KMS key identifier used to encrypt data
2788              # at rest, AKA a Customer Managed Encryption Key (CMEK).
2789              #
2790              # Format:
2791              #   projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY
2792          "internalExperiments": { # Experimental settings.
2793            "a_key": "", # Properties of the object. Contains field @type with type URL.
2794          },
2795          "dataset": "A String", # The dataset for the current project where various workflow
2796              # related tables are stored.
2797              #
2798              # The supported resource type is:
2799              #
2800              # Google BigQuery:
2801              #   bigquery.googleapis.com/{dataset}
2802          "experiments": [ # The list of experiments to enable.
2803            "A String",
2804          ],
2805          "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
2806          "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
2807              # options are passed through the service and are used to recreate the
2808              # SDK pipeline options on the worker in a language agnostic and platform
2809              # independent way.
2810            "a_key": "", # Properties of the object.
2811          },
2812          "userAgent": { # A description of the process that generated the request.
2813            "a_key": "", # Properties of the object.
2814          },
2815          "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
2816              # unspecified, the service will attempt to choose a reasonable
2817              # default.  This should be in the form of the API service name,
2818              # e.g. "compute.googleapis.com".
2819          "workerPools": [ # The worker pools. At least one "harness" worker pool must be
2820              # specified in order for the job to have workers.
2821            { # Describes one particular pool of Cloud Dataflow workers to be
2822                # instantiated by the Cloud Dataflow service in order to perform the
2823                # computations required by a job.  Note that a workflow job may use
2824                # multiple pools, in order to match the various computational
2825                # requirements of the various stages of the job.
2826              "diskSourceImage": "A String", # Fully qualified source image for disks.
2827              "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
2828                  # using the standard Dataflow task runner.  Users should ignore
2829                  # this field.
2830                "workflowFileName": "A String", # The file to store the workflow in.
2831                "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
2832                    # will not be uploaded.
2833                    #
2834                    # The supported resource type is:
2835                    #
2836                    # Google Cloud Storage:
2837                    #   storage.googleapis.com/{bucket}/{object}
2838                    #   bucket.storage.googleapis.com/{object}
2839                "commandlinesFileName": "A String", # The file to store preprocessing commands in.
2840                "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
2841                  "reportingEnabled": True or False, # Whether to send work progress updates to the service.
2842                  "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
2843                      # "shuffle/v1beta1".
2844                  "workerId": "A String", # The ID of the worker running this pipeline.
2845                  "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
2846                      #
2847                      # When workers access Google Cloud APIs, they logically do so via
2848                      # relative URLs.  If this field is specified, it supplies the base
2849                      # URL to use for resolving these relative URLs.  The normative
2850                      # algorithm used is defined by RFC 1808, "Relative Uniform Resource
2851                      # Locators".
2852                      #
2853                      # If not specified, the default value is "http://www.googleapis.com/"
2854                  "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
2855                      # "dataflow/v1b3/projects".
2856                  "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
2857                      # storage.
2858                      #
2859                      # The supported resource type is:
2860                      #
2861                      # Google Cloud Storage:
2862                      #
2863                      #   storage.googleapis.com/{bucket}/{object}
2864                      #   bucket.storage.googleapis.com/{object}
2865                },
2866                "vmId": "A String", # The ID string of the VM.
2867                "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
2868                "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
2869                "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
2870                    # access the Cloud Dataflow API.
2871                  "A String",
2872                ],
2873                "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
2874                    # taskrunner; e.g. "root".
2875                "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
2876                    #
2877                    # When workers access Google Cloud APIs, they logically do so via
2878                    # relative URLs.  If this field is specified, it supplies the base
2879                    # URL to use for resolving these relative URLs.  The normative
2880                    # algorithm used is defined by RFC 1808, "Relative Uniform Resource
2881                    # Locators".
2882                    #
2883                    # If not specified, the default value is "http://www.googleapis.com/"
2884                "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
2885                    # taskrunner; e.g. "wheel".
2886                "languageHint": "A String", # The suggested backend language.
2887                "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
2888                    # console.
2889                "streamingWorkerMainClass": "A String", # The streaming worker main class name.
2890                "logDir": "A String", # The directory on the VM to store logs.
2891                "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
2892                "harnessCommand": "A String", # The command to launch the worker harness.
2893                "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
2894                    # temporary storage.
2895                    #
2896                    # The supported resource type is:
2897                    #
2898                    # Google Cloud Storage:
2899                    #   storage.googleapis.com/{bucket}/{object}
2900                    #   bucket.storage.googleapis.com/{object}
2901                "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
2902              },
2903              "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
2904                  # are supported.
2905              "packages": [ # Packages to be installed on workers.
2906                { # The packages that must be installed in order for a worker to run the
2907                    # steps of the Cloud Dataflow job that will be assigned to its worker
2908                    # pool.
2909                    #
2910                    # This is the mechanism by which the Cloud Dataflow SDK causes code to
2911                    # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
2912                    # might use this to install jars containing the user's code and all of the
2913                    # various dependencies (libraries, data files, etc.) required in order
2914                    # for that code to run.
2915                  "location": "A String", # The resource to read the package from. The supported resource type is:
2916                      #
2917                      # Google Cloud Storage:
2918                      #
2919                      #   storage.googleapis.com/{bucket}
2920                      #   bucket.storage.googleapis.com/
2921                  "name": "A String", # The name of the package.
2922                },
2923              ],
2924              "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
2925                  # service will attempt to choose a reasonable default.
2926              "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
2927                  # the service will use the network "default".
2928              "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
2929                  # will attempt to choose a reasonable default.
2930              "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
2931                  # attempt to choose a reasonable default.
2932              "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
2933                  # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
2934                  # `TEARDOWN_NEVER`.
2935                  # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
2936                  # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
2937                  # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
2938                  # down.
2939                  #
2940                  # If the workers are not torn down by the service, they will
2941                  # continue to run and use Google Compute Engine VM resources in the
2942                  # user's project until they are explicitly terminated by the user.
2943                  # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
2944                  # policy except for small, manually supervised test jobs.
2945                  #
2946                  # If unknown or unspecified, the service will attempt to choose a reasonable
2947                  # default.
2948              "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
2949                  # Compute Engine API.
2950              "ipConfiguration": "A String", # Configuration for VM IPs.
2951              "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
2952                  # service will choose a number of threads (according to the number of cores
2953                  # on the selected machine type for batch, or 1 by convention for streaming).
2954              "poolArgs": { # Extra arguments for this worker pool.
2955                "a_key": "", # Properties of the object. Contains field @type with type URL.
2956              },
2957              "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
2958                  # execute the job.  If zero or unspecified, the service will
2959                  # attempt to choose a reasonable default.
2960              "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
2961                  # harness, residing in Google Container Registry.
2962              "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
2963                  # the form "regions/REGION/subnetworks/SUBNETWORK".
2964              "dataDisks": [ # Data disks that are used by a VM in this workflow.
2965                { # Describes the data disk used by a workflow job.
2966                  "mountPoint": "A String", # Directory in a VM where disk is mounted.
2967                  "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
2968                      # attempt to choose a reasonable default.
2969                  "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
2970                      # must be a disk type appropriate to the project and zone in which
2971                      # the workers will run.  If unknown or unspecified, the service
2972                      # will attempt to choose a reasonable default.
2973                      #
2974                      # For example, the standard persistent disk type is a resource name
2975                      # typically ending in "pd-standard".  If SSD persistent disks are
2976                      # available, the resource name typically ends with "pd-ssd".  The
2977                      # actual valid values are defined the Google Compute Engine API,
2978                      # not by the Cloud Dataflow API; consult the Google Compute Engine
2979                      # documentation for more information about determining the set of
2980                      # available disk types for a particular project and zone.
2981                      #
2982                      # Google Compute Engine Disk types are local to a particular
2983                      # project in a particular zone, and so the resource name will
2984                      # typically look something like this:
2985                      #
2986                      # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
2987                },
2988              ],
2989              "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
2990                "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
2991                "algorithm": "A String", # The algorithm to use for autoscaling.
2992              },
2993              "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
2994                  # select a default set of packages which are useful to worker
2995                  # harnesses written in a particular language.
2996              "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
2997                  # attempt to choose a reasonable default.
2998              "metadata": { # Metadata to set on the Google Compute Engine VMs.
2999                "a_key": "A String",
3000              },
3001            },
3002          ],
3003          "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
3004              # storage.  The system will append the suffix "/temp-{JOBNAME} to
3005              # this resource prefix, where {JOBNAME} is the value of the
3006              # job_name field.  The resulting bucket and object prefix is used
3007              # as the prefix of the resources used to store temporary data
3008              # needed during the job execution.  NOTE: This will override the
3009              # value in taskrunner_settings.
3010              # The supported resource type is:
3011              #
3012              # Google Cloud Storage:
3013              #
3014              #   storage.googleapis.com/{bucket}/{object}
3015              #   bucket.storage.googleapis.com/{object}
3016        },
3017        "location": "A String", # The [regional endpoint]
3018            # (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
3019            # contains this job.
3020        "tempFiles": [ # A set of files the system should be aware of that are used
3021            # for temporary storage. These temporary files will be
3022            # removed on job completion.
3023            # No duplicates are allowed.
3024            # No file patterns are supported.
3025            #
3026            # The supported files are:
3027            #
3028            # Google Cloud Storage:
3029            #
3030            #    storage.googleapis.com/{bucket}/{object}
3031            #    bucket.storage.googleapis.com/{object}
3032          "A String",
3033        ],
3034        "type": "A String", # The type of Cloud Dataflow job.
3035        "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
3036            # If this field is set, the service will ensure its uniqueness.
3037            # The request to create a job will fail if the service has knowledge of a
3038            # previously submitted job with the same client's ID and job name.
3039            # The caller may use this field to ensure idempotence of job
3040            # creation across retried attempts to create a job.
3041            # By default, the field is empty and, in that case, the service ignores it.
3042        "createdFromSnapshotId": "A String", # If this is specified, the job's initial state is populated from the given
3043            # snapshot.
3044        "stepsLocation": "A String", # The GCS location where the steps are stored.
3045        "currentStateTime": "A String", # The timestamp associated with the current state.
3046        "startTime": "A String", # The timestamp when the job was started (transitioned to JOB_STATE_PENDING).
3047            # Flexible resource scheduling jobs are started with some delay after job
3048            # creation, so start_time is unset before start and is updated when the
3049            # job is started by the Cloud Dataflow service. For other jobs, start_time
3050            # always equals to create_time and is immutable and set by the Cloud Dataflow
3051            # service.
3052        "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
3053            # Cloud Dataflow service.
3054        "requestedState": "A String", # The job's requested state.
3055            #
3056            # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
3057            # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
3058            # also be used to directly set a job's requested state to
3059            # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
3060            # job if it has not already reached a terminal state.
3061        "name": "A String", # The user-specified Cloud Dataflow job name.
3062            #
3063            # Only one Job with a given name may exist in a project at any
3064            # given time. If a caller attempts to create a Job with the same
3065            # name as an already-existing Job, the attempt returns the
3066            # existing Job.
3067            #
3068            # The name must match the regular expression
3069            # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
3070        "steps": [ # Exactly one of step or steps_location should be specified.
3071            #
3072            # The top-level steps that constitute the entire job.
3073          { # Defines a particular step within a Cloud Dataflow job.
3074              #
3075              # A job consists of multiple steps, each of which performs some
3076              # specific operation as part of the overall job.  Data is typically
3077              # passed from one step to another as part of the job.
3078              #
3079              # Here's an example of a sequence of steps which together implement a
3080              # Map-Reduce job:
3081              #
3082              #   * Read a collection of data from some source, parsing the
3083              #     collection's elements.
3084              #
3085              #   * Validate the elements.
3086              #
3087              #   * Apply a user-defined function to map each element to some value
3088              #     and extract an element-specific key value.
3089              #
3090              #   * Group elements with the same key into a single element with
3091              #     that key, transforming a multiply-keyed collection into a
3092              #     uniquely-keyed collection.
3093              #
3094              #   * Write the elements out to some data sink.
3095              #
3096              # Note that the Cloud Dataflow service may be used to run many different
3097              # types of jobs, not just Map-Reduce.
3098            "kind": "A String", # The kind of step in the Cloud Dataflow job.
3099            "properties": { # Named properties associated with the step. Each kind of
3100                # predefined step has its own required set of properties.
3101                # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
3102              "a_key": "", # Properties of the object.
3103            },
3104            "name": "A String", # The name that identifies the step. This must be unique for each
3105                # step with respect to all other steps in the Cloud Dataflow job.
3106          },
3107        ],
3108        "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
3109            # of the job it replaced.
3110            #
3111            # When sending a `CreateJobRequest`, you can update a job by specifying it
3112            # here. The job named here is stopped, and its intermediate state is
3113            # transferred to this job.
3114        "currentState": "A String", # The current state of the job.
3115            #
3116            # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
3117            # specified.
3118            #
3119            # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
3120            # terminal state. After a job has reached a terminal state, no
3121            # further state updates may be made.
3122            #
3123            # This field may be mutated by the Cloud Dataflow service;
3124            # callers cannot mutate it.
3125        "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
3126            # isn't contained in the submitted job.
3127          "stages": { # A mapping from each stage to the information about that stage.
3128            "a_key": { # Contains information about how a particular
3129                # google.dataflow.v1beta3.Step will be executed.
3130              "stepName": [ # The steps associated with the execution stage.
3131                  # Note that stages may have several steps, and that a given step
3132                  # might be run by more than one stage.
3133                "A String",
3134              ],
3135            },
3136          },
3137        },
3138      },
3139    ],
3140  }</pre>
3141</div>
3142
3143<div class="method">
3144    <code class="details" id="list_next">list_next(previous_request, previous_response)</code>
3145  <pre>Retrieves the next page of results.
3146
3147Args:
3148  previous_request: The request for the previous page. (required)
3149  previous_response: The response from the request for the previous page. (required)
3150
3151Returns:
3152  A request object that you can call 'execute()' on to request the next
3153  page. Returns None if there are no more items in the collection.
3154    </pre>
3155</div>
3156
3157<div class="method">
3158    <code class="details" id="snapshot">snapshot(projectId, jobId, body, x__xgafv=None)</code>
3159  <pre>Snapshot the state of a streaming job.
3160
3161Args:
3162  projectId: string, The project which owns the job to be snapshotted. (required)
3163  jobId: string, The job to be snapshotted. (required)
3164  body: object, The request body. (required)
3165    The object takes the form of:
3166
3167{ # Request to create a snapshot of a job.
3168    "location": "A String", # The location that contains this job.
3169    "ttl": "A String", # TTL for the snapshot.
3170  }
3171
3172  x__xgafv: string, V1 error format.
3173    Allowed values
3174      1 - v1 error format
3175      2 - v2 error format
3176
3177Returns:
3178  An object of the form:
3179
3180    { # Represents a snapshot of a job.
3181    "sourceJobId": "A String", # The job this snapshot was created from.
3182    "projectId": "A String", # The project this snapshot belongs to.
3183    "creationTime": "A String", # The time this snapshot was created.
3184    "state": "A String", # State of the snapshot.
3185    "ttl": "A String", # The time after which this snapshot will be automatically deleted.
3186    "id": "A String", # The unique ID of this snapshot.
3187  }</pre>
3188</div>
3189
3190<div class="method">
3191    <code class="details" id="update">update(projectId, jobId, body, location=None, x__xgafv=None)</code>
3192  <pre>Updates the state of an existing Cloud Dataflow job.
3193
3194To update the state of an existing job, we recommend using
3195`projects.locations.jobs.update` with a [regional endpoint]
3196(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints). Using
3197`projects.jobs.update` is not recommended, as you can only update the state
3198of jobs that are running in `us-central1`.
3199
3200Args:
3201  projectId: string, The ID of the Cloud Platform project that the job belongs to. (required)
3202  jobId: string, The job ID. (required)
3203  body: object, The request body. (required)
3204    The object takes the form of:
3205
3206{ # Defines a job to be run by the Cloud Dataflow service.
3207  "labels": { # User-defined labels for this job.
3208      #
3209      # The labels map can contain no more than 64 entries.  Entries of the labels
3210      # map are UTF8 strings that comply with the following restrictions:
3211      #
3212      # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
3213      # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
3214      # * Both keys and values are additionally constrained to be <= 128 bytes in
3215      # size.
3216    "a_key": "A String",
3217  },
3218  "jobMetadata": { # Metadata available primarily for filtering jobs. Will be included in the # This field is populated by the Dataflow service to support filtering jobs
3219      # by the metadata values provided here. Populated for ListJobs and all GetJob
3220      # views SUMMARY and higher.
3221      # ListJob response and Job SUMMARY view.
3222    "sdkVersion": { # The version of the SDK used to run the job. # The SDK version used to run the job.
3223      "versionDisplayName": "A String", # A readable string describing the version of the SDK.
3224      "version": "A String", # The version of the SDK used to run the job.
3225      "sdkSupportStatus": "A String", # The support status for this SDK version.
3226    },
3227    "pubsubDetails": [ # Identification of a PubSub source used in the Dataflow job.
3228      { # Metadata for a PubSub connector used by the job.
3229        "topic": "A String", # Topic accessed in the connection.
3230        "subscription": "A String", # Subscription used in the connection.
3231      },
3232    ],
3233    "datastoreDetails": [ # Identification of a Datastore source used in the Dataflow job.
3234      { # Metadata for a Datastore connector used by the job.
3235        "projectId": "A String", # ProjectId accessed in the connection.
3236        "namespace": "A String", # Namespace used in the connection.
3237      },
3238    ],
3239    "fileDetails": [ # Identification of a File source used in the Dataflow job.
3240      { # Metadata for a File connector used by the job.
3241        "filePattern": "A String", # File Pattern used to access files by the connector.
3242      },
3243    ],
3244    "spannerDetails": [ # Identification of a Spanner source used in the Dataflow job.
3245      { # Metadata for a Spanner connector used by the job.
3246        "instanceId": "A String", # InstanceId accessed in the connection.
3247        "projectId": "A String", # ProjectId accessed in the connection.
3248        "databaseId": "A String", # DatabaseId accessed in the connection.
3249      },
3250    ],
3251    "bigTableDetails": [ # Identification of a BigTable source used in the Dataflow job.
3252      { # Metadata for a BigTable connector used by the job.
3253        "instanceId": "A String", # InstanceId accessed in the connection.
3254        "projectId": "A String", # ProjectId accessed in the connection.
3255        "tableId": "A String", # TableId accessed in the connection.
3256      },
3257    ],
3258    "bigqueryDetails": [ # Identification of a BigQuery source used in the Dataflow job.
3259      { # Metadata for a BigQuery connector used by the job.
3260        "projectId": "A String", # Project accessed in the connection.
3261        "dataset": "A String", # Dataset accessed in the connection.
3262        "table": "A String", # Table accessed in the connection.
3263        "query": "A String", # Query used to access data in the connection.
3264      },
3265    ],
3266  },
3267  "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
3268      # A description of the user pipeline and stages through which it is executed.
3269      # Created by Cloud Dataflow service.  Only retrieved with
3270      # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
3271      # form.  This data is provided by the Dataflow service for ease of visualizing
3272      # the pipeline and interpreting Dataflow provided metrics.
3273    "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
3274      { # Description of the type, names/ids, and input/outputs for a transform.
3275        "kind": "A String", # Type of transform.
3276        "name": "A String", # User provided name for this transform instance.
3277        "inputCollectionName": [ # User names for all collection inputs to this transform.
3278          "A String",
3279        ],
3280        "displayData": [ # Transform-specific display data.
3281          { # Data provided with a pipeline or transform to provide descriptive info.
3282            "shortStrValue": "A String", # A possible additional shorter value to display.
3283                # For example a java_class_name_value of com.mypackage.MyDoFn
3284                # will be stored with MyDoFn as the short_str_value and
3285                # com.mypackage.MyDoFn as the java_class_name value.
3286                # short_str_value can be displayed and java_class_name_value
3287                # will be displayed as a tooltip.
3288            "durationValue": "A String", # Contains value if the data is of duration type.
3289            "url": "A String", # An optional full URL.
3290            "floatValue": 3.14, # Contains value if the data is of float type.
3291            "namespace": "A String", # The namespace for the key. This is usually a class name or programming
3292                # language namespace (i.e. python module) which defines the display data.
3293                # This allows a dax monitoring system to specially handle the data
3294                # and perform custom rendering.
3295            "javaClassValue": "A String", # Contains value if the data is of java class type.
3296            "label": "A String", # An optional label to display in a dax UI for the element.
3297            "boolValue": True or False, # Contains value if the data is of a boolean type.
3298            "strValue": "A String", # Contains value if the data is of string type.
3299            "key": "A String", # The key identifying the display data.
3300                # This is intended to be used as a label for the display data
3301                # when viewed in a dax monitoring system.
3302            "int64Value": "A String", # Contains value if the data is of int64 type.
3303            "timestampValue": "A String", # Contains value if the data is of timestamp type.
3304          },
3305        ],
3306        "outputCollectionName": [ # User  names for all collection outputs to this transform.
3307          "A String",
3308        ],
3309        "id": "A String", # SDK generated id of this transform instance.
3310      },
3311    ],
3312    "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
3313      { # Description of the composing transforms, names/ids, and input/outputs of a
3314          # stage of execution.  Some composing transforms and sources may have been
3315          # generated by the Dataflow service during execution planning.
3316        "componentSource": [ # Collections produced and consumed by component transforms of this stage.
3317          { # Description of an interstitial value between transforms in an execution
3318              # stage.
3319            "userName": "A String", # Human-readable name for this transform; may be user or system generated.
3320            "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
3321                # source is most closely associated.
3322            "name": "A String", # Dataflow service generated name for this source.
3323          },
3324        ],
3325        "kind": "A String", # Type of tranform this stage is executing.
3326        "name": "A String", # Dataflow service generated name for this stage.
3327        "outputSource": [ # Output sources for this stage.
3328          { # Description of an input or output of an execution stage.
3329            "userName": "A String", # Human-readable name for this source; may be user or system generated.
3330            "sizeBytes": "A String", # Size of the source, if measurable.
3331            "name": "A String", # Dataflow service generated name for this source.
3332            "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
3333                # source is most closely associated.
3334          },
3335        ],
3336        "inputSource": [ # Input sources for this stage.
3337          { # Description of an input or output of an execution stage.
3338            "userName": "A String", # Human-readable name for this source; may be user or system generated.
3339            "sizeBytes": "A String", # Size of the source, if measurable.
3340            "name": "A String", # Dataflow service generated name for this source.
3341            "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
3342                # source is most closely associated.
3343          },
3344        ],
3345        "componentTransform": [ # Transforms that comprise this execution stage.
3346          { # Description of a transform executed as part of an execution stage.
3347            "userName": "A String", # Human-readable name for this transform; may be user or system generated.
3348            "originalTransform": "A String", # User name for the original user transform with which this transform is
3349                # most closely associated.
3350            "name": "A String", # Dataflow service generated name for this source.
3351          },
3352        ],
3353        "id": "A String", # Dataflow service generated id for this stage.
3354      },
3355    ],
3356    "displayData": [ # Pipeline level display data.
3357      { # Data provided with a pipeline or transform to provide descriptive info.
3358        "shortStrValue": "A String", # A possible additional shorter value to display.
3359            # For example a java_class_name_value of com.mypackage.MyDoFn
3360            # will be stored with MyDoFn as the short_str_value and
3361            # com.mypackage.MyDoFn as the java_class_name value.
3362            # short_str_value can be displayed and java_class_name_value
3363            # will be displayed as a tooltip.
3364        "durationValue": "A String", # Contains value if the data is of duration type.
3365        "url": "A String", # An optional full URL.
3366        "floatValue": 3.14, # Contains value if the data is of float type.
3367        "namespace": "A String", # The namespace for the key. This is usually a class name or programming
3368            # language namespace (i.e. python module) which defines the display data.
3369            # This allows a dax monitoring system to specially handle the data
3370            # and perform custom rendering.
3371        "javaClassValue": "A String", # Contains value if the data is of java class type.
3372        "label": "A String", # An optional label to display in a dax UI for the element.
3373        "boolValue": True or False, # Contains value if the data is of a boolean type.
3374        "strValue": "A String", # Contains value if the data is of string type.
3375        "key": "A String", # The key identifying the display data.
3376            # This is intended to be used as a label for the display data
3377            # when viewed in a dax monitoring system.
3378        "int64Value": "A String", # Contains value if the data is of int64 type.
3379        "timestampValue": "A String", # Contains value if the data is of timestamp type.
3380      },
3381    ],
3382  },
3383  "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
3384      # callers cannot mutate it.
3385    { # A message describing the state of a particular execution stage.
3386      "executionStageName": "A String", # The name of the execution stage.
3387      "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
3388      "currentStateTime": "A String", # The time at which the stage transitioned to this state.
3389    },
3390  ],
3391  "id": "A String", # The unique ID of this job.
3392      #
3393      # This field is set by the Cloud Dataflow service when the Job is
3394      # created, and is immutable for the life of the job.
3395  "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
3396      # `JOB_STATE_UPDATED`), this field contains the ID of that job.
3397  "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
3398  "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
3399      # corresponding name prefixes of the new job.
3400    "a_key": "A String",
3401  },
3402  "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
3403    "version": { # A structure describing which components and their versions of the service
3404        # are required in order to run the job.
3405      "a_key": "", # Properties of the object.
3406    },
3407    "flexResourceSchedulingGoal": "A String", # Which Flexible Resource Scheduling mode to run in.
3408    "serviceKmsKeyName": "A String", # If set, contains the Cloud KMS key identifier used to encrypt data
3409        # at rest, AKA a Customer Managed Encryption Key (CMEK).
3410        #
3411        # Format:
3412        #   projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY
3413    "internalExperiments": { # Experimental settings.
3414      "a_key": "", # Properties of the object. Contains field @type with type URL.
3415    },
3416    "dataset": "A String", # The dataset for the current project where various workflow
3417        # related tables are stored.
3418        #
3419        # The supported resource type is:
3420        #
3421        # Google BigQuery:
3422        #   bigquery.googleapis.com/{dataset}
3423    "experiments": [ # The list of experiments to enable.
3424      "A String",
3425    ],
3426    "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
3427    "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
3428        # options are passed through the service and are used to recreate the
3429        # SDK pipeline options on the worker in a language agnostic and platform
3430        # independent way.
3431      "a_key": "", # Properties of the object.
3432    },
3433    "userAgent": { # A description of the process that generated the request.
3434      "a_key": "", # Properties of the object.
3435    },
3436    "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
3437        # unspecified, the service will attempt to choose a reasonable
3438        # default.  This should be in the form of the API service name,
3439        # e.g. "compute.googleapis.com".
3440    "workerPools": [ # The worker pools. At least one "harness" worker pool must be
3441        # specified in order for the job to have workers.
3442      { # Describes one particular pool of Cloud Dataflow workers to be
3443          # instantiated by the Cloud Dataflow service in order to perform the
3444          # computations required by a job.  Note that a workflow job may use
3445          # multiple pools, in order to match the various computational
3446          # requirements of the various stages of the job.
3447        "diskSourceImage": "A String", # Fully qualified source image for disks.
3448        "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
3449            # using the standard Dataflow task runner.  Users should ignore
3450            # this field.
3451          "workflowFileName": "A String", # The file to store the workflow in.
3452          "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
3453              # will not be uploaded.
3454              #
3455              # The supported resource type is:
3456              #
3457              # Google Cloud Storage:
3458              #   storage.googleapis.com/{bucket}/{object}
3459              #   bucket.storage.googleapis.com/{object}
3460          "commandlinesFileName": "A String", # The file to store preprocessing commands in.
3461          "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
3462            "reportingEnabled": True or False, # Whether to send work progress updates to the service.
3463            "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
3464                # "shuffle/v1beta1".
3465            "workerId": "A String", # The ID of the worker running this pipeline.
3466            "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
3467                #
3468                # When workers access Google Cloud APIs, they logically do so via
3469                # relative URLs.  If this field is specified, it supplies the base
3470                # URL to use for resolving these relative URLs.  The normative
3471                # algorithm used is defined by RFC 1808, "Relative Uniform Resource
3472                # Locators".
3473                #
3474                # If not specified, the default value is "http://www.googleapis.com/"
3475            "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
3476                # "dataflow/v1b3/projects".
3477            "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
3478                # storage.
3479                #
3480                # The supported resource type is:
3481                #
3482                # Google Cloud Storage:
3483                #
3484                #   storage.googleapis.com/{bucket}/{object}
3485                #   bucket.storage.googleapis.com/{object}
3486          },
3487          "vmId": "A String", # The ID string of the VM.
3488          "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
3489          "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
3490          "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
3491              # access the Cloud Dataflow API.
3492            "A String",
3493          ],
3494          "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
3495              # taskrunner; e.g. "root".
3496          "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
3497              #
3498              # When workers access Google Cloud APIs, they logically do so via
3499              # relative URLs.  If this field is specified, it supplies the base
3500              # URL to use for resolving these relative URLs.  The normative
3501              # algorithm used is defined by RFC 1808, "Relative Uniform Resource
3502              # Locators".
3503              #
3504              # If not specified, the default value is "http://www.googleapis.com/"
3505          "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
3506              # taskrunner; e.g. "wheel".
3507          "languageHint": "A String", # The suggested backend language.
3508          "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
3509              # console.
3510          "streamingWorkerMainClass": "A String", # The streaming worker main class name.
3511          "logDir": "A String", # The directory on the VM to store logs.
3512          "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
3513          "harnessCommand": "A String", # The command to launch the worker harness.
3514          "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
3515              # temporary storage.
3516              #
3517              # The supported resource type is:
3518              #
3519              # Google Cloud Storage:
3520              #   storage.googleapis.com/{bucket}/{object}
3521              #   bucket.storage.googleapis.com/{object}
3522          "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
3523        },
3524        "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
3525            # are supported.
3526        "packages": [ # Packages to be installed on workers.
3527          { # The packages that must be installed in order for a worker to run the
3528              # steps of the Cloud Dataflow job that will be assigned to its worker
3529              # pool.
3530              #
3531              # This is the mechanism by which the Cloud Dataflow SDK causes code to
3532              # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
3533              # might use this to install jars containing the user's code and all of the
3534              # various dependencies (libraries, data files, etc.) required in order
3535              # for that code to run.
3536            "location": "A String", # The resource to read the package from. The supported resource type is:
3537                #
3538                # Google Cloud Storage:
3539                #
3540                #   storage.googleapis.com/{bucket}
3541                #   bucket.storage.googleapis.com/
3542            "name": "A String", # The name of the package.
3543          },
3544        ],
3545        "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
3546            # service will attempt to choose a reasonable default.
3547        "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
3548            # the service will use the network "default".
3549        "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
3550            # will attempt to choose a reasonable default.
3551        "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
3552            # attempt to choose a reasonable default.
3553        "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
3554            # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
3555            # `TEARDOWN_NEVER`.
3556            # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
3557            # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
3558            # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
3559            # down.
3560            #
3561            # If the workers are not torn down by the service, they will
3562            # continue to run and use Google Compute Engine VM resources in the
3563            # user's project until they are explicitly terminated by the user.
3564            # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
3565            # policy except for small, manually supervised test jobs.
3566            #
3567            # If unknown or unspecified, the service will attempt to choose a reasonable
3568            # default.
3569        "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
3570            # Compute Engine API.
3571        "ipConfiguration": "A String", # Configuration for VM IPs.
3572        "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
3573            # service will choose a number of threads (according to the number of cores
3574            # on the selected machine type for batch, or 1 by convention for streaming).
3575        "poolArgs": { # Extra arguments for this worker pool.
3576          "a_key": "", # Properties of the object. Contains field @type with type URL.
3577        },
3578        "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
3579            # execute the job.  If zero or unspecified, the service will
3580            # attempt to choose a reasonable default.
3581        "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
3582            # harness, residing in Google Container Registry.
3583        "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
3584            # the form "regions/REGION/subnetworks/SUBNETWORK".
3585        "dataDisks": [ # Data disks that are used by a VM in this workflow.
3586          { # Describes the data disk used by a workflow job.
3587            "mountPoint": "A String", # Directory in a VM where disk is mounted.
3588            "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
3589                # attempt to choose a reasonable default.
3590            "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
3591                # must be a disk type appropriate to the project and zone in which
3592                # the workers will run.  If unknown or unspecified, the service
3593                # will attempt to choose a reasonable default.
3594                #
3595                # For example, the standard persistent disk type is a resource name
3596                # typically ending in "pd-standard".  If SSD persistent disks are
3597                # available, the resource name typically ends with "pd-ssd".  The
3598                # actual valid values are defined the Google Compute Engine API,
3599                # not by the Cloud Dataflow API; consult the Google Compute Engine
3600                # documentation for more information about determining the set of
3601                # available disk types for a particular project and zone.
3602                #
3603                # Google Compute Engine Disk types are local to a particular
3604                # project in a particular zone, and so the resource name will
3605                # typically look something like this:
3606                #
3607                # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
3608          },
3609        ],
3610        "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
3611          "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
3612          "algorithm": "A String", # The algorithm to use for autoscaling.
3613        },
3614        "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
3615            # select a default set of packages which are useful to worker
3616            # harnesses written in a particular language.
3617        "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
3618            # attempt to choose a reasonable default.
3619        "metadata": { # Metadata to set on the Google Compute Engine VMs.
3620          "a_key": "A String",
3621        },
3622      },
3623    ],
3624    "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
3625        # storage.  The system will append the suffix "/temp-{JOBNAME} to
3626        # this resource prefix, where {JOBNAME} is the value of the
3627        # job_name field.  The resulting bucket and object prefix is used
3628        # as the prefix of the resources used to store temporary data
3629        # needed during the job execution.  NOTE: This will override the
3630        # value in taskrunner_settings.
3631        # The supported resource type is:
3632        #
3633        # Google Cloud Storage:
3634        #
3635        #   storage.googleapis.com/{bucket}/{object}
3636        #   bucket.storage.googleapis.com/{object}
3637  },
3638  "location": "A String", # The [regional endpoint]
3639      # (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
3640      # contains this job.
3641  "tempFiles": [ # A set of files the system should be aware of that are used
3642      # for temporary storage. These temporary files will be
3643      # removed on job completion.
3644      # No duplicates are allowed.
3645      # No file patterns are supported.
3646      #
3647      # The supported files are:
3648      #
3649      # Google Cloud Storage:
3650      #
3651      #    storage.googleapis.com/{bucket}/{object}
3652      #    bucket.storage.googleapis.com/{object}
3653    "A String",
3654  ],
3655  "type": "A String", # The type of Cloud Dataflow job.
3656  "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
3657      # If this field is set, the service will ensure its uniqueness.
3658      # The request to create a job will fail if the service has knowledge of a
3659      # previously submitted job with the same client's ID and job name.
3660      # The caller may use this field to ensure idempotence of job
3661      # creation across retried attempts to create a job.
3662      # By default, the field is empty and, in that case, the service ignores it.
3663  "createdFromSnapshotId": "A String", # If this is specified, the job's initial state is populated from the given
3664      # snapshot.
3665  "stepsLocation": "A String", # The GCS location where the steps are stored.
3666  "currentStateTime": "A String", # The timestamp associated with the current state.
3667  "startTime": "A String", # The timestamp when the job was started (transitioned to JOB_STATE_PENDING).
3668      # Flexible resource scheduling jobs are started with some delay after job
3669      # creation, so start_time is unset before start and is updated when the
3670      # job is started by the Cloud Dataflow service. For other jobs, start_time
3671      # always equals to create_time and is immutable and set by the Cloud Dataflow
3672      # service.
3673  "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
3674      # Cloud Dataflow service.
3675  "requestedState": "A String", # The job's requested state.
3676      #
3677      # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
3678      # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
3679      # also be used to directly set a job's requested state to
3680      # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
3681      # job if it has not already reached a terminal state.
3682  "name": "A String", # The user-specified Cloud Dataflow job name.
3683      #
3684      # Only one Job with a given name may exist in a project at any
3685      # given time. If a caller attempts to create a Job with the same
3686      # name as an already-existing Job, the attempt returns the
3687      # existing Job.
3688      #
3689      # The name must match the regular expression
3690      # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
3691  "steps": [ # Exactly one of step or steps_location should be specified.
3692      #
3693      # The top-level steps that constitute the entire job.
3694    { # Defines a particular step within a Cloud Dataflow job.
3695        #
3696        # A job consists of multiple steps, each of which performs some
3697        # specific operation as part of the overall job.  Data is typically
3698        # passed from one step to another as part of the job.
3699        #
3700        # Here's an example of a sequence of steps which together implement a
3701        # Map-Reduce job:
3702        #
3703        #   * Read a collection of data from some source, parsing the
3704        #     collection's elements.
3705        #
3706        #   * Validate the elements.
3707        #
3708        #   * Apply a user-defined function to map each element to some value
3709        #     and extract an element-specific key value.
3710        #
3711        #   * Group elements with the same key into a single element with
3712        #     that key, transforming a multiply-keyed collection into a
3713        #     uniquely-keyed collection.
3714        #
3715        #   * Write the elements out to some data sink.
3716        #
3717        # Note that the Cloud Dataflow service may be used to run many different
3718        # types of jobs, not just Map-Reduce.
3719      "kind": "A String", # The kind of step in the Cloud Dataflow job.
3720      "properties": { # Named properties associated with the step. Each kind of
3721          # predefined step has its own required set of properties.
3722          # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
3723        "a_key": "", # Properties of the object.
3724      },
3725      "name": "A String", # The name that identifies the step. This must be unique for each
3726          # step with respect to all other steps in the Cloud Dataflow job.
3727    },
3728  ],
3729  "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
3730      # of the job it replaced.
3731      #
3732      # When sending a `CreateJobRequest`, you can update a job by specifying it
3733      # here. The job named here is stopped, and its intermediate state is
3734      # transferred to this job.
3735  "currentState": "A String", # The current state of the job.
3736      #
3737      # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
3738      # specified.
3739      #
3740      # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
3741      # terminal state. After a job has reached a terminal state, no
3742      # further state updates may be made.
3743      #
3744      # This field may be mutated by the Cloud Dataflow service;
3745      # callers cannot mutate it.
3746  "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
3747      # isn't contained in the submitted job.
3748    "stages": { # A mapping from each stage to the information about that stage.
3749      "a_key": { # Contains information about how a particular
3750          # google.dataflow.v1beta3.Step will be executed.
3751        "stepName": [ # The steps associated with the execution stage.
3752            # Note that stages may have several steps, and that a given step
3753            # might be run by more than one stage.
3754          "A String",
3755        ],
3756      },
3757    },
3758  },
3759}
3760
3761  location: string, The [regional endpoint]
3762(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
3763contains this job.
3764  x__xgafv: string, V1 error format.
3765    Allowed values
3766      1 - v1 error format
3767      2 - v2 error format
3768
3769Returns:
3770  An object of the form:
3771
3772    { # Defines a job to be run by the Cloud Dataflow service.
3773    "labels": { # User-defined labels for this job.
3774        #
3775        # The labels map can contain no more than 64 entries.  Entries of the labels
3776        # map are UTF8 strings that comply with the following restrictions:
3777        #
3778        # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
3779        # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
3780        # * Both keys and values are additionally constrained to be <= 128 bytes in
3781        # size.
3782      "a_key": "A String",
3783    },
3784    "jobMetadata": { # Metadata available primarily for filtering jobs. Will be included in the # This field is populated by the Dataflow service to support filtering jobs
3785        # by the metadata values provided here. Populated for ListJobs and all GetJob
3786        # views SUMMARY and higher.
3787        # ListJob response and Job SUMMARY view.
3788      "sdkVersion": { # The version of the SDK used to run the job. # The SDK version used to run the job.
3789        "versionDisplayName": "A String", # A readable string describing the version of the SDK.
3790        "version": "A String", # The version of the SDK used to run the job.
3791        "sdkSupportStatus": "A String", # The support status for this SDK version.
3792      },
3793      "pubsubDetails": [ # Identification of a PubSub source used in the Dataflow job.
3794        { # Metadata for a PubSub connector used by the job.
3795          "topic": "A String", # Topic accessed in the connection.
3796          "subscription": "A String", # Subscription used in the connection.
3797        },
3798      ],
3799      "datastoreDetails": [ # Identification of a Datastore source used in the Dataflow job.
3800        { # Metadata for a Datastore connector used by the job.
3801          "projectId": "A String", # ProjectId accessed in the connection.
3802          "namespace": "A String", # Namespace used in the connection.
3803        },
3804      ],
3805      "fileDetails": [ # Identification of a File source used in the Dataflow job.
3806        { # Metadata for a File connector used by the job.
3807          "filePattern": "A String", # File Pattern used to access files by the connector.
3808        },
3809      ],
3810      "spannerDetails": [ # Identification of a Spanner source used in the Dataflow job.
3811        { # Metadata for a Spanner connector used by the job.
3812          "instanceId": "A String", # InstanceId accessed in the connection.
3813          "projectId": "A String", # ProjectId accessed in the connection.
3814          "databaseId": "A String", # DatabaseId accessed in the connection.
3815        },
3816      ],
3817      "bigTableDetails": [ # Identification of a BigTable source used in the Dataflow job.
3818        { # Metadata for a BigTable connector used by the job.
3819          "instanceId": "A String", # InstanceId accessed in the connection.
3820          "projectId": "A String", # ProjectId accessed in the connection.
3821          "tableId": "A String", # TableId accessed in the connection.
3822        },
3823      ],
3824      "bigqueryDetails": [ # Identification of a BigQuery source used in the Dataflow job.
3825        { # Metadata for a BigQuery connector used by the job.
3826          "projectId": "A String", # Project accessed in the connection.
3827          "dataset": "A String", # Dataset accessed in the connection.
3828          "table": "A String", # Table accessed in the connection.
3829          "query": "A String", # Query used to access data in the connection.
3830        },
3831      ],
3832    },
3833    "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
3834        # A description of the user pipeline and stages through which it is executed.
3835        # Created by Cloud Dataflow service.  Only retrieved with
3836        # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
3837        # form.  This data is provided by the Dataflow service for ease of visualizing
3838        # the pipeline and interpreting Dataflow provided metrics.
3839      "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
3840        { # Description of the type, names/ids, and input/outputs for a transform.
3841          "kind": "A String", # Type of transform.
3842          "name": "A String", # User provided name for this transform instance.
3843          "inputCollectionName": [ # User names for all collection inputs to this transform.
3844            "A String",
3845          ],
3846          "displayData": [ # Transform-specific display data.
3847            { # Data provided with a pipeline or transform to provide descriptive info.
3848              "shortStrValue": "A String", # A possible additional shorter value to display.
3849                  # For example a java_class_name_value of com.mypackage.MyDoFn
3850                  # will be stored with MyDoFn as the short_str_value and
3851                  # com.mypackage.MyDoFn as the java_class_name value.
3852                  # short_str_value can be displayed and java_class_name_value
3853                  # will be displayed as a tooltip.
3854              "durationValue": "A String", # Contains value if the data is of duration type.
3855              "url": "A String", # An optional full URL.
3856              "floatValue": 3.14, # Contains value if the data is of float type.
3857              "namespace": "A String", # The namespace for the key. This is usually a class name or programming
3858                  # language namespace (i.e. python module) which defines the display data.
3859                  # This allows a dax monitoring system to specially handle the data
3860                  # and perform custom rendering.
3861              "javaClassValue": "A String", # Contains value if the data is of java class type.
3862              "label": "A String", # An optional label to display in a dax UI for the element.
3863              "boolValue": True or False, # Contains value if the data is of a boolean type.
3864              "strValue": "A String", # Contains value if the data is of string type.
3865              "key": "A String", # The key identifying the display data.
3866                  # This is intended to be used as a label for the display data
3867                  # when viewed in a dax monitoring system.
3868              "int64Value": "A String", # Contains value if the data is of int64 type.
3869              "timestampValue": "A String", # Contains value if the data is of timestamp type.
3870            },
3871          ],
3872          "outputCollectionName": [ # User  names for all collection outputs to this transform.
3873            "A String",
3874          ],
3875          "id": "A String", # SDK generated id of this transform instance.
3876        },
3877      ],
3878      "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
3879        { # Description of the composing transforms, names/ids, and input/outputs of a
3880            # stage of execution.  Some composing transforms and sources may have been
3881            # generated by the Dataflow service during execution planning.
3882          "componentSource": [ # Collections produced and consumed by component transforms of this stage.
3883            { # Description of an interstitial value between transforms in an execution
3884                # stage.
3885              "userName": "A String", # Human-readable name for this transform; may be user or system generated.
3886              "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
3887                  # source is most closely associated.
3888              "name": "A String", # Dataflow service generated name for this source.
3889            },
3890          ],
3891          "kind": "A String", # Type of tranform this stage is executing.
3892          "name": "A String", # Dataflow service generated name for this stage.
3893          "outputSource": [ # Output sources for this stage.
3894            { # Description of an input or output of an execution stage.
3895              "userName": "A String", # Human-readable name for this source; may be user or system generated.
3896              "sizeBytes": "A String", # Size of the source, if measurable.
3897              "name": "A String", # Dataflow service generated name for this source.
3898              "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
3899                  # source is most closely associated.
3900            },
3901          ],
3902          "inputSource": [ # Input sources for this stage.
3903            { # Description of an input or output of an execution stage.
3904              "userName": "A String", # Human-readable name for this source; may be user or system generated.
3905              "sizeBytes": "A String", # Size of the source, if measurable.
3906              "name": "A String", # Dataflow service generated name for this source.
3907              "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
3908                  # source is most closely associated.
3909            },
3910          ],
3911          "componentTransform": [ # Transforms that comprise this execution stage.
3912            { # Description of a transform executed as part of an execution stage.
3913              "userName": "A String", # Human-readable name for this transform; may be user or system generated.
3914              "originalTransform": "A String", # User name for the original user transform with which this transform is
3915                  # most closely associated.
3916              "name": "A String", # Dataflow service generated name for this source.
3917            },
3918          ],
3919          "id": "A String", # Dataflow service generated id for this stage.
3920        },
3921      ],
3922      "displayData": [ # Pipeline level display data.
3923        { # Data provided with a pipeline or transform to provide descriptive info.
3924          "shortStrValue": "A String", # A possible additional shorter value to display.
3925              # For example a java_class_name_value of com.mypackage.MyDoFn
3926              # will be stored with MyDoFn as the short_str_value and
3927              # com.mypackage.MyDoFn as the java_class_name value.
3928              # short_str_value can be displayed and java_class_name_value
3929              # will be displayed as a tooltip.
3930          "durationValue": "A String", # Contains value if the data is of duration type.
3931          "url": "A String", # An optional full URL.
3932          "floatValue": 3.14, # Contains value if the data is of float type.
3933          "namespace": "A String", # The namespace for the key. This is usually a class name or programming
3934              # language namespace (i.e. python module) which defines the display data.
3935              # This allows a dax monitoring system to specially handle the data
3936              # and perform custom rendering.
3937          "javaClassValue": "A String", # Contains value if the data is of java class type.
3938          "label": "A String", # An optional label to display in a dax UI for the element.
3939          "boolValue": True or False, # Contains value if the data is of a boolean type.
3940          "strValue": "A String", # Contains value if the data is of string type.
3941          "key": "A String", # The key identifying the display data.
3942              # This is intended to be used as a label for the display data
3943              # when viewed in a dax monitoring system.
3944          "int64Value": "A String", # Contains value if the data is of int64 type.
3945          "timestampValue": "A String", # Contains value if the data is of timestamp type.
3946        },
3947      ],
3948    },
3949    "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
3950        # callers cannot mutate it.
3951      { # A message describing the state of a particular execution stage.
3952        "executionStageName": "A String", # The name of the execution stage.
3953        "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
3954        "currentStateTime": "A String", # The time at which the stage transitioned to this state.
3955      },
3956    ],
3957    "id": "A String", # The unique ID of this job.
3958        #
3959        # This field is set by the Cloud Dataflow service when the Job is
3960        # created, and is immutable for the life of the job.
3961    "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
3962        # `JOB_STATE_UPDATED`), this field contains the ID of that job.
3963    "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
3964    "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
3965        # corresponding name prefixes of the new job.
3966      "a_key": "A String",
3967    },
3968    "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
3969      "version": { # A structure describing which components and their versions of the service
3970          # are required in order to run the job.
3971        "a_key": "", # Properties of the object.
3972      },
3973      "flexResourceSchedulingGoal": "A String", # Which Flexible Resource Scheduling mode to run in.
3974      "serviceKmsKeyName": "A String", # If set, contains the Cloud KMS key identifier used to encrypt data
3975          # at rest, AKA a Customer Managed Encryption Key (CMEK).
3976          #
3977          # Format:
3978          #   projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY
3979      "internalExperiments": { # Experimental settings.
3980        "a_key": "", # Properties of the object. Contains field @type with type URL.
3981      },
3982      "dataset": "A String", # The dataset for the current project where various workflow
3983          # related tables are stored.
3984          #
3985          # The supported resource type is:
3986          #
3987          # Google BigQuery:
3988          #   bigquery.googleapis.com/{dataset}
3989      "experiments": [ # The list of experiments to enable.
3990        "A String",
3991      ],
3992      "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
3993      "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
3994          # options are passed through the service and are used to recreate the
3995          # SDK pipeline options on the worker in a language agnostic and platform
3996          # independent way.
3997        "a_key": "", # Properties of the object.
3998      },
3999      "userAgent": { # A description of the process that generated the request.
4000        "a_key": "", # Properties of the object.
4001      },
4002      "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
4003          # unspecified, the service will attempt to choose a reasonable
4004          # default.  This should be in the form of the API service name,
4005          # e.g. "compute.googleapis.com".
4006      "workerPools": [ # The worker pools. At least one "harness" worker pool must be
4007          # specified in order for the job to have workers.
4008        { # Describes one particular pool of Cloud Dataflow workers to be
4009            # instantiated by the Cloud Dataflow service in order to perform the
4010            # computations required by a job.  Note that a workflow job may use
4011            # multiple pools, in order to match the various computational
4012            # requirements of the various stages of the job.
4013          "diskSourceImage": "A String", # Fully qualified source image for disks.
4014          "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
4015              # using the standard Dataflow task runner.  Users should ignore
4016              # this field.
4017            "workflowFileName": "A String", # The file to store the workflow in.
4018            "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
4019                # will not be uploaded.
4020                #
4021                # The supported resource type is:
4022                #
4023                # Google Cloud Storage:
4024                #   storage.googleapis.com/{bucket}/{object}
4025                #   bucket.storage.googleapis.com/{object}
4026            "commandlinesFileName": "A String", # The file to store preprocessing commands in.
4027            "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
4028              "reportingEnabled": True or False, # Whether to send work progress updates to the service.
4029              "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
4030                  # "shuffle/v1beta1".
4031              "workerId": "A String", # The ID of the worker running this pipeline.
4032              "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
4033                  #
4034                  # When workers access Google Cloud APIs, they logically do so via
4035                  # relative URLs.  If this field is specified, it supplies the base
4036                  # URL to use for resolving these relative URLs.  The normative
4037                  # algorithm used is defined by RFC 1808, "Relative Uniform Resource
4038                  # Locators".
4039                  #
4040                  # If not specified, the default value is "http://www.googleapis.com/"
4041              "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
4042                  # "dataflow/v1b3/projects".
4043              "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
4044                  # storage.
4045                  #
4046                  # The supported resource type is:
4047                  #
4048                  # Google Cloud Storage:
4049                  #
4050                  #   storage.googleapis.com/{bucket}/{object}
4051                  #   bucket.storage.googleapis.com/{object}
4052            },
4053            "vmId": "A String", # The ID string of the VM.
4054            "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
4055            "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
4056            "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
4057                # access the Cloud Dataflow API.
4058              "A String",
4059            ],
4060            "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
4061                # taskrunner; e.g. "root".
4062            "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
4063                #
4064                # When workers access Google Cloud APIs, they logically do so via
4065                # relative URLs.  If this field is specified, it supplies the base
4066                # URL to use for resolving these relative URLs.  The normative
4067                # algorithm used is defined by RFC 1808, "Relative Uniform Resource
4068                # Locators".
4069                #
4070                # If not specified, the default value is "http://www.googleapis.com/"
4071            "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
4072                # taskrunner; e.g. "wheel".
4073            "languageHint": "A String", # The suggested backend language.
4074            "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
4075                # console.
4076            "streamingWorkerMainClass": "A String", # The streaming worker main class name.
4077            "logDir": "A String", # The directory on the VM to store logs.
4078            "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
4079            "harnessCommand": "A String", # The command to launch the worker harness.
4080            "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
4081                # temporary storage.
4082                #
4083                # The supported resource type is:
4084                #
4085                # Google Cloud Storage:
4086                #   storage.googleapis.com/{bucket}/{object}
4087                #   bucket.storage.googleapis.com/{object}
4088            "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
4089          },
4090          "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
4091              # are supported.
4092          "packages": [ # Packages to be installed on workers.
4093            { # The packages that must be installed in order for a worker to run the
4094                # steps of the Cloud Dataflow job that will be assigned to its worker
4095                # pool.
4096                #
4097                # This is the mechanism by which the Cloud Dataflow SDK causes code to
4098                # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
4099                # might use this to install jars containing the user's code and all of the
4100                # various dependencies (libraries, data files, etc.) required in order
4101                # for that code to run.
4102              "location": "A String", # The resource to read the package from. The supported resource type is:
4103                  #
4104                  # Google Cloud Storage:
4105                  #
4106                  #   storage.googleapis.com/{bucket}
4107                  #   bucket.storage.googleapis.com/
4108              "name": "A String", # The name of the package.
4109            },
4110          ],
4111          "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
4112              # service will attempt to choose a reasonable default.
4113          "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
4114              # the service will use the network "default".
4115          "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
4116              # will attempt to choose a reasonable default.
4117          "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
4118              # attempt to choose a reasonable default.
4119          "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
4120              # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
4121              # `TEARDOWN_NEVER`.
4122              # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
4123              # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
4124              # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
4125              # down.
4126              #
4127              # If the workers are not torn down by the service, they will
4128              # continue to run and use Google Compute Engine VM resources in the
4129              # user's project until they are explicitly terminated by the user.
4130              # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
4131              # policy except for small, manually supervised test jobs.
4132              #
4133              # If unknown or unspecified, the service will attempt to choose a reasonable
4134              # default.
4135          "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
4136              # Compute Engine API.
4137          "ipConfiguration": "A String", # Configuration for VM IPs.
4138          "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
4139              # service will choose a number of threads (according to the number of cores
4140              # on the selected machine type for batch, or 1 by convention for streaming).
4141          "poolArgs": { # Extra arguments for this worker pool.
4142            "a_key": "", # Properties of the object. Contains field @type with type URL.
4143          },
4144          "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
4145              # execute the job.  If zero or unspecified, the service will
4146              # attempt to choose a reasonable default.
4147          "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
4148              # harness, residing in Google Container Registry.
4149          "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
4150              # the form "regions/REGION/subnetworks/SUBNETWORK".
4151          "dataDisks": [ # Data disks that are used by a VM in this workflow.
4152            { # Describes the data disk used by a workflow job.
4153              "mountPoint": "A String", # Directory in a VM where disk is mounted.
4154              "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
4155                  # attempt to choose a reasonable default.
4156              "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
4157                  # must be a disk type appropriate to the project and zone in which
4158                  # the workers will run.  If unknown or unspecified, the service
4159                  # will attempt to choose a reasonable default.
4160                  #
4161                  # For example, the standard persistent disk type is a resource name
4162                  # typically ending in "pd-standard".  If SSD persistent disks are
4163                  # available, the resource name typically ends with "pd-ssd".  The
4164                  # actual valid values are defined the Google Compute Engine API,
4165                  # not by the Cloud Dataflow API; consult the Google Compute Engine
4166                  # documentation for more information about determining the set of
4167                  # available disk types for a particular project and zone.
4168                  #
4169                  # Google Compute Engine Disk types are local to a particular
4170                  # project in a particular zone, and so the resource name will
4171                  # typically look something like this:
4172                  #
4173                  # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
4174            },
4175          ],
4176          "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
4177            "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
4178            "algorithm": "A String", # The algorithm to use for autoscaling.
4179          },
4180          "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
4181              # select a default set of packages which are useful to worker
4182              # harnesses written in a particular language.
4183          "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
4184              # attempt to choose a reasonable default.
4185          "metadata": { # Metadata to set on the Google Compute Engine VMs.
4186            "a_key": "A String",
4187          },
4188        },
4189      ],
4190      "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
4191          # storage.  The system will append the suffix "/temp-{JOBNAME} to
4192          # this resource prefix, where {JOBNAME} is the value of the
4193          # job_name field.  The resulting bucket and object prefix is used
4194          # as the prefix of the resources used to store temporary data
4195          # needed during the job execution.  NOTE: This will override the
4196          # value in taskrunner_settings.
4197          # The supported resource type is:
4198          #
4199          # Google Cloud Storage:
4200          #
4201          #   storage.googleapis.com/{bucket}/{object}
4202          #   bucket.storage.googleapis.com/{object}
4203    },
4204    "location": "A String", # The [regional endpoint]
4205        # (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
4206        # contains this job.
4207    "tempFiles": [ # A set of files the system should be aware of that are used
4208        # for temporary storage. These temporary files will be
4209        # removed on job completion.
4210        # No duplicates are allowed.
4211        # No file patterns are supported.
4212        #
4213        # The supported files are:
4214        #
4215        # Google Cloud Storage:
4216        #
4217        #    storage.googleapis.com/{bucket}/{object}
4218        #    bucket.storage.googleapis.com/{object}
4219      "A String",
4220    ],
4221    "type": "A String", # The type of Cloud Dataflow job.
4222    "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
4223        # If this field is set, the service will ensure its uniqueness.
4224        # The request to create a job will fail if the service has knowledge of a
4225        # previously submitted job with the same client's ID and job name.
4226        # The caller may use this field to ensure idempotence of job
4227        # creation across retried attempts to create a job.
4228        # By default, the field is empty and, in that case, the service ignores it.
4229    "createdFromSnapshotId": "A String", # If this is specified, the job's initial state is populated from the given
4230        # snapshot.
4231    "stepsLocation": "A String", # The GCS location where the steps are stored.
4232    "currentStateTime": "A String", # The timestamp associated with the current state.
4233    "startTime": "A String", # The timestamp when the job was started (transitioned to JOB_STATE_PENDING).
4234        # Flexible resource scheduling jobs are started with some delay after job
4235        # creation, so start_time is unset before start and is updated when the
4236        # job is started by the Cloud Dataflow service. For other jobs, start_time
4237        # always equals to create_time and is immutable and set by the Cloud Dataflow
4238        # service.
4239    "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
4240        # Cloud Dataflow service.
4241    "requestedState": "A String", # The job's requested state.
4242        #
4243        # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
4244        # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
4245        # also be used to directly set a job's requested state to
4246        # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
4247        # job if it has not already reached a terminal state.
4248    "name": "A String", # The user-specified Cloud Dataflow job name.
4249        #
4250        # Only one Job with a given name may exist in a project at any
4251        # given time. If a caller attempts to create a Job with the same
4252        # name as an already-existing Job, the attempt returns the
4253        # existing Job.
4254        #
4255        # The name must match the regular expression
4256        # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
4257    "steps": [ # Exactly one of step or steps_location should be specified.
4258        #
4259        # The top-level steps that constitute the entire job.
4260      { # Defines a particular step within a Cloud Dataflow job.
4261          #
4262          # A job consists of multiple steps, each of which performs some
4263          # specific operation as part of the overall job.  Data is typically
4264          # passed from one step to another as part of the job.
4265          #
4266          # Here's an example of a sequence of steps which together implement a
4267          # Map-Reduce job:
4268          #
4269          #   * Read a collection of data from some source, parsing the
4270          #     collection's elements.
4271          #
4272          #   * Validate the elements.
4273          #
4274          #   * Apply a user-defined function to map each element to some value
4275          #     and extract an element-specific key value.
4276          #
4277          #   * Group elements with the same key into a single element with
4278          #     that key, transforming a multiply-keyed collection into a
4279          #     uniquely-keyed collection.
4280          #
4281          #   * Write the elements out to some data sink.
4282          #
4283          # Note that the Cloud Dataflow service may be used to run many different
4284          # types of jobs, not just Map-Reduce.
4285        "kind": "A String", # The kind of step in the Cloud Dataflow job.
4286        "properties": { # Named properties associated with the step. Each kind of
4287            # predefined step has its own required set of properties.
4288            # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
4289          "a_key": "", # Properties of the object.
4290        },
4291        "name": "A String", # The name that identifies the step. This must be unique for each
4292            # step with respect to all other steps in the Cloud Dataflow job.
4293      },
4294    ],
4295    "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
4296        # of the job it replaced.
4297        #
4298        # When sending a `CreateJobRequest`, you can update a job by specifying it
4299        # here. The job named here is stopped, and its intermediate state is
4300        # transferred to this job.
4301    "currentState": "A String", # The current state of the job.
4302        #
4303        # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
4304        # specified.
4305        #
4306        # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
4307        # terminal state. After a job has reached a terminal state, no
4308        # further state updates may be made.
4309        #
4310        # This field may be mutated by the Cloud Dataflow service;
4311        # callers cannot mutate it.
4312    "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
4313        # isn't contained in the submitted job.
4314      "stages": { # A mapping from each stage to the information about that stage.
4315        "a_key": { # Contains information about how a particular
4316            # google.dataflow.v1beta3.Step will be executed.
4317          "stepName": [ # The steps associated with the execution stage.
4318              # Note that stages may have several steps, and that a given step
4319              # might be run by more than one stage.
4320            "A String",
4321          ],
4322        },
4323      },
4324    },
4325  }</pre>
4326</div>
4327
4328</body></html>