• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1<html><body>
2<style>
3
4body, h1, h2, h3, div, span, p, pre, a {
5  margin: 0;
6  padding: 0;
7  border: 0;
8  font-weight: inherit;
9  font-style: inherit;
10  font-size: 100%;
11  font-family: inherit;
12  vertical-align: baseline;
13}
14
15body {
16  font-size: 13px;
17  padding: 1em;
18}
19
20h1 {
21  font-size: 26px;
22  margin-bottom: 1em;
23}
24
25h2 {
26  font-size: 24px;
27  margin-bottom: 1em;
28}
29
30h3 {
31  font-size: 20px;
32  margin-bottom: 1em;
33  margin-top: 1em;
34}
35
36pre, code {
37  line-height: 1.5;
38  font-family: Monaco, 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', 'Lucida Console', monospace;
39}
40
41pre {
42  margin-top: 0.5em;
43}
44
45h1, h2, h3, p {
46  font-family: Arial, sans serif;
47}
48
49h1, h2, h3 {
50  border-bottom: solid #CCC 1px;
51}
52
53.toc_element {
54  margin-top: 0.5em;
55}
56
57.firstline {
58  margin-left: 2 em;
59}
60
61.method  {
62  margin-top: 1em;
63  border: solid 1px #CCC;
64  padding: 1em;
65  background: #EEE;
66}
67
68.details {
69  font-weight: bold;
70  font-size: 14px;
71}
72
73</style>
74
75<h1><a href="dataflow_v1b3.html">Google Dataflow API</a> . <a href="dataflow_v1b3.projects.html">projects</a> . <a href="dataflow_v1b3.projects.locations.html">locations</a> . <a href="dataflow_v1b3.projects.locations.templates.html">templates</a></h1>
76<h2>Instance Methods</h2>
77<p class="toc_element">
78  <code><a href="#create">create(projectId, location, body, x__xgafv=None)</a></code></p>
79<p class="firstline">Creates a Cloud Dataflow job from a template.</p>
80<p class="toc_element">
81  <code><a href="#get">get(projectId, location, gcsPath=None, x__xgafv=None, view=None)</a></code></p>
82<p class="firstline">Get the template associated with a template.</p>
83<p class="toc_element">
84  <code><a href="#launch">launch(projectId, location, body, gcsPath=None, validateOnly=None, x__xgafv=None)</a></code></p>
85<p class="firstline">Launch a template.</p>
86<h3>Method Details</h3>
87<div class="method">
88    <code class="details" id="create">create(projectId, location, body, x__xgafv=None)</code>
89  <pre>Creates a Cloud Dataflow job from a template.
90
91Args:
92  projectId: string, Required. The ID of the Cloud Platform project that the job belongs to. (required)
93  location: string, The location to which to direct the request. (required)
94  body: object, The request body. (required)
95    The object takes the form of:
96
97{ # A request to create a Cloud Dataflow job from a template.
98    "environment": { # The environment values to set at runtime. # The runtime environment for the job.
99      "machineType": "A String", # The machine type to use for the job. Defaults to the value from the
100          # template if not specified.
101      "zone": "A String", # The Compute Engine [availability
102          # zone](https://cloud.google.com/compute/docs/regions-zones/regions-zones)
103          # for launching worker instances to run your pipeline.
104      "bypassTempDirValidation": True or False, # Whether to bypass the safety checks for the job's temporary directory.
105          # Use with caution.
106      "tempLocation": "A String", # The Cloud Storage path to use for temporary files.
107          # Must be a valid Cloud Storage URL, beginning with `gs://`.
108      "serviceAccountEmail": "A String", # The email address of the service account to run the job as.
109      "maxWorkers": 42, # The maximum number of Google Compute Engine instances to be made
110          # available to your pipeline during execution, from 1 to 1000.
111    },
112    "gcsPath": "A String", # Required. A Cloud Storage path to the template from which to
113        # create the job.
114        # Must be a valid Cloud Storage URL, beginning with `gs://`.
115    "location": "A String", # The location to which to direct the request.
116    "parameters": { # The runtime parameters to pass to the job.
117      "a_key": "A String",
118    },
119    "jobName": "A String", # Required. The job name to use for the created job.
120  }
121
122  x__xgafv: string, V1 error format.
123    Allowed values
124      1 - v1 error format
125      2 - v2 error format
126
127Returns:
128  An object of the form:
129
130    { # Defines a job to be run by the Cloud Dataflow service.
131      "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
132          # If this field is set, the service will ensure its uniqueness.
133          # The request to create a job will fail if the service has knowledge of a
134          # previously submitted job with the same client's ID and job name.
135          # The caller may use this field to ensure idempotence of job
136          # creation across retried attempts to create a job.
137          # By default, the field is empty and, in that case, the service ignores it.
138      "requestedState": "A String", # The job's requested state.
139          #
140          # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
141          # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
142          # also be used to directly set a job's requested state to
143          # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
144          # job if it has not already reached a terminal state.
145      "name": "A String", # The user-specified Cloud Dataflow job name.
146          #
147          # Only one Job with a given name may exist in a project at any
148          # given time. If a caller attempts to create a Job with the same
149          # name as an already-existing Job, the attempt returns the
150          # existing Job.
151          #
152          # The name must match the regular expression
153          # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
154      "location": "A String", # The location that contains this job.
155      "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
156          # `JOB_STATE_UPDATED`), this field contains the ID of that job.
157      "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
158      "currentState": "A String", # The current state of the job.
159          #
160          # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
161          # specified.
162          #
163          # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
164          # terminal state. After a job has reached a terminal state, no
165          # further state updates may be made.
166          #
167          # This field may be mutated by the Cloud Dataflow service;
168          # callers cannot mutate it.
169      "labels": { # User-defined labels for this job.
170          #
171          # The labels map can contain no more than 64 entries.  Entries of the labels
172          # map are UTF8 strings that comply with the following restrictions:
173          #
174          # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
175          # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
176          # * Both keys and values are additionally constrained to be <= 128 bytes in
177          # size.
178        "a_key": "A String",
179      },
180      "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
181          # corresponding name prefixes of the new job.
182        "a_key": "A String",
183      },
184      "id": "A String", # The unique ID of this job.
185          #
186          # This field is set by the Cloud Dataflow service when the Job is
187          # created, and is immutable for the life of the job.
188      "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
189        "version": { # A structure describing which components and their versions of the service
190            # are required in order to run the job.
191          "a_key": "", # Properties of the object.
192        },
193        "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
194            # storage.  The system will append the suffix "/temp-{JOBNAME} to
195            # this resource prefix, where {JOBNAME} is the value of the
196            # job_name field.  The resulting bucket and object prefix is used
197            # as the prefix of the resources used to store temporary data
198            # needed during the job execution.  NOTE: This will override the
199            # value in taskrunner_settings.
200            # The supported resource type is:
201            #
202            # Google Cloud Storage:
203            #
204            #   storage.googleapis.com/{bucket}/{object}
205            #   bucket.storage.googleapis.com/{object}
206        "internalExperiments": { # Experimental settings.
207          "a_key": "", # Properties of the object. Contains field @type with type URL.
208        },
209        "dataset": "A String", # The dataset for the current project where various workflow
210            # related tables are stored.
211            #
212            # The supported resource type is:
213            #
214            # Google BigQuery:
215            #   bigquery.googleapis.com/{dataset}
216        "experiments": [ # The list of experiments to enable.
217          "A String",
218        ],
219        "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
220        "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
221            # options are passed through the service and are used to recreate the
222            # SDK pipeline options on the worker in a language agnostic and platform
223            # independent way.
224          "a_key": "", # Properties of the object.
225        },
226        "userAgent": { # A description of the process that generated the request.
227          "a_key": "", # Properties of the object.
228        },
229        "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
230            # unspecified, the service will attempt to choose a reasonable
231            # default.  This should be in the form of the API service name,
232            # e.g. "compute.googleapis.com".
233        "workerPools": [ # The worker pools. At least one "harness" worker pool must be
234            # specified in order for the job to have workers.
235          { # Describes one particular pool of Cloud Dataflow workers to be
236              # instantiated by the Cloud Dataflow service in order to perform the
237              # computations required by a job.  Note that a workflow job may use
238              # multiple pools, in order to match the various computational
239              # requirements of the various stages of the job.
240            "diskSourceImage": "A String", # Fully qualified source image for disks.
241            "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
242                # using the standard Dataflow task runner.  Users should ignore
243                # this field.
244              "workflowFileName": "A String", # The file to store the workflow in.
245              "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
246                  # will not be uploaded.
247                  #
248                  # The supported resource type is:
249                  #
250                  # Google Cloud Storage:
251                  #   storage.googleapis.com/{bucket}/{object}
252                  #   bucket.storage.googleapis.com/{object}
253              "commandlinesFileName": "A String", # The file to store preprocessing commands in.
254              "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
255                "reportingEnabled": True or False, # Whether to send work progress updates to the service.
256                "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
257                    # "shuffle/v1beta1".
258                "workerId": "A String", # The ID of the worker running this pipeline.
259                "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
260                    #
261                    # When workers access Google Cloud APIs, they logically do so via
262                    # relative URLs.  If this field is specified, it supplies the base
263                    # URL to use for resolving these relative URLs.  The normative
264                    # algorithm used is defined by RFC 1808, "Relative Uniform Resource
265                    # Locators".
266                    #
267                    # If not specified, the default value is "http://www.googleapis.com/"
268                "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
269                    # "dataflow/v1b3/projects".
270                "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
271                    # storage.
272                    #
273                    # The supported resource type is:
274                    #
275                    # Google Cloud Storage:
276                    #
277                    #   storage.googleapis.com/{bucket}/{object}
278                    #   bucket.storage.googleapis.com/{object}
279              },
280              "vmId": "A String", # The ID string of the VM.
281              "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
282              "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
283              "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
284                  # access the Cloud Dataflow API.
285                "A String",
286              ],
287              "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
288                  # taskrunner; e.g. "root".
289              "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
290                  #
291                  # When workers access Google Cloud APIs, they logically do so via
292                  # relative URLs.  If this field is specified, it supplies the base
293                  # URL to use for resolving these relative URLs.  The normative
294                  # algorithm used is defined by RFC 1808, "Relative Uniform Resource
295                  # Locators".
296                  #
297                  # If not specified, the default value is "http://www.googleapis.com/"
298              "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
299                  # taskrunner; e.g. "wheel".
300              "languageHint": "A String", # The suggested backend language.
301              "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
302                  # console.
303              "streamingWorkerMainClass": "A String", # The streaming worker main class name.
304              "logDir": "A String", # The directory on the VM to store logs.
305              "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
306              "harnessCommand": "A String", # The command to launch the worker harness.
307              "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
308                  # temporary storage.
309                  #
310                  # The supported resource type is:
311                  #
312                  # Google Cloud Storage:
313                  #   storage.googleapis.com/{bucket}/{object}
314                  #   bucket.storage.googleapis.com/{object}
315              "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
316            },
317            "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
318                # are supported.
319            "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
320                # service will attempt to choose a reasonable default.
321            "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
322                # the service will use the network "default".
323            "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
324                # will attempt to choose a reasonable default.
325            "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
326                # attempt to choose a reasonable default.
327            "dataDisks": [ # Data disks that are used by a VM in this workflow.
328              { # Describes the data disk used by a workflow job.
329                "mountPoint": "A String", # Directory in a VM where disk is mounted.
330                "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
331                    # attempt to choose a reasonable default.
332                "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
333                    # must be a disk type appropriate to the project and zone in which
334                    # the workers will run.  If unknown or unspecified, the service
335                    # will attempt to choose a reasonable default.
336                    #
337                    # For example, the standard persistent disk type is a resource name
338                    # typically ending in "pd-standard".  If SSD persistent disks are
339                    # available, the resource name typically ends with "pd-ssd".  The
340                    # actual valid values are defined the Google Compute Engine API,
341                    # not by the Cloud Dataflow API; consult the Google Compute Engine
342                    # documentation for more information about determining the set of
343                    # available disk types for a particular project and zone.
344                    #
345                    # Google Compute Engine Disk types are local to a particular
346                    # project in a particular zone, and so the resource name will
347                    # typically look something like this:
348                    #
349                    # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
350              },
351            ],
352            "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
353                # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
354                # `TEARDOWN_NEVER`.
355                # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
356                # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
357                # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
358                # down.
359                #
360                # If the workers are not torn down by the service, they will
361                # continue to run and use Google Compute Engine VM resources in the
362                # user's project until they are explicitly terminated by the user.
363                # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
364                # policy except for small, manually supervised test jobs.
365                #
366                # If unknown or unspecified, the service will attempt to choose a reasonable
367                # default.
368            "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
369                # Compute Engine API.
370            "ipConfiguration": "A String", # Configuration for VM IPs.
371            "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
372                # service will choose a number of threads (according to the number of cores
373                # on the selected machine type for batch, or 1 by convention for streaming).
374            "poolArgs": { # Extra arguments for this worker pool.
375              "a_key": "", # Properties of the object. Contains field @type with type URL.
376            },
377            "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
378                # execute the job.  If zero or unspecified, the service will
379                # attempt to choose a reasonable default.
380            "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
381                # harness, residing in Google Container Registry.
382            "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
383                # the form "regions/REGION/subnetworks/SUBNETWORK".
384            "packages": [ # Packages to be installed on workers.
385              { # The packages that must be installed in order for a worker to run the
386                  # steps of the Cloud Dataflow job that will be assigned to its worker
387                  # pool.
388                  #
389                  # This is the mechanism by which the Cloud Dataflow SDK causes code to
390                  # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
391                  # might use this to install jars containing the user's code and all of the
392                  # various dependencies (libraries, data files, etc.) required in order
393                  # for that code to run.
394                "location": "A String", # The resource to read the package from. The supported resource type is:
395                    #
396                    # Google Cloud Storage:
397                    #
398                    #   storage.googleapis.com/{bucket}
399                    #   bucket.storage.googleapis.com/
400                "name": "A String", # The name of the package.
401              },
402            ],
403            "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
404              "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
405              "algorithm": "A String", # The algorithm to use for autoscaling.
406            },
407            "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
408                # select a default set of packages which are useful to worker
409                # harnesses written in a particular language.
410            "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
411                # attempt to choose a reasonable default.
412            "metadata": { # Metadata to set on the Google Compute Engine VMs.
413              "a_key": "A String",
414            },
415          },
416        ],
417      },
418      "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
419          # A description of the user pipeline and stages through which it is executed.
420          # Created by Cloud Dataflow service.  Only retrieved with
421          # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
422          # form.  This data is provided by the Dataflow service for ease of visualizing
423          # the pipeline and interpretting Dataflow provided metrics.
424        "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
425          { # Description of the type, names/ids, and input/outputs for a transform.
426            "kind": "A String", # Type of transform.
427            "name": "A String", # User provided name for this transform instance.
428            "inputCollectionName": [ # User names for all collection inputs to this transform.
429              "A String",
430            ],
431            "displayData": [ # Transform-specific display data.
432              { # Data provided with a pipeline or transform to provide descriptive info.
433                "shortStrValue": "A String", # A possible additional shorter value to display.
434                    # For example a java_class_name_value of com.mypackage.MyDoFn
435                    # will be stored with MyDoFn as the short_str_value and
436                    # com.mypackage.MyDoFn as the java_class_name value.
437                    # short_str_value can be displayed and java_class_name_value
438                    # will be displayed as a tooltip.
439                "durationValue": "A String", # Contains value if the data is of duration type.
440                "url": "A String", # An optional full URL.
441                "floatValue": 3.14, # Contains value if the data is of float type.
442                "namespace": "A String", # The namespace for the key. This is usually a class name or programming
443                    # language namespace (i.e. python module) which defines the display data.
444                    # This allows a dax monitoring system to specially handle the data
445                    # and perform custom rendering.
446                "javaClassValue": "A String", # Contains value if the data is of java class type.
447                "label": "A String", # An optional label to display in a dax UI for the element.
448                "boolValue": True or False, # Contains value if the data is of a boolean type.
449                "strValue": "A String", # Contains value if the data is of string type.
450                "key": "A String", # The key identifying the display data.
451                    # This is intended to be used as a label for the display data
452                    # when viewed in a dax monitoring system.
453                "int64Value": "A String", # Contains value if the data is of int64 type.
454                "timestampValue": "A String", # Contains value if the data is of timestamp type.
455              },
456            ],
457            "outputCollectionName": [ # User  names for all collection outputs to this transform.
458              "A String",
459            ],
460            "id": "A String", # SDK generated id of this transform instance.
461          },
462        ],
463        "displayData": [ # Pipeline level display data.
464          { # Data provided with a pipeline or transform to provide descriptive info.
465            "shortStrValue": "A String", # A possible additional shorter value to display.
466                # For example a java_class_name_value of com.mypackage.MyDoFn
467                # will be stored with MyDoFn as the short_str_value and
468                # com.mypackage.MyDoFn as the java_class_name value.
469                # short_str_value can be displayed and java_class_name_value
470                # will be displayed as a tooltip.
471            "durationValue": "A String", # Contains value if the data is of duration type.
472            "url": "A String", # An optional full URL.
473            "floatValue": 3.14, # Contains value if the data is of float type.
474            "namespace": "A String", # The namespace for the key. This is usually a class name or programming
475                # language namespace (i.e. python module) which defines the display data.
476                # This allows a dax monitoring system to specially handle the data
477                # and perform custom rendering.
478            "javaClassValue": "A String", # Contains value if the data is of java class type.
479            "label": "A String", # An optional label to display in a dax UI for the element.
480            "boolValue": True or False, # Contains value if the data is of a boolean type.
481            "strValue": "A String", # Contains value if the data is of string type.
482            "key": "A String", # The key identifying the display data.
483                # This is intended to be used as a label for the display data
484                # when viewed in a dax monitoring system.
485            "int64Value": "A String", # Contains value if the data is of int64 type.
486            "timestampValue": "A String", # Contains value if the data is of timestamp type.
487          },
488        ],
489        "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
490          { # Description of the composing transforms, names/ids, and input/outputs of a
491              # stage of execution.  Some composing transforms and sources may have been
492              # generated by the Dataflow service during execution planning.
493            "componentSource": [ # Collections produced and consumed by component transforms of this stage.
494              { # Description of an interstitial value between transforms in an execution
495                  # stage.
496                "userName": "A String", # Human-readable name for this transform; may be user or system generated.
497                "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
498                    # source is most closely associated.
499                "name": "A String", # Dataflow service generated name for this source.
500              },
501            ],
502            "kind": "A String", # Type of tranform this stage is executing.
503            "name": "A String", # Dataflow service generated name for this stage.
504            "outputSource": [ # Output sources for this stage.
505              { # Description of an input or output of an execution stage.
506                "userName": "A String", # Human-readable name for this source; may be user or system generated.
507                "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
508                    # source is most closely associated.
509                "name": "A String", # Dataflow service generated name for this source.
510                "sizeBytes": "A String", # Size of the source, if measurable.
511              },
512            ],
513            "inputSource": [ # Input sources for this stage.
514              { # Description of an input or output of an execution stage.
515                "userName": "A String", # Human-readable name for this source; may be user or system generated.
516                "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
517                    # source is most closely associated.
518                "name": "A String", # Dataflow service generated name for this source.
519                "sizeBytes": "A String", # Size of the source, if measurable.
520              },
521            ],
522            "componentTransform": [ # Transforms that comprise this execution stage.
523              { # Description of a transform executed as part of an execution stage.
524                "userName": "A String", # Human-readable name for this transform; may be user or system generated.
525                "originalTransform": "A String", # User name for the original user transform with which this transform is
526                    # most closely associated.
527                "name": "A String", # Dataflow service generated name for this source.
528              },
529            ],
530            "id": "A String", # Dataflow service generated id for this stage.
531          },
532        ],
533      },
534      "steps": [ # The top-level steps that constitute the entire job.
535        { # Defines a particular step within a Cloud Dataflow job.
536            #
537            # A job consists of multiple steps, each of which performs some
538            # specific operation as part of the overall job.  Data is typically
539            # passed from one step to another as part of the job.
540            #
541            # Here's an example of a sequence of steps which together implement a
542            # Map-Reduce job:
543            #
544            #   * Read a collection of data from some source, parsing the
545            #     collection's elements.
546            #
547            #   * Validate the elements.
548            #
549            #   * Apply a user-defined function to map each element to some value
550            #     and extract an element-specific key value.
551            #
552            #   * Group elements with the same key into a single element with
553            #     that key, transforming a multiply-keyed collection into a
554            #     uniquely-keyed collection.
555            #
556            #   * Write the elements out to some data sink.
557            #
558            # Note that the Cloud Dataflow service may be used to run many different
559            # types of jobs, not just Map-Reduce.
560          "kind": "A String", # The kind of step in the Cloud Dataflow job.
561          "properties": { # Named properties associated with the step. Each kind of
562              # predefined step has its own required set of properties.
563              # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
564            "a_key": "", # Properties of the object.
565          },
566          "name": "A String", # The name that identifies the step. This must be unique for each
567              # step with respect to all other steps in the Cloud Dataflow job.
568        },
569      ],
570      "currentStateTime": "A String", # The timestamp associated with the current state.
571      "tempFiles": [ # A set of files the system should be aware of that are used
572          # for temporary storage. These temporary files will be
573          # removed on job completion.
574          # No duplicates are allowed.
575          # No file patterns are supported.
576          #
577          # The supported files are:
578          #
579          # Google Cloud Storage:
580          #
581          #    storage.googleapis.com/{bucket}/{object}
582          #    bucket.storage.googleapis.com/{object}
583        "A String",
584      ],
585      "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
586          # callers cannot mutate it.
587        { # A message describing the state of a particular execution stage.
588          "executionStageName": "A String", # The name of the execution stage.
589          "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
590          "currentStateTime": "A String", # The time at which the stage transitioned to this state.
591        },
592      ],
593      "type": "A String", # The type of Cloud Dataflow job.
594      "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
595          # Cloud Dataflow service.
596      "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
597          # of the job it replaced.
598          #
599          # When sending a `CreateJobRequest`, you can update a job by specifying it
600          # here. The job named here is stopped, and its intermediate state is
601          # transferred to this job.
602      "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
603          # isn't contained in the submitted job.
604        "stages": { # A mapping from each stage to the information about that stage.
605          "a_key": { # Contains information about how a particular
606              # google.dataflow.v1beta3.Step will be executed.
607            "stepName": [ # The steps associated with the execution stage.
608                # Note that stages may have several steps, and that a given step
609                # might be run by more than one stage.
610              "A String",
611            ],
612          },
613        },
614      },
615    }</pre>
616</div>
617
618<div class="method">
619    <code class="details" id="get">get(projectId, location, gcsPath=None, x__xgafv=None, view=None)</code>
620  <pre>Get the template associated with a template.
621
622Args:
623  projectId: string, Required. The ID of the Cloud Platform project that the job belongs to. (required)
624  location: string, The location to which to direct the request. (required)
625  gcsPath: string, Required. A Cloud Storage path to the template from which to
626create the job.
627Must be a valid Cloud Storage URL, beginning with `gs://`.
628  x__xgafv: string, V1 error format.
629    Allowed values
630      1 - v1 error format
631      2 - v2 error format
632  view: string, The view to retrieve. Defaults to METADATA_ONLY.
633
634Returns:
635  An object of the form:
636
637    { # The response to a GetTemplate request.
638    "status": { # The `Status` type defines a logical error model that is suitable for different # The status of the get template request. Any problems with the
639        # request will be indicated in the error_details.
640        # programming environments, including REST APIs and RPC APIs. It is used by
641        # [gRPC](https://github.com/grpc). The error model is designed to be:
642        #
643        # - Simple to use and understand for most users
644        # - Flexible enough to meet unexpected needs
645        #
646        # # Overview
647        #
648        # The `Status` message contains three pieces of data: error code, error message,
649        # and error details. The error code should be an enum value of
650        # google.rpc.Code, but it may accept additional error codes if needed.  The
651        # error message should be a developer-facing English message that helps
652        # developers *understand* and *resolve* the error. If a localized user-facing
653        # error message is needed, put the localized message in the error details or
654        # localize it in the client. The optional error details may contain arbitrary
655        # information about the error. There is a predefined set of error detail types
656        # in the package `google.rpc` that can be used for common error conditions.
657        #
658        # # Language mapping
659        #
660        # The `Status` message is the logical representation of the error model, but it
661        # is not necessarily the actual wire format. When the `Status` message is
662        # exposed in different client libraries and different wire protocols, it can be
663        # mapped differently. For example, it will likely be mapped to some exceptions
664        # in Java, but more likely mapped to some error codes in C.
665        #
666        # # Other uses
667        #
668        # The error model and the `Status` message can be used in a variety of
669        # environments, either with or without APIs, to provide a
670        # consistent developer experience across different environments.
671        #
672        # Example uses of this error model include:
673        #
674        # - Partial errors. If a service needs to return partial errors to the client,
675        #     it may embed the `Status` in the normal response to indicate the partial
676        #     errors.
677        #
678        # - Workflow errors. A typical workflow has multiple steps. Each step may
679        #     have a `Status` message for error reporting.
680        #
681        # - Batch operations. If a client uses batch request and batch response, the
682        #     `Status` message should be used directly inside batch response, one for
683        #     each error sub-response.
684        #
685        # - Asynchronous operations. If an API call embeds asynchronous operation
686        #     results in its response, the status of those operations should be
687        #     represented directly using the `Status` message.
688        #
689        # - Logging. If some API errors are stored in logs, the message `Status` could
690        #     be used directly after any stripping needed for security/privacy reasons.
691      "message": "A String", # A developer-facing error message, which should be in English. Any
692          # user-facing error message should be localized and sent in the
693          # google.rpc.Status.details field, or localized by the client.
694      "code": 42, # The status code, which should be an enum value of google.rpc.Code.
695      "details": [ # A list of messages that carry the error details.  There will be a
696          # common set of message types for APIs to use.
697        {
698          "a_key": "", # Properties of the object. Contains field @type with type URL.
699        },
700      ],
701    },
702    "metadata": { # Metadata describing a template. # The template metadata describing the template name, available
703        # parameters, etc.
704      "name": "A String", # Required. The name of the template.
705      "parameters": [ # The parameters for the template.
706        { # Metadata for a specific parameter.
707          "regexes": [ # Optional. Regexes that the parameter must match.
708            "A String",
709          ],
710          "helpText": "A String", # Required. The help text to display for the parameter.
711          "name": "A String", # Required. The name of the parameter.
712          "isOptional": True or False, # Optional. Whether the parameter is optional. Defaults to false.
713          "label": "A String", # Required. The label to display for the parameter.
714        },
715      ],
716      "description": "A String", # Optional. A description of the template.
717    },
718  }</pre>
719</div>
720
721<div class="method">
722    <code class="details" id="launch">launch(projectId, location, body, gcsPath=None, validateOnly=None, x__xgafv=None)</code>
723  <pre>Launch a template.
724
725Args:
726  projectId: string, Required. The ID of the Cloud Platform project that the job belongs to. (required)
727  location: string, The location to which to direct the request. (required)
728  body: object, The request body. (required)
729    The object takes the form of:
730
731{ # Parameters to provide to the template being launched.
732    "environment": { # The environment values to set at runtime. # The runtime environment for the job.
733      "machineType": "A String", # The machine type to use for the job. Defaults to the value from the
734          # template if not specified.
735      "zone": "A String", # The Compute Engine [availability
736          # zone](https://cloud.google.com/compute/docs/regions-zones/regions-zones)
737          # for launching worker instances to run your pipeline.
738      "bypassTempDirValidation": True or False, # Whether to bypass the safety checks for the job's temporary directory.
739          # Use with caution.
740      "tempLocation": "A String", # The Cloud Storage path to use for temporary files.
741          # Must be a valid Cloud Storage URL, beginning with `gs://`.
742      "serviceAccountEmail": "A String", # The email address of the service account to run the job as.
743      "maxWorkers": 42, # The maximum number of Google Compute Engine instances to be made
744          # available to your pipeline during execution, from 1 to 1000.
745    },
746    "parameters": { # The runtime parameters to pass to the job.
747      "a_key": "A String",
748    },
749    "jobName": "A String", # Required. The job name to use for the created job.
750  }
751
752  gcsPath: string, Required. A Cloud Storage path to the template from which to create
753the job.
754Must be valid Cloud Storage URL, beginning with 'gs://'.
755  validateOnly: boolean, If true, the request is validated but not actually executed.
756Defaults to false.
757  x__xgafv: string, V1 error format.
758    Allowed values
759      1 - v1 error format
760      2 - v2 error format
761
762Returns:
763  An object of the form:
764
765    { # Response to the request to launch a template.
766    "job": { # Defines a job to be run by the Cloud Dataflow service. # The job that was launched, if the request was not a dry run and
767        # the job was successfully launched.
768        "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
769            # If this field is set, the service will ensure its uniqueness.
770            # The request to create a job will fail if the service has knowledge of a
771            # previously submitted job with the same client's ID and job name.
772            # The caller may use this field to ensure idempotence of job
773            # creation across retried attempts to create a job.
774            # By default, the field is empty and, in that case, the service ignores it.
775        "requestedState": "A String", # The job's requested state.
776            #
777            # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
778            # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
779            # also be used to directly set a job's requested state to
780            # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
781            # job if it has not already reached a terminal state.
782        "name": "A String", # The user-specified Cloud Dataflow job name.
783            #
784            # Only one Job with a given name may exist in a project at any
785            # given time. If a caller attempts to create a Job with the same
786            # name as an already-existing Job, the attempt returns the
787            # existing Job.
788            #
789            # The name must match the regular expression
790            # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
791        "location": "A String", # The location that contains this job.
792        "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
793            # `JOB_STATE_UPDATED`), this field contains the ID of that job.
794        "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
795        "currentState": "A String", # The current state of the job.
796            #
797            # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
798            # specified.
799            #
800            # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
801            # terminal state. After a job has reached a terminal state, no
802            # further state updates may be made.
803            #
804            # This field may be mutated by the Cloud Dataflow service;
805            # callers cannot mutate it.
806        "labels": { # User-defined labels for this job.
807            #
808            # The labels map can contain no more than 64 entries.  Entries of the labels
809            # map are UTF8 strings that comply with the following restrictions:
810            #
811            # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
812            # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
813            # * Both keys and values are additionally constrained to be <= 128 bytes in
814            # size.
815          "a_key": "A String",
816        },
817        "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
818            # corresponding name prefixes of the new job.
819          "a_key": "A String",
820        },
821        "id": "A String", # The unique ID of this job.
822            #
823            # This field is set by the Cloud Dataflow service when the Job is
824            # created, and is immutable for the life of the job.
825        "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
826          "version": { # A structure describing which components and their versions of the service
827              # are required in order to run the job.
828            "a_key": "", # Properties of the object.
829          },
830          "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
831              # storage.  The system will append the suffix "/temp-{JOBNAME} to
832              # this resource prefix, where {JOBNAME} is the value of the
833              # job_name field.  The resulting bucket and object prefix is used
834              # as the prefix of the resources used to store temporary data
835              # needed during the job execution.  NOTE: This will override the
836              # value in taskrunner_settings.
837              # The supported resource type is:
838              #
839              # Google Cloud Storage:
840              #
841              #   storage.googleapis.com/{bucket}/{object}
842              #   bucket.storage.googleapis.com/{object}
843          "internalExperiments": { # Experimental settings.
844            "a_key": "", # Properties of the object. Contains field @type with type URL.
845          },
846          "dataset": "A String", # The dataset for the current project where various workflow
847              # related tables are stored.
848              #
849              # The supported resource type is:
850              #
851              # Google BigQuery:
852              #   bigquery.googleapis.com/{dataset}
853          "experiments": [ # The list of experiments to enable.
854            "A String",
855          ],
856          "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
857          "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
858              # options are passed through the service and are used to recreate the
859              # SDK pipeline options on the worker in a language agnostic and platform
860              # independent way.
861            "a_key": "", # Properties of the object.
862          },
863          "userAgent": { # A description of the process that generated the request.
864            "a_key": "", # Properties of the object.
865          },
866          "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
867              # unspecified, the service will attempt to choose a reasonable
868              # default.  This should be in the form of the API service name,
869              # e.g. "compute.googleapis.com".
870          "workerPools": [ # The worker pools. At least one "harness" worker pool must be
871              # specified in order for the job to have workers.
872            { # Describes one particular pool of Cloud Dataflow workers to be
873                # instantiated by the Cloud Dataflow service in order to perform the
874                # computations required by a job.  Note that a workflow job may use
875                # multiple pools, in order to match the various computational
876                # requirements of the various stages of the job.
877              "diskSourceImage": "A String", # Fully qualified source image for disks.
878              "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
879                  # using the standard Dataflow task runner.  Users should ignore
880                  # this field.
881                "workflowFileName": "A String", # The file to store the workflow in.
882                "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
883                    # will not be uploaded.
884                    #
885                    # The supported resource type is:
886                    #
887                    # Google Cloud Storage:
888                    #   storage.googleapis.com/{bucket}/{object}
889                    #   bucket.storage.googleapis.com/{object}
890                "commandlinesFileName": "A String", # The file to store preprocessing commands in.
891                "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
892                  "reportingEnabled": True or False, # Whether to send work progress updates to the service.
893                  "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
894                      # "shuffle/v1beta1".
895                  "workerId": "A String", # The ID of the worker running this pipeline.
896                  "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
897                      #
898                      # When workers access Google Cloud APIs, they logically do so via
899                      # relative URLs.  If this field is specified, it supplies the base
900                      # URL to use for resolving these relative URLs.  The normative
901                      # algorithm used is defined by RFC 1808, "Relative Uniform Resource
902                      # Locators".
903                      #
904                      # If not specified, the default value is "http://www.googleapis.com/"
905                  "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
906                      # "dataflow/v1b3/projects".
907                  "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
908                      # storage.
909                      #
910                      # The supported resource type is:
911                      #
912                      # Google Cloud Storage:
913                      #
914                      #   storage.googleapis.com/{bucket}/{object}
915                      #   bucket.storage.googleapis.com/{object}
916                },
917                "vmId": "A String", # The ID string of the VM.
918                "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
919                "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
920                "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
921                    # access the Cloud Dataflow API.
922                  "A String",
923                ],
924                "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
925                    # taskrunner; e.g. "root".
926                "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
927                    #
928                    # When workers access Google Cloud APIs, they logically do so via
929                    # relative URLs.  If this field is specified, it supplies the base
930                    # URL to use for resolving these relative URLs.  The normative
931                    # algorithm used is defined by RFC 1808, "Relative Uniform Resource
932                    # Locators".
933                    #
934                    # If not specified, the default value is "http://www.googleapis.com/"
935                "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
936                    # taskrunner; e.g. "wheel".
937                "languageHint": "A String", # The suggested backend language.
938                "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
939                    # console.
940                "streamingWorkerMainClass": "A String", # The streaming worker main class name.
941                "logDir": "A String", # The directory on the VM to store logs.
942                "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
943                "harnessCommand": "A String", # The command to launch the worker harness.
944                "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
945                    # temporary storage.
946                    #
947                    # The supported resource type is:
948                    #
949                    # Google Cloud Storage:
950                    #   storage.googleapis.com/{bucket}/{object}
951                    #   bucket.storage.googleapis.com/{object}
952                "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
953              },
954              "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
955                  # are supported.
956              "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
957                  # service will attempt to choose a reasonable default.
958              "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
959                  # the service will use the network "default".
960              "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
961                  # will attempt to choose a reasonable default.
962              "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
963                  # attempt to choose a reasonable default.
964              "dataDisks": [ # Data disks that are used by a VM in this workflow.
965                { # Describes the data disk used by a workflow job.
966                  "mountPoint": "A String", # Directory in a VM where disk is mounted.
967                  "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
968                      # attempt to choose a reasonable default.
969                  "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
970                      # must be a disk type appropriate to the project and zone in which
971                      # the workers will run.  If unknown or unspecified, the service
972                      # will attempt to choose a reasonable default.
973                      #
974                      # For example, the standard persistent disk type is a resource name
975                      # typically ending in "pd-standard".  If SSD persistent disks are
976                      # available, the resource name typically ends with "pd-ssd".  The
977                      # actual valid values are defined the Google Compute Engine API,
978                      # not by the Cloud Dataflow API; consult the Google Compute Engine
979                      # documentation for more information about determining the set of
980                      # available disk types for a particular project and zone.
981                      #
982                      # Google Compute Engine Disk types are local to a particular
983                      # project in a particular zone, and so the resource name will
984                      # typically look something like this:
985                      #
986                      # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
987                },
988              ],
989              "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
990                  # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
991                  # `TEARDOWN_NEVER`.
992                  # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
993                  # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
994                  # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
995                  # down.
996                  #
997                  # If the workers are not torn down by the service, they will
998                  # continue to run and use Google Compute Engine VM resources in the
999                  # user's project until they are explicitly terminated by the user.
1000                  # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
1001                  # policy except for small, manually supervised test jobs.
1002                  #
1003                  # If unknown or unspecified, the service will attempt to choose a reasonable
1004                  # default.
1005              "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
1006                  # Compute Engine API.
1007              "ipConfiguration": "A String", # Configuration for VM IPs.
1008              "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
1009                  # service will choose a number of threads (according to the number of cores
1010                  # on the selected machine type for batch, or 1 by convention for streaming).
1011              "poolArgs": { # Extra arguments for this worker pool.
1012                "a_key": "", # Properties of the object. Contains field @type with type URL.
1013              },
1014              "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
1015                  # execute the job.  If zero or unspecified, the service will
1016                  # attempt to choose a reasonable default.
1017              "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
1018                  # harness, residing in Google Container Registry.
1019              "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
1020                  # the form "regions/REGION/subnetworks/SUBNETWORK".
1021              "packages": [ # Packages to be installed on workers.
1022                { # The packages that must be installed in order for a worker to run the
1023                    # steps of the Cloud Dataflow job that will be assigned to its worker
1024                    # pool.
1025                    #
1026                    # This is the mechanism by which the Cloud Dataflow SDK causes code to
1027                    # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
1028                    # might use this to install jars containing the user's code and all of the
1029                    # various dependencies (libraries, data files, etc.) required in order
1030                    # for that code to run.
1031                  "location": "A String", # The resource to read the package from. The supported resource type is:
1032                      #
1033                      # Google Cloud Storage:
1034                      #
1035                      #   storage.googleapis.com/{bucket}
1036                      #   bucket.storage.googleapis.com/
1037                  "name": "A String", # The name of the package.
1038                },
1039              ],
1040              "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
1041                "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
1042                "algorithm": "A String", # The algorithm to use for autoscaling.
1043              },
1044              "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
1045                  # select a default set of packages which are useful to worker
1046                  # harnesses written in a particular language.
1047              "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
1048                  # attempt to choose a reasonable default.
1049              "metadata": { # Metadata to set on the Google Compute Engine VMs.
1050                "a_key": "A String",
1051              },
1052            },
1053          ],
1054        },
1055        "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
1056            # A description of the user pipeline and stages through which it is executed.
1057            # Created by Cloud Dataflow service.  Only retrieved with
1058            # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
1059            # form.  This data is provided by the Dataflow service for ease of visualizing
1060            # the pipeline and interpretting Dataflow provided metrics.
1061          "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
1062            { # Description of the type, names/ids, and input/outputs for a transform.
1063              "kind": "A String", # Type of transform.
1064              "name": "A String", # User provided name for this transform instance.
1065              "inputCollectionName": [ # User names for all collection inputs to this transform.
1066                "A String",
1067              ],
1068              "displayData": [ # Transform-specific display data.
1069                { # Data provided with a pipeline or transform to provide descriptive info.
1070                  "shortStrValue": "A String", # A possible additional shorter value to display.
1071                      # For example a java_class_name_value of com.mypackage.MyDoFn
1072                      # will be stored with MyDoFn as the short_str_value and
1073                      # com.mypackage.MyDoFn as the java_class_name value.
1074                      # short_str_value can be displayed and java_class_name_value
1075                      # will be displayed as a tooltip.
1076                  "durationValue": "A String", # Contains value if the data is of duration type.
1077                  "url": "A String", # An optional full URL.
1078                  "floatValue": 3.14, # Contains value if the data is of float type.
1079                  "namespace": "A String", # The namespace for the key. This is usually a class name or programming
1080                      # language namespace (i.e. python module) which defines the display data.
1081                      # This allows a dax monitoring system to specially handle the data
1082                      # and perform custom rendering.
1083                  "javaClassValue": "A String", # Contains value if the data is of java class type.
1084                  "label": "A String", # An optional label to display in a dax UI for the element.
1085                  "boolValue": True or False, # Contains value if the data is of a boolean type.
1086                  "strValue": "A String", # Contains value if the data is of string type.
1087                  "key": "A String", # The key identifying the display data.
1088                      # This is intended to be used as a label for the display data
1089                      # when viewed in a dax monitoring system.
1090                  "int64Value": "A String", # Contains value if the data is of int64 type.
1091                  "timestampValue": "A String", # Contains value if the data is of timestamp type.
1092                },
1093              ],
1094              "outputCollectionName": [ # User  names for all collection outputs to this transform.
1095                "A String",
1096              ],
1097              "id": "A String", # SDK generated id of this transform instance.
1098            },
1099          ],
1100          "displayData": [ # Pipeline level display data.
1101            { # Data provided with a pipeline or transform to provide descriptive info.
1102              "shortStrValue": "A String", # A possible additional shorter value to display.
1103                  # For example a java_class_name_value of com.mypackage.MyDoFn
1104                  # will be stored with MyDoFn as the short_str_value and
1105                  # com.mypackage.MyDoFn as the java_class_name value.
1106                  # short_str_value can be displayed and java_class_name_value
1107                  # will be displayed as a tooltip.
1108              "durationValue": "A String", # Contains value if the data is of duration type.
1109              "url": "A String", # An optional full URL.
1110              "floatValue": 3.14, # Contains value if the data is of float type.
1111              "namespace": "A String", # The namespace for the key. This is usually a class name or programming
1112                  # language namespace (i.e. python module) which defines the display data.
1113                  # This allows a dax monitoring system to specially handle the data
1114                  # and perform custom rendering.
1115              "javaClassValue": "A String", # Contains value if the data is of java class type.
1116              "label": "A String", # An optional label to display in a dax UI for the element.
1117              "boolValue": True or False, # Contains value if the data is of a boolean type.
1118              "strValue": "A String", # Contains value if the data is of string type.
1119              "key": "A String", # The key identifying the display data.
1120                  # This is intended to be used as a label for the display data
1121                  # when viewed in a dax monitoring system.
1122              "int64Value": "A String", # Contains value if the data is of int64 type.
1123              "timestampValue": "A String", # Contains value if the data is of timestamp type.
1124            },
1125          ],
1126          "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
1127            { # Description of the composing transforms, names/ids, and input/outputs of a
1128                # stage of execution.  Some composing transforms and sources may have been
1129                # generated by the Dataflow service during execution planning.
1130              "componentSource": [ # Collections produced and consumed by component transforms of this stage.
1131                { # Description of an interstitial value between transforms in an execution
1132                    # stage.
1133                  "userName": "A String", # Human-readable name for this transform; may be user or system generated.
1134                  "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
1135                      # source is most closely associated.
1136                  "name": "A String", # Dataflow service generated name for this source.
1137                },
1138              ],
1139              "kind": "A String", # Type of tranform this stage is executing.
1140              "name": "A String", # Dataflow service generated name for this stage.
1141              "outputSource": [ # Output sources for this stage.
1142                { # Description of an input or output of an execution stage.
1143                  "userName": "A String", # Human-readable name for this source; may be user or system generated.
1144                  "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
1145                      # source is most closely associated.
1146                  "name": "A String", # Dataflow service generated name for this source.
1147                  "sizeBytes": "A String", # Size of the source, if measurable.
1148                },
1149              ],
1150              "inputSource": [ # Input sources for this stage.
1151                { # Description of an input or output of an execution stage.
1152                  "userName": "A String", # Human-readable name for this source; may be user or system generated.
1153                  "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
1154                      # source is most closely associated.
1155                  "name": "A String", # Dataflow service generated name for this source.
1156                  "sizeBytes": "A String", # Size of the source, if measurable.
1157                },
1158              ],
1159              "componentTransform": [ # Transforms that comprise this execution stage.
1160                { # Description of a transform executed as part of an execution stage.
1161                  "userName": "A String", # Human-readable name for this transform; may be user or system generated.
1162                  "originalTransform": "A String", # User name for the original user transform with which this transform is
1163                      # most closely associated.
1164                  "name": "A String", # Dataflow service generated name for this source.
1165                },
1166              ],
1167              "id": "A String", # Dataflow service generated id for this stage.
1168            },
1169          ],
1170        },
1171        "steps": [ # The top-level steps that constitute the entire job.
1172          { # Defines a particular step within a Cloud Dataflow job.
1173              #
1174              # A job consists of multiple steps, each of which performs some
1175              # specific operation as part of the overall job.  Data is typically
1176              # passed from one step to another as part of the job.
1177              #
1178              # Here's an example of a sequence of steps which together implement a
1179              # Map-Reduce job:
1180              #
1181              #   * Read a collection of data from some source, parsing the
1182              #     collection's elements.
1183              #
1184              #   * Validate the elements.
1185              #
1186              #   * Apply a user-defined function to map each element to some value
1187              #     and extract an element-specific key value.
1188              #
1189              #   * Group elements with the same key into a single element with
1190              #     that key, transforming a multiply-keyed collection into a
1191              #     uniquely-keyed collection.
1192              #
1193              #   * Write the elements out to some data sink.
1194              #
1195              # Note that the Cloud Dataflow service may be used to run many different
1196              # types of jobs, not just Map-Reduce.
1197            "kind": "A String", # The kind of step in the Cloud Dataflow job.
1198            "properties": { # Named properties associated with the step. Each kind of
1199                # predefined step has its own required set of properties.
1200                # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
1201              "a_key": "", # Properties of the object.
1202            },
1203            "name": "A String", # The name that identifies the step. This must be unique for each
1204                # step with respect to all other steps in the Cloud Dataflow job.
1205          },
1206        ],
1207        "currentStateTime": "A String", # The timestamp associated with the current state.
1208        "tempFiles": [ # A set of files the system should be aware of that are used
1209            # for temporary storage. These temporary files will be
1210            # removed on job completion.
1211            # No duplicates are allowed.
1212            # No file patterns are supported.
1213            #
1214            # The supported files are:
1215            #
1216            # Google Cloud Storage:
1217            #
1218            #    storage.googleapis.com/{bucket}/{object}
1219            #    bucket.storage.googleapis.com/{object}
1220          "A String",
1221        ],
1222        "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
1223            # callers cannot mutate it.
1224          { # A message describing the state of a particular execution stage.
1225            "executionStageName": "A String", # The name of the execution stage.
1226            "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
1227            "currentStateTime": "A String", # The time at which the stage transitioned to this state.
1228          },
1229        ],
1230        "type": "A String", # The type of Cloud Dataflow job.
1231        "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
1232            # Cloud Dataflow service.
1233        "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
1234            # of the job it replaced.
1235            #
1236            # When sending a `CreateJobRequest`, you can update a job by specifying it
1237            # here. The job named here is stopped, and its intermediate state is
1238            # transferred to this job.
1239        "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
1240            # isn't contained in the submitted job.
1241          "stages": { # A mapping from each stage to the information about that stage.
1242            "a_key": { # Contains information about how a particular
1243                # google.dataflow.v1beta3.Step will be executed.
1244              "stepName": [ # The steps associated with the execution stage.
1245                  # Note that stages may have several steps, and that a given step
1246                  # might be run by more than one stage.
1247                "A String",
1248              ],
1249            },
1250          },
1251        },
1252      },
1253  }</pre>
1254</div>
1255
1256</body></html>