• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1<html><body>
2<style>
3
4body, h1, h2, h3, div, span, p, pre, a {
5  margin: 0;
6  padding: 0;
7  border: 0;
8  font-weight: inherit;
9  font-style: inherit;
10  font-size: 100%;
11  font-family: inherit;
12  vertical-align: baseline;
13}
14
15body {
16  font-size: 13px;
17  padding: 1em;
18}
19
20h1 {
21  font-size: 26px;
22  margin-bottom: 1em;
23}
24
25h2 {
26  font-size: 24px;
27  margin-bottom: 1em;
28}
29
30h3 {
31  font-size: 20px;
32  margin-bottom: 1em;
33  margin-top: 1em;
34}
35
36pre, code {
37  line-height: 1.5;
38  font-family: Monaco, 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', 'Lucida Console', monospace;
39}
40
41pre {
42  margin-top: 0.5em;
43}
44
45h1, h2, h3, p {
46  font-family: Arial, sans serif;
47}
48
49h1, h2, h3 {
50  border-bottom: solid #CCC 1px;
51}
52
53.toc_element {
54  margin-top: 0.5em;
55}
56
57.firstline {
58  margin-left: 2 em;
59}
60
61.method  {
62  margin-top: 1em;
63  border: solid 1px #CCC;
64  padding: 1em;
65  background: #EEE;
66}
67
68.details {
69  font-weight: bold;
70  font-size: 14px;
71}
72
73</style>
74
75<h1><a href="dataflow_v1b3.html">Google Dataflow API</a> . <a href="dataflow_v1b3.projects.html">projects</a> . <a href="dataflow_v1b3.projects.templates.html">templates</a></h1>
76<h2>Instance Methods</h2>
77<p class="toc_element">
78  <code><a href="#create">create(projectId, body, x__xgafv=None)</a></code></p>
79<p class="firstline">Creates a Cloud Dataflow job from a template.</p>
80<p class="toc_element">
81  <code><a href="#get">get(projectId, gcsPath=None, location=None, x__xgafv=None, view=None)</a></code></p>
82<p class="firstline">Get the template associated with a template.</p>
83<p class="toc_element">
84  <code><a href="#launch">launch(projectId, body, gcsPath=None, location=None, validateOnly=None, x__xgafv=None)</a></code></p>
85<p class="firstline">Launch a template.</p>
86<h3>Method Details</h3>
87<div class="method">
88    <code class="details" id="create">create(projectId, body, x__xgafv=None)</code>
89  <pre>Creates a Cloud Dataflow job from a template.
90
91Args:
92  projectId: string, Required. The ID of the Cloud Platform project that the job belongs to. (required)
93  body: object, The request body. (required)
94    The object takes the form of:
95
96{ # A request to create a Cloud Dataflow job from a template.
97    "environment": { # The environment values to set at runtime. # The runtime environment for the job.
98      "machineType": "A String", # The machine type to use for the job. Defaults to the value from the
99          # template if not specified.
100      "zone": "A String", # The Compute Engine [availability
101          # zone](https://cloud.google.com/compute/docs/regions-zones/regions-zones)
102          # for launching worker instances to run your pipeline.
103      "bypassTempDirValidation": True or False, # Whether to bypass the safety checks for the job's temporary directory.
104          # Use with caution.
105      "tempLocation": "A String", # The Cloud Storage path to use for temporary files.
106          # Must be a valid Cloud Storage URL, beginning with `gs://`.
107      "serviceAccountEmail": "A String", # The email address of the service account to run the job as.
108      "maxWorkers": 42, # The maximum number of Google Compute Engine instances to be made
109          # available to your pipeline during execution, from 1 to 1000.
110    },
111    "gcsPath": "A String", # Required. A Cloud Storage path to the template from which to
112        # create the job.
113        # Must be a valid Cloud Storage URL, beginning with `gs://`.
114    "location": "A String", # The location to which to direct the request.
115    "parameters": { # The runtime parameters to pass to the job.
116      "a_key": "A String",
117    },
118    "jobName": "A String", # Required. The job name to use for the created job.
119  }
120
121  x__xgafv: string, V1 error format.
122    Allowed values
123      1 - v1 error format
124      2 - v2 error format
125
126Returns:
127  An object of the form:
128
129    { # Defines a job to be run by the Cloud Dataflow service.
130      "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
131          # If this field is set, the service will ensure its uniqueness.
132          # The request to create a job will fail if the service has knowledge of a
133          # previously submitted job with the same client's ID and job name.
134          # The caller may use this field to ensure idempotence of job
135          # creation across retried attempts to create a job.
136          # By default, the field is empty and, in that case, the service ignores it.
137      "requestedState": "A String", # The job's requested state.
138          #
139          # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
140          # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
141          # also be used to directly set a job's requested state to
142          # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
143          # job if it has not already reached a terminal state.
144      "name": "A String", # The user-specified Cloud Dataflow job name.
145          #
146          # Only one Job with a given name may exist in a project at any
147          # given time. If a caller attempts to create a Job with the same
148          # name as an already-existing Job, the attempt returns the
149          # existing Job.
150          #
151          # The name must match the regular expression
152          # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
153      "location": "A String", # The location that contains this job.
154      "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
155          # `JOB_STATE_UPDATED`), this field contains the ID of that job.
156      "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
157      "currentState": "A String", # The current state of the job.
158          #
159          # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
160          # specified.
161          #
162          # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
163          # terminal state. After a job has reached a terminal state, no
164          # further state updates may be made.
165          #
166          # This field may be mutated by the Cloud Dataflow service;
167          # callers cannot mutate it.
168      "labels": { # User-defined labels for this job.
169          #
170          # The labels map can contain no more than 64 entries.  Entries of the labels
171          # map are UTF8 strings that comply with the following restrictions:
172          #
173          # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
174          # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
175          # * Both keys and values are additionally constrained to be <= 128 bytes in
176          # size.
177        "a_key": "A String",
178      },
179      "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
180          # corresponding name prefixes of the new job.
181        "a_key": "A String",
182      },
183      "id": "A String", # The unique ID of this job.
184          #
185          # This field is set by the Cloud Dataflow service when the Job is
186          # created, and is immutable for the life of the job.
187      "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
188        "version": { # A structure describing which components and their versions of the service
189            # are required in order to run the job.
190          "a_key": "", # Properties of the object.
191        },
192        "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
193            # storage.  The system will append the suffix "/temp-{JOBNAME} to
194            # this resource prefix, where {JOBNAME} is the value of the
195            # job_name field.  The resulting bucket and object prefix is used
196            # as the prefix of the resources used to store temporary data
197            # needed during the job execution.  NOTE: This will override the
198            # value in taskrunner_settings.
199            # The supported resource type is:
200            #
201            # Google Cloud Storage:
202            #
203            #   storage.googleapis.com/{bucket}/{object}
204            #   bucket.storage.googleapis.com/{object}
205        "internalExperiments": { # Experimental settings.
206          "a_key": "", # Properties of the object. Contains field @type with type URL.
207        },
208        "dataset": "A String", # The dataset for the current project where various workflow
209            # related tables are stored.
210            #
211            # The supported resource type is:
212            #
213            # Google BigQuery:
214            #   bigquery.googleapis.com/{dataset}
215        "experiments": [ # The list of experiments to enable.
216          "A String",
217        ],
218        "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
219        "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
220            # options are passed through the service and are used to recreate the
221            # SDK pipeline options on the worker in a language agnostic and platform
222            # independent way.
223          "a_key": "", # Properties of the object.
224        },
225        "userAgent": { # A description of the process that generated the request.
226          "a_key": "", # Properties of the object.
227        },
228        "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
229            # unspecified, the service will attempt to choose a reasonable
230            # default.  This should be in the form of the API service name,
231            # e.g. "compute.googleapis.com".
232        "workerPools": [ # The worker pools. At least one "harness" worker pool must be
233            # specified in order for the job to have workers.
234          { # Describes one particular pool of Cloud Dataflow workers to be
235              # instantiated by the Cloud Dataflow service in order to perform the
236              # computations required by a job.  Note that a workflow job may use
237              # multiple pools, in order to match the various computational
238              # requirements of the various stages of the job.
239            "diskSourceImage": "A String", # Fully qualified source image for disks.
240            "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
241                # using the standard Dataflow task runner.  Users should ignore
242                # this field.
243              "workflowFileName": "A String", # The file to store the workflow in.
244              "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
245                  # will not be uploaded.
246                  #
247                  # The supported resource type is:
248                  #
249                  # Google Cloud Storage:
250                  #   storage.googleapis.com/{bucket}/{object}
251                  #   bucket.storage.googleapis.com/{object}
252              "commandlinesFileName": "A String", # The file to store preprocessing commands in.
253              "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
254                "reportingEnabled": True or False, # Whether to send work progress updates to the service.
255                "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
256                    # "shuffle/v1beta1".
257                "workerId": "A String", # The ID of the worker running this pipeline.
258                "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
259                    #
260                    # When workers access Google Cloud APIs, they logically do so via
261                    # relative URLs.  If this field is specified, it supplies the base
262                    # URL to use for resolving these relative URLs.  The normative
263                    # algorithm used is defined by RFC 1808, "Relative Uniform Resource
264                    # Locators".
265                    #
266                    # If not specified, the default value is "http://www.googleapis.com/"
267                "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
268                    # "dataflow/v1b3/projects".
269                "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
270                    # storage.
271                    #
272                    # The supported resource type is:
273                    #
274                    # Google Cloud Storage:
275                    #
276                    #   storage.googleapis.com/{bucket}/{object}
277                    #   bucket.storage.googleapis.com/{object}
278              },
279              "vmId": "A String", # The ID string of the VM.
280              "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
281              "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
282              "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
283                  # access the Cloud Dataflow API.
284                "A String",
285              ],
286              "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
287                  # taskrunner; e.g. "root".
288              "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
289                  #
290                  # When workers access Google Cloud APIs, they logically do so via
291                  # relative URLs.  If this field is specified, it supplies the base
292                  # URL to use for resolving these relative URLs.  The normative
293                  # algorithm used is defined by RFC 1808, "Relative Uniform Resource
294                  # Locators".
295                  #
296                  # If not specified, the default value is "http://www.googleapis.com/"
297              "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
298                  # taskrunner; e.g. "wheel".
299              "languageHint": "A String", # The suggested backend language.
300              "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
301                  # console.
302              "streamingWorkerMainClass": "A String", # The streaming worker main class name.
303              "logDir": "A String", # The directory on the VM to store logs.
304              "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
305              "harnessCommand": "A String", # The command to launch the worker harness.
306              "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
307                  # temporary storage.
308                  #
309                  # The supported resource type is:
310                  #
311                  # Google Cloud Storage:
312                  #   storage.googleapis.com/{bucket}/{object}
313                  #   bucket.storage.googleapis.com/{object}
314              "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
315            },
316            "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
317                # are supported.
318            "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
319                # service will attempt to choose a reasonable default.
320            "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
321                # the service will use the network "default".
322            "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
323                # will attempt to choose a reasonable default.
324            "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
325                # attempt to choose a reasonable default.
326            "dataDisks": [ # Data disks that are used by a VM in this workflow.
327              { # Describes the data disk used by a workflow job.
328                "mountPoint": "A String", # Directory in a VM where disk is mounted.
329                "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
330                    # attempt to choose a reasonable default.
331                "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
332                    # must be a disk type appropriate to the project and zone in which
333                    # the workers will run.  If unknown or unspecified, the service
334                    # will attempt to choose a reasonable default.
335                    #
336                    # For example, the standard persistent disk type is a resource name
337                    # typically ending in "pd-standard".  If SSD persistent disks are
338                    # available, the resource name typically ends with "pd-ssd".  The
339                    # actual valid values are defined the Google Compute Engine API,
340                    # not by the Cloud Dataflow API; consult the Google Compute Engine
341                    # documentation for more information about determining the set of
342                    # available disk types for a particular project and zone.
343                    #
344                    # Google Compute Engine Disk types are local to a particular
345                    # project in a particular zone, and so the resource name will
346                    # typically look something like this:
347                    #
348                    # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
349              },
350            ],
351            "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
352                # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
353                # `TEARDOWN_NEVER`.
354                # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
355                # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
356                # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
357                # down.
358                #
359                # If the workers are not torn down by the service, they will
360                # continue to run and use Google Compute Engine VM resources in the
361                # user's project until they are explicitly terminated by the user.
362                # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
363                # policy except for small, manually supervised test jobs.
364                #
365                # If unknown or unspecified, the service will attempt to choose a reasonable
366                # default.
367            "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
368                # Compute Engine API.
369            "ipConfiguration": "A String", # Configuration for VM IPs.
370            "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
371                # service will choose a number of threads (according to the number of cores
372                # on the selected machine type for batch, or 1 by convention for streaming).
373            "poolArgs": { # Extra arguments for this worker pool.
374              "a_key": "", # Properties of the object. Contains field @type with type URL.
375            },
376            "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
377                # execute the job.  If zero or unspecified, the service will
378                # attempt to choose a reasonable default.
379            "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
380                # harness, residing in Google Container Registry.
381            "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
382                # the form "regions/REGION/subnetworks/SUBNETWORK".
383            "packages": [ # Packages to be installed on workers.
384              { # The packages that must be installed in order for a worker to run the
385                  # steps of the Cloud Dataflow job that will be assigned to its worker
386                  # pool.
387                  #
388                  # This is the mechanism by which the Cloud Dataflow SDK causes code to
389                  # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
390                  # might use this to install jars containing the user's code and all of the
391                  # various dependencies (libraries, data files, etc.) required in order
392                  # for that code to run.
393                "location": "A String", # The resource to read the package from. The supported resource type is:
394                    #
395                    # Google Cloud Storage:
396                    #
397                    #   storage.googleapis.com/{bucket}
398                    #   bucket.storage.googleapis.com/
399                "name": "A String", # The name of the package.
400              },
401            ],
402            "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
403              "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
404              "algorithm": "A String", # The algorithm to use for autoscaling.
405            },
406            "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
407                # select a default set of packages which are useful to worker
408                # harnesses written in a particular language.
409            "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
410                # attempt to choose a reasonable default.
411            "metadata": { # Metadata to set on the Google Compute Engine VMs.
412              "a_key": "A String",
413            },
414          },
415        ],
416      },
417      "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
418          # A description of the user pipeline and stages through which it is executed.
419          # Created by Cloud Dataflow service.  Only retrieved with
420          # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
421          # form.  This data is provided by the Dataflow service for ease of visualizing
422          # the pipeline and interpretting Dataflow provided metrics.
423        "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
424          { # Description of the type, names/ids, and input/outputs for a transform.
425            "kind": "A String", # Type of transform.
426            "name": "A String", # User provided name for this transform instance.
427            "inputCollectionName": [ # User names for all collection inputs to this transform.
428              "A String",
429            ],
430            "displayData": [ # Transform-specific display data.
431              { # Data provided with a pipeline or transform to provide descriptive info.
432                "shortStrValue": "A String", # A possible additional shorter value to display.
433                    # For example a java_class_name_value of com.mypackage.MyDoFn
434                    # will be stored with MyDoFn as the short_str_value and
435                    # com.mypackage.MyDoFn as the java_class_name value.
436                    # short_str_value can be displayed and java_class_name_value
437                    # will be displayed as a tooltip.
438                "durationValue": "A String", # Contains value if the data is of duration type.
439                "url": "A String", # An optional full URL.
440                "floatValue": 3.14, # Contains value if the data is of float type.
441                "namespace": "A String", # The namespace for the key. This is usually a class name or programming
442                    # language namespace (i.e. python module) which defines the display data.
443                    # This allows a dax monitoring system to specially handle the data
444                    # and perform custom rendering.
445                "javaClassValue": "A String", # Contains value if the data is of java class type.
446                "label": "A String", # An optional label to display in a dax UI for the element.
447                "boolValue": True or False, # Contains value if the data is of a boolean type.
448                "strValue": "A String", # Contains value if the data is of string type.
449                "key": "A String", # The key identifying the display data.
450                    # This is intended to be used as a label for the display data
451                    # when viewed in a dax monitoring system.
452                "int64Value": "A String", # Contains value if the data is of int64 type.
453                "timestampValue": "A String", # Contains value if the data is of timestamp type.
454              },
455            ],
456            "outputCollectionName": [ # User  names for all collection outputs to this transform.
457              "A String",
458            ],
459            "id": "A String", # SDK generated id of this transform instance.
460          },
461        ],
462        "displayData": [ # Pipeline level display data.
463          { # Data provided with a pipeline or transform to provide descriptive info.
464            "shortStrValue": "A String", # A possible additional shorter value to display.
465                # For example a java_class_name_value of com.mypackage.MyDoFn
466                # will be stored with MyDoFn as the short_str_value and
467                # com.mypackage.MyDoFn as the java_class_name value.
468                # short_str_value can be displayed and java_class_name_value
469                # will be displayed as a tooltip.
470            "durationValue": "A String", # Contains value if the data is of duration type.
471            "url": "A String", # An optional full URL.
472            "floatValue": 3.14, # Contains value if the data is of float type.
473            "namespace": "A String", # The namespace for the key. This is usually a class name or programming
474                # language namespace (i.e. python module) which defines the display data.
475                # This allows a dax monitoring system to specially handle the data
476                # and perform custom rendering.
477            "javaClassValue": "A String", # Contains value if the data is of java class type.
478            "label": "A String", # An optional label to display in a dax UI for the element.
479            "boolValue": True or False, # Contains value if the data is of a boolean type.
480            "strValue": "A String", # Contains value if the data is of string type.
481            "key": "A String", # The key identifying the display data.
482                # This is intended to be used as a label for the display data
483                # when viewed in a dax monitoring system.
484            "int64Value": "A String", # Contains value if the data is of int64 type.
485            "timestampValue": "A String", # Contains value if the data is of timestamp type.
486          },
487        ],
488        "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
489          { # Description of the composing transforms, names/ids, and input/outputs of a
490              # stage of execution.  Some composing transforms and sources may have been
491              # generated by the Dataflow service during execution planning.
492            "componentSource": [ # Collections produced and consumed by component transforms of this stage.
493              { # Description of an interstitial value between transforms in an execution
494                  # stage.
495                "userName": "A String", # Human-readable name for this transform; may be user or system generated.
496                "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
497                    # source is most closely associated.
498                "name": "A String", # Dataflow service generated name for this source.
499              },
500            ],
501            "kind": "A String", # Type of tranform this stage is executing.
502            "name": "A String", # Dataflow service generated name for this stage.
503            "outputSource": [ # Output sources for this stage.
504              { # Description of an input or output of an execution stage.
505                "userName": "A String", # Human-readable name for this source; may be user or system generated.
506                "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
507                    # source is most closely associated.
508                "name": "A String", # Dataflow service generated name for this source.
509                "sizeBytes": "A String", # Size of the source, if measurable.
510              },
511            ],
512            "inputSource": [ # Input sources for this stage.
513              { # Description of an input or output of an execution stage.
514                "userName": "A String", # Human-readable name for this source; may be user or system generated.
515                "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
516                    # source is most closely associated.
517                "name": "A String", # Dataflow service generated name for this source.
518                "sizeBytes": "A String", # Size of the source, if measurable.
519              },
520            ],
521            "componentTransform": [ # Transforms that comprise this execution stage.
522              { # Description of a transform executed as part of an execution stage.
523                "userName": "A String", # Human-readable name for this transform; may be user or system generated.
524                "originalTransform": "A String", # User name for the original user transform with which this transform is
525                    # most closely associated.
526                "name": "A String", # Dataflow service generated name for this source.
527              },
528            ],
529            "id": "A String", # Dataflow service generated id for this stage.
530          },
531        ],
532      },
533      "steps": [ # The top-level steps that constitute the entire job.
534        { # Defines a particular step within a Cloud Dataflow job.
535            #
536            # A job consists of multiple steps, each of which performs some
537            # specific operation as part of the overall job.  Data is typically
538            # passed from one step to another as part of the job.
539            #
540            # Here's an example of a sequence of steps which together implement a
541            # Map-Reduce job:
542            #
543            #   * Read a collection of data from some source, parsing the
544            #     collection's elements.
545            #
546            #   * Validate the elements.
547            #
548            #   * Apply a user-defined function to map each element to some value
549            #     and extract an element-specific key value.
550            #
551            #   * Group elements with the same key into a single element with
552            #     that key, transforming a multiply-keyed collection into a
553            #     uniquely-keyed collection.
554            #
555            #   * Write the elements out to some data sink.
556            #
557            # Note that the Cloud Dataflow service may be used to run many different
558            # types of jobs, not just Map-Reduce.
559          "kind": "A String", # The kind of step in the Cloud Dataflow job.
560          "properties": { # Named properties associated with the step. Each kind of
561              # predefined step has its own required set of properties.
562              # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
563            "a_key": "", # Properties of the object.
564          },
565          "name": "A String", # The name that identifies the step. This must be unique for each
566              # step with respect to all other steps in the Cloud Dataflow job.
567        },
568      ],
569      "currentStateTime": "A String", # The timestamp associated with the current state.
570      "tempFiles": [ # A set of files the system should be aware of that are used
571          # for temporary storage. These temporary files will be
572          # removed on job completion.
573          # No duplicates are allowed.
574          # No file patterns are supported.
575          #
576          # The supported files are:
577          #
578          # Google Cloud Storage:
579          #
580          #    storage.googleapis.com/{bucket}/{object}
581          #    bucket.storage.googleapis.com/{object}
582        "A String",
583      ],
584      "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
585          # callers cannot mutate it.
586        { # A message describing the state of a particular execution stage.
587          "executionStageName": "A String", # The name of the execution stage.
588          "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
589          "currentStateTime": "A String", # The time at which the stage transitioned to this state.
590        },
591      ],
592      "type": "A String", # The type of Cloud Dataflow job.
593      "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
594          # Cloud Dataflow service.
595      "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
596          # of the job it replaced.
597          #
598          # When sending a `CreateJobRequest`, you can update a job by specifying it
599          # here. The job named here is stopped, and its intermediate state is
600          # transferred to this job.
601      "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
602          # isn't contained in the submitted job.
603        "stages": { # A mapping from each stage to the information about that stage.
604          "a_key": { # Contains information about how a particular
605              # google.dataflow.v1beta3.Step will be executed.
606            "stepName": [ # The steps associated with the execution stage.
607                # Note that stages may have several steps, and that a given step
608                # might be run by more than one stage.
609              "A String",
610            ],
611          },
612        },
613      },
614    }</pre>
615</div>
616
617<div class="method">
618    <code class="details" id="get">get(projectId, gcsPath=None, location=None, x__xgafv=None, view=None)</code>
619  <pre>Get the template associated with a template.
620
621Args:
622  projectId: string, Required. The ID of the Cloud Platform project that the job belongs to. (required)
623  gcsPath: string, Required. A Cloud Storage path to the template from which to
624create the job.
625Must be a valid Cloud Storage URL, beginning with `gs://`.
626  location: string, The location to which to direct the request.
627  x__xgafv: string, V1 error format.
628    Allowed values
629      1 - v1 error format
630      2 - v2 error format
631  view: string, The view to retrieve. Defaults to METADATA_ONLY.
632
633Returns:
634  An object of the form:
635
636    { # The response to a GetTemplate request.
637    "status": { # The `Status` type defines a logical error model that is suitable for different # The status of the get template request. Any problems with the
638        # request will be indicated in the error_details.
639        # programming environments, including REST APIs and RPC APIs. It is used by
640        # [gRPC](https://github.com/grpc). The error model is designed to be:
641        #
642        # - Simple to use and understand for most users
643        # - Flexible enough to meet unexpected needs
644        #
645        # # Overview
646        #
647        # The `Status` message contains three pieces of data: error code, error message,
648        # and error details. The error code should be an enum value of
649        # google.rpc.Code, but it may accept additional error codes if needed.  The
650        # error message should be a developer-facing English message that helps
651        # developers *understand* and *resolve* the error. If a localized user-facing
652        # error message is needed, put the localized message in the error details or
653        # localize it in the client. The optional error details may contain arbitrary
654        # information about the error. There is a predefined set of error detail types
655        # in the package `google.rpc` that can be used for common error conditions.
656        #
657        # # Language mapping
658        #
659        # The `Status` message is the logical representation of the error model, but it
660        # is not necessarily the actual wire format. When the `Status` message is
661        # exposed in different client libraries and different wire protocols, it can be
662        # mapped differently. For example, it will likely be mapped to some exceptions
663        # in Java, but more likely mapped to some error codes in C.
664        #
665        # # Other uses
666        #
667        # The error model and the `Status` message can be used in a variety of
668        # environments, either with or without APIs, to provide a
669        # consistent developer experience across different environments.
670        #
671        # Example uses of this error model include:
672        #
673        # - Partial errors. If a service needs to return partial errors to the client,
674        #     it may embed the `Status` in the normal response to indicate the partial
675        #     errors.
676        #
677        # - Workflow errors. A typical workflow has multiple steps. Each step may
678        #     have a `Status` message for error reporting.
679        #
680        # - Batch operations. If a client uses batch request and batch response, the
681        #     `Status` message should be used directly inside batch response, one for
682        #     each error sub-response.
683        #
684        # - Asynchronous operations. If an API call embeds asynchronous operation
685        #     results in its response, the status of those operations should be
686        #     represented directly using the `Status` message.
687        #
688        # - Logging. If some API errors are stored in logs, the message `Status` could
689        #     be used directly after any stripping needed for security/privacy reasons.
690      "message": "A String", # A developer-facing error message, which should be in English. Any
691          # user-facing error message should be localized and sent in the
692          # google.rpc.Status.details field, or localized by the client.
693      "code": 42, # The status code, which should be an enum value of google.rpc.Code.
694      "details": [ # A list of messages that carry the error details.  There will be a
695          # common set of message types for APIs to use.
696        {
697          "a_key": "", # Properties of the object. Contains field @type with type URL.
698        },
699      ],
700    },
701    "metadata": { # Metadata describing a template. # The template metadata describing the template name, available
702        # parameters, etc.
703      "name": "A String", # Required. The name of the template.
704      "parameters": [ # The parameters for the template.
705        { # Metadata for a specific parameter.
706          "regexes": [ # Optional. Regexes that the parameter must match.
707            "A String",
708          ],
709          "helpText": "A String", # Required. The help text to display for the parameter.
710          "name": "A String", # Required. The name of the parameter.
711          "isOptional": True or False, # Optional. Whether the parameter is optional. Defaults to false.
712          "label": "A String", # Required. The label to display for the parameter.
713        },
714      ],
715      "description": "A String", # Optional. A description of the template.
716    },
717  }</pre>
718</div>
719
720<div class="method">
721    <code class="details" id="launch">launch(projectId, body, gcsPath=None, location=None, validateOnly=None, x__xgafv=None)</code>
722  <pre>Launch a template.
723
724Args:
725  projectId: string, Required. The ID of the Cloud Platform project that the job belongs to. (required)
726  body: object, The request body. (required)
727    The object takes the form of:
728
729{ # Parameters to provide to the template being launched.
730    "environment": { # The environment values to set at runtime. # The runtime environment for the job.
731      "machineType": "A String", # The machine type to use for the job. Defaults to the value from the
732          # template if not specified.
733      "zone": "A String", # The Compute Engine [availability
734          # zone](https://cloud.google.com/compute/docs/regions-zones/regions-zones)
735          # for launching worker instances to run your pipeline.
736      "bypassTempDirValidation": True or False, # Whether to bypass the safety checks for the job's temporary directory.
737          # Use with caution.
738      "tempLocation": "A String", # The Cloud Storage path to use for temporary files.
739          # Must be a valid Cloud Storage URL, beginning with `gs://`.
740      "serviceAccountEmail": "A String", # The email address of the service account to run the job as.
741      "maxWorkers": 42, # The maximum number of Google Compute Engine instances to be made
742          # available to your pipeline during execution, from 1 to 1000.
743    },
744    "parameters": { # The runtime parameters to pass to the job.
745      "a_key": "A String",
746    },
747    "jobName": "A String", # Required. The job name to use for the created job.
748  }
749
750  gcsPath: string, Required. A Cloud Storage path to the template from which to create
751the job.
752Must be valid Cloud Storage URL, beginning with 'gs://'.
753  location: string, The location to which to direct the request.
754  validateOnly: boolean, If true, the request is validated but not actually executed.
755Defaults to false.
756  x__xgafv: string, V1 error format.
757    Allowed values
758      1 - v1 error format
759      2 - v2 error format
760
761Returns:
762  An object of the form:
763
764    { # Response to the request to launch a template.
765    "job": { # Defines a job to be run by the Cloud Dataflow service. # The job that was launched, if the request was not a dry run and
766        # the job was successfully launched.
767        "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
768            # If this field is set, the service will ensure its uniqueness.
769            # The request to create a job will fail if the service has knowledge of a
770            # previously submitted job with the same client's ID and job name.
771            # The caller may use this field to ensure idempotence of job
772            # creation across retried attempts to create a job.
773            # By default, the field is empty and, in that case, the service ignores it.
774        "requestedState": "A String", # The job's requested state.
775            #
776            # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
777            # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
778            # also be used to directly set a job's requested state to
779            # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
780            # job if it has not already reached a terminal state.
781        "name": "A String", # The user-specified Cloud Dataflow job name.
782            #
783            # Only one Job with a given name may exist in a project at any
784            # given time. If a caller attempts to create a Job with the same
785            # name as an already-existing Job, the attempt returns the
786            # existing Job.
787            #
788            # The name must match the regular expression
789            # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
790        "location": "A String", # The location that contains this job.
791        "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
792            # `JOB_STATE_UPDATED`), this field contains the ID of that job.
793        "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
794        "currentState": "A String", # The current state of the job.
795            #
796            # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
797            # specified.
798            #
799            # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
800            # terminal state. After a job has reached a terminal state, no
801            # further state updates may be made.
802            #
803            # This field may be mutated by the Cloud Dataflow service;
804            # callers cannot mutate it.
805        "labels": { # User-defined labels for this job.
806            #
807            # The labels map can contain no more than 64 entries.  Entries of the labels
808            # map are UTF8 strings that comply with the following restrictions:
809            #
810            # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
811            # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
812            # * Both keys and values are additionally constrained to be <= 128 bytes in
813            # size.
814          "a_key": "A String",
815        },
816        "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
817            # corresponding name prefixes of the new job.
818          "a_key": "A String",
819        },
820        "id": "A String", # The unique ID of this job.
821            #
822            # This field is set by the Cloud Dataflow service when the Job is
823            # created, and is immutable for the life of the job.
824        "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
825          "version": { # A structure describing which components and their versions of the service
826              # are required in order to run the job.
827            "a_key": "", # Properties of the object.
828          },
829          "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
830              # storage.  The system will append the suffix "/temp-{JOBNAME} to
831              # this resource prefix, where {JOBNAME} is the value of the
832              # job_name field.  The resulting bucket and object prefix is used
833              # as the prefix of the resources used to store temporary data
834              # needed during the job execution.  NOTE: This will override the
835              # value in taskrunner_settings.
836              # The supported resource type is:
837              #
838              # Google Cloud Storage:
839              #
840              #   storage.googleapis.com/{bucket}/{object}
841              #   bucket.storage.googleapis.com/{object}
842          "internalExperiments": { # Experimental settings.
843            "a_key": "", # Properties of the object. Contains field @type with type URL.
844          },
845          "dataset": "A String", # The dataset for the current project where various workflow
846              # related tables are stored.
847              #
848              # The supported resource type is:
849              #
850              # Google BigQuery:
851              #   bigquery.googleapis.com/{dataset}
852          "experiments": [ # The list of experiments to enable.
853            "A String",
854          ],
855          "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
856          "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
857              # options are passed through the service and are used to recreate the
858              # SDK pipeline options on the worker in a language agnostic and platform
859              # independent way.
860            "a_key": "", # Properties of the object.
861          },
862          "userAgent": { # A description of the process that generated the request.
863            "a_key": "", # Properties of the object.
864          },
865          "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
866              # unspecified, the service will attempt to choose a reasonable
867              # default.  This should be in the form of the API service name,
868              # e.g. "compute.googleapis.com".
869          "workerPools": [ # The worker pools. At least one "harness" worker pool must be
870              # specified in order for the job to have workers.
871            { # Describes one particular pool of Cloud Dataflow workers to be
872                # instantiated by the Cloud Dataflow service in order to perform the
873                # computations required by a job.  Note that a workflow job may use
874                # multiple pools, in order to match the various computational
875                # requirements of the various stages of the job.
876              "diskSourceImage": "A String", # Fully qualified source image for disks.
877              "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
878                  # using the standard Dataflow task runner.  Users should ignore
879                  # this field.
880                "workflowFileName": "A String", # The file to store the workflow in.
881                "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
882                    # will not be uploaded.
883                    #
884                    # The supported resource type is:
885                    #
886                    # Google Cloud Storage:
887                    #   storage.googleapis.com/{bucket}/{object}
888                    #   bucket.storage.googleapis.com/{object}
889                "commandlinesFileName": "A String", # The file to store preprocessing commands in.
890                "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
891                  "reportingEnabled": True or False, # Whether to send work progress updates to the service.
892                  "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
893                      # "shuffle/v1beta1".
894                  "workerId": "A String", # The ID of the worker running this pipeline.
895                  "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
896                      #
897                      # When workers access Google Cloud APIs, they logically do so via
898                      # relative URLs.  If this field is specified, it supplies the base
899                      # URL to use for resolving these relative URLs.  The normative
900                      # algorithm used is defined by RFC 1808, "Relative Uniform Resource
901                      # Locators".
902                      #
903                      # If not specified, the default value is "http://www.googleapis.com/"
904                  "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
905                      # "dataflow/v1b3/projects".
906                  "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
907                      # storage.
908                      #
909                      # The supported resource type is:
910                      #
911                      # Google Cloud Storage:
912                      #
913                      #   storage.googleapis.com/{bucket}/{object}
914                      #   bucket.storage.googleapis.com/{object}
915                },
916                "vmId": "A String", # The ID string of the VM.
917                "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
918                "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
919                "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
920                    # access the Cloud Dataflow API.
921                  "A String",
922                ],
923                "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
924                    # taskrunner; e.g. "root".
925                "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
926                    #
927                    # When workers access Google Cloud APIs, they logically do so via
928                    # relative URLs.  If this field is specified, it supplies the base
929                    # URL to use for resolving these relative URLs.  The normative
930                    # algorithm used is defined by RFC 1808, "Relative Uniform Resource
931                    # Locators".
932                    #
933                    # If not specified, the default value is "http://www.googleapis.com/"
934                "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
935                    # taskrunner; e.g. "wheel".
936                "languageHint": "A String", # The suggested backend language.
937                "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
938                    # console.
939                "streamingWorkerMainClass": "A String", # The streaming worker main class name.
940                "logDir": "A String", # The directory on the VM to store logs.
941                "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
942                "harnessCommand": "A String", # The command to launch the worker harness.
943                "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
944                    # temporary storage.
945                    #
946                    # The supported resource type is:
947                    #
948                    # Google Cloud Storage:
949                    #   storage.googleapis.com/{bucket}/{object}
950                    #   bucket.storage.googleapis.com/{object}
951                "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
952              },
953              "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
954                  # are supported.
955              "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
956                  # service will attempt to choose a reasonable default.
957              "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
958                  # the service will use the network "default".
959              "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
960                  # will attempt to choose a reasonable default.
961              "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
962                  # attempt to choose a reasonable default.
963              "dataDisks": [ # Data disks that are used by a VM in this workflow.
964                { # Describes the data disk used by a workflow job.
965                  "mountPoint": "A String", # Directory in a VM where disk is mounted.
966                  "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
967                      # attempt to choose a reasonable default.
968                  "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
969                      # must be a disk type appropriate to the project and zone in which
970                      # the workers will run.  If unknown or unspecified, the service
971                      # will attempt to choose a reasonable default.
972                      #
973                      # For example, the standard persistent disk type is a resource name
974                      # typically ending in "pd-standard".  If SSD persistent disks are
975                      # available, the resource name typically ends with "pd-ssd".  The
976                      # actual valid values are defined the Google Compute Engine API,
977                      # not by the Cloud Dataflow API; consult the Google Compute Engine
978                      # documentation for more information about determining the set of
979                      # available disk types for a particular project and zone.
980                      #
981                      # Google Compute Engine Disk types are local to a particular
982                      # project in a particular zone, and so the resource name will
983                      # typically look something like this:
984                      #
985                      # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
986                },
987              ],
988              "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
989                  # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
990                  # `TEARDOWN_NEVER`.
991                  # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
992                  # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
993                  # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
994                  # down.
995                  #
996                  # If the workers are not torn down by the service, they will
997                  # continue to run and use Google Compute Engine VM resources in the
998                  # user's project until they are explicitly terminated by the user.
999                  # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
1000                  # policy except for small, manually supervised test jobs.
1001                  #
1002                  # If unknown or unspecified, the service will attempt to choose a reasonable
1003                  # default.
1004              "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
1005                  # Compute Engine API.
1006              "ipConfiguration": "A String", # Configuration for VM IPs.
1007              "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
1008                  # service will choose a number of threads (according to the number of cores
1009                  # on the selected machine type for batch, or 1 by convention for streaming).
1010              "poolArgs": { # Extra arguments for this worker pool.
1011                "a_key": "", # Properties of the object. Contains field @type with type URL.
1012              },
1013              "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
1014                  # execute the job.  If zero or unspecified, the service will
1015                  # attempt to choose a reasonable default.
1016              "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
1017                  # harness, residing in Google Container Registry.
1018              "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
1019                  # the form "regions/REGION/subnetworks/SUBNETWORK".
1020              "packages": [ # Packages to be installed on workers.
1021                { # The packages that must be installed in order for a worker to run the
1022                    # steps of the Cloud Dataflow job that will be assigned to its worker
1023                    # pool.
1024                    #
1025                    # This is the mechanism by which the Cloud Dataflow SDK causes code to
1026                    # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
1027                    # might use this to install jars containing the user's code and all of the
1028                    # various dependencies (libraries, data files, etc.) required in order
1029                    # for that code to run.
1030                  "location": "A String", # The resource to read the package from. The supported resource type is:
1031                      #
1032                      # Google Cloud Storage:
1033                      #
1034                      #   storage.googleapis.com/{bucket}
1035                      #   bucket.storage.googleapis.com/
1036                  "name": "A String", # The name of the package.
1037                },
1038              ],
1039              "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
1040                "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
1041                "algorithm": "A String", # The algorithm to use for autoscaling.
1042              },
1043              "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
1044                  # select a default set of packages which are useful to worker
1045                  # harnesses written in a particular language.
1046              "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
1047                  # attempt to choose a reasonable default.
1048              "metadata": { # Metadata to set on the Google Compute Engine VMs.
1049                "a_key": "A String",
1050              },
1051            },
1052          ],
1053        },
1054        "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
1055            # A description of the user pipeline and stages through which it is executed.
1056            # Created by Cloud Dataflow service.  Only retrieved with
1057            # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
1058            # form.  This data is provided by the Dataflow service for ease of visualizing
1059            # the pipeline and interpretting Dataflow provided metrics.
1060          "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
1061            { # Description of the type, names/ids, and input/outputs for a transform.
1062              "kind": "A String", # Type of transform.
1063              "name": "A String", # User provided name for this transform instance.
1064              "inputCollectionName": [ # User names for all collection inputs to this transform.
1065                "A String",
1066              ],
1067              "displayData": [ # Transform-specific display data.
1068                { # Data provided with a pipeline or transform to provide descriptive info.
1069                  "shortStrValue": "A String", # A possible additional shorter value to display.
1070                      # For example a java_class_name_value of com.mypackage.MyDoFn
1071                      # will be stored with MyDoFn as the short_str_value and
1072                      # com.mypackage.MyDoFn as the java_class_name value.
1073                      # short_str_value can be displayed and java_class_name_value
1074                      # will be displayed as a tooltip.
1075                  "durationValue": "A String", # Contains value if the data is of duration type.
1076                  "url": "A String", # An optional full URL.
1077                  "floatValue": 3.14, # Contains value if the data is of float type.
1078                  "namespace": "A String", # The namespace for the key. This is usually a class name or programming
1079                      # language namespace (i.e. python module) which defines the display data.
1080                      # This allows a dax monitoring system to specially handle the data
1081                      # and perform custom rendering.
1082                  "javaClassValue": "A String", # Contains value if the data is of java class type.
1083                  "label": "A String", # An optional label to display in a dax UI for the element.
1084                  "boolValue": True or False, # Contains value if the data is of a boolean type.
1085                  "strValue": "A String", # Contains value if the data is of string type.
1086                  "key": "A String", # The key identifying the display data.
1087                      # This is intended to be used as a label for the display data
1088                      # when viewed in a dax monitoring system.
1089                  "int64Value": "A String", # Contains value if the data is of int64 type.
1090                  "timestampValue": "A String", # Contains value if the data is of timestamp type.
1091                },
1092              ],
1093              "outputCollectionName": [ # User  names for all collection outputs to this transform.
1094                "A String",
1095              ],
1096              "id": "A String", # SDK generated id of this transform instance.
1097            },
1098          ],
1099          "displayData": [ # Pipeline level display data.
1100            { # Data provided with a pipeline or transform to provide descriptive info.
1101              "shortStrValue": "A String", # A possible additional shorter value to display.
1102                  # For example a java_class_name_value of com.mypackage.MyDoFn
1103                  # will be stored with MyDoFn as the short_str_value and
1104                  # com.mypackage.MyDoFn as the java_class_name value.
1105                  # short_str_value can be displayed and java_class_name_value
1106                  # will be displayed as a tooltip.
1107              "durationValue": "A String", # Contains value if the data is of duration type.
1108              "url": "A String", # An optional full URL.
1109              "floatValue": 3.14, # Contains value if the data is of float type.
1110              "namespace": "A String", # The namespace for the key. This is usually a class name or programming
1111                  # language namespace (i.e. python module) which defines the display data.
1112                  # This allows a dax monitoring system to specially handle the data
1113                  # and perform custom rendering.
1114              "javaClassValue": "A String", # Contains value if the data is of java class type.
1115              "label": "A String", # An optional label to display in a dax UI for the element.
1116              "boolValue": True or False, # Contains value if the data is of a boolean type.
1117              "strValue": "A String", # Contains value if the data is of string type.
1118              "key": "A String", # The key identifying the display data.
1119                  # This is intended to be used as a label for the display data
1120                  # when viewed in a dax monitoring system.
1121              "int64Value": "A String", # Contains value if the data is of int64 type.
1122              "timestampValue": "A String", # Contains value if the data is of timestamp type.
1123            },
1124          ],
1125          "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
1126            { # Description of the composing transforms, names/ids, and input/outputs of a
1127                # stage of execution.  Some composing transforms and sources may have been
1128                # generated by the Dataflow service during execution planning.
1129              "componentSource": [ # Collections produced and consumed by component transforms of this stage.
1130                { # Description of an interstitial value between transforms in an execution
1131                    # stage.
1132                  "userName": "A String", # Human-readable name for this transform; may be user or system generated.
1133                  "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
1134                      # source is most closely associated.
1135                  "name": "A String", # Dataflow service generated name for this source.
1136                },
1137              ],
1138              "kind": "A String", # Type of tranform this stage is executing.
1139              "name": "A String", # Dataflow service generated name for this stage.
1140              "outputSource": [ # Output sources for this stage.
1141                { # Description of an input or output of an execution stage.
1142                  "userName": "A String", # Human-readable name for this source; may be user or system generated.
1143                  "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
1144                      # source is most closely associated.
1145                  "name": "A String", # Dataflow service generated name for this source.
1146                  "sizeBytes": "A String", # Size of the source, if measurable.
1147                },
1148              ],
1149              "inputSource": [ # Input sources for this stage.
1150                { # Description of an input or output of an execution stage.
1151                  "userName": "A String", # Human-readable name for this source; may be user or system generated.
1152                  "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
1153                      # source is most closely associated.
1154                  "name": "A String", # Dataflow service generated name for this source.
1155                  "sizeBytes": "A String", # Size of the source, if measurable.
1156                },
1157              ],
1158              "componentTransform": [ # Transforms that comprise this execution stage.
1159                { # Description of a transform executed as part of an execution stage.
1160                  "userName": "A String", # Human-readable name for this transform; may be user or system generated.
1161                  "originalTransform": "A String", # User name for the original user transform with which this transform is
1162                      # most closely associated.
1163                  "name": "A String", # Dataflow service generated name for this source.
1164                },
1165              ],
1166              "id": "A String", # Dataflow service generated id for this stage.
1167            },
1168          ],
1169        },
1170        "steps": [ # The top-level steps that constitute the entire job.
1171          { # Defines a particular step within a Cloud Dataflow job.
1172              #
1173              # A job consists of multiple steps, each of which performs some
1174              # specific operation as part of the overall job.  Data is typically
1175              # passed from one step to another as part of the job.
1176              #
1177              # Here's an example of a sequence of steps which together implement a
1178              # Map-Reduce job:
1179              #
1180              #   * Read a collection of data from some source, parsing the
1181              #     collection's elements.
1182              #
1183              #   * Validate the elements.
1184              #
1185              #   * Apply a user-defined function to map each element to some value
1186              #     and extract an element-specific key value.
1187              #
1188              #   * Group elements with the same key into a single element with
1189              #     that key, transforming a multiply-keyed collection into a
1190              #     uniquely-keyed collection.
1191              #
1192              #   * Write the elements out to some data sink.
1193              #
1194              # Note that the Cloud Dataflow service may be used to run many different
1195              # types of jobs, not just Map-Reduce.
1196            "kind": "A String", # The kind of step in the Cloud Dataflow job.
1197            "properties": { # Named properties associated with the step. Each kind of
1198                # predefined step has its own required set of properties.
1199                # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
1200              "a_key": "", # Properties of the object.
1201            },
1202            "name": "A String", # The name that identifies the step. This must be unique for each
1203                # step with respect to all other steps in the Cloud Dataflow job.
1204          },
1205        ],
1206        "currentStateTime": "A String", # The timestamp associated with the current state.
1207        "tempFiles": [ # A set of files the system should be aware of that are used
1208            # for temporary storage. These temporary files will be
1209            # removed on job completion.
1210            # No duplicates are allowed.
1211            # No file patterns are supported.
1212            #
1213            # The supported files are:
1214            #
1215            # Google Cloud Storage:
1216            #
1217            #    storage.googleapis.com/{bucket}/{object}
1218            #    bucket.storage.googleapis.com/{object}
1219          "A String",
1220        ],
1221        "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
1222            # callers cannot mutate it.
1223          { # A message describing the state of a particular execution stage.
1224            "executionStageName": "A String", # The name of the execution stage.
1225            "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
1226            "currentStateTime": "A String", # The time at which the stage transitioned to this state.
1227          },
1228        ],
1229        "type": "A String", # The type of Cloud Dataflow job.
1230        "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
1231            # Cloud Dataflow service.
1232        "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
1233            # of the job it replaced.
1234            #
1235            # When sending a `CreateJobRequest`, you can update a job by specifying it
1236            # here. The job named here is stopped, and its intermediate state is
1237            # transferred to this job.
1238        "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
1239            # isn't contained in the submitted job.
1240          "stages": { # A mapping from each stage to the information about that stage.
1241            "a_key": { # Contains information about how a particular
1242                # google.dataflow.v1beta3.Step will be executed.
1243              "stepName": [ # The steps associated with the execution stage.
1244                  # Note that stages may have several steps, and that a given step
1245                  # might be run by more than one stage.
1246                "A String",
1247              ],
1248            },
1249          },
1250        },
1251      },
1252  }</pre>
1253</div>
1254
1255</body></html>