• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1<html><body>
2<style>
3
4body, h1, h2, h3, div, span, p, pre, a {
5  margin: 0;
6  padding: 0;
7  border: 0;
8  font-weight: inherit;
9  font-style: inherit;
10  font-size: 100%;
11  font-family: inherit;
12  vertical-align: baseline;
13}
14
15body {
16  font-size: 13px;
17  padding: 1em;
18}
19
20h1 {
21  font-size: 26px;
22  margin-bottom: 1em;
23}
24
25h2 {
26  font-size: 24px;
27  margin-bottom: 1em;
28}
29
30h3 {
31  font-size: 20px;
32  margin-bottom: 1em;
33  margin-top: 1em;
34}
35
36pre, code {
37  line-height: 1.5;
38  font-family: Monaco, 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', 'Lucida Console', monospace;
39}
40
41pre {
42  margin-top: 0.5em;
43}
44
45h1, h2, h3, p {
46  font-family: Arial, sans serif;
47}
48
49h1, h2, h3 {
50  border-bottom: solid #CCC 1px;
51}
52
53.toc_element {
54  margin-top: 0.5em;
55}
56
57.firstline {
58  margin-left: 2 em;
59}
60
61.method  {
62  margin-top: 1em;
63  border: solid 1px #CCC;
64  padding: 1em;
65  background: #EEE;
66}
67
68.details {
69  font-weight: bold;
70  font-size: 14px;
71}
72
73</style>
74
75<h1><a href="dataflow_v1b3.html">Dataflow API</a> . <a href="dataflow_v1b3.projects.html">projects</a> . <a href="dataflow_v1b3.projects.templates.html">templates</a></h1>
76<h2>Instance Methods</h2>
77<p class="toc_element">
78  <code><a href="#create">create(projectId, body, x__xgafv=None)</a></code></p>
79<p class="firstline">Creates a Cloud Dataflow job from a template.</p>
80<p class="toc_element">
81  <code><a href="#get">get(projectId, gcsPath=None, location=None, x__xgafv=None, view=None)</a></code></p>
82<p class="firstline">Get the template associated with a template.</p>
83<p class="toc_element">
84  <code><a href="#launch">launch(projectId, body, dynamicTemplate_gcsPath=None, x__xgafv=None, dynamicTemplate_stagingLocation=None, location=None, gcsPath=None, validateOnly=None)</a></code></p>
85<p class="firstline">Launch a template.</p>
86<h3>Method Details</h3>
87<div class="method">
88    <code class="details" id="create">create(projectId, body, x__xgafv=None)</code>
89  <pre>Creates a Cloud Dataflow job from a template.
90
91Args:
92  projectId: string, Required. The ID of the Cloud Platform project that the job belongs to. (required)
93  body: object, The request body. (required)
94    The object takes the form of:
95
96{ # A request to create a Cloud Dataflow job from a template.
97    "environment": { # The environment values to set at runtime. # The runtime environment for the job.
98      "machineType": "A String", # The machine type to use for the job. Defaults to the value from the
99          # template if not specified.
100      "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
101          # the service will use the network "default".
102      "zone": "A String", # The Compute Engine [availability
103          # zone](https://cloud.google.com/compute/docs/regions-zones/regions-zones)
104          # for launching worker instances to run your pipeline.
105      "additionalUserLabels": { # Additional user labels to be specified for the job.
106          # Keys and values should follow the restrictions specified in the [labeling
107          # restrictions](https://cloud.google.com/compute/docs/labeling-resources#restrictions)
108          # page.
109        "a_key": "A String",
110      },
111      "additionalExperiments": [ # Additional experiment flags for the job.
112        "A String",
113      ],
114      "bypassTempDirValidation": True or False, # Whether to bypass the safety checks for the job's temporary directory.
115          # Use with caution.
116      "tempLocation": "A String", # The Cloud Storage path to use for temporary files.
117          # Must be a valid Cloud Storage URL, beginning with `gs://`.
118      "serviceAccountEmail": "A String", # The email address of the service account to run the job as.
119      "numWorkers": 42, # The initial number of Google Compute Engine instnaces for the job.
120      "maxWorkers": 42, # The maximum number of Google Compute Engine instances to be made
121          # available to your pipeline during execution, from 1 to 1000.
122      "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
123          # the form "regions/REGION/subnetworks/SUBNETWORK".
124    },
125    "gcsPath": "A String", # Required. A Cloud Storage path to the template from which to
126        # create the job.
127        # Must be a valid Cloud Storage URL, beginning with `gs://`.
128    "location": "A String", # The [regional endpoint]
129        # (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) to
130        # which to direct the request.
131    "parameters": { # The runtime parameters to pass to the job.
132      "a_key": "A String",
133    },
134    "jobName": "A String", # Required. The job name to use for the created job.
135  }
136
137  x__xgafv: string, V1 error format.
138    Allowed values
139      1 - v1 error format
140      2 - v2 error format
141
142Returns:
143  An object of the form:
144
145    { # Defines a job to be run by the Cloud Dataflow service.
146    "labels": { # User-defined labels for this job.
147        #
148        # The labels map can contain no more than 64 entries.  Entries of the labels
149        # map are UTF8 strings that comply with the following restrictions:
150        #
151        # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
152        # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
153        # * Both keys and values are additionally constrained to be <= 128 bytes in
154        # size.
155      "a_key": "A String",
156    },
157    "jobMetadata": { # Metadata available primarily for filtering jobs. Will be included in the # This field is populated by the Dataflow service to support filtering jobs
158        # by the metadata values provided here. Populated for ListJobs and all GetJob
159        # views SUMMARY and higher.
160        # ListJob response and Job SUMMARY view.
161      "sdkVersion": { # The version of the SDK used to run the job. # The SDK version used to run the job.
162        "versionDisplayName": "A String", # A readable string describing the version of the SDK.
163        "version": "A String", # The version of the SDK used to run the job.
164        "sdkSupportStatus": "A String", # The support status for this SDK version.
165      },
166      "pubsubDetails": [ # Identification of a PubSub source used in the Dataflow job.
167        { # Metadata for a PubSub connector used by the job.
168          "topic": "A String", # Topic accessed in the connection.
169          "subscription": "A String", # Subscription used in the connection.
170        },
171      ],
172      "datastoreDetails": [ # Identification of a Datastore source used in the Dataflow job.
173        { # Metadata for a Datastore connector used by the job.
174          "projectId": "A String", # ProjectId accessed in the connection.
175          "namespace": "A String", # Namespace used in the connection.
176        },
177      ],
178      "fileDetails": [ # Identification of a File source used in the Dataflow job.
179        { # Metadata for a File connector used by the job.
180          "filePattern": "A String", # File Pattern used to access files by the connector.
181        },
182      ],
183      "spannerDetails": [ # Identification of a Spanner source used in the Dataflow job.
184        { # Metadata for a Spanner connector used by the job.
185          "instanceId": "A String", # InstanceId accessed in the connection.
186          "projectId": "A String", # ProjectId accessed in the connection.
187          "databaseId": "A String", # DatabaseId accessed in the connection.
188        },
189      ],
190      "bigTableDetails": [ # Identification of a BigTable source used in the Dataflow job.
191        { # Metadata for a BigTable connector used by the job.
192          "instanceId": "A String", # InstanceId accessed in the connection.
193          "projectId": "A String", # ProjectId accessed in the connection.
194          "tableId": "A String", # TableId accessed in the connection.
195        },
196      ],
197      "bigqueryDetails": [ # Identification of a BigQuery source used in the Dataflow job.
198        { # Metadata for a BigQuery connector used by the job.
199          "projectId": "A String", # Project accessed in the connection.
200          "dataset": "A String", # Dataset accessed in the connection.
201          "table": "A String", # Table accessed in the connection.
202          "query": "A String", # Query used to access data in the connection.
203        },
204      ],
205    },
206    "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
207        # A description of the user pipeline and stages through which it is executed.
208        # Created by Cloud Dataflow service.  Only retrieved with
209        # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
210        # form.  This data is provided by the Dataflow service for ease of visualizing
211        # the pipeline and interpreting Dataflow provided metrics.
212      "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
213        { # Description of the type, names/ids, and input/outputs for a transform.
214          "kind": "A String", # Type of transform.
215          "name": "A String", # User provided name for this transform instance.
216          "inputCollectionName": [ # User names for all collection inputs to this transform.
217            "A String",
218          ],
219          "displayData": [ # Transform-specific display data.
220            { # Data provided with a pipeline or transform to provide descriptive info.
221              "shortStrValue": "A String", # A possible additional shorter value to display.
222                  # For example a java_class_name_value of com.mypackage.MyDoFn
223                  # will be stored with MyDoFn as the short_str_value and
224                  # com.mypackage.MyDoFn as the java_class_name value.
225                  # short_str_value can be displayed and java_class_name_value
226                  # will be displayed as a tooltip.
227              "durationValue": "A String", # Contains value if the data is of duration type.
228              "url": "A String", # An optional full URL.
229              "floatValue": 3.14, # Contains value if the data is of float type.
230              "namespace": "A String", # The namespace for the key. This is usually a class name or programming
231                  # language namespace (i.e. python module) which defines the display data.
232                  # This allows a dax monitoring system to specially handle the data
233                  # and perform custom rendering.
234              "javaClassValue": "A String", # Contains value if the data is of java class type.
235              "label": "A String", # An optional label to display in a dax UI for the element.
236              "boolValue": True or False, # Contains value if the data is of a boolean type.
237              "strValue": "A String", # Contains value if the data is of string type.
238              "key": "A String", # The key identifying the display data.
239                  # This is intended to be used as a label for the display data
240                  # when viewed in a dax monitoring system.
241              "int64Value": "A String", # Contains value if the data is of int64 type.
242              "timestampValue": "A String", # Contains value if the data is of timestamp type.
243            },
244          ],
245          "outputCollectionName": [ # User  names for all collection outputs to this transform.
246            "A String",
247          ],
248          "id": "A String", # SDK generated id of this transform instance.
249        },
250      ],
251      "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
252        { # Description of the composing transforms, names/ids, and input/outputs of a
253            # stage of execution.  Some composing transforms and sources may have been
254            # generated by the Dataflow service during execution planning.
255          "componentSource": [ # Collections produced and consumed by component transforms of this stage.
256            { # Description of an interstitial value between transforms in an execution
257                # stage.
258              "userName": "A String", # Human-readable name for this transform; may be user or system generated.
259              "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
260                  # source is most closely associated.
261              "name": "A String", # Dataflow service generated name for this source.
262            },
263          ],
264          "kind": "A String", # Type of tranform this stage is executing.
265          "name": "A String", # Dataflow service generated name for this stage.
266          "outputSource": [ # Output sources for this stage.
267            { # Description of an input or output of an execution stage.
268              "userName": "A String", # Human-readable name for this source; may be user or system generated.
269              "sizeBytes": "A String", # Size of the source, if measurable.
270              "name": "A String", # Dataflow service generated name for this source.
271              "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
272                  # source is most closely associated.
273            },
274          ],
275          "inputSource": [ # Input sources for this stage.
276            { # Description of an input or output of an execution stage.
277              "userName": "A String", # Human-readable name for this source; may be user or system generated.
278              "sizeBytes": "A String", # Size of the source, if measurable.
279              "name": "A String", # Dataflow service generated name for this source.
280              "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
281                  # source is most closely associated.
282            },
283          ],
284          "componentTransform": [ # Transforms that comprise this execution stage.
285            { # Description of a transform executed as part of an execution stage.
286              "userName": "A String", # Human-readable name for this transform; may be user or system generated.
287              "originalTransform": "A String", # User name for the original user transform with which this transform is
288                  # most closely associated.
289              "name": "A String", # Dataflow service generated name for this source.
290            },
291          ],
292          "id": "A String", # Dataflow service generated id for this stage.
293        },
294      ],
295      "displayData": [ # Pipeline level display data.
296        { # Data provided with a pipeline or transform to provide descriptive info.
297          "shortStrValue": "A String", # A possible additional shorter value to display.
298              # For example a java_class_name_value of com.mypackage.MyDoFn
299              # will be stored with MyDoFn as the short_str_value and
300              # com.mypackage.MyDoFn as the java_class_name value.
301              # short_str_value can be displayed and java_class_name_value
302              # will be displayed as a tooltip.
303          "durationValue": "A String", # Contains value if the data is of duration type.
304          "url": "A String", # An optional full URL.
305          "floatValue": 3.14, # Contains value if the data is of float type.
306          "namespace": "A String", # The namespace for the key. This is usually a class name or programming
307              # language namespace (i.e. python module) which defines the display data.
308              # This allows a dax monitoring system to specially handle the data
309              # and perform custom rendering.
310          "javaClassValue": "A String", # Contains value if the data is of java class type.
311          "label": "A String", # An optional label to display in a dax UI for the element.
312          "boolValue": True or False, # Contains value if the data is of a boolean type.
313          "strValue": "A String", # Contains value if the data is of string type.
314          "key": "A String", # The key identifying the display data.
315              # This is intended to be used as a label for the display data
316              # when viewed in a dax monitoring system.
317          "int64Value": "A String", # Contains value if the data is of int64 type.
318          "timestampValue": "A String", # Contains value if the data is of timestamp type.
319        },
320      ],
321    },
322    "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
323        # callers cannot mutate it.
324      { # A message describing the state of a particular execution stage.
325        "executionStageName": "A String", # The name of the execution stage.
326        "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
327        "currentStateTime": "A String", # The time at which the stage transitioned to this state.
328      },
329    ],
330    "id": "A String", # The unique ID of this job.
331        #
332        # This field is set by the Cloud Dataflow service when the Job is
333        # created, and is immutable for the life of the job.
334    "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
335        # `JOB_STATE_UPDATED`), this field contains the ID of that job.
336    "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
337    "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
338        # corresponding name prefixes of the new job.
339      "a_key": "A String",
340    },
341    "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
342      "version": { # A structure describing which components and their versions of the service
343          # are required in order to run the job.
344        "a_key": "", # Properties of the object.
345      },
346      "flexResourceSchedulingGoal": "A String", # Which Flexible Resource Scheduling mode to run in.
347      "serviceKmsKeyName": "A String", # If set, contains the Cloud KMS key identifier used to encrypt data
348          # at rest, AKA a Customer Managed Encryption Key (CMEK).
349          #
350          # Format:
351          #   projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY
352      "internalExperiments": { # Experimental settings.
353        "a_key": "", # Properties of the object. Contains field @type with type URL.
354      },
355      "dataset": "A String", # The dataset for the current project where various workflow
356          # related tables are stored.
357          #
358          # The supported resource type is:
359          #
360          # Google BigQuery:
361          #   bigquery.googleapis.com/{dataset}
362      "experiments": [ # The list of experiments to enable.
363        "A String",
364      ],
365      "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
366      "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
367          # options are passed through the service and are used to recreate the
368          # SDK pipeline options on the worker in a language agnostic and platform
369          # independent way.
370        "a_key": "", # Properties of the object.
371      },
372      "userAgent": { # A description of the process that generated the request.
373        "a_key": "", # Properties of the object.
374      },
375      "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
376          # unspecified, the service will attempt to choose a reasonable
377          # default.  This should be in the form of the API service name,
378          # e.g. "compute.googleapis.com".
379      "workerPools": [ # The worker pools. At least one "harness" worker pool must be
380          # specified in order for the job to have workers.
381        { # Describes one particular pool of Cloud Dataflow workers to be
382            # instantiated by the Cloud Dataflow service in order to perform the
383            # computations required by a job.  Note that a workflow job may use
384            # multiple pools, in order to match the various computational
385            # requirements of the various stages of the job.
386          "diskSourceImage": "A String", # Fully qualified source image for disks.
387          "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
388              # using the standard Dataflow task runner.  Users should ignore
389              # this field.
390            "workflowFileName": "A String", # The file to store the workflow in.
391            "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
392                # will not be uploaded.
393                #
394                # The supported resource type is:
395                #
396                # Google Cloud Storage:
397                #   storage.googleapis.com/{bucket}/{object}
398                #   bucket.storage.googleapis.com/{object}
399            "commandlinesFileName": "A String", # The file to store preprocessing commands in.
400            "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
401              "reportingEnabled": True or False, # Whether to send work progress updates to the service.
402              "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
403                  # "shuffle/v1beta1".
404              "workerId": "A String", # The ID of the worker running this pipeline.
405              "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
406                  #
407                  # When workers access Google Cloud APIs, they logically do so via
408                  # relative URLs.  If this field is specified, it supplies the base
409                  # URL to use for resolving these relative URLs.  The normative
410                  # algorithm used is defined by RFC 1808, "Relative Uniform Resource
411                  # Locators".
412                  #
413                  # If not specified, the default value is "http://www.googleapis.com/"
414              "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
415                  # "dataflow/v1b3/projects".
416              "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
417                  # storage.
418                  #
419                  # The supported resource type is:
420                  #
421                  # Google Cloud Storage:
422                  #
423                  #   storage.googleapis.com/{bucket}/{object}
424                  #   bucket.storage.googleapis.com/{object}
425            },
426            "vmId": "A String", # The ID string of the VM.
427            "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
428            "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
429            "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
430                # access the Cloud Dataflow API.
431              "A String",
432            ],
433            "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
434                # taskrunner; e.g. "root".
435            "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
436                #
437                # When workers access Google Cloud APIs, they logically do so via
438                # relative URLs.  If this field is specified, it supplies the base
439                # URL to use for resolving these relative URLs.  The normative
440                # algorithm used is defined by RFC 1808, "Relative Uniform Resource
441                # Locators".
442                #
443                # If not specified, the default value is "http://www.googleapis.com/"
444            "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
445                # taskrunner; e.g. "wheel".
446            "languageHint": "A String", # The suggested backend language.
447            "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
448                # console.
449            "streamingWorkerMainClass": "A String", # The streaming worker main class name.
450            "logDir": "A String", # The directory on the VM to store logs.
451            "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
452            "harnessCommand": "A String", # The command to launch the worker harness.
453            "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
454                # temporary storage.
455                #
456                # The supported resource type is:
457                #
458                # Google Cloud Storage:
459                #   storage.googleapis.com/{bucket}/{object}
460                #   bucket.storage.googleapis.com/{object}
461            "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
462          },
463          "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
464              # are supported.
465          "packages": [ # Packages to be installed on workers.
466            { # The packages that must be installed in order for a worker to run the
467                # steps of the Cloud Dataflow job that will be assigned to its worker
468                # pool.
469                #
470                # This is the mechanism by which the Cloud Dataflow SDK causes code to
471                # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
472                # might use this to install jars containing the user's code and all of the
473                # various dependencies (libraries, data files, etc.) required in order
474                # for that code to run.
475              "location": "A String", # The resource to read the package from. The supported resource type is:
476                  #
477                  # Google Cloud Storage:
478                  #
479                  #   storage.googleapis.com/{bucket}
480                  #   bucket.storage.googleapis.com/
481              "name": "A String", # The name of the package.
482            },
483          ],
484          "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
485              # service will attempt to choose a reasonable default.
486          "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
487              # the service will use the network "default".
488          "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
489              # will attempt to choose a reasonable default.
490          "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
491              # attempt to choose a reasonable default.
492          "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
493              # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
494              # `TEARDOWN_NEVER`.
495              # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
496              # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
497              # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
498              # down.
499              #
500              # If the workers are not torn down by the service, they will
501              # continue to run and use Google Compute Engine VM resources in the
502              # user's project until they are explicitly terminated by the user.
503              # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
504              # policy except for small, manually supervised test jobs.
505              #
506              # If unknown or unspecified, the service will attempt to choose a reasonable
507              # default.
508          "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
509              # Compute Engine API.
510          "ipConfiguration": "A String", # Configuration for VM IPs.
511          "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
512              # service will choose a number of threads (according to the number of cores
513              # on the selected machine type for batch, or 1 by convention for streaming).
514          "poolArgs": { # Extra arguments for this worker pool.
515            "a_key": "", # Properties of the object. Contains field @type with type URL.
516          },
517          "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
518              # execute the job.  If zero or unspecified, the service will
519              # attempt to choose a reasonable default.
520          "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
521              # harness, residing in Google Container Registry.
522          "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
523              # the form "regions/REGION/subnetworks/SUBNETWORK".
524          "dataDisks": [ # Data disks that are used by a VM in this workflow.
525            { # Describes the data disk used by a workflow job.
526              "mountPoint": "A String", # Directory in a VM where disk is mounted.
527              "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
528                  # attempt to choose a reasonable default.
529              "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
530                  # must be a disk type appropriate to the project and zone in which
531                  # the workers will run.  If unknown or unspecified, the service
532                  # will attempt to choose a reasonable default.
533                  #
534                  # For example, the standard persistent disk type is a resource name
535                  # typically ending in "pd-standard".  If SSD persistent disks are
536                  # available, the resource name typically ends with "pd-ssd".  The
537                  # actual valid values are defined the Google Compute Engine API,
538                  # not by the Cloud Dataflow API; consult the Google Compute Engine
539                  # documentation for more information about determining the set of
540                  # available disk types for a particular project and zone.
541                  #
542                  # Google Compute Engine Disk types are local to a particular
543                  # project in a particular zone, and so the resource name will
544                  # typically look something like this:
545                  #
546                  # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
547            },
548          ],
549          "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
550            "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
551            "algorithm": "A String", # The algorithm to use for autoscaling.
552          },
553          "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
554              # select a default set of packages which are useful to worker
555              # harnesses written in a particular language.
556          "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
557              # attempt to choose a reasonable default.
558          "metadata": { # Metadata to set on the Google Compute Engine VMs.
559            "a_key": "A String",
560          },
561        },
562      ],
563      "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
564          # storage.  The system will append the suffix "/temp-{JOBNAME} to
565          # this resource prefix, where {JOBNAME} is the value of the
566          # job_name field.  The resulting bucket and object prefix is used
567          # as the prefix of the resources used to store temporary data
568          # needed during the job execution.  NOTE: This will override the
569          # value in taskrunner_settings.
570          # The supported resource type is:
571          #
572          # Google Cloud Storage:
573          #
574          #   storage.googleapis.com/{bucket}/{object}
575          #   bucket.storage.googleapis.com/{object}
576    },
577    "location": "A String", # The [regional endpoint]
578        # (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
579        # contains this job.
580    "tempFiles": [ # A set of files the system should be aware of that are used
581        # for temporary storage. These temporary files will be
582        # removed on job completion.
583        # No duplicates are allowed.
584        # No file patterns are supported.
585        #
586        # The supported files are:
587        #
588        # Google Cloud Storage:
589        #
590        #    storage.googleapis.com/{bucket}/{object}
591        #    bucket.storage.googleapis.com/{object}
592      "A String",
593    ],
594    "type": "A String", # The type of Cloud Dataflow job.
595    "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
596        # If this field is set, the service will ensure its uniqueness.
597        # The request to create a job will fail if the service has knowledge of a
598        # previously submitted job with the same client's ID and job name.
599        # The caller may use this field to ensure idempotence of job
600        # creation across retried attempts to create a job.
601        # By default, the field is empty and, in that case, the service ignores it.
602    "createdFromSnapshotId": "A String", # If this is specified, the job's initial state is populated from the given
603        # snapshot.
604    "stepsLocation": "A String", # The GCS location where the steps are stored.
605    "currentStateTime": "A String", # The timestamp associated with the current state.
606    "startTime": "A String", # The timestamp when the job was started (transitioned to JOB_STATE_PENDING).
607        # Flexible resource scheduling jobs are started with some delay after job
608        # creation, so start_time is unset before start and is updated when the
609        # job is started by the Cloud Dataflow service. For other jobs, start_time
610        # always equals to create_time and is immutable and set by the Cloud Dataflow
611        # service.
612    "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
613        # Cloud Dataflow service.
614    "requestedState": "A String", # The job's requested state.
615        #
616        # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
617        # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
618        # also be used to directly set a job's requested state to
619        # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
620        # job if it has not already reached a terminal state.
621    "name": "A String", # The user-specified Cloud Dataflow job name.
622        #
623        # Only one Job with a given name may exist in a project at any
624        # given time. If a caller attempts to create a Job with the same
625        # name as an already-existing Job, the attempt returns the
626        # existing Job.
627        #
628        # The name must match the regular expression
629        # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
630    "steps": [ # Exactly one of step or steps_location should be specified.
631        #
632        # The top-level steps that constitute the entire job.
633      { # Defines a particular step within a Cloud Dataflow job.
634          #
635          # A job consists of multiple steps, each of which performs some
636          # specific operation as part of the overall job.  Data is typically
637          # passed from one step to another as part of the job.
638          #
639          # Here's an example of a sequence of steps which together implement a
640          # Map-Reduce job:
641          #
642          #   * Read a collection of data from some source, parsing the
643          #     collection's elements.
644          #
645          #   * Validate the elements.
646          #
647          #   * Apply a user-defined function to map each element to some value
648          #     and extract an element-specific key value.
649          #
650          #   * Group elements with the same key into a single element with
651          #     that key, transforming a multiply-keyed collection into a
652          #     uniquely-keyed collection.
653          #
654          #   * Write the elements out to some data sink.
655          #
656          # Note that the Cloud Dataflow service may be used to run many different
657          # types of jobs, not just Map-Reduce.
658        "kind": "A String", # The kind of step in the Cloud Dataflow job.
659        "properties": { # Named properties associated with the step. Each kind of
660            # predefined step has its own required set of properties.
661            # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
662          "a_key": "", # Properties of the object.
663        },
664        "name": "A String", # The name that identifies the step. This must be unique for each
665            # step with respect to all other steps in the Cloud Dataflow job.
666      },
667    ],
668    "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
669        # of the job it replaced.
670        #
671        # When sending a `CreateJobRequest`, you can update a job by specifying it
672        # here. The job named here is stopped, and its intermediate state is
673        # transferred to this job.
674    "currentState": "A String", # The current state of the job.
675        #
676        # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
677        # specified.
678        #
679        # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
680        # terminal state. After a job has reached a terminal state, no
681        # further state updates may be made.
682        #
683        # This field may be mutated by the Cloud Dataflow service;
684        # callers cannot mutate it.
685    "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
686        # isn't contained in the submitted job.
687      "stages": { # A mapping from each stage to the information about that stage.
688        "a_key": { # Contains information about how a particular
689            # google.dataflow.v1beta3.Step will be executed.
690          "stepName": [ # The steps associated with the execution stage.
691              # Note that stages may have several steps, and that a given step
692              # might be run by more than one stage.
693            "A String",
694          ],
695        },
696      },
697    },
698  }</pre>
699</div>
700
701<div class="method">
702    <code class="details" id="get">get(projectId, gcsPath=None, location=None, x__xgafv=None, view=None)</code>
703  <pre>Get the template associated with a template.
704
705Args:
706  projectId: string, Required. The ID of the Cloud Platform project that the job belongs to. (required)
707  gcsPath: string, Required. A Cloud Storage path to the template from which to
708create the job.
709Must be valid Cloud Storage URL, beginning with 'gs://'.
710  location: string, The [regional endpoint]
711(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) to
712which to direct the request.
713  x__xgafv: string, V1 error format.
714    Allowed values
715      1 - v1 error format
716      2 - v2 error format
717  view: string, The view to retrieve. Defaults to METADATA_ONLY.
718
719Returns:
720  An object of the form:
721
722    { # The response to a GetTemplate request.
723    "status": { # The `Status` type defines a logical error model that is suitable for # The status of the get template request. Any problems with the
724        # request will be indicated in the error_details.
725        # different programming environments, including REST APIs and RPC APIs. It is
726        # used by [gRPC](https://github.com/grpc). The error model is designed to be:
727        #
728        # - Simple to use and understand for most users
729        # - Flexible enough to meet unexpected needs
730        #
731        # # Overview
732        #
733        # The `Status` message contains three pieces of data: error code, error
734        # message, and error details. The error code should be an enum value of
735        # google.rpc.Code, but it may accept additional error codes if needed.  The
736        # error message should be a developer-facing English message that helps
737        # developers *understand* and *resolve* the error. If a localized user-facing
738        # error message is needed, put the localized message in the error details or
739        # localize it in the client. The optional error details may contain arbitrary
740        # information about the error. There is a predefined set of error detail types
741        # in the package `google.rpc` that can be used for common error conditions.
742        #
743        # # Language mapping
744        #
745        # The `Status` message is the logical representation of the error model, but it
746        # is not necessarily the actual wire format. When the `Status` message is
747        # exposed in different client libraries and different wire protocols, it can be
748        # mapped differently. For example, it will likely be mapped to some exceptions
749        # in Java, but more likely mapped to some error codes in C.
750        #
751        # # Other uses
752        #
753        # The error model and the `Status` message can be used in a variety of
754        # environments, either with or without APIs, to provide a
755        # consistent developer experience across different environments.
756        #
757        # Example uses of this error model include:
758        #
759        # - Partial errors. If a service needs to return partial errors to the client,
760        #     it may embed the `Status` in the normal response to indicate the partial
761        #     errors.
762        #
763        # - Workflow errors. A typical workflow has multiple steps. Each step may
764        #     have a `Status` message for error reporting.
765        #
766        # - Batch operations. If a client uses batch request and batch response, the
767        #     `Status` message should be used directly inside batch response, one for
768        #     each error sub-response.
769        #
770        # - Asynchronous operations. If an API call embeds asynchronous operation
771        #     results in its response, the status of those operations should be
772        #     represented directly using the `Status` message.
773        #
774        # - Logging. If some API errors are stored in logs, the message `Status` could
775        #     be used directly after any stripping needed for security/privacy reasons.
776      "message": "A String", # A developer-facing error message, which should be in English. Any
777          # user-facing error message should be localized and sent in the
778          # google.rpc.Status.details field, or localized by the client.
779      "code": 42, # The status code, which should be an enum value of google.rpc.Code.
780      "details": [ # A list of messages that carry the error details.  There is a common set of
781          # message types for APIs to use.
782        {
783          "a_key": "", # Properties of the object. Contains field @type with type URL.
784        },
785      ],
786    },
787    "metadata": { # Metadata describing a template. # The template metadata describing the template name, available
788        # parameters, etc.
789      "name": "A String", # Required. The name of the template.
790      "parameters": [ # The parameters for the template.
791        { # Metadata for a specific parameter.
792          "regexes": [ # Optional. Regexes that the parameter must match.
793            "A String",
794          ],
795          "helpText": "A String", # Required. The help text to display for the parameter.
796          "name": "A String", # Required. The name of the parameter.
797          "isOptional": True or False, # Optional. Whether the parameter is optional. Defaults to false.
798          "label": "A String", # Required. The label to display for the parameter.
799        },
800      ],
801      "description": "A String", # Optional. A description of the template.
802    },
803  }</pre>
804</div>
805
806<div class="method">
807    <code class="details" id="launch">launch(projectId, body, dynamicTemplate_gcsPath=None, x__xgafv=None, dynamicTemplate_stagingLocation=None, location=None, gcsPath=None, validateOnly=None)</code>
808  <pre>Launch a template.
809
810Args:
811  projectId: string, Required. The ID of the Cloud Platform project that the job belongs to. (required)
812  body: object, The request body. (required)
813    The object takes the form of:
814
815{ # Parameters to provide to the template being launched.
816    "environment": { # The environment values to set at runtime. # The runtime environment for the job.
817      "machineType": "A String", # The machine type to use for the job. Defaults to the value from the
818          # template if not specified.
819      "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
820          # the service will use the network "default".
821      "zone": "A String", # The Compute Engine [availability
822          # zone](https://cloud.google.com/compute/docs/regions-zones/regions-zones)
823          # for launching worker instances to run your pipeline.
824      "additionalUserLabels": { # Additional user labels to be specified for the job.
825          # Keys and values should follow the restrictions specified in the [labeling
826          # restrictions](https://cloud.google.com/compute/docs/labeling-resources#restrictions)
827          # page.
828        "a_key": "A String",
829      },
830      "additionalExperiments": [ # Additional experiment flags for the job.
831        "A String",
832      ],
833      "bypassTempDirValidation": True or False, # Whether to bypass the safety checks for the job's temporary directory.
834          # Use with caution.
835      "tempLocation": "A String", # The Cloud Storage path to use for temporary files.
836          # Must be a valid Cloud Storage URL, beginning with `gs://`.
837      "serviceAccountEmail": "A String", # The email address of the service account to run the job as.
838      "numWorkers": 42, # The initial number of Google Compute Engine instnaces for the job.
839      "maxWorkers": 42, # The maximum number of Google Compute Engine instances to be made
840          # available to your pipeline during execution, from 1 to 1000.
841      "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
842          # the form "regions/REGION/subnetworks/SUBNETWORK".
843    },
844    "parameters": { # The runtime parameters to pass to the job.
845      "a_key": "A String",
846    },
847    "jobName": "A String", # Required. The job name to use for the created job.
848  }
849
850  dynamicTemplate_gcsPath: string, Path to dynamic template spec file on GCS.
851The file must be a Json serialized DynamicTemplateFieSpec object.
852  x__xgafv: string, V1 error format.
853    Allowed values
854      1 - v1 error format
855      2 - v2 error format
856  dynamicTemplate_stagingLocation: string, Cloud Storage path for staging dependencies.
857Must be a valid Cloud Storage URL, beginning with `gs://`.
858  location: string, The [regional endpoint]
859(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) to
860which to direct the request.
861  gcsPath: string, A Cloud Storage path to the template from which to create
862the job.
863Must be valid Cloud Storage URL, beginning with 'gs://'.
864  validateOnly: boolean, If true, the request is validated but not actually executed.
865Defaults to false.
866
867Returns:
868  An object of the form:
869
870    { # Response to the request to launch a template.
871    "job": { # Defines a job to be run by the Cloud Dataflow service. # The job that was launched, if the request was not a dry run and
872        # the job was successfully launched.
873      "labels": { # User-defined labels for this job.
874          #
875          # The labels map can contain no more than 64 entries.  Entries of the labels
876          # map are UTF8 strings that comply with the following restrictions:
877          #
878          # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
879          # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
880          # * Both keys and values are additionally constrained to be <= 128 bytes in
881          # size.
882        "a_key": "A String",
883      },
884      "jobMetadata": { # Metadata available primarily for filtering jobs. Will be included in the # This field is populated by the Dataflow service to support filtering jobs
885          # by the metadata values provided here. Populated for ListJobs and all GetJob
886          # views SUMMARY and higher.
887          # ListJob response and Job SUMMARY view.
888        "sdkVersion": { # The version of the SDK used to run the job. # The SDK version used to run the job.
889          "versionDisplayName": "A String", # A readable string describing the version of the SDK.
890          "version": "A String", # The version of the SDK used to run the job.
891          "sdkSupportStatus": "A String", # The support status for this SDK version.
892        },
893        "pubsubDetails": [ # Identification of a PubSub source used in the Dataflow job.
894          { # Metadata for a PubSub connector used by the job.
895            "topic": "A String", # Topic accessed in the connection.
896            "subscription": "A String", # Subscription used in the connection.
897          },
898        ],
899        "datastoreDetails": [ # Identification of a Datastore source used in the Dataflow job.
900          { # Metadata for a Datastore connector used by the job.
901            "projectId": "A String", # ProjectId accessed in the connection.
902            "namespace": "A String", # Namespace used in the connection.
903          },
904        ],
905        "fileDetails": [ # Identification of a File source used in the Dataflow job.
906          { # Metadata for a File connector used by the job.
907            "filePattern": "A String", # File Pattern used to access files by the connector.
908          },
909        ],
910        "spannerDetails": [ # Identification of a Spanner source used in the Dataflow job.
911          { # Metadata for a Spanner connector used by the job.
912            "instanceId": "A String", # InstanceId accessed in the connection.
913            "projectId": "A String", # ProjectId accessed in the connection.
914            "databaseId": "A String", # DatabaseId accessed in the connection.
915          },
916        ],
917        "bigTableDetails": [ # Identification of a BigTable source used in the Dataflow job.
918          { # Metadata for a BigTable connector used by the job.
919            "instanceId": "A String", # InstanceId accessed in the connection.
920            "projectId": "A String", # ProjectId accessed in the connection.
921            "tableId": "A String", # TableId accessed in the connection.
922          },
923        ],
924        "bigqueryDetails": [ # Identification of a BigQuery source used in the Dataflow job.
925          { # Metadata for a BigQuery connector used by the job.
926            "projectId": "A String", # Project accessed in the connection.
927            "dataset": "A String", # Dataset accessed in the connection.
928            "table": "A String", # Table accessed in the connection.
929            "query": "A String", # Query used to access data in the connection.
930          },
931        ],
932      },
933      "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
934          # A description of the user pipeline and stages through which it is executed.
935          # Created by Cloud Dataflow service.  Only retrieved with
936          # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
937          # form.  This data is provided by the Dataflow service for ease of visualizing
938          # the pipeline and interpreting Dataflow provided metrics.
939        "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
940          { # Description of the type, names/ids, and input/outputs for a transform.
941            "kind": "A String", # Type of transform.
942            "name": "A String", # User provided name for this transform instance.
943            "inputCollectionName": [ # User names for all collection inputs to this transform.
944              "A String",
945            ],
946            "displayData": [ # Transform-specific display data.
947              { # Data provided with a pipeline or transform to provide descriptive info.
948                "shortStrValue": "A String", # A possible additional shorter value to display.
949                    # For example a java_class_name_value of com.mypackage.MyDoFn
950                    # will be stored with MyDoFn as the short_str_value and
951                    # com.mypackage.MyDoFn as the java_class_name value.
952                    # short_str_value can be displayed and java_class_name_value
953                    # will be displayed as a tooltip.
954                "durationValue": "A String", # Contains value if the data is of duration type.
955                "url": "A String", # An optional full URL.
956                "floatValue": 3.14, # Contains value if the data is of float type.
957                "namespace": "A String", # The namespace for the key. This is usually a class name or programming
958                    # language namespace (i.e. python module) which defines the display data.
959                    # This allows a dax monitoring system to specially handle the data
960                    # and perform custom rendering.
961                "javaClassValue": "A String", # Contains value if the data is of java class type.
962                "label": "A String", # An optional label to display in a dax UI for the element.
963                "boolValue": True or False, # Contains value if the data is of a boolean type.
964                "strValue": "A String", # Contains value if the data is of string type.
965                "key": "A String", # The key identifying the display data.
966                    # This is intended to be used as a label for the display data
967                    # when viewed in a dax monitoring system.
968                "int64Value": "A String", # Contains value if the data is of int64 type.
969                "timestampValue": "A String", # Contains value if the data is of timestamp type.
970              },
971            ],
972            "outputCollectionName": [ # User  names for all collection outputs to this transform.
973              "A String",
974            ],
975            "id": "A String", # SDK generated id of this transform instance.
976          },
977        ],
978        "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
979          { # Description of the composing transforms, names/ids, and input/outputs of a
980              # stage of execution.  Some composing transforms and sources may have been
981              # generated by the Dataflow service during execution planning.
982            "componentSource": [ # Collections produced and consumed by component transforms of this stage.
983              { # Description of an interstitial value between transforms in an execution
984                  # stage.
985                "userName": "A String", # Human-readable name for this transform; may be user or system generated.
986                "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
987                    # source is most closely associated.
988                "name": "A String", # Dataflow service generated name for this source.
989              },
990            ],
991            "kind": "A String", # Type of tranform this stage is executing.
992            "name": "A String", # Dataflow service generated name for this stage.
993            "outputSource": [ # Output sources for this stage.
994              { # Description of an input or output of an execution stage.
995                "userName": "A String", # Human-readable name for this source; may be user or system generated.
996                "sizeBytes": "A String", # Size of the source, if measurable.
997                "name": "A String", # Dataflow service generated name for this source.
998                "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
999                    # source is most closely associated.
1000              },
1001            ],
1002            "inputSource": [ # Input sources for this stage.
1003              { # Description of an input or output of an execution stage.
1004                "userName": "A String", # Human-readable name for this source; may be user or system generated.
1005                "sizeBytes": "A String", # Size of the source, if measurable.
1006                "name": "A String", # Dataflow service generated name for this source.
1007                "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
1008                    # source is most closely associated.
1009              },
1010            ],
1011            "componentTransform": [ # Transforms that comprise this execution stage.
1012              { # Description of a transform executed as part of an execution stage.
1013                "userName": "A String", # Human-readable name for this transform; may be user or system generated.
1014                "originalTransform": "A String", # User name for the original user transform with which this transform is
1015                    # most closely associated.
1016                "name": "A String", # Dataflow service generated name for this source.
1017              },
1018            ],
1019            "id": "A String", # Dataflow service generated id for this stage.
1020          },
1021        ],
1022        "displayData": [ # Pipeline level display data.
1023          { # Data provided with a pipeline or transform to provide descriptive info.
1024            "shortStrValue": "A String", # A possible additional shorter value to display.
1025                # For example a java_class_name_value of com.mypackage.MyDoFn
1026                # will be stored with MyDoFn as the short_str_value and
1027                # com.mypackage.MyDoFn as the java_class_name value.
1028                # short_str_value can be displayed and java_class_name_value
1029                # will be displayed as a tooltip.
1030            "durationValue": "A String", # Contains value if the data is of duration type.
1031            "url": "A String", # An optional full URL.
1032            "floatValue": 3.14, # Contains value if the data is of float type.
1033            "namespace": "A String", # The namespace for the key. This is usually a class name or programming
1034                # language namespace (i.e. python module) which defines the display data.
1035                # This allows a dax monitoring system to specially handle the data
1036                # and perform custom rendering.
1037            "javaClassValue": "A String", # Contains value if the data is of java class type.
1038            "label": "A String", # An optional label to display in a dax UI for the element.
1039            "boolValue": True or False, # Contains value if the data is of a boolean type.
1040            "strValue": "A String", # Contains value if the data is of string type.
1041            "key": "A String", # The key identifying the display data.
1042                # This is intended to be used as a label for the display data
1043                # when viewed in a dax monitoring system.
1044            "int64Value": "A String", # Contains value if the data is of int64 type.
1045            "timestampValue": "A String", # Contains value if the data is of timestamp type.
1046          },
1047        ],
1048      },
1049      "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
1050          # callers cannot mutate it.
1051        { # A message describing the state of a particular execution stage.
1052          "executionStageName": "A String", # The name of the execution stage.
1053          "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
1054          "currentStateTime": "A String", # The time at which the stage transitioned to this state.
1055        },
1056      ],
1057      "id": "A String", # The unique ID of this job.
1058          #
1059          # This field is set by the Cloud Dataflow service when the Job is
1060          # created, and is immutable for the life of the job.
1061      "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
1062          # `JOB_STATE_UPDATED`), this field contains the ID of that job.
1063      "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
1064      "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
1065          # corresponding name prefixes of the new job.
1066        "a_key": "A String",
1067      },
1068      "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
1069        "version": { # A structure describing which components and their versions of the service
1070            # are required in order to run the job.
1071          "a_key": "", # Properties of the object.
1072        },
1073        "flexResourceSchedulingGoal": "A String", # Which Flexible Resource Scheduling mode to run in.
1074        "serviceKmsKeyName": "A String", # If set, contains the Cloud KMS key identifier used to encrypt data
1075            # at rest, AKA a Customer Managed Encryption Key (CMEK).
1076            #
1077            # Format:
1078            #   projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY
1079        "internalExperiments": { # Experimental settings.
1080          "a_key": "", # Properties of the object. Contains field @type with type URL.
1081        },
1082        "dataset": "A String", # The dataset for the current project where various workflow
1083            # related tables are stored.
1084            #
1085            # The supported resource type is:
1086            #
1087            # Google BigQuery:
1088            #   bigquery.googleapis.com/{dataset}
1089        "experiments": [ # The list of experiments to enable.
1090          "A String",
1091        ],
1092        "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
1093        "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
1094            # options are passed through the service and are used to recreate the
1095            # SDK pipeline options on the worker in a language agnostic and platform
1096            # independent way.
1097          "a_key": "", # Properties of the object.
1098        },
1099        "userAgent": { # A description of the process that generated the request.
1100          "a_key": "", # Properties of the object.
1101        },
1102        "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
1103            # unspecified, the service will attempt to choose a reasonable
1104            # default.  This should be in the form of the API service name,
1105            # e.g. "compute.googleapis.com".
1106        "workerPools": [ # The worker pools. At least one "harness" worker pool must be
1107            # specified in order for the job to have workers.
1108          { # Describes one particular pool of Cloud Dataflow workers to be
1109              # instantiated by the Cloud Dataflow service in order to perform the
1110              # computations required by a job.  Note that a workflow job may use
1111              # multiple pools, in order to match the various computational
1112              # requirements of the various stages of the job.
1113            "diskSourceImage": "A String", # Fully qualified source image for disks.
1114            "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
1115                # using the standard Dataflow task runner.  Users should ignore
1116                # this field.
1117              "workflowFileName": "A String", # The file to store the workflow in.
1118              "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
1119                  # will not be uploaded.
1120                  #
1121                  # The supported resource type is:
1122                  #
1123                  # Google Cloud Storage:
1124                  #   storage.googleapis.com/{bucket}/{object}
1125                  #   bucket.storage.googleapis.com/{object}
1126              "commandlinesFileName": "A String", # The file to store preprocessing commands in.
1127              "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
1128                "reportingEnabled": True or False, # Whether to send work progress updates to the service.
1129                "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
1130                    # "shuffle/v1beta1".
1131                "workerId": "A String", # The ID of the worker running this pipeline.
1132                "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
1133                    #
1134                    # When workers access Google Cloud APIs, they logically do so via
1135                    # relative URLs.  If this field is specified, it supplies the base
1136                    # URL to use for resolving these relative URLs.  The normative
1137                    # algorithm used is defined by RFC 1808, "Relative Uniform Resource
1138                    # Locators".
1139                    #
1140                    # If not specified, the default value is "http://www.googleapis.com/"
1141                "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
1142                    # "dataflow/v1b3/projects".
1143                "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
1144                    # storage.
1145                    #
1146                    # The supported resource type is:
1147                    #
1148                    # Google Cloud Storage:
1149                    #
1150                    #   storage.googleapis.com/{bucket}/{object}
1151                    #   bucket.storage.googleapis.com/{object}
1152              },
1153              "vmId": "A String", # The ID string of the VM.
1154              "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
1155              "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
1156              "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
1157                  # access the Cloud Dataflow API.
1158                "A String",
1159              ],
1160              "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
1161                  # taskrunner; e.g. "root".
1162              "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
1163                  #
1164                  # When workers access Google Cloud APIs, they logically do so via
1165                  # relative URLs.  If this field is specified, it supplies the base
1166                  # URL to use for resolving these relative URLs.  The normative
1167                  # algorithm used is defined by RFC 1808, "Relative Uniform Resource
1168                  # Locators".
1169                  #
1170                  # If not specified, the default value is "http://www.googleapis.com/"
1171              "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
1172                  # taskrunner; e.g. "wheel".
1173              "languageHint": "A String", # The suggested backend language.
1174              "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
1175                  # console.
1176              "streamingWorkerMainClass": "A String", # The streaming worker main class name.
1177              "logDir": "A String", # The directory on the VM to store logs.
1178              "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
1179              "harnessCommand": "A String", # The command to launch the worker harness.
1180              "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
1181                  # temporary storage.
1182                  #
1183                  # The supported resource type is:
1184                  #
1185                  # Google Cloud Storage:
1186                  #   storage.googleapis.com/{bucket}/{object}
1187                  #   bucket.storage.googleapis.com/{object}
1188              "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
1189            },
1190            "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
1191                # are supported.
1192            "packages": [ # Packages to be installed on workers.
1193              { # The packages that must be installed in order for a worker to run the
1194                  # steps of the Cloud Dataflow job that will be assigned to its worker
1195                  # pool.
1196                  #
1197                  # This is the mechanism by which the Cloud Dataflow SDK causes code to
1198                  # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
1199                  # might use this to install jars containing the user's code and all of the
1200                  # various dependencies (libraries, data files, etc.) required in order
1201                  # for that code to run.
1202                "location": "A String", # The resource to read the package from. The supported resource type is:
1203                    #
1204                    # Google Cloud Storage:
1205                    #
1206                    #   storage.googleapis.com/{bucket}
1207                    #   bucket.storage.googleapis.com/
1208                "name": "A String", # The name of the package.
1209              },
1210            ],
1211            "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
1212                # service will attempt to choose a reasonable default.
1213            "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
1214                # the service will use the network "default".
1215            "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
1216                # will attempt to choose a reasonable default.
1217            "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
1218                # attempt to choose a reasonable default.
1219            "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
1220                # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
1221                # `TEARDOWN_NEVER`.
1222                # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
1223                # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
1224                # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
1225                # down.
1226                #
1227                # If the workers are not torn down by the service, they will
1228                # continue to run and use Google Compute Engine VM resources in the
1229                # user's project until they are explicitly terminated by the user.
1230                # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
1231                # policy except for small, manually supervised test jobs.
1232                #
1233                # If unknown or unspecified, the service will attempt to choose a reasonable
1234                # default.
1235            "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
1236                # Compute Engine API.
1237            "ipConfiguration": "A String", # Configuration for VM IPs.
1238            "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
1239                # service will choose a number of threads (according to the number of cores
1240                # on the selected machine type for batch, or 1 by convention for streaming).
1241            "poolArgs": { # Extra arguments for this worker pool.
1242              "a_key": "", # Properties of the object. Contains field @type with type URL.
1243            },
1244            "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
1245                # execute the job.  If zero or unspecified, the service will
1246                # attempt to choose a reasonable default.
1247            "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
1248                # harness, residing in Google Container Registry.
1249            "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
1250                # the form "regions/REGION/subnetworks/SUBNETWORK".
1251            "dataDisks": [ # Data disks that are used by a VM in this workflow.
1252              { # Describes the data disk used by a workflow job.
1253                "mountPoint": "A String", # Directory in a VM where disk is mounted.
1254                "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
1255                    # attempt to choose a reasonable default.
1256                "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
1257                    # must be a disk type appropriate to the project and zone in which
1258                    # the workers will run.  If unknown or unspecified, the service
1259                    # will attempt to choose a reasonable default.
1260                    #
1261                    # For example, the standard persistent disk type is a resource name
1262                    # typically ending in "pd-standard".  If SSD persistent disks are
1263                    # available, the resource name typically ends with "pd-ssd".  The
1264                    # actual valid values are defined the Google Compute Engine API,
1265                    # not by the Cloud Dataflow API; consult the Google Compute Engine
1266                    # documentation for more information about determining the set of
1267                    # available disk types for a particular project and zone.
1268                    #
1269                    # Google Compute Engine Disk types are local to a particular
1270                    # project in a particular zone, and so the resource name will
1271                    # typically look something like this:
1272                    #
1273                    # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
1274              },
1275            ],
1276            "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
1277              "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
1278              "algorithm": "A String", # The algorithm to use for autoscaling.
1279            },
1280            "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
1281                # select a default set of packages which are useful to worker
1282                # harnesses written in a particular language.
1283            "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
1284                # attempt to choose a reasonable default.
1285            "metadata": { # Metadata to set on the Google Compute Engine VMs.
1286              "a_key": "A String",
1287            },
1288          },
1289        ],
1290        "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
1291            # storage.  The system will append the suffix "/temp-{JOBNAME} to
1292            # this resource prefix, where {JOBNAME} is the value of the
1293            # job_name field.  The resulting bucket and object prefix is used
1294            # as the prefix of the resources used to store temporary data
1295            # needed during the job execution.  NOTE: This will override the
1296            # value in taskrunner_settings.
1297            # The supported resource type is:
1298            #
1299            # Google Cloud Storage:
1300            #
1301            #   storage.googleapis.com/{bucket}/{object}
1302            #   bucket.storage.googleapis.com/{object}
1303      },
1304      "location": "A String", # The [regional endpoint]
1305          # (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
1306          # contains this job.
1307      "tempFiles": [ # A set of files the system should be aware of that are used
1308          # for temporary storage. These temporary files will be
1309          # removed on job completion.
1310          # No duplicates are allowed.
1311          # No file patterns are supported.
1312          #
1313          # The supported files are:
1314          #
1315          # Google Cloud Storage:
1316          #
1317          #    storage.googleapis.com/{bucket}/{object}
1318          #    bucket.storage.googleapis.com/{object}
1319        "A String",
1320      ],
1321      "type": "A String", # The type of Cloud Dataflow job.
1322      "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
1323          # If this field is set, the service will ensure its uniqueness.
1324          # The request to create a job will fail if the service has knowledge of a
1325          # previously submitted job with the same client's ID and job name.
1326          # The caller may use this field to ensure idempotence of job
1327          # creation across retried attempts to create a job.
1328          # By default, the field is empty and, in that case, the service ignores it.
1329      "createdFromSnapshotId": "A String", # If this is specified, the job's initial state is populated from the given
1330          # snapshot.
1331      "stepsLocation": "A String", # The GCS location where the steps are stored.
1332      "currentStateTime": "A String", # The timestamp associated with the current state.
1333      "startTime": "A String", # The timestamp when the job was started (transitioned to JOB_STATE_PENDING).
1334          # Flexible resource scheduling jobs are started with some delay after job
1335          # creation, so start_time is unset before start and is updated when the
1336          # job is started by the Cloud Dataflow service. For other jobs, start_time
1337          # always equals to create_time and is immutable and set by the Cloud Dataflow
1338          # service.
1339      "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
1340          # Cloud Dataflow service.
1341      "requestedState": "A String", # The job's requested state.
1342          #
1343          # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
1344          # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
1345          # also be used to directly set a job's requested state to
1346          # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
1347          # job if it has not already reached a terminal state.
1348      "name": "A String", # The user-specified Cloud Dataflow job name.
1349          #
1350          # Only one Job with a given name may exist in a project at any
1351          # given time. If a caller attempts to create a Job with the same
1352          # name as an already-existing Job, the attempt returns the
1353          # existing Job.
1354          #
1355          # The name must match the regular expression
1356          # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
1357      "steps": [ # Exactly one of step or steps_location should be specified.
1358          #
1359          # The top-level steps that constitute the entire job.
1360        { # Defines a particular step within a Cloud Dataflow job.
1361            #
1362            # A job consists of multiple steps, each of which performs some
1363            # specific operation as part of the overall job.  Data is typically
1364            # passed from one step to another as part of the job.
1365            #
1366            # Here's an example of a sequence of steps which together implement a
1367            # Map-Reduce job:
1368            #
1369            #   * Read a collection of data from some source, parsing the
1370            #     collection's elements.
1371            #
1372            #   * Validate the elements.
1373            #
1374            #   * Apply a user-defined function to map each element to some value
1375            #     and extract an element-specific key value.
1376            #
1377            #   * Group elements with the same key into a single element with
1378            #     that key, transforming a multiply-keyed collection into a
1379            #     uniquely-keyed collection.
1380            #
1381            #   * Write the elements out to some data sink.
1382            #
1383            # Note that the Cloud Dataflow service may be used to run many different
1384            # types of jobs, not just Map-Reduce.
1385          "kind": "A String", # The kind of step in the Cloud Dataflow job.
1386          "properties": { # Named properties associated with the step. Each kind of
1387              # predefined step has its own required set of properties.
1388              # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
1389            "a_key": "", # Properties of the object.
1390          },
1391          "name": "A String", # The name that identifies the step. This must be unique for each
1392              # step with respect to all other steps in the Cloud Dataflow job.
1393        },
1394      ],
1395      "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
1396          # of the job it replaced.
1397          #
1398          # When sending a `CreateJobRequest`, you can update a job by specifying it
1399          # here. The job named here is stopped, and its intermediate state is
1400          # transferred to this job.
1401      "currentState": "A String", # The current state of the job.
1402          #
1403          # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
1404          # specified.
1405          #
1406          # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
1407          # terminal state. After a job has reached a terminal state, no
1408          # further state updates may be made.
1409          #
1410          # This field may be mutated by the Cloud Dataflow service;
1411          # callers cannot mutate it.
1412      "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
1413          # isn't contained in the submitted job.
1414        "stages": { # A mapping from each stage to the information about that stage.
1415          "a_key": { # Contains information about how a particular
1416              # google.dataflow.v1beta3.Step will be executed.
1417            "stepName": [ # The steps associated with the execution stage.
1418                # Note that stages may have several steps, and that a given step
1419                # might be run by more than one stage.
1420              "A String",
1421            ],
1422          },
1423        },
1424      },
1425    },
1426  }</pre>
1427</div>
1428
1429</body></html>