create(projectId, location, body, x__xgafv=None, replaceJobId=None, view=None)

Creates a Cloud Dataflow job.

To create a job, we recommend using `projects.locations.jobs.create` with a
[regional endpoint]
(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints). Using
`projects.jobs.create` is not recommended, as your job will always start
in `us-central1`.

Args:
  projectId: string, The ID of the Cloud Platform project that the job belongs to. (required)
  location: string, The [regional endpoint]
(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
contains this job. (required)
  body: object, The request body. (required)
    The object takes the form of:

{ # Defines a job to be run by the Cloud Dataflow service.
  "labels": { # User-defined labels for this job.
      # 
      # The labels map can contain no more than 64 entries.  Entries of the labels
      # map are UTF8 strings that comply with the following restrictions:
      # 
      # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
      # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
      # * Both keys and values are additionally constrained to be <= 128 bytes in
      # size.
    "a_key": "A String",
  },
  "jobMetadata": { # Metadata available primarily for filtering jobs. Will be included in the # This field is populated by the Dataflow service to support filtering jobs
      # by the metadata values provided here. Populated for ListJobs and all GetJob
      # views SUMMARY and higher.
      # ListJob response and Job SUMMARY view.
    "sdkVersion": { # The version of the SDK used to run the job. # The SDK version used to run the job.
      "versionDisplayName": "A String", # A readable string describing the version of the SDK.
      "version": "A String", # The version of the SDK used to run the job.
      "sdkSupportStatus": "A String", # The support status for this SDK version.
    },
    "pubsubDetails": [ # Identification of a PubSub source used in the Dataflow job.
      { # Metadata for a PubSub connector used by the job.
        "topic": "A String", # Topic accessed in the connection.
        "subscription": "A String", # Subscription used in the connection.
      },
    ],
    "datastoreDetails": [ # Identification of a Datastore source used in the Dataflow job.
      { # Metadata for a Datastore connector used by the job.
        "projectId": "A String", # ProjectId accessed in the connection.
        "namespace": "A String", # Namespace used in the connection.
      },
    ],
    "fileDetails": [ # Identification of a File source used in the Dataflow job.
      { # Metadata for a File connector used by the job.
        "filePattern": "A String", # File Pattern used to access files by the connector.
      },
    ],
    "spannerDetails": [ # Identification of a Spanner source used in the Dataflow job.
      { # Metadata for a Spanner connector used by the job.
        "instanceId": "A String", # InstanceId accessed in the connection.
        "projectId": "A String", # ProjectId accessed in the connection.
        "databaseId": "A String", # DatabaseId accessed in the connection.
      },
    ],
    "bigTableDetails": [ # Identification of a BigTable source used in the Dataflow job.
      { # Metadata for a BigTable connector used by the job.
        "instanceId": "A String", # InstanceId accessed in the connection.
        "projectId": "A String", # ProjectId accessed in the connection.
        "tableId": "A String", # TableId accessed in the connection.
      },
    ],
    "bigqueryDetails": [ # Identification of a BigQuery source used in the Dataflow job.
      { # Metadata for a BigQuery connector used by the job.
        "projectId": "A String", # Project accessed in the connection.
        "dataset": "A String", # Dataset accessed in the connection.
        "table": "A String", # Table accessed in the connection.
        "query": "A String", # Query used to access data in the connection.
      },
    ],
  },
  "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
      # A description of the user pipeline and stages through which it is executed.
      # Created by Cloud Dataflow service.  Only retrieved with
      # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
      # form.  This data is provided by the Dataflow service for ease of visualizing
      # the pipeline and interpreting Dataflow provided metrics.
    "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
      { # Description of the type, names/ids, and input/outputs for a transform.
        "kind": "A String", # Type of transform.
        "name": "A String", # User provided name for this transform instance.
        "inputCollectionName": [ # User names for all collection inputs to this transform.
          "A String",
        ],
        "displayData": [ # Transform-specific display data.
          { # Data provided with a pipeline or transform to provide descriptive info.
            "shortStrValue": "A String", # A possible additional shorter value to display.
                # For example a java_class_name_value of com.mypackage.MyDoFn
                # will be stored with MyDoFn as the short_str_value and
                # com.mypackage.MyDoFn as the java_class_name value.
                # short_str_value can be displayed and java_class_name_value
                # will be displayed as a tooltip.
            "durationValue": "A String", # Contains value if the data is of duration type.
            "url": "A String", # An optional full URL.
            "floatValue": 3.14, # Contains value if the data is of float type.
            "namespace": "A String", # The namespace for the key. This is usually a class name or programming
                # language namespace (i.e. python module) which defines the display data.
                # This allows a dax monitoring system to specially handle the data
                # and perform custom rendering.
            "javaClassValue": "A String", # Contains value if the data is of java class type.
            "label": "A String", # An optional label to display in a dax UI for the element.
            "boolValue": True or False, # Contains value if the data is of a boolean type.
            "strValue": "A String", # Contains value if the data is of string type.
            "key": "A String", # The key identifying the display data.
                # This is intended to be used as a label for the display data
                # when viewed in a dax monitoring system.
            "int64Value": "A String", # Contains value if the data is of int64 type.
            "timestampValue": "A String", # Contains value if the data is of timestamp type.
          },
        ],
        "outputCollectionName": [ # User  names for all collection outputs to this transform.
          "A String",
        ],
        "id": "A String", # SDK generated id of this transform instance.
      },
    ],
    "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
      { # Description of the composing transforms, names/ids, and input/outputs of a
          # stage of execution.  Some composing transforms and sources may have been
          # generated by the Dataflow service during execution planning.
        "componentSource": [ # Collections produced and consumed by component transforms of this stage.
          { # Description of an interstitial value between transforms in an execution
              # stage.
            "userName": "A String", # Human-readable name for this transform; may be user or system generated.
            "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
                # source is most closely associated.
            "name": "A String", # Dataflow service generated name for this source.
          },
        ],
        "kind": "A String", # Type of tranform this stage is executing.
        "name": "A String", # Dataflow service generated name for this stage.
        "outputSource": [ # Output sources for this stage.
          { # Description of an input or output of an execution stage.
            "userName": "A String", # Human-readable name for this source; may be user or system generated.
            "sizeBytes": "A String", # Size of the source, if measurable.
            "name": "A String", # Dataflow service generated name for this source.
            "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
                # source is most closely associated.
          },
        ],
        "inputSource": [ # Input sources for this stage.
          { # Description of an input or output of an execution stage.
            "userName": "A String", # Human-readable name for this source; may be user or system generated.
            "sizeBytes": "A String", # Size of the source, if measurable.
            "name": "A String", # Dataflow service generated name for this source.
            "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
                # source is most closely associated.
          },
        ],
        "componentTransform": [ # Transforms that comprise this execution stage.
          { # Description of a transform executed as part of an execution stage.
            "userName": "A String", # Human-readable name for this transform; may be user or system generated.
            "originalTransform": "A String", # User name for the original user transform with which this transform is
                # most closely associated.
            "name": "A String", # Dataflow service generated name for this source.
          },
        ],
        "id": "A String", # Dataflow service generated id for this stage.
      },
    ],
    "displayData": [ # Pipeline level display data.
      { # Data provided with a pipeline or transform to provide descriptive info.
        "shortStrValue": "A String", # A possible additional shorter value to display.
            # For example a java_class_name_value of com.mypackage.MyDoFn
            # will be stored with MyDoFn as the short_str_value and
            # com.mypackage.MyDoFn as the java_class_name value.
            # short_str_value can be displayed and java_class_name_value
            # will be displayed as a tooltip.
        "durationValue": "A String", # Contains value if the data is of duration type.
        "url": "A String", # An optional full URL.
        "floatValue": 3.14, # Contains value if the data is of float type.
        "namespace": "A String", # The namespace for the key. This is usually a class name or programming
            # language namespace (i.e. python module) which defines the display data.
            # This allows a dax monitoring system to specially handle the data
            # and perform custom rendering.
        "javaClassValue": "A String", # Contains value if the data is of java class type.
        "label": "A String", # An optional label to display in a dax UI for the element.
        "boolValue": True or False, # Contains value if the data is of a boolean type.
        "strValue": "A String", # Contains value if the data is of string type.
        "key": "A String", # The key identifying the display data.
            # This is intended to be used as a label for the display data
            # when viewed in a dax monitoring system.
        "int64Value": "A String", # Contains value if the data is of int64 type.
        "timestampValue": "A String", # Contains value if the data is of timestamp type.
      },
    ],
  },
  "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
      # callers cannot mutate it.
    { # A message describing the state of a particular execution stage.
      "executionStageName": "A String", # The name of the execution stage.
      "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
      "currentStateTime": "A String", # The time at which the stage transitioned to this state.
    },
  ],
  "id": "A String", # The unique ID of this job.
      # 
      # This field is set by the Cloud Dataflow service when the Job is
      # created, and is immutable for the life of the job.
  "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
      # `JOB_STATE_UPDATED`), this field contains the ID of that job.
  "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
  "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
      # corresponding name prefixes of the new job.
    "a_key": "A String",
  },
  "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
    "version": { # A structure describing which components and their versions of the service
        # are required in order to run the job.
      "a_key": "", # Properties of the object.
    },
    "flexResourceSchedulingGoal": "A String", # Which Flexible Resource Scheduling mode to run in.
    "serviceKmsKeyName": "A String", # If set, contains the Cloud KMS key identifier used to encrypt data
        # at rest, AKA a Customer Managed Encryption Key (CMEK).
        #
        # Format:
        #   projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY
    "internalExperiments": { # Experimental settings.
      "a_key": "", # Properties of the object. Contains field @type with type URL.
    },
    "dataset": "A String", # The dataset for the current project where various workflow
        # related tables are stored.
        #
        # The supported resource type is:
        #
        # Google BigQuery:
        #   bigquery.googleapis.com/{dataset}
    "experiments": [ # The list of experiments to enable.
      "A String",
    ],
    "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
    "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
        # options are passed through the service and are used to recreate the
        # SDK pipeline options on the worker in a language agnostic and platform
        # independent way.
      "a_key": "", # Properties of the object.
    },
    "userAgent": { # A description of the process that generated the request.
      "a_key": "", # Properties of the object.
    },
    "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
        # unspecified, the service will attempt to choose a reasonable
        # default.  This should be in the form of the API service name,
        # e.g. "compute.googleapis.com".
    "workerPools": [ # The worker pools. At least one "harness" worker pool must be
        # specified in order for the job to have workers.
      { # Describes one particular pool of Cloud Dataflow workers to be
          # instantiated by the Cloud Dataflow service in order to perform the
          # computations required by a job.  Note that a workflow job may use
          # multiple pools, in order to match the various computational
          # requirements of the various stages of the job.
        "diskSourceImage": "A String", # Fully qualified source image for disks.
        "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
            # using the standard Dataflow task runner.  Users should ignore
            # this field.
          "workflowFileName": "A String", # The file to store the workflow in.
          "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
              # will not be uploaded.
              #
              # The supported resource type is:
              #
              # Google Cloud Storage:
              #   storage.googleapis.com/{bucket}/{object}
              #   bucket.storage.googleapis.com/{object}
          "commandlinesFileName": "A String", # The file to store preprocessing commands in.
          "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
            "reportingEnabled": True or False, # Whether to send work progress updates to the service.
            "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
                # "shuffle/v1beta1".
            "workerId": "A String", # The ID of the worker running this pipeline.
            "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
                #
                # When workers access Google Cloud APIs, they logically do so via
                # relative URLs.  If this field is specified, it supplies the base
                # URL to use for resolving these relative URLs.  The normative
                # algorithm used is defined by RFC 1808, "Relative Uniform Resource
                # Locators".
                #
                # If not specified, the default value is "http://www.googleapis.com/"
            "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
                # "dataflow/v1b3/projects".
            "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
                # storage.
                #
                # The supported resource type is:
                #
                # Google Cloud Storage:
                #
                #   storage.googleapis.com/{bucket}/{object}
                #   bucket.storage.googleapis.com/{object}
          },
          "vmId": "A String", # The ID string of the VM.
          "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
          "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
          "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
              # access the Cloud Dataflow API.
            "A String",
          ],
          "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
              # taskrunner; e.g. "root".
          "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
              #
              # When workers access Google Cloud APIs, they logically do so via
              # relative URLs.  If this field is specified, it supplies the base
              # URL to use for resolving these relative URLs.  The normative
              # algorithm used is defined by RFC 1808, "Relative Uniform Resource
              # Locators".
              #
              # If not specified, the default value is "http://www.googleapis.com/"
          "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
              # taskrunner; e.g. "wheel".
          "languageHint": "A String", # The suggested backend language.
          "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
              # console.
          "streamingWorkerMainClass": "A String", # The streaming worker main class name.
          "logDir": "A String", # The directory on the VM to store logs.
          "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
          "harnessCommand": "A String", # The command to launch the worker harness.
          "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
              # temporary storage.
              #
              # The supported resource type is:
              #
              # Google Cloud Storage:
              #   storage.googleapis.com/{bucket}/{object}
              #   bucket.storage.googleapis.com/{object}
          "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
        },
        "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
            # are supported.
        "packages": [ # Packages to be installed on workers.
          { # The packages that must be installed in order for a worker to run the
              # steps of the Cloud Dataflow job that will be assigned to its worker
              # pool.
              #
              # This is the mechanism by which the Cloud Dataflow SDK causes code to
              # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
              # might use this to install jars containing the user's code and all of the
              # various dependencies (libraries, data files, etc.) required in order
              # for that code to run.
            "location": "A String", # The resource to read the package from. The supported resource type is:
                #
                # Google Cloud Storage:
                #
                #   storage.googleapis.com/{bucket}
                #   bucket.storage.googleapis.com/
            "name": "A String", # The name of the package.
          },
        ],
        "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
            # service will attempt to choose a reasonable default.
        "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
            # the service will use the network "default".
        "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
            # will attempt to choose a reasonable default.
        "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
            # attempt to choose a reasonable default.
        "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
            # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
            # `TEARDOWN_NEVER`.
            # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
            # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
            # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
            # down.
            #
            # If the workers are not torn down by the service, they will
            # continue to run and use Google Compute Engine VM resources in the
            # user's project until they are explicitly terminated by the user.
            # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
            # policy except for small, manually supervised test jobs.
            #
            # If unknown or unspecified, the service will attempt to choose a reasonable
            # default.
        "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
            # Compute Engine API.
        "ipConfiguration": "A String", # Configuration for VM IPs.
        "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
            # service will choose a number of threads (according to the number of cores
            # on the selected machine type for batch, or 1 by convention for streaming).
        "poolArgs": { # Extra arguments for this worker pool.
          "a_key": "", # Properties of the object. Contains field @type with type URL.
        },
        "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
            # execute the job.  If zero or unspecified, the service will
            # attempt to choose a reasonable default.
        "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
            # harness, residing in Google Container Registry.
        "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
            # the form "regions/REGION/subnetworks/SUBNETWORK".
        "dataDisks": [ # Data disks that are used by a VM in this workflow.
          { # Describes the data disk used by a workflow job.
            "mountPoint": "A String", # Directory in a VM where disk is mounted.
            "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
                # attempt to choose a reasonable default.
            "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
                # must be a disk type appropriate to the project and zone in which
                # the workers will run.  If unknown or unspecified, the service
                # will attempt to choose a reasonable default.
                #
                # For example, the standard persistent disk type is a resource name
                # typically ending in "pd-standard".  If SSD persistent disks are
                # available, the resource name typically ends with "pd-ssd".  The
                # actual valid values are defined the Google Compute Engine API,
                # not by the Cloud Dataflow API; consult the Google Compute Engine
                # documentation for more information about determining the set of
                # available disk types for a particular project and zone.
                #
                # Google Compute Engine Disk types are local to a particular
                # project in a particular zone, and so the resource name will
                # typically look something like this:
                #
                # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
          },
        ],
        "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
          "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
          "algorithm": "A String", # The algorithm to use for autoscaling.
        },
        "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
            # select a default set of packages which are useful to worker
            # harnesses written in a particular language.
        "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
            # attempt to choose a reasonable default.
        "metadata": { # Metadata to set on the Google Compute Engine VMs.
          "a_key": "A String",
        },
      },
    ],
    "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
        # storage.  The system will append the suffix "/temp-{JOBNAME} to
        # this resource prefix, where {JOBNAME} is the value of the
        # job_name field.  The resulting bucket and object prefix is used
        # as the prefix of the resources used to store temporary data
        # needed during the job execution.  NOTE: This will override the
        # value in taskrunner_settings.
        # The supported resource type is:
        #
        # Google Cloud Storage:
        #
        #   storage.googleapis.com/{bucket}/{object}
        #   bucket.storage.googleapis.com/{object}
  },
  "location": "A String", # The [regional endpoint]
      # (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
      # contains this job.
  "tempFiles": [ # A set of files the system should be aware of that are used
      # for temporary storage. These temporary files will be
      # removed on job completion.
      # No duplicates are allowed.
      # No file patterns are supported.
      # 
      # The supported files are:
      # 
      # Google Cloud Storage:
      # 
      #    storage.googleapis.com/{bucket}/{object}
      #    bucket.storage.googleapis.com/{object}
    "A String",
  ],
  "type": "A String", # The type of Cloud Dataflow job.
  "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
      # If this field is set, the service will ensure its uniqueness.
      # The request to create a job will fail if the service has knowledge of a
      # previously submitted job with the same client's ID and job name.
      # The caller may use this field to ensure idempotence of job
      # creation across retried attempts to create a job.
      # By default, the field is empty and, in that case, the service ignores it.
  "createdFromSnapshotId": "A String", # If this is specified, the job's initial state is populated from the given
      # snapshot.
  "stepsLocation": "A String", # The GCS location where the steps are stored.
  "currentStateTime": "A String", # The timestamp associated with the current state.
  "startTime": "A String", # The timestamp when the job was started (transitioned to JOB_STATE_PENDING).
      # Flexible resource scheduling jobs are started with some delay after job
      # creation, so start_time is unset before start and is updated when the
      # job is started by the Cloud Dataflow service. For other jobs, start_time
      # always equals to create_time and is immutable and set by the Cloud Dataflow
      # service.
  "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
      # Cloud Dataflow service.
  "requestedState": "A String", # The job's requested state.
      # 
      # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
      # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
      # also be used to directly set a job's requested state to
      # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
      # job if it has not already reached a terminal state.
  "name": "A String", # The user-specified Cloud Dataflow job name.
      # 
      # Only one Job with a given name may exist in a project at any
      # given time. If a caller attempts to create a Job with the same
      # name as an already-existing Job, the attempt returns the
      # existing Job.
      # 
      # The name must match the regular expression
      # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
  "steps": [ # Exactly one of step or steps_location should be specified.
      # 
      # The top-level steps that constitute the entire job.
    { # Defines a particular step within a Cloud Dataflow job.
        #
        # A job consists of multiple steps, each of which performs some
        # specific operation as part of the overall job.  Data is typically
        # passed from one step to another as part of the job.
        #
        # Here's an example of a sequence of steps which together implement a
        # Map-Reduce job:
        #
        #   * Read a collection of data from some source, parsing the
        #     collection's elements.
        #
        #   * Validate the elements.
        #
        #   * Apply a user-defined function to map each element to some value
        #     and extract an element-specific key value.
        #
        #   * Group elements with the same key into a single element with
        #     that key, transforming a multiply-keyed collection into a
        #     uniquely-keyed collection.
        #
        #   * Write the elements out to some data sink.
        #
        # Note that the Cloud Dataflow service may be used to run many different
        # types of jobs, not just Map-Reduce.
      "kind": "A String", # The kind of step in the Cloud Dataflow job.
      "properties": { # Named properties associated with the step. Each kind of
          # predefined step has its own required set of properties.
          # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
        "a_key": "", # Properties of the object.
      },
      "name": "A String", # The name that identifies the step. This must be unique for each
          # step with respect to all other steps in the Cloud Dataflow job.
    },
  ],
  "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
      # of the job it replaced.
      # 
      # When sending a `CreateJobRequest`, you can update a job by specifying it
      # here. The job named here is stopped, and its intermediate state is
      # transferred to this job.
  "currentState": "A String", # The current state of the job.
      # 
      # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
      # specified.
      # 
      # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
      # terminal state. After a job has reached a terminal state, no
      # further state updates may be made.
      # 
      # This field may be mutated by the Cloud Dataflow service;
      # callers cannot mutate it.
  "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
      # isn't contained in the submitted job.
    "stages": { # A mapping from each stage to the information about that stage.
      "a_key": { # Contains information about how a particular
          # google.dataflow.v1beta3.Step will be executed.
        "stepName": [ # The steps associated with the execution stage.
            # Note that stages may have several steps, and that a given step
            # might be run by more than one stage.
          "A String",
        ],
      },
    },
  },
}

  x__xgafv: string, V1 error format.
    Allowed values
      1 - v1 error format
      2 - v2 error format
  replaceJobId: string, Deprecated. This field is now in the Job message.
  view: string, The level of information requested in response.

Returns:
  An object of the form:

    { # Defines a job to be run by the Cloud Dataflow service.
    "labels": { # User-defined labels for this job.
        #
        # The labels map can contain no more than 64 entries.  Entries of the labels
        # map are UTF8 strings that comply with the following restrictions:
        #
        # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
        # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
        # * Both keys and values are additionally constrained to be <= 128 bytes in
        # size.
      "a_key": "A String",
    },
    "jobMetadata": { # Metadata available primarily for filtering jobs. Will be included in the # This field is populated by the Dataflow service to support filtering jobs
        # by the metadata values provided here. Populated for ListJobs and all GetJob
        # views SUMMARY and higher.
        # ListJob response and Job SUMMARY view.
      "sdkVersion": { # The version of the SDK used to run the job. # The SDK version used to run the job.
        "versionDisplayName": "A String", # A readable string describing the version of the SDK.
        "version": "A String", # The version of the SDK used to run the job.
        "sdkSupportStatus": "A String", # The support status for this SDK version.
      },
      "pubsubDetails": [ # Identification of a PubSub source used in the Dataflow job.
        { # Metadata for a PubSub connector used by the job.
          "topic": "A String", # Topic accessed in the connection.
          "subscription": "A String", # Subscription used in the connection.
        },
      ],
      "datastoreDetails": [ # Identification of a Datastore source used in the Dataflow job.
        { # Metadata for a Datastore connector used by the job.
          "projectId": "A String", # ProjectId accessed in the connection.
          "namespace": "A String", # Namespace used in the connection.
        },
      ],
      "fileDetails": [ # Identification of a File source used in the Dataflow job.
        { # Metadata for a File connector used by the job.
          "filePattern": "A String", # File Pattern used to access files by the connector.
        },
      ],
      "spannerDetails": [ # Identification of a Spanner source used in the Dataflow job.
        { # Metadata for a Spanner connector used by the job.
          "instanceId": "A String", # InstanceId accessed in the connection.
          "projectId": "A String", # ProjectId accessed in the connection.
          "databaseId": "A String", # DatabaseId accessed in the connection.
        },
      ],
      "bigTableDetails": [ # Identification of a BigTable source used in the Dataflow job.
        { # Metadata for a BigTable connector used by the job.
          "instanceId": "A String", # InstanceId accessed in the connection.
          "projectId": "A String", # ProjectId accessed in the connection.
          "tableId": "A String", # TableId accessed in the connection.
        },
      ],
      "bigqueryDetails": [ # Identification of a BigQuery source used in the Dataflow job.
        { # Metadata for a BigQuery connector used by the job.
          "projectId": "A String", # Project accessed in the connection.
          "dataset": "A String", # Dataset accessed in the connection.
          "table": "A String", # Table accessed in the connection.
          "query": "A String", # Query used to access data in the connection.
        },
      ],
    },
    "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
        # A description of the user pipeline and stages through which it is executed.
        # Created by Cloud Dataflow service.  Only retrieved with
        # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
        # form.  This data is provided by the Dataflow service for ease of visualizing
        # the pipeline and interpreting Dataflow provided metrics.
      "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
        { # Description of the type, names/ids, and input/outputs for a transform.
          "kind": "A String", # Type of transform.
          "name": "A String", # User provided name for this transform instance.
          "inputCollectionName": [ # User names for all collection inputs to this transform.
            "A String",
          ],
          "displayData": [ # Transform-specific display data.
            { # Data provided with a pipeline or transform to provide descriptive info.
              "shortStrValue": "A String", # A possible additional shorter value to display.
                  # For example a java_class_name_value of com.mypackage.MyDoFn
                  # will be stored with MyDoFn as the short_str_value and
                  # com.mypackage.MyDoFn as the java_class_name value.
                  # short_str_value can be displayed and java_class_name_value
                  # will be displayed as a tooltip.
              "durationValue": "A String", # Contains value if the data is of duration type.
              "url": "A String", # An optional full URL.
              "floatValue": 3.14, # Contains value if the data is of float type.
              "namespace": "A String", # The namespace for the key. This is usually a class name or programming
                  # language namespace (i.e. python module) which defines the display data.
                  # This allows a dax monitoring system to specially handle the data
                  # and perform custom rendering.
              "javaClassValue": "A String", # Contains value if the data is of java class type.
              "label": "A String", # An optional label to display in a dax UI for the element.
              "boolValue": True or False, # Contains value if the data is of a boolean type.
              "strValue": "A String", # Contains value if the data is of string type.
              "key": "A String", # The key identifying the display data.
                  # This is intended to be used as a label for the display data
                  # when viewed in a dax monitoring system.
              "int64Value": "A String", # Contains value if the data is of int64 type.
              "timestampValue": "A String", # Contains value if the data is of timestamp type.
            },
          ],
          "outputCollectionName": [ # User  names for all collection outputs to this transform.
            "A String",
          ],
          "id": "A String", # SDK generated id of this transform instance.
        },
      ],
      "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
        { # Description of the composing transforms, names/ids, and input/outputs of a
            # stage of execution.  Some composing transforms and sources may have been
            # generated by the Dataflow service during execution planning.
          "componentSource": [ # Collections produced and consumed by component transforms of this stage.
            { # Description of an interstitial value between transforms in an execution
                # stage.
              "userName": "A String", # Human-readable name for this transform; may be user or system generated.
              "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
                  # source is most closely associated.
              "name": "A String", # Dataflow service generated name for this source.
            },
          ],
          "kind": "A String", # Type of tranform this stage is executing.
          "name": "A String", # Dataflow service generated name for this stage.
          "outputSource": [ # Output sources for this stage.
            { # Description of an input or output of an execution stage.
              "userName": "A String", # Human-readable name for this source; may be user or system generated.
              "sizeBytes": "A String", # Size of the source, if measurable.
              "name": "A String", # Dataflow service generated name for this source.
              "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
                  # source is most closely associated.
            },
          ],
          "inputSource": [ # Input sources for this stage.
            { # Description of an input or output of an execution stage.
              "userName": "A String", # Human-readable name for this source; may be user or system generated.
              "sizeBytes": "A String", # Size of the source, if measurable.
              "name": "A String", # Dataflow service generated name for this source.
              "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
                  # source is most closely associated.
            },
          ],
          "componentTransform": [ # Transforms that comprise this execution stage.
            { # Description of a transform executed as part of an execution stage.
              "userName": "A String", # Human-readable name for this transform; may be user or system generated.
              "originalTransform": "A String", # User name for the original user transform with which this transform is
                  # most closely associated.
              "name": "A String", # Dataflow service generated name for this source.
            },
          ],
          "id": "A String", # Dataflow service generated id for this stage.
        },
      ],
      "displayData": [ # Pipeline level display data.
        { # Data provided with a pipeline or transform to provide descriptive info.
          "shortStrValue": "A String", # A possible additional shorter value to display.
              # For example a java_class_name_value of com.mypackage.MyDoFn
              # will be stored with MyDoFn as the short_str_value and
              # com.mypackage.MyDoFn as the java_class_name value.
              # short_str_value can be displayed and java_class_name_value
              # will be displayed as a tooltip.
          "durationValue": "A String", # Contains value if the data is of duration type.
          "url": "A String", # An optional full URL.
          "floatValue": 3.14, # Contains value if the data is of float type.
          "namespace": "A String", # The namespace for the key. This is usually a class name or programming
              # language namespace (i.e. python module) which defines the display data.
              # This allows a dax monitoring system to specially handle the data
              # and perform custom rendering.
          "javaClassValue": "A String", # Contains value if the data is of java class type.
          "label": "A String", # An optional label to display in a dax UI for the element.
          "boolValue": True or False, # Contains value if the data is of a boolean type.
          "strValue": "A String", # Contains value if the data is of string type.
          "key": "A String", # The key identifying the display data.
              # This is intended to be used as a label for the display data
              # when viewed in a dax monitoring system.
          "int64Value": "A String", # Contains value if the data is of int64 type.
          "timestampValue": "A String", # Contains value if the data is of timestamp type.
        },
      ],
    },
    "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
        # callers cannot mutate it.
      { # A message describing the state of a particular execution stage.
        "executionStageName": "A String", # The name of the execution stage.
        "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
        "currentStateTime": "A String", # The time at which the stage transitioned to this state.
      },
    ],
    "id": "A String", # The unique ID of this job.
        #
        # This field is set by the Cloud Dataflow service when the Job is
        # created, and is immutable for the life of the job.
    "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
        # `JOB_STATE_UPDATED`), this field contains the ID of that job.
    "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
    "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
        # corresponding name prefixes of the new job.
      "a_key": "A String",
    },
    "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
      "version": { # A structure describing which components and their versions of the service
          # are required in order to run the job.
        "a_key": "", # Properties of the object.
      },
      "flexResourceSchedulingGoal": "A String", # Which Flexible Resource Scheduling mode to run in.
      "serviceKmsKeyName": "A String", # If set, contains the Cloud KMS key identifier used to encrypt data
          # at rest, AKA a Customer Managed Encryption Key (CMEK).
          #
          # Format:
          #   projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY
      "internalExperiments": { # Experimental settings.
        "a_key": "", # Properties of the object. Contains field @type with type URL.
      },
      "dataset": "A String", # The dataset for the current project where various workflow
          # related tables are stored.
          #
          # The supported resource type is:
          #
          # Google BigQuery:
          #   bigquery.googleapis.com/{dataset}
      "experiments": [ # The list of experiments to enable.
        "A String",
      ],
      "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
      "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
          # options are passed through the service and are used to recreate the
          # SDK pipeline options on the worker in a language agnostic and platform
          # independent way.
        "a_key": "", # Properties of the object.
      },
      "userAgent": { # A description of the process that generated the request.
        "a_key": "", # Properties of the object.
      },
      "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
          # unspecified, the service will attempt to choose a reasonable
          # default.  This should be in the form of the API service name,
          # e.g. "compute.googleapis.com".
      "workerPools": [ # The worker pools. At least one "harness" worker pool must be
          # specified in order for the job to have workers.
        { # Describes one particular pool of Cloud Dataflow workers to be
            # instantiated by the Cloud Dataflow service in order to perform the
            # computations required by a job.  Note that a workflow job may use
            # multiple pools, in order to match the various computational
            # requirements of the various stages of the job.
          "diskSourceImage": "A String", # Fully qualified source image for disks.
          "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
              # using the standard Dataflow task runner.  Users should ignore
              # this field.
            "workflowFileName": "A String", # The file to store the workflow in.
            "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
                # will not be uploaded.
                #
                # The supported resource type is:
                #
                # Google Cloud Storage:
                #   storage.googleapis.com/{bucket}/{object}
                #   bucket.storage.googleapis.com/{object}
            "commandlinesFileName": "A String", # The file to store preprocessing commands in.
            "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
              "reportingEnabled": True or False, # Whether to send work progress updates to the service.
              "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
                  # "shuffle/v1beta1".
              "workerId": "A String", # The ID of the worker running this pipeline.
              "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
                  #
                  # When workers access Google Cloud APIs, they logically do so via
                  # relative URLs.  If this field is specified, it supplies the base
                  # URL to use for resolving these relative URLs.  The normative
                  # algorithm used is defined by RFC 1808, "Relative Uniform Resource
                  # Locators".
                  #
                  # If not specified, the default value is "http://www.googleapis.com/"
              "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
                  # "dataflow/v1b3/projects".
              "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
                  # storage.
                  #
                  # The supported resource type is:
                  #
                  # Google Cloud Storage:
                  #
                  #   storage.googleapis.com/{bucket}/{object}
                  #   bucket.storage.googleapis.com/{object}
            },
            "vmId": "A String", # The ID string of the VM.
            "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
            "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
            "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
                # access the Cloud Dataflow API.
              "A String",
            ],
            "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
                # taskrunner; e.g. "root".
            "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
                #
                # When workers access Google Cloud APIs, they logically do so via
                # relative URLs.  If this field is specified, it supplies the base
                # URL to use for resolving these relative URLs.  The normative
                # algorithm used is defined by RFC 1808, "Relative Uniform Resource
                # Locators".
                #
                # If not specified, the default value is "http://www.googleapis.com/"
            "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
                # taskrunner; e.g. "wheel".
            "languageHint": "A String", # The suggested backend language.
            "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
                # console.
            "streamingWorkerMainClass": "A String", # The streaming worker main class name.
            "logDir": "A String", # The directory on the VM to store logs.
            "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
            "harnessCommand": "A String", # The command to launch the worker harness.
            "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
                # temporary storage.
                #
                # The supported resource type is:
                #
                # Google Cloud Storage:
                #   storage.googleapis.com/{bucket}/{object}
                #   bucket.storage.googleapis.com/{object}
            "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
          },
          "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
              # are supported.
          "packages": [ # Packages to be installed on workers.
            { # The packages that must be installed in order for a worker to run the
                # steps of the Cloud Dataflow job that will be assigned to its worker
                # pool.
                #
                # This is the mechanism by which the Cloud Dataflow SDK causes code to
                # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
                # might use this to install jars containing the user's code and all of the
                # various dependencies (libraries, data files, etc.) required in order
                # for that code to run.
              "location": "A String", # The resource to read the package from. The supported resource type is:
                  #
                  # Google Cloud Storage:
                  #
                  #   storage.googleapis.com/{bucket}
                  #   bucket.storage.googleapis.com/
              "name": "A String", # The name of the package.
            },
          ],
          "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
              # service will attempt to choose a reasonable default.
          "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
              # the service will use the network "default".
          "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
              # will attempt to choose a reasonable default.
          "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
              # attempt to choose a reasonable default.
          "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
              # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
              # `TEARDOWN_NEVER`.
              # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
              # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
              # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
              # down.
              #
              # If the workers are not torn down by the service, they will
              # continue to run and use Google Compute Engine VM resources in the
              # user's project until they are explicitly terminated by the user.
              # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
              # policy except for small, manually supervised test jobs.
              #
              # If unknown or unspecified, the service will attempt to choose a reasonable
              # default.
          "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
              # Compute Engine API.
          "ipConfiguration": "A String", # Configuration for VM IPs.
          "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
              # service will choose a number of threads (according to the number of cores
              # on the selected machine type for batch, or 1 by convention for streaming).
          "poolArgs": { # Extra arguments for this worker pool.
            "a_key": "", # Properties of the object. Contains field @type with type URL.
          },
          "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
              # execute the job.  If zero or unspecified, the service will
              # attempt to choose a reasonable default.
          "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
              # harness, residing in Google Container Registry.
          "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
              # the form "regions/REGION/subnetworks/SUBNETWORK".
          "dataDisks": [ # Data disks that are used by a VM in this workflow.
            { # Describes the data disk used by a workflow job.
              "mountPoint": "A String", # Directory in a VM where disk is mounted.
              "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
                  # attempt to choose a reasonable default.
              "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
                  # must be a disk type appropriate to the project and zone in which
                  # the workers will run.  If unknown or unspecified, the service
                  # will attempt to choose a reasonable default.
                  #
                  # For example, the standard persistent disk type is a resource name
                  # typically ending in "pd-standard".  If SSD persistent disks are
                  # available, the resource name typically ends with "pd-ssd".  The
                  # actual valid values are defined the Google Compute Engine API,
                  # not by the Cloud Dataflow API; consult the Google Compute Engine
                  # documentation for more information about determining the set of
                  # available disk types for a particular project and zone.
                  #
                  # Google Compute Engine Disk types are local to a particular
                  # project in a particular zone, and so the resource name will
                  # typically look something like this:
                  #
                  # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
            },
          ],
          "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
            "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
            "algorithm": "A String", # The algorithm to use for autoscaling.
          },
          "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
              # select a default set of packages which are useful to worker
              # harnesses written in a particular language.
          "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
              # attempt to choose a reasonable default.
          "metadata": { # Metadata to set on the Google Compute Engine VMs.
            "a_key": "A String",
          },
        },
      ],
      "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
          # storage.  The system will append the suffix "/temp-{JOBNAME} to
          # this resource prefix, where {JOBNAME} is the value of the
          # job_name field.  The resulting bucket and object prefix is used
          # as the prefix of the resources used to store temporary data
          # needed during the job execution.  NOTE: This will override the
          # value in taskrunner_settings.
          # The supported resource type is:
          #
          # Google Cloud Storage:
          #
          #   storage.googleapis.com/{bucket}/{object}
          #   bucket.storage.googleapis.com/{object}
    },
    "location": "A String", # The [regional endpoint]
        # (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
        # contains this job.
    "tempFiles": [ # A set of files the system should be aware of that are used
        # for temporary storage. These temporary files will be
        # removed on job completion.
        # No duplicates are allowed.
        # No file patterns are supported.
        #
        # The supported files are:
        #
        # Google Cloud Storage:
        #
        #    storage.googleapis.com/{bucket}/{object}
        #    bucket.storage.googleapis.com/{object}
      "A String",
    ],
    "type": "A String", # The type of Cloud Dataflow job.
    "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
        # If this field is set, the service will ensure its uniqueness.
        # The request to create a job will fail if the service has knowledge of a
        # previously submitted job with the same client's ID and job name.
        # The caller may use this field to ensure idempotence of job
        # creation across retried attempts to create a job.
        # By default, the field is empty and, in that case, the service ignores it.
    "createdFromSnapshotId": "A String", # If this is specified, the job's initial state is populated from the given
        # snapshot.
    "stepsLocation": "A String", # The GCS location where the steps are stored.
    "currentStateTime": "A String", # The timestamp associated with the current state.
    "startTime": "A String", # The timestamp when the job was started (transitioned to JOB_STATE_PENDING).
        # Flexible resource scheduling jobs are started with some delay after job
        # creation, so start_time is unset before start and is updated when the
        # job is started by the Cloud Dataflow service. For other jobs, start_time
        # always equals to create_time and is immutable and set by the Cloud Dataflow
        # service.
    "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
        # Cloud Dataflow service.
    "requestedState": "A String", # The job's requested state.
        #
        # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
        # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
        # also be used to directly set a job's requested state to
        # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
        # job if it has not already reached a terminal state.
    "name": "A String", # The user-specified Cloud Dataflow job name.
        #
        # Only one Job with a given name may exist in a project at any
        # given time. If a caller attempts to create a Job with the same
        # name as an already-existing Job, the attempt returns the
        # existing Job.
        #
        # The name must match the regular expression
        # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
    "steps": [ # Exactly one of step or steps_location should be specified.
        #
        # The top-level steps that constitute the entire job.
      { # Defines a particular step within a Cloud Dataflow job.
          #
          # A job consists of multiple steps, each of which performs some
          # specific operation as part of the overall job.  Data is typically
          # passed from one step to another as part of the job.
          #
          # Here's an example of a sequence of steps which together implement a
          # Map-Reduce job:
          #
          #   * Read a collection of data from some source, parsing the
          #     collection's elements.
          #
          #   * Validate the elements.
          #
          #   * Apply a user-defined function to map each element to some value
          #     and extract an element-specific key value.
          #
          #   * Group elements with the same key into a single element with
          #     that key, transforming a multiply-keyed collection into a
          #     uniquely-keyed collection.
          #
          #   * Write the elements out to some data sink.
          #
          # Note that the Cloud Dataflow service may be used to run many different
          # types of jobs, not just Map-Reduce.
        "kind": "A String", # The kind of step in the Cloud Dataflow job.
        "properties": { # Named properties associated with the step. Each kind of
            # predefined step has its own required set of properties.
            # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
          "a_key": "", # Properties of the object.
        },
        "name": "A String", # The name that identifies the step. This must be unique for each
            # step with respect to all other steps in the Cloud Dataflow job.
      },
    ],
    "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
        # of the job it replaced.
        #
        # When sending a `CreateJobRequest`, you can update a job by specifying it
        # here. The job named here is stopped, and its intermediate state is
        # transferred to this job.
    "currentState": "A String", # The current state of the job.
        #
        # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
        # specified.
        #
        # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
        # terminal state. After a job has reached a terminal state, no
        # further state updates may be made.
        #
        # This field may be mutated by the Cloud Dataflow service;
        # callers cannot mutate it.
    "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
        # isn't contained in the submitted job.
      "stages": { # A mapping from each stage to the information about that stage.
        "a_key": { # Contains information about how a particular
            # google.dataflow.v1beta3.Step will be executed.
          "stepName": [ # The steps associated with the execution stage.
              # Note that stages may have several steps, and that a given step
              # might be run by more than one stage.
            "A String",
          ],
        },
      },
    },
  }

get(projectId, location, jobId, x__xgafv=None, view=None)

Gets the state of the specified Cloud Dataflow job.

To get the state of a job, we recommend using `projects.locations.jobs.get`
with a [regional endpoint]
(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints). Using
`projects.jobs.get` is not recommended, as you can only get the state of
jobs that are running in `us-central1`.

Args:
  projectId: string, The ID of the Cloud Platform project that the job belongs to. (required)
  location: string, The [regional endpoint]
(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
contains this job. (required)
  jobId: string, The job ID. (required)
  x__xgafv: string, V1 error format.
    Allowed values
      1 - v1 error format
      2 - v2 error format
  view: string, The level of information requested in response.

Returns:
  An object of the form:

    { # Defines a job to be run by the Cloud Dataflow service.
    "labels": { # User-defined labels for this job.
        #
        # The labels map can contain no more than 64 entries.  Entries of the labels
        # map are UTF8 strings that comply with the following restrictions:
        #
        # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
        # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
        # * Both keys and values are additionally constrained to be <= 128 bytes in
        # size.
      "a_key": "A String",
    },
    "jobMetadata": { # Metadata available primarily for filtering jobs. Will be included in the # This field is populated by the Dataflow service to support filtering jobs
        # by the metadata values provided here. Populated for ListJobs and all GetJob
        # views SUMMARY and higher.
        # ListJob response and Job SUMMARY view.
      "sdkVersion": { # The version of the SDK used to run the job. # The SDK version used to run the job.
        "versionDisplayName": "A String", # A readable string describing the version of the SDK.
        "version": "A String", # The version of the SDK used to run the job.
        "sdkSupportStatus": "A String", # The support status for this SDK version.
      },
      "pubsubDetails": [ # Identification of a PubSub source used in the Dataflow job.
        { # Metadata for a PubSub connector used by the job.
          "topic": "A String", # Topic accessed in the connection.
          "subscription": "A String", # Subscription used in the connection.
        },
      ],
      "datastoreDetails": [ # Identification of a Datastore source used in the Dataflow job.
        { # Metadata for a Datastore connector used by the job.
          "projectId": "A String", # ProjectId accessed in the connection.
          "namespace": "A String", # Namespace used in the connection.
        },
      ],
      "fileDetails": [ # Identification of a File source used in the Dataflow job.
        { # Metadata for a File connector used by the job.
          "filePattern": "A String", # File Pattern used to access files by the connector.
        },
      ],
      "spannerDetails": [ # Identification of a Spanner source used in the Dataflow job.
        { # Metadata for a Spanner connector used by the job.
          "instanceId": "A String", # InstanceId accessed in the connection.
          "projectId": "A String", # ProjectId accessed in the connection.
          "databaseId": "A String", # DatabaseId accessed in the connection.
        },
      ],
      "bigTableDetails": [ # Identification of a BigTable source used in the Dataflow job.
        { # Metadata for a BigTable connector used by the job.
          "instanceId": "A String", # InstanceId accessed in the connection.
          "projectId": "A String", # ProjectId accessed in the connection.
          "tableId": "A String", # TableId accessed in the connection.
        },
      ],
      "bigqueryDetails": [ # Identification of a BigQuery source used in the Dataflow job.
        { # Metadata for a BigQuery connector used by the job.
          "projectId": "A String", # Project accessed in the connection.
          "dataset": "A String", # Dataset accessed in the connection.
          "table": "A String", # Table accessed in the connection.
          "query": "A String", # Query used to access data in the connection.
        },
      ],
    },
    "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
        # A description of the user pipeline and stages through which it is executed.
        # Created by Cloud Dataflow service.  Only retrieved with
        # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
        # form.  This data is provided by the Dataflow service for ease of visualizing
        # the pipeline and interpreting Dataflow provided metrics.
      "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
        { # Description of the type, names/ids, and input/outputs for a transform.
          "kind": "A String", # Type of transform.
          "name": "A String", # User provided name for this transform instance.
          "inputCollectionName": [ # User names for all collection inputs to this transform.
            "A String",
          ],
          "displayData": [ # Transform-specific display data.
            { # Data provided with a pipeline or transform to provide descriptive info.
              "shortStrValue": "A String", # A possible additional shorter value to display.
                  # For example a java_class_name_value of com.mypackage.MyDoFn
                  # will be stored with MyDoFn as the short_str_value and
                  # com.mypackage.MyDoFn as the java_class_name value.
                  # short_str_value can be displayed and java_class_name_value
                  # will be displayed as a tooltip.
              "durationValue": "A String", # Contains value if the data is of duration type.
              "url": "A String", # An optional full URL.
              "floatValue": 3.14, # Contains value if the data is of float type.
              "namespace": "A String", # The namespace for the key. This is usually a class name or programming
                  # language namespace (i.e. python module) which defines the display data.
                  # This allows a dax monitoring system to specially handle the data
                  # and perform custom rendering.
              "javaClassValue": "A String", # Contains value if the data is of java class type.
              "label": "A String", # An optional label to display in a dax UI for the element.
              "boolValue": True or False, # Contains value if the data is of a boolean type.
              "strValue": "A String", # Contains value if the data is of string type.
              "key": "A String", # The key identifying the display data.
                  # This is intended to be used as a label for the display data
                  # when viewed in a dax monitoring system.
              "int64Value": "A String", # Contains value if the data is of int64 type.
              "timestampValue": "A String", # Contains value if the data is of timestamp type.
            },
          ],
          "outputCollectionName": [ # User  names for all collection outputs to this transform.
            "A String",
          ],
          "id": "A String", # SDK generated id of this transform instance.
        },
      ],
      "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
        { # Description of the composing transforms, names/ids, and input/outputs of a
            # stage of execution.  Some composing transforms and sources may have been
            # generated by the Dataflow service during execution planning.
          "componentSource": [ # Collections produced and consumed by component transforms of this stage.
            { # Description of an interstitial value between transforms in an execution
                # stage.
              "userName": "A String", # Human-readable name for this transform; may be user or system generated.
              "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
                  # source is most closely associated.
              "name": "A String", # Dataflow service generated name for this source.
            },
          ],
          "kind": "A String", # Type of tranform this stage is executing.
          "name": "A String", # Dataflow service generated name for this stage.
          "outputSource": [ # Output sources for this stage.
            { # Description of an input or output of an execution stage.
              "userName": "A String", # Human-readable name for this source; may be user or system generated.
              "sizeBytes": "A String", # Size of the source, if measurable.
              "name": "A String", # Dataflow service generated name for this source.
              "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
                  # source is most closely associated.
            },
          ],
          "inputSource": [ # Input sources for this stage.
            { # Description of an input or output of an execution stage.
              "userName": "A String", # Human-readable name for this source; may be user or system generated.
              "sizeBytes": "A String", # Size of the source, if measurable.
              "name": "A String", # Dataflow service generated name for this source.
              "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
                  # source is most closely associated.
            },
          ],
          "componentTransform": [ # Transforms that comprise this execution stage.
            { # Description of a transform executed as part of an execution stage.
              "userName": "A String", # Human-readable name for this transform; may be user or system generated.
              "originalTransform": "A String", # User name for the original user transform with which this transform is
                  # most closely associated.
              "name": "A String", # Dataflow service generated name for this source.
            },
          ],
          "id": "A String", # Dataflow service generated id for this stage.
        },
      ],
      "displayData": [ # Pipeline level display data.
        { # Data provided with a pipeline or transform to provide descriptive info.
          "shortStrValue": "A String", # A possible additional shorter value to display.
              # For example a java_class_name_value of com.mypackage.MyDoFn
              # will be stored with MyDoFn as the short_str_value and
              # com.mypackage.MyDoFn as the java_class_name value.
              # short_str_value can be displayed and java_class_name_value
              # will be displayed as a tooltip.
          "durationValue": "A String", # Contains value if the data is of duration type.
          "url": "A String", # An optional full URL.
          "floatValue": 3.14, # Contains value if the data is of float type.
          "namespace": "A String", # The namespace for the key. This is usually a class name or programming
              # language namespace (i.e. python module) which defines the display data.
              # This allows a dax monitoring system to specially handle the data
              # and perform custom rendering.
          "javaClassValue": "A String", # Contains value if the data is of java class type.
          "label": "A String", # An optional label to display in a dax UI for the element.
          "boolValue": True or False, # Contains value if the data is of a boolean type.
          "strValue": "A String", # Contains value if the data is of string type.
          "key": "A String", # The key identifying the display data.
              # This is intended to be used as a label for the display data
              # when viewed in a dax monitoring system.
          "int64Value": "A String", # Contains value if the data is of int64 type.
          "timestampValue": "A String", # Contains value if the data is of timestamp type.
        },
      ],
    },
    "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
        # callers cannot mutate it.
      { # A message describing the state of a particular execution stage.
        "executionStageName": "A String", # The name of the execution stage.
        "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
        "currentStateTime": "A String", # The time at which the stage transitioned to this state.
      },
    ],
    "id": "A String", # The unique ID of this job.
        #
        # This field is set by the Cloud Dataflow service when the Job is
        # created, and is immutable for the life of the job.
    "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
        # `JOB_STATE_UPDATED`), this field contains the ID of that job.
    "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
    "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
        # corresponding name prefixes of the new job.
      "a_key": "A String",
    },
    "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
      "version": { # A structure describing which components and their versions of the service
          # are required in order to run the job.
        "a_key": "", # Properties of the object.
      },
      "flexResourceSchedulingGoal": "A String", # Which Flexible Resource Scheduling mode to run in.
      "serviceKmsKeyName": "A String", # If set, contains the Cloud KMS key identifier used to encrypt data
          # at rest, AKA a Customer Managed Encryption Key (CMEK).
          #
          # Format:
          #   projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY
      "internalExperiments": { # Experimental settings.
        "a_key": "", # Properties of the object. Contains field @type with type URL.
      },
      "dataset": "A String", # The dataset for the current project where various workflow
          # related tables are stored.
          #
          # The supported resource type is:
          #
          # Google BigQuery:
          #   bigquery.googleapis.com/{dataset}
      "experiments": [ # The list of experiments to enable.
        "A String",
      ],
      "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
      "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
          # options are passed through the service and are used to recreate the
          # SDK pipeline options on the worker in a language agnostic and platform
          # independent way.
        "a_key": "", # Properties of the object.
      },
      "userAgent": { # A description of the process that generated the request.
        "a_key": "", # Properties of the object.
      },
      "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
          # unspecified, the service will attempt to choose a reasonable
          # default.  This should be in the form of the API service name,
          # e.g. "compute.googleapis.com".
      "workerPools": [ # The worker pools. At least one "harness" worker pool must be
          # specified in order for the job to have workers.
        { # Describes one particular pool of Cloud Dataflow workers to be
            # instantiated by the Cloud Dataflow service in order to perform the
            # computations required by a job.  Note that a workflow job may use
            # multiple pools, in order to match the various computational
            # requirements of the various stages of the job.
          "diskSourceImage": "A String", # Fully qualified source image for disks.
          "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
              # using the standard Dataflow task runner.  Users should ignore
              # this field.
            "workflowFileName": "A String", # The file to store the workflow in.
            "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
                # will not be uploaded.
                #
                # The supported resource type is:
                #
                # Google Cloud Storage:
                #   storage.googleapis.com/{bucket}/{object}
                #   bucket.storage.googleapis.com/{object}
            "commandlinesFileName": "A String", # The file to store preprocessing commands in.
            "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
              "reportingEnabled": True or False, # Whether to send work progress updates to the service.
              "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
                  # "shuffle/v1beta1".
              "workerId": "A String", # The ID of the worker running this pipeline.
              "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
                  #
                  # When workers access Google Cloud APIs, they logically do so via
                  # relative URLs.  If this field is specified, it supplies the base
                  # URL to use for resolving these relative URLs.  The normative
                  # algorithm used is defined by RFC 1808, "Relative Uniform Resource
                  # Locators".
                  #
                  # If not specified, the default value is "http://www.googleapis.com/"
              "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
                  # "dataflow/v1b3/projects".
              "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
                  # storage.
                  #
                  # The supported resource type is:
                  #
                  # Google Cloud Storage:
                  #
                  #   storage.googleapis.com/{bucket}/{object}
                  #   bucket.storage.googleapis.com/{object}
            },
            "vmId": "A String", # The ID string of the VM.
            "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
            "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
            "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
                # access the Cloud Dataflow API.
              "A String",
            ],
            "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
                # taskrunner; e.g. "root".
            "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
                #
                # When workers access Google Cloud APIs, they logically do so via
                # relative URLs.  If this field is specified, it supplies the base
                # URL to use for resolving these relative URLs.  The normative
                # algorithm used is defined by RFC 1808, "Relative Uniform Resource
                # Locators".
                #
                # If not specified, the default value is "http://www.googleapis.com/"
            "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
                # taskrunner; e.g. "wheel".
            "languageHint": "A String", # The suggested backend language.
            "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
                # console.
            "streamingWorkerMainClass": "A String", # The streaming worker main class name.
            "logDir": "A String", # The directory on the VM to store logs.
            "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
            "harnessCommand": "A String", # The command to launch the worker harness.
            "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
                # temporary storage.
                #
                # The supported resource type is:
                #
                # Google Cloud Storage:
                #   storage.googleapis.com/{bucket}/{object}
                #   bucket.storage.googleapis.com/{object}
            "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
          },
          "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
              # are supported.
          "packages": [ # Packages to be installed on workers.
            { # The packages that must be installed in order for a worker to run the
                # steps of the Cloud Dataflow job that will be assigned to its worker
                # pool.
                #
                # This is the mechanism by which the Cloud Dataflow SDK causes code to
                # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
                # might use this to install jars containing the user's code and all of the
                # various dependencies (libraries, data files, etc.) required in order
                # for that code to run.
              "location": "A String", # The resource to read the package from. The supported resource type is:
                  #
                  # Google Cloud Storage:
                  #
                  #   storage.googleapis.com/{bucket}
                  #   bucket.storage.googleapis.com/
              "name": "A String", # The name of the package.
            },
          ],
          "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
              # service will attempt to choose a reasonable default.
          "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
              # the service will use the network "default".
          "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
              # will attempt to choose a reasonable default.
          "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
              # attempt to choose a reasonable default.
          "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
              # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
              # `TEARDOWN_NEVER`.
              # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
              # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
              # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
              # down.
              #
              # If the workers are not torn down by the service, they will
              # continue to run and use Google Compute Engine VM resources in the
              # user's project until they are explicitly terminated by the user.
              # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
              # policy except for small, manually supervised test jobs.
              #
              # If unknown or unspecified, the service will attempt to choose a reasonable
              # default.
          "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
              # Compute Engine API.
          "ipConfiguration": "A String", # Configuration for VM IPs.
          "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
              # service will choose a number of threads (according to the number of cores
              # on the selected machine type for batch, or 1 by convention for streaming).
          "poolArgs": { # Extra arguments for this worker pool.
            "a_key": "", # Properties of the object. Contains field @type with type URL.
          },
          "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
              # execute the job.  If zero or unspecified, the service will
              # attempt to choose a reasonable default.
          "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
              # harness, residing in Google Container Registry.
          "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
              # the form "regions/REGION/subnetworks/SUBNETWORK".
          "dataDisks": [ # Data disks that are used by a VM in this workflow.
            { # Describes the data disk used by a workflow job.
              "mountPoint": "A String", # Directory in a VM where disk is mounted.
              "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
                  # attempt to choose a reasonable default.
              "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
                  # must be a disk type appropriate to the project and zone in which
                  # the workers will run.  If unknown or unspecified, the service
                  # will attempt to choose a reasonable default.
                  #
                  # For example, the standard persistent disk type is a resource name
                  # typically ending in "pd-standard".  If SSD persistent disks are
                  # available, the resource name typically ends with "pd-ssd".  The
                  # actual valid values are defined the Google Compute Engine API,
                  # not by the Cloud Dataflow API; consult the Google Compute Engine
                  # documentation for more information about determining the set of
                  # available disk types for a particular project and zone.
                  #
                  # Google Compute Engine Disk types are local to a particular
                  # project in a particular zone, and so the resource name will
                  # typically look something like this:
                  #
                  # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
            },
          ],
          "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
            "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
            "algorithm": "A String", # The algorithm to use for autoscaling.
          },
          "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
              # select a default set of packages which are useful to worker
              # harnesses written in a particular language.
          "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
              # attempt to choose a reasonable default.
          "metadata": { # Metadata to set on the Google Compute Engine VMs.
            "a_key": "A String",
          },
        },
      ],
      "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
          # storage.  The system will append the suffix "/temp-{JOBNAME} to
          # this resource prefix, where {JOBNAME} is the value of the
          # job_name field.  The resulting bucket and object prefix is used
          # as the prefix of the resources used to store temporary data
          # needed during the job execution.  NOTE: This will override the
          # value in taskrunner_settings.
          # The supported resource type is:
          #
          # Google Cloud Storage:
          #
          #   storage.googleapis.com/{bucket}/{object}
          #   bucket.storage.googleapis.com/{object}
    },
    "location": "A String", # The [regional endpoint]
        # (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
        # contains this job.
    "tempFiles": [ # A set of files the system should be aware of that are used
        # for temporary storage. These temporary files will be
        # removed on job completion.
        # No duplicates are allowed.
        # No file patterns are supported.
        #
        # The supported files are:
        #
        # Google Cloud Storage:
        #
        #    storage.googleapis.com/{bucket}/{object}
        #    bucket.storage.googleapis.com/{object}
      "A String",
    ],
    "type": "A String", # The type of Cloud Dataflow job.
    "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
        # If this field is set, the service will ensure its uniqueness.
        # The request to create a job will fail if the service has knowledge of a
        # previously submitted job with the same client's ID and job name.
        # The caller may use this field to ensure idempotence of job
        # creation across retried attempts to create a job.
        # By default, the field is empty and, in that case, the service ignores it.
    "createdFromSnapshotId": "A String", # If this is specified, the job's initial state is populated from the given
        # snapshot.
    "stepsLocation": "A String", # The GCS location where the steps are stored.
    "currentStateTime": "A String", # The timestamp associated with the current state.
    "startTime": "A String", # The timestamp when the job was started (transitioned to JOB_STATE_PENDING).
        # Flexible resource scheduling jobs are started with some delay after job
        # creation, so start_time is unset before start and is updated when the
        # job is started by the Cloud Dataflow service. For other jobs, start_time
        # always equals to create_time and is immutable and set by the Cloud Dataflow
        # service.
    "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
        # Cloud Dataflow service.
    "requestedState": "A String", # The job's requested state.
        #
        # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
        # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
        # also be used to directly set a job's requested state to
        # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
        # job if it has not already reached a terminal state.
    "name": "A String", # The user-specified Cloud Dataflow job name.
        #
        # Only one Job with a given name may exist in a project at any
        # given time. If a caller attempts to create a Job with the same
        # name as an already-existing Job, the attempt returns the
        # existing Job.
        #
        # The name must match the regular expression
        # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
    "steps": [ # Exactly one of step or steps_location should be specified.
        #
        # The top-level steps that constitute the entire job.
      { # Defines a particular step within a Cloud Dataflow job.
          #
          # A job consists of multiple steps, each of which performs some
          # specific operation as part of the overall job.  Data is typically
          # passed from one step to another as part of the job.
          #
          # Here's an example of a sequence of steps which together implement a
          # Map-Reduce job:
          #
          #   * Read a collection of data from some source, parsing the
          #     collection's elements.
          #
          #   * Validate the elements.
          #
          #   * Apply a user-defined function to map each element to some value
          #     and extract an element-specific key value.
          #
          #   * Group elements with the same key into a single element with
          #     that key, transforming a multiply-keyed collection into a
          #     uniquely-keyed collection.
          #
          #   * Write the elements out to some data sink.
          #
          # Note that the Cloud Dataflow service may be used to run many different
          # types of jobs, not just Map-Reduce.
        "kind": "A String", # The kind of step in the Cloud Dataflow job.
        "properties": { # Named properties associated with the step. Each kind of
            # predefined step has its own required set of properties.
            # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
          "a_key": "", # Properties of the object.
        },
        "name": "A String", # The name that identifies the step. This must be unique for each
            # step with respect to all other steps in the Cloud Dataflow job.
      },
    ],
    "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
        # of the job it replaced.
        #
        # When sending a `CreateJobRequest`, you can update a job by specifying it
        # here. The job named here is stopped, and its intermediate state is
        # transferred to this job.
    "currentState": "A String", # The current state of the job.
        #
        # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
        # specified.
        #
        # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
        # terminal state. After a job has reached a terminal state, no
        # further state updates may be made.
        #
        # This field may be mutated by the Cloud Dataflow service;
        # callers cannot mutate it.
    "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
        # isn't contained in the submitted job.
      "stages": { # A mapping from each stage to the information about that stage.
        "a_key": { # Contains information about how a particular
            # google.dataflow.v1beta3.Step will be executed.
          "stepName": [ # The steps associated with the execution stage.
              # Note that stages may have several steps, and that a given step
              # might be run by more than one stage.
            "A String",
          ],
        },
      },
    },
  }

getMetrics(projectId, location, jobId, startTime=None, x__xgafv=None)

Request the job status.

To request the status of a job, we recommend using
`projects.locations.jobs.getMetrics` with a [regional endpoint]
(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints). Using
`projects.jobs.getMetrics` is not recommended, as you can only request the
status of jobs that are running in `us-central1`.

Args:
projectId: string, A project id. (required)
location: string, The [regional endpoint]
(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
contains the job specified by job_id. (required)
jobId: string, The job to get messages for. (required)
startTime: string, Return only metric data that has changed since this time.
Default is to return all information about all metrics for the job.
x__xgafv: string, V1 error format.
Allowed values
1 - v1 error format
2 - v2 error format

Returns:
An object of the form:

{ # JobMetrics contains a collection of metrics describing the detailed progress
# of a Dataflow job. Metrics correspond to user-defined and system-defined
# metrics in the job.
#
# This resource captures only the most recent values of each metric;
# time-series data can be queried for them (under the same metric names)
# from Cloud Monitoring.
"metrics": [ # All metrics for this job.
{ # Describes the state of a metric.
"meanCount": "", # Worker-computed aggregate value for the "Mean" aggregation kind.
# This holds the count of the aggregated values and is used in combination
# with mean_sum above to obtain the actual mean aggregate value.
# The only possible value type is Long.
"kind": "A String", # Metric aggregation kind. The possible metric aggregation kinds are
# "Sum", "Max", "Min", "Mean", "Set", "And", "Or", and "Distribution".
# The specified aggregation kind is case-insensitive.
#
# If omitted, this is not an aggregated value but instead
# a single metric sample value.
"set": "", # Worker-computed aggregate value for the "Set" aggregation kind. The only
# possible value type is a list of Values whose type can be Long, Double,
# or String, according to the metric's type. All Values in the list must
# be of the same type.
"name": { # Identifies a metric, by describing the source which generated the # Name of the metric.
# metric.
"origin": "A String", # Origin (namespace) of metric name. May be blank for user-define metrics;
# will be "dataflow" for metrics defined by the Dataflow service or SDK.
"name": "A String", # Worker-defined metric name.
"context": { # Zero or more labeled fields which identify the part of the job this
# metric is associated with, such as the name of a step or collection.
#
# For example, built-in counters associated with steps will have
# context['step'] = . Counters associated with PCollections
# in the SDK will have context['pcollection'] = .
"a_key": "A String",
},
},
"meanSum": "", # Worker-computed aggregate value for the "Mean" aggregation kind.
# This holds the sum of the aggregated values and is used in combination
# with mean_count below to obtain the actual mean aggregate value.
# The only possible value types are Long and Double.
"cumulative": True or False, # True if this metric is reported as the total cumulative aggregate
# value accumulated since the worker started working on this WorkItem.
# By default this is false, indicating that this metric is reported
# as a delta that is not associated with any WorkItem.
"updateTime": "A String", # Timestamp associated with the metric value. Optional when workers are
# reporting work progress; it will be filled in responses from the
# metrics API.
"scalar": "", # Worker-computed aggregate value for aggregation kinds "Sum", "Max", "Min",
# "And", and "Or". The possible value types are Long, Double, and Boolean.
"internal": "", # Worker-computed aggregate value for internal use by the Dataflow
# service.
"gauge": "", # A struct value describing properties of a Gauge.
# Metrics of gauge type show the value of a metric across time, and is
# aggregated based on the newest value.
"distribution": "", # A struct value describing properties of a distribution of numeric values.
},
],
"metricTime": "A String", # Timestamp as of which metric values are current.
}

list(projectId, location, pageSize=None, pageToken=None, x__xgafv=None, filter=None, view=None)

List the jobs of a project.

To list the jobs of a project in a region, we recommend using
`projects.locations.jobs.get` with a [regional endpoint]
(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints). To
list the all jobs across all regions, use `projects.jobs.aggregated`. Using
`projects.jobs.list` is not recommended, as you can only get the list of
jobs that are running in `us-central1`.

Args:
  projectId: string, The project which owns the jobs. (required)
  location: string, The [regional endpoint]
(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
contains this job. (required)
  pageSize: integer, If there are many jobs, limit response to at most this many.
The actual number of jobs returned will be the lesser of max_responses
and an unspecified server-defined limit.
  pageToken: string, Set this to the 'next_page_token' field of a previous response
to request additional results in a long list.
  x__xgafv: string, V1 error format.
    Allowed values
      1 - v1 error format
      2 - v2 error format
  filter: string, The kind of filter to use.
  view: string, Level of information requested in response. Default is `JOB_VIEW_SUMMARY`.

Returns:
  An object of the form:

    { # Response to a request to list Cloud Dataflow jobs.  This may be a partial
      # response, depending on the page size in the ListJobsRequest.
    "nextPageToken": "A String", # Set if there may be more results than fit in this response.
    "failedLocation": [ # Zero or more messages describing the [regional endpoints]
        # (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
        # failed to respond.
      { # Indicates which [regional endpoint]
          # (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) failed
          # to respond to a request for data.
        "name": "A String", # The name of the [regional endpoint]
            # (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
            # failed to respond.
      },
    ],
    "jobs": [ # A subset of the requested job information.
      { # Defines a job to be run by the Cloud Dataflow service.
        "labels": { # User-defined labels for this job.
            #
            # The labels map can contain no more than 64 entries.  Entries of the labels
            # map are UTF8 strings that comply with the following restrictions:
            #
            # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
            # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
            # * Both keys and values are additionally constrained to be <= 128 bytes in
            # size.
          "a_key": "A String",
        },
        "jobMetadata": { # Metadata available primarily for filtering jobs. Will be included in the # This field is populated by the Dataflow service to support filtering jobs
            # by the metadata values provided here. Populated for ListJobs and all GetJob
            # views SUMMARY and higher.
            # ListJob response and Job SUMMARY view.
          "sdkVersion": { # The version of the SDK used to run the job. # The SDK version used to run the job.
            "versionDisplayName": "A String", # A readable string describing the version of the SDK.
            "version": "A String", # The version of the SDK used to run the job.
            "sdkSupportStatus": "A String", # The support status for this SDK version.
          },
          "pubsubDetails": [ # Identification of a PubSub source used in the Dataflow job.
            { # Metadata for a PubSub connector used by the job.
              "topic": "A String", # Topic accessed in the connection.
              "subscription": "A String", # Subscription used in the connection.
            },
          ],
          "datastoreDetails": [ # Identification of a Datastore source used in the Dataflow job.
            { # Metadata for a Datastore connector used by the job.
              "projectId": "A String", # ProjectId accessed in the connection.
              "namespace": "A String", # Namespace used in the connection.
            },
          ],
          "fileDetails": [ # Identification of a File source used in the Dataflow job.
            { # Metadata for a File connector used by the job.
              "filePattern": "A String", # File Pattern used to access files by the connector.
            },
          ],
          "spannerDetails": [ # Identification of a Spanner source used in the Dataflow job.
            { # Metadata for a Spanner connector used by the job.
              "instanceId": "A String", # InstanceId accessed in the connection.
              "projectId": "A String", # ProjectId accessed in the connection.
              "databaseId": "A String", # DatabaseId accessed in the connection.
            },
          ],
          "bigTableDetails": [ # Identification of a BigTable source used in the Dataflow job.
            { # Metadata for a BigTable connector used by the job.
              "instanceId": "A String", # InstanceId accessed in the connection.
              "projectId": "A String", # ProjectId accessed in the connection.
              "tableId": "A String", # TableId accessed in the connection.
            },
          ],
          "bigqueryDetails": [ # Identification of a BigQuery source used in the Dataflow job.
            { # Metadata for a BigQuery connector used by the job.
              "projectId": "A String", # Project accessed in the connection.
              "dataset": "A String", # Dataset accessed in the connection.
              "table": "A String", # Table accessed in the connection.
              "query": "A String", # Query used to access data in the connection.
            },
          ],
        },
        "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
            # A description of the user pipeline and stages through which it is executed.
            # Created by Cloud Dataflow service.  Only retrieved with
            # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
            # form.  This data is provided by the Dataflow service for ease of visualizing
            # the pipeline and interpreting Dataflow provided metrics.
          "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
            { # Description of the type, names/ids, and input/outputs for a transform.
              "kind": "A String", # Type of transform.
              "name": "A String", # User provided name for this transform instance.
              "inputCollectionName": [ # User names for all collection inputs to this transform.
                "A String",
              ],
              "displayData": [ # Transform-specific display data.
                { # Data provided with a pipeline or transform to provide descriptive info.
                  "shortStrValue": "A String", # A possible additional shorter value to display.
                      # For example a java_class_name_value of com.mypackage.MyDoFn
                      # will be stored with MyDoFn as the short_str_value and
                      # com.mypackage.MyDoFn as the java_class_name value.
                      # short_str_value can be displayed and java_class_name_value
                      # will be displayed as a tooltip.
                  "durationValue": "A String", # Contains value if the data is of duration type.
                  "url": "A String", # An optional full URL.
                  "floatValue": 3.14, # Contains value if the data is of float type.
                  "namespace": "A String", # The namespace for the key. This is usually a class name or programming
                      # language namespace (i.e. python module) which defines the display data.
                      # This allows a dax monitoring system to specially handle the data
                      # and perform custom rendering.
                  "javaClassValue": "A String", # Contains value if the data is of java class type.
                  "label": "A String", # An optional label to display in a dax UI for the element.
                  "boolValue": True or False, # Contains value if the data is of a boolean type.
                  "strValue": "A String", # Contains value if the data is of string type.
                  "key": "A String", # The key identifying the display data.
                      # This is intended to be used as a label for the display data
                      # when viewed in a dax monitoring system.
                  "int64Value": "A String", # Contains value if the data is of int64 type.
                  "timestampValue": "A String", # Contains value if the data is of timestamp type.
                },
              ],
              "outputCollectionName": [ # User  names for all collection outputs to this transform.
                "A String",
              ],
              "id": "A String", # SDK generated id of this transform instance.
            },
          ],
          "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
            { # Description of the composing transforms, names/ids, and input/outputs of a
                # stage of execution.  Some composing transforms and sources may have been
                # generated by the Dataflow service during execution planning.
              "componentSource": [ # Collections produced and consumed by component transforms of this stage.
                { # Description of an interstitial value between transforms in an execution
                    # stage.
                  "userName": "A String", # Human-readable name for this transform; may be user or system generated.
                  "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
                      # source is most closely associated.
                  "name": "A String", # Dataflow service generated name for this source.
                },
              ],
              "kind": "A String", # Type of tranform this stage is executing.
              "name": "A String", # Dataflow service generated name for this stage.
              "outputSource": [ # Output sources for this stage.
                { # Description of an input or output of an execution stage.
                  "userName": "A String", # Human-readable name for this source; may be user or system generated.
                  "sizeBytes": "A String", # Size of the source, if measurable.
                  "name": "A String", # Dataflow service generated name for this source.
                  "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
                      # source is most closely associated.
                },
              ],
              "inputSource": [ # Input sources for this stage.
                { # Description of an input or output of an execution stage.
                  "userName": "A String", # Human-readable name for this source; may be user or system generated.
                  "sizeBytes": "A String", # Size of the source, if measurable.
                  "name": "A String", # Dataflow service generated name for this source.
                  "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
                      # source is most closely associated.
                },
              ],
              "componentTransform": [ # Transforms that comprise this execution stage.
                { # Description of a transform executed as part of an execution stage.
                  "userName": "A String", # Human-readable name for this transform; may be user or system generated.
                  "originalTransform": "A String", # User name for the original user transform with which this transform is
                      # most closely associated.
                  "name": "A String", # Dataflow service generated name for this source.
                },
              ],
              "id": "A String", # Dataflow service generated id for this stage.
            },
          ],
          "displayData": [ # Pipeline level display data.
            { # Data provided with a pipeline or transform to provide descriptive info.
              "shortStrValue": "A String", # A possible additional shorter value to display.
                  # For example a java_class_name_value of com.mypackage.MyDoFn
                  # will be stored with MyDoFn as the short_str_value and
                  # com.mypackage.MyDoFn as the java_class_name value.
                  # short_str_value can be displayed and java_class_name_value
                  # will be displayed as a tooltip.
              "durationValue": "A String", # Contains value if the data is of duration type.
              "url": "A String", # An optional full URL.
              "floatValue": 3.14, # Contains value if the data is of float type.
              "namespace": "A String", # The namespace for the key. This is usually a class name or programming
                  # language namespace (i.e. python module) which defines the display data.
                  # This allows a dax monitoring system to specially handle the data
                  # and perform custom rendering.
              "javaClassValue": "A String", # Contains value if the data is of java class type.
              "label": "A String", # An optional label to display in a dax UI for the element.
              "boolValue": True or False, # Contains value if the data is of a boolean type.
              "strValue": "A String", # Contains value if the data is of string type.
              "key": "A String", # The key identifying the display data.
                  # This is intended to be used as a label for the display data
                  # when viewed in a dax monitoring system.
              "int64Value": "A String", # Contains value if the data is of int64 type.
              "timestampValue": "A String", # Contains value if the data is of timestamp type.
            },
          ],
        },
        "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
            # callers cannot mutate it.
          { # A message describing the state of a particular execution stage.
            "executionStageName": "A String", # The name of the execution stage.
            "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
            "currentStateTime": "A String", # The time at which the stage transitioned to this state.
          },
        ],
        "id": "A String", # The unique ID of this job.
            #
            # This field is set by the Cloud Dataflow service when the Job is
            # created, and is immutable for the life of the job.
        "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
            # `JOB_STATE_UPDATED`), this field contains the ID of that job.
        "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
        "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
            # corresponding name prefixes of the new job.
          "a_key": "A String",
        },
        "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
          "version": { # A structure describing which components and their versions of the service
              # are required in order to run the job.
            "a_key": "", # Properties of the object.
          },
          "flexResourceSchedulingGoal": "A String", # Which Flexible Resource Scheduling mode to run in.
          "serviceKmsKeyName": "A String", # If set, contains the Cloud KMS key identifier used to encrypt data
              # at rest, AKA a Customer Managed Encryption Key (CMEK).
              #
              # Format:
              #   projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY
          "internalExperiments": { # Experimental settings.
            "a_key": "", # Properties of the object. Contains field @type with type URL.
          },
          "dataset": "A String", # The dataset for the current project where various workflow
              # related tables are stored.
              #
              # The supported resource type is:
              #
              # Google BigQuery:
              #   bigquery.googleapis.com/{dataset}
          "experiments": [ # The list of experiments to enable.
            "A String",
          ],
          "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
          "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
              # options are passed through the service and are used to recreate the
              # SDK pipeline options on the worker in a language agnostic and platform
              # independent way.
            "a_key": "", # Properties of the object.
          },
          "userAgent": { # A description of the process that generated the request.
            "a_key": "", # Properties of the object.
          },
          "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
              # unspecified, the service will attempt to choose a reasonable
              # default.  This should be in the form of the API service name,
              # e.g. "compute.googleapis.com".
          "workerPools": [ # The worker pools. At least one "harness" worker pool must be
              # specified in order for the job to have workers.
            { # Describes one particular pool of Cloud Dataflow workers to be
                # instantiated by the Cloud Dataflow service in order to perform the
                # computations required by a job.  Note that a workflow job may use
                # multiple pools, in order to match the various computational
                # requirements of the various stages of the job.
              "diskSourceImage": "A String", # Fully qualified source image for disks.
              "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
                  # using the standard Dataflow task runner.  Users should ignore
                  # this field.
                "workflowFileName": "A String", # The file to store the workflow in.
                "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
                    # will not be uploaded.
                    #
                    # The supported resource type is:
                    #
                    # Google Cloud Storage:
                    #   storage.googleapis.com/{bucket}/{object}
                    #   bucket.storage.googleapis.com/{object}
                "commandlinesFileName": "A String", # The file to store preprocessing commands in.
                "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
                  "reportingEnabled": True or False, # Whether to send work progress updates to the service.
                  "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
                      # "shuffle/v1beta1".
                  "workerId": "A String", # The ID of the worker running this pipeline.
                  "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
                      #
                      # When workers access Google Cloud APIs, they logically do so via
                      # relative URLs.  If this field is specified, it supplies the base
                      # URL to use for resolving these relative URLs.  The normative
                      # algorithm used is defined by RFC 1808, "Relative Uniform Resource
                      # Locators".
                      #
                      # If not specified, the default value is "http://www.googleapis.com/"
                  "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
                      # "dataflow/v1b3/projects".
                  "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
                      # storage.
                      #
                      # The supported resource type is:
                      #
                      # Google Cloud Storage:
                      #
                      #   storage.googleapis.com/{bucket}/{object}
                      #   bucket.storage.googleapis.com/{object}
                },
                "vmId": "A String", # The ID string of the VM.
                "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
                "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
                "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
                    # access the Cloud Dataflow API.
                  "A String",
                ],
                "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
                    # taskrunner; e.g. "root".
                "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
                    #
                    # When workers access Google Cloud APIs, they logically do so via
                    # relative URLs.  If this field is specified, it supplies the base
                    # URL to use for resolving these relative URLs.  The normative
                    # algorithm used is defined by RFC 1808, "Relative Uniform Resource
                    # Locators".
                    #
                    # If not specified, the default value is "http://www.googleapis.com/"
                "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
                    # taskrunner; e.g. "wheel".
                "languageHint": "A String", # The suggested backend language.
                "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
                    # console.
                "streamingWorkerMainClass": "A String", # The streaming worker main class name.
                "logDir": "A String", # The directory on the VM to store logs.
                "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
                "harnessCommand": "A String", # The command to launch the worker harness.
                "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
                    # temporary storage.
                    #
                    # The supported resource type is:
                    #
                    # Google Cloud Storage:
                    #   storage.googleapis.com/{bucket}/{object}
                    #   bucket.storage.googleapis.com/{object}
                "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
              },
              "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
                  # are supported.
              "packages": [ # Packages to be installed on workers.
                { # The packages that must be installed in order for a worker to run the
                    # steps of the Cloud Dataflow job that will be assigned to its worker
                    # pool.
                    #
                    # This is the mechanism by which the Cloud Dataflow SDK causes code to
                    # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
                    # might use this to install jars containing the user's code and all of the
                    # various dependencies (libraries, data files, etc.) required in order
                    # for that code to run.
                  "location": "A String", # The resource to read the package from. The supported resource type is:
                      #
                      # Google Cloud Storage:
                      #
                      #   storage.googleapis.com/{bucket}
                      #   bucket.storage.googleapis.com/
                  "name": "A String", # The name of the package.
                },
              ],
              "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
                  # service will attempt to choose a reasonable default.
              "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
                  # the service will use the network "default".
              "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
                  # will attempt to choose a reasonable default.
              "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
                  # attempt to choose a reasonable default.
              "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
                  # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
                  # `TEARDOWN_NEVER`.
                  # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
                  # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
                  # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
                  # down.
                  #
                  # If the workers are not torn down by the service, they will
                  # continue to run and use Google Compute Engine VM resources in the
                  # user's project until they are explicitly terminated by the user.
                  # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
                  # policy except for small, manually supervised test jobs.
                  #
                  # If unknown or unspecified, the service will attempt to choose a reasonable
                  # default.
              "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
                  # Compute Engine API.
              "ipConfiguration": "A String", # Configuration for VM IPs.
              "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
                  # service will choose a number of threads (according to the number of cores
                  # on the selected machine type for batch, or 1 by convention for streaming).
              "poolArgs": { # Extra arguments for this worker pool.
                "a_key": "", # Properties of the object. Contains field @type with type URL.
              },
              "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
                  # execute the job.  If zero or unspecified, the service will
                  # attempt to choose a reasonable default.
              "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
                  # harness, residing in Google Container Registry.
              "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
                  # the form "regions/REGION/subnetworks/SUBNETWORK".
              "dataDisks": [ # Data disks that are used by a VM in this workflow.
                { # Describes the data disk used by a workflow job.
                  "mountPoint": "A String", # Directory in a VM where disk is mounted.
                  "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
                      # attempt to choose a reasonable default.
                  "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
                      # must be a disk type appropriate to the project and zone in which
                      # the workers will run.  If unknown or unspecified, the service
                      # will attempt to choose a reasonable default.
                      #
                      # For example, the standard persistent disk type is a resource name
                      # typically ending in "pd-standard".  If SSD persistent disks are
                      # available, the resource name typically ends with "pd-ssd".  The
                      # actual valid values are defined the Google Compute Engine API,
                      # not by the Cloud Dataflow API; consult the Google Compute Engine
                      # documentation for more information about determining the set of
                      # available disk types for a particular project and zone.
                      #
                      # Google Compute Engine Disk types are local to a particular
                      # project in a particular zone, and so the resource name will
                      # typically look something like this:
                      #
                      # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
                },
              ],
              "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
                "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
                "algorithm": "A String", # The algorithm to use for autoscaling.
              },
              "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
                  # select a default set of packages which are useful to worker
                  # harnesses written in a particular language.
              "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
                  # attempt to choose a reasonable default.
              "metadata": { # Metadata to set on the Google Compute Engine VMs.
                "a_key": "A String",
              },
            },
          ],
          "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
              # storage.  The system will append the suffix "/temp-{JOBNAME} to
              # this resource prefix, where {JOBNAME} is the value of the
              # job_name field.  The resulting bucket and object prefix is used
              # as the prefix of the resources used to store temporary data
              # needed during the job execution.  NOTE: This will override the
              # value in taskrunner_settings.
              # The supported resource type is:
              #
              # Google Cloud Storage:
              #
              #   storage.googleapis.com/{bucket}/{object}
              #   bucket.storage.googleapis.com/{object}
        },
        "location": "A String", # The [regional endpoint]
            # (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
            # contains this job.
        "tempFiles": [ # A set of files the system should be aware of that are used
            # for temporary storage. These temporary files will be
            # removed on job completion.
            # No duplicates are allowed.
            # No file patterns are supported.
            #
            # The supported files are:
            #
            # Google Cloud Storage:
            #
            #    storage.googleapis.com/{bucket}/{object}
            #    bucket.storage.googleapis.com/{object}
          "A String",
        ],
        "type": "A String", # The type of Cloud Dataflow job.
        "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
            # If this field is set, the service will ensure its uniqueness.
            # The request to create a job will fail if the service has knowledge of a
            # previously submitted job with the same client's ID and job name.
            # The caller may use this field to ensure idempotence of job
            # creation across retried attempts to create a job.
            # By default, the field is empty and, in that case, the service ignores it.
        "createdFromSnapshotId": "A String", # If this is specified, the job's initial state is populated from the given
            # snapshot.
        "stepsLocation": "A String", # The GCS location where the steps are stored.
        "currentStateTime": "A String", # The timestamp associated with the current state.
        "startTime": "A String", # The timestamp when the job was started (transitioned to JOB_STATE_PENDING).
            # Flexible resource scheduling jobs are started with some delay after job
            # creation, so start_time is unset before start and is updated when the
            # job is started by the Cloud Dataflow service. For other jobs, start_time
            # always equals to create_time and is immutable and set by the Cloud Dataflow
            # service.
        "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
            # Cloud Dataflow service.
        "requestedState": "A String", # The job's requested state.
            #
            # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
            # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
            # also be used to directly set a job's requested state to
            # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
            # job if it has not already reached a terminal state.
        "name": "A String", # The user-specified Cloud Dataflow job name.
            #
            # Only one Job with a given name may exist in a project at any
            # given time. If a caller attempts to create a Job with the same
            # name as an already-existing Job, the attempt returns the
            # existing Job.
            #
            # The name must match the regular expression
            # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
        "steps": [ # Exactly one of step or steps_location should be specified.
            #
            # The top-level steps that constitute the entire job.
          { # Defines a particular step within a Cloud Dataflow job.
              #
              # A job consists of multiple steps, each of which performs some
              # specific operation as part of the overall job.  Data is typically
              # passed from one step to another as part of the job.
              #
              # Here's an example of a sequence of steps which together implement a
              # Map-Reduce job:
              #
              #   * Read a collection of data from some source, parsing the
              #     collection's elements.
              #
              #   * Validate the elements.
              #
              #   * Apply a user-defined function to map each element to some value
              #     and extract an element-specific key value.
              #
              #   * Group elements with the same key into a single element with
              #     that key, transforming a multiply-keyed collection into a
              #     uniquely-keyed collection.
              #
              #   * Write the elements out to some data sink.
              #
              # Note that the Cloud Dataflow service may be used to run many different
              # types of jobs, not just Map-Reduce.
            "kind": "A String", # The kind of step in the Cloud Dataflow job.
            "properties": { # Named properties associated with the step. Each kind of
                # predefined step has its own required set of properties.
                # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
              "a_key": "", # Properties of the object.
            },
            "name": "A String", # The name that identifies the step. This must be unique for each
                # step with respect to all other steps in the Cloud Dataflow job.
          },
        ],
        "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
            # of the job it replaced.
            #
            # When sending a `CreateJobRequest`, you can update a job by specifying it
            # here. The job named here is stopped, and its intermediate state is
            # transferred to this job.
        "currentState": "A String", # The current state of the job.
            #
            # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
            # specified.
            #
            # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
            # terminal state. After a job has reached a terminal state, no
            # further state updates may be made.
            #
            # This field may be mutated by the Cloud Dataflow service;
            # callers cannot mutate it.
        "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
            # isn't contained in the submitted job.
          "stages": { # A mapping from each stage to the information about that stage.
            "a_key": { # Contains information about how a particular
                # google.dataflow.v1beta3.Step will be executed.
              "stepName": [ # The steps associated with the execution stage.
                  # Note that stages may have several steps, and that a given step
                  # might be run by more than one stage.
                "A String",
              ],
            },
          },
        },
      },
    ],
  }

list_next(previous_request, previous_response)

Retrieves the next page of results.

Args:
  previous_request: The request for the previous page. (required)
  previous_response: The response from the request for the previous page. (required)

Returns:
  A request object that you can call 'execute()' on to request the next
  page. Returns None if there are no more items in the collection.

snapshot(projectId, location, jobId, body, x__xgafv=None)

Snapshot the state of a streaming job.

Args:
  projectId: string, The project which owns the job to be snapshotted. (required)
  location: string, The location that contains this job. (required)
  jobId: string, The job to be snapshotted. (required)
  body: object, The request body. (required)
    The object takes the form of:

{ # Request to create a snapshot of a job.
    "location": "A String", # The location that contains this job.
    "ttl": "A String", # TTL for the snapshot.
  }

  x__xgafv: string, V1 error format.
    Allowed values
      1 - v1 error format
      2 - v2 error format

Returns:
  An object of the form:

    { # Represents a snapshot of a job.
    "sourceJobId": "A String", # The job this snapshot was created from.
    "projectId": "A String", # The project this snapshot belongs to.
    "creationTime": "A String", # The time this snapshot was created.
    "state": "A String", # State of the snapshot.
    "ttl": "A String", # The time after which this snapshot will be automatically deleted.
    "id": "A String", # The unique ID of this snapshot.
  }

update(projectId, location, jobId, body, x__xgafv=None)

Updates the state of an existing Cloud Dataflow job.

To update the state of an existing job, we recommend using
`projects.locations.jobs.update` with a [regional endpoint]
(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints). Using
`projects.jobs.update` is not recommended, as you can only update the state
of jobs that are running in `us-central1`.

Args:
  projectId: string, The ID of the Cloud Platform project that the job belongs to. (required)
  location: string, The [regional endpoint]
(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
contains this job. (required)
  jobId: string, The job ID. (required)
  body: object, The request body. (required)
    The object takes the form of:

{ # Defines a job to be run by the Cloud Dataflow service.
  "labels": { # User-defined labels for this job.
      # 
      # The labels map can contain no more than 64 entries.  Entries of the labels
      # map are UTF8 strings that comply with the following restrictions:
      # 
      # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
      # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
      # * Both keys and values are additionally constrained to be <= 128 bytes in
      # size.
    "a_key": "A String",
  },
  "jobMetadata": { # Metadata available primarily for filtering jobs. Will be included in the # This field is populated by the Dataflow service to support filtering jobs
      # by the metadata values provided here. Populated for ListJobs and all GetJob
      # views SUMMARY and higher.
      # ListJob response and Job SUMMARY view.
    "sdkVersion": { # The version of the SDK used to run the job. # The SDK version used to run the job.
      "versionDisplayName": "A String", # A readable string describing the version of the SDK.
      "version": "A String", # The version of the SDK used to run the job.
      "sdkSupportStatus": "A String", # The support status for this SDK version.
    },
    "pubsubDetails": [ # Identification of a PubSub source used in the Dataflow job.
      { # Metadata for a PubSub connector used by the job.
        "topic": "A String", # Topic accessed in the connection.
        "subscription": "A String", # Subscription used in the connection.
      },
    ],
    "datastoreDetails": [ # Identification of a Datastore source used in the Dataflow job.
      { # Metadata for a Datastore connector used by the job.
        "projectId": "A String", # ProjectId accessed in the connection.
        "namespace": "A String", # Namespace used in the connection.
      },
    ],
    "fileDetails": [ # Identification of a File source used in the Dataflow job.
      { # Metadata for a File connector used by the job.
        "filePattern": "A String", # File Pattern used to access files by the connector.
      },
    ],
    "spannerDetails": [ # Identification of a Spanner source used in the Dataflow job.
      { # Metadata for a Spanner connector used by the job.
        "instanceId": "A String", # InstanceId accessed in the connection.
        "projectId": "A String", # ProjectId accessed in the connection.
        "databaseId": "A String", # DatabaseId accessed in the connection.
      },
    ],
    "bigTableDetails": [ # Identification of a BigTable source used in the Dataflow job.
      { # Metadata for a BigTable connector used by the job.
        "instanceId": "A String", # InstanceId accessed in the connection.
        "projectId": "A String", # ProjectId accessed in the connection.
        "tableId": "A String", # TableId accessed in the connection.
      },
    ],
    "bigqueryDetails": [ # Identification of a BigQuery source used in the Dataflow job.
      { # Metadata for a BigQuery connector used by the job.
        "projectId": "A String", # Project accessed in the connection.
        "dataset": "A String", # Dataset accessed in the connection.
        "table": "A String", # Table accessed in the connection.
        "query": "A String", # Query used to access data in the connection.
      },
    ],
  },
  "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
      # A description of the user pipeline and stages through which it is executed.
      # Created by Cloud Dataflow service.  Only retrieved with
      # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
      # form.  This data is provided by the Dataflow service for ease of visualizing
      # the pipeline and interpreting Dataflow provided metrics.
    "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
      { # Description of the type, names/ids, and input/outputs for a transform.
        "kind": "A String", # Type of transform.
        "name": "A String", # User provided name for this transform instance.
        "inputCollectionName": [ # User names for all collection inputs to this transform.
          "A String",
        ],
        "displayData": [ # Transform-specific display data.
          { # Data provided with a pipeline or transform to provide descriptive info.
            "shortStrValue": "A String", # A possible additional shorter value to display.
                # For example a java_class_name_value of com.mypackage.MyDoFn
                # will be stored with MyDoFn as the short_str_value and
                # com.mypackage.MyDoFn as the java_class_name value.
                # short_str_value can be displayed and java_class_name_value
                # will be displayed as a tooltip.
            "durationValue": "A String", # Contains value if the data is of duration type.
            "url": "A String", # An optional full URL.
            "floatValue": 3.14, # Contains value if the data is of float type.
            "namespace": "A String", # The namespace for the key. This is usually a class name or programming
                # language namespace (i.e. python module) which defines the display data.
                # This allows a dax monitoring system to specially handle the data
                # and perform custom rendering.
            "javaClassValue": "A String", # Contains value if the data is of java class type.
            "label": "A String", # An optional label to display in a dax UI for the element.
            "boolValue": True or False, # Contains value if the data is of a boolean type.
            "strValue": "A String", # Contains value if the data is of string type.
            "key": "A String", # The key identifying the display data.
                # This is intended to be used as a label for the display data
                # when viewed in a dax monitoring system.
            "int64Value": "A String", # Contains value if the data is of int64 type.
            "timestampValue": "A String", # Contains value if the data is of timestamp type.
          },
        ],
        "outputCollectionName": [ # User  names for all collection outputs to this transform.
          "A String",
        ],
        "id": "A String", # SDK generated id of this transform instance.
      },
    ],
    "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
      { # Description of the composing transforms, names/ids, and input/outputs of a
          # stage of execution.  Some composing transforms and sources may have been
          # generated by the Dataflow service during execution planning.
        "componentSource": [ # Collections produced and consumed by component transforms of this stage.
          { # Description of an interstitial value between transforms in an execution
              # stage.
            "userName": "A String", # Human-readable name for this transform; may be user or system generated.
            "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
                # source is most closely associated.
            "name": "A String", # Dataflow service generated name for this source.
          },
        ],
        "kind": "A String", # Type of tranform this stage is executing.
        "name": "A String", # Dataflow service generated name for this stage.
        "outputSource": [ # Output sources for this stage.
          { # Description of an input or output of an execution stage.
            "userName": "A String", # Human-readable name for this source; may be user or system generated.
            "sizeBytes": "A String", # Size of the source, if measurable.
            "name": "A String", # Dataflow service generated name for this source.
            "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
                # source is most closely associated.
          },
        ],
        "inputSource": [ # Input sources for this stage.
          { # Description of an input or output of an execution stage.
            "userName": "A String", # Human-readable name for this source; may be user or system generated.
            "sizeBytes": "A String", # Size of the source, if measurable.
            "name": "A String", # Dataflow service generated name for this source.
            "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
                # source is most closely associated.
          },
        ],
        "componentTransform": [ # Transforms that comprise this execution stage.
          { # Description of a transform executed as part of an execution stage.
            "userName": "A String", # Human-readable name for this transform; may be user or system generated.
            "originalTransform": "A String", # User name for the original user transform with which this transform is
                # most closely associated.
            "name": "A String", # Dataflow service generated name for this source.
          },
        ],
        "id": "A String", # Dataflow service generated id for this stage.
      },
    ],
    "displayData": [ # Pipeline level display data.
      { # Data provided with a pipeline or transform to provide descriptive info.
        "shortStrValue": "A String", # A possible additional shorter value to display.
            # For example a java_class_name_value of com.mypackage.MyDoFn
            # will be stored with MyDoFn as the short_str_value and
            # com.mypackage.MyDoFn as the java_class_name value.
            # short_str_value can be displayed and java_class_name_value
            # will be displayed as a tooltip.
        "durationValue": "A String", # Contains value if the data is of duration type.
        "url": "A String", # An optional full URL.
        "floatValue": 3.14, # Contains value if the data is of float type.
        "namespace": "A String", # The namespace for the key. This is usually a class name or programming
            # language namespace (i.e. python module) which defines the display data.
            # This allows a dax monitoring system to specially handle the data
            # and perform custom rendering.
        "javaClassValue": "A String", # Contains value if the data is of java class type.
        "label": "A String", # An optional label to display in a dax UI for the element.
        "boolValue": True or False, # Contains value if the data is of a boolean type.
        "strValue": "A String", # Contains value if the data is of string type.
        "key": "A String", # The key identifying the display data.
            # This is intended to be used as a label for the display data
            # when viewed in a dax monitoring system.
        "int64Value": "A String", # Contains value if the data is of int64 type.
        "timestampValue": "A String", # Contains value if the data is of timestamp type.
      },
    ],
  },
  "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
      # callers cannot mutate it.
    { # A message describing the state of a particular execution stage.
      "executionStageName": "A String", # The name of the execution stage.
      "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
      "currentStateTime": "A String", # The time at which the stage transitioned to this state.
    },
  ],
  "id": "A String", # The unique ID of this job.
      # 
      # This field is set by the Cloud Dataflow service when the Job is
      # created, and is immutable for the life of the job.
  "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
      # `JOB_STATE_UPDATED`), this field contains the ID of that job.
  "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
  "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
      # corresponding name prefixes of the new job.
    "a_key": "A String",
  },
  "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
    "version": { # A structure describing which components and their versions of the service
        # are required in order to run the job.
      "a_key": "", # Properties of the object.
    },
    "flexResourceSchedulingGoal": "A String", # Which Flexible Resource Scheduling mode to run in.
    "serviceKmsKeyName": "A String", # If set, contains the Cloud KMS key identifier used to encrypt data
        # at rest, AKA a Customer Managed Encryption Key (CMEK).
        #
        # Format:
        #   projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY
    "internalExperiments": { # Experimental settings.
      "a_key": "", # Properties of the object. Contains field @type with type URL.
    },
    "dataset": "A String", # The dataset for the current project where various workflow
        # related tables are stored.
        #
        # The supported resource type is:
        #
        # Google BigQuery:
        #   bigquery.googleapis.com/{dataset}
    "experiments": [ # The list of experiments to enable.
      "A String",
    ],
    "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
    "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
        # options are passed through the service and are used to recreate the
        # SDK pipeline options on the worker in a language agnostic and platform
        # independent way.
      "a_key": "", # Properties of the object.
    },
    "userAgent": { # A description of the process that generated the request.
      "a_key": "", # Properties of the object.
    },
    "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
        # unspecified, the service will attempt to choose a reasonable
        # default.  This should be in the form of the API service name,
        # e.g. "compute.googleapis.com".
    "workerPools": [ # The worker pools. At least one "harness" worker pool must be
        # specified in order for the job to have workers.
      { # Describes one particular pool of Cloud Dataflow workers to be
          # instantiated by the Cloud Dataflow service in order to perform the
          # computations required by a job.  Note that a workflow job may use
          # multiple pools, in order to match the various computational
          # requirements of the various stages of the job.
        "diskSourceImage": "A String", # Fully qualified source image for disks.
        "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
            # using the standard Dataflow task runner.  Users should ignore
            # this field.
          "workflowFileName": "A String", # The file to store the workflow in.
          "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
              # will not be uploaded.
              #
              # The supported resource type is:
              #
              # Google Cloud Storage:
              #   storage.googleapis.com/{bucket}/{object}
              #   bucket.storage.googleapis.com/{object}
          "commandlinesFileName": "A String", # The file to store preprocessing commands in.
          "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
            "reportingEnabled": True or False, # Whether to send work progress updates to the service.
            "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
                # "shuffle/v1beta1".
            "workerId": "A String", # The ID of the worker running this pipeline.
            "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
                #
                # When workers access Google Cloud APIs, they logically do so via
                # relative URLs.  If this field is specified, it supplies the base
                # URL to use for resolving these relative URLs.  The normative
                # algorithm used is defined by RFC 1808, "Relative Uniform Resource
                # Locators".
                #
                # If not specified, the default value is "http://www.googleapis.com/"
            "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
                # "dataflow/v1b3/projects".
            "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
                # storage.
                #
                # The supported resource type is:
                #
                # Google Cloud Storage:
                #
                #   storage.googleapis.com/{bucket}/{object}
                #   bucket.storage.googleapis.com/{object}
          },
          "vmId": "A String", # The ID string of the VM.
          "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
          "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
          "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
              # access the Cloud Dataflow API.
            "A String",
          ],
          "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
              # taskrunner; e.g. "root".
          "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
              #
              # When workers access Google Cloud APIs, they logically do so via
              # relative URLs.  If this field is specified, it supplies the base
              # URL to use for resolving these relative URLs.  The normative
              # algorithm used is defined by RFC 1808, "Relative Uniform Resource
              # Locators".
              #
              # If not specified, the default value is "http://www.googleapis.com/"
          "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
              # taskrunner; e.g. "wheel".
          "languageHint": "A String", # The suggested backend language.
          "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
              # console.
          "streamingWorkerMainClass": "A String", # The streaming worker main class name.
          "logDir": "A String", # The directory on the VM to store logs.
          "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
          "harnessCommand": "A String", # The command to launch the worker harness.
          "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
              # temporary storage.
              #
              # The supported resource type is:
              #
              # Google Cloud Storage:
              #   storage.googleapis.com/{bucket}/{object}
              #   bucket.storage.googleapis.com/{object}
          "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
        },
        "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
            # are supported.
        "packages": [ # Packages to be installed on workers.
          { # The packages that must be installed in order for a worker to run the
              # steps of the Cloud Dataflow job that will be assigned to its worker
              # pool.
              #
              # This is the mechanism by which the Cloud Dataflow SDK causes code to
              # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
              # might use this to install jars containing the user's code and all of the
              # various dependencies (libraries, data files, etc.) required in order
              # for that code to run.
            "location": "A String", # The resource to read the package from. The supported resource type is:
                #
                # Google Cloud Storage:
                #
                #   storage.googleapis.com/{bucket}
                #   bucket.storage.googleapis.com/
            "name": "A String", # The name of the package.
          },
        ],
        "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
            # service will attempt to choose a reasonable default.
        "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
            # the service will use the network "default".
        "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
            # will attempt to choose a reasonable default.
        "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
            # attempt to choose a reasonable default.
        "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
            # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
            # `TEARDOWN_NEVER`.
            # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
            # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
            # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
            # down.
            #
            # If the workers are not torn down by the service, they will
            # continue to run and use Google Compute Engine VM resources in the
            # user's project until they are explicitly terminated by the user.
            # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
            # policy except for small, manually supervised test jobs.
            #
            # If unknown or unspecified, the service will attempt to choose a reasonable
            # default.
        "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
            # Compute Engine API.
        "ipConfiguration": "A String", # Configuration for VM IPs.
        "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
            # service will choose a number of threads (according to the number of cores
            # on the selected machine type for batch, or 1 by convention for streaming).
        "poolArgs": { # Extra arguments for this worker pool.
          "a_key": "", # Properties of the object. Contains field @type with type URL.
        },
        "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
            # execute the job.  If zero or unspecified, the service will
            # attempt to choose a reasonable default.
        "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
            # harness, residing in Google Container Registry.
        "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
            # the form "regions/REGION/subnetworks/SUBNETWORK".
        "dataDisks": [ # Data disks that are used by a VM in this workflow.
          { # Describes the data disk used by a workflow job.
            "mountPoint": "A String", # Directory in a VM where disk is mounted.
            "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
                # attempt to choose a reasonable default.
            "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
                # must be a disk type appropriate to the project and zone in which
                # the workers will run.  If unknown or unspecified, the service
                # will attempt to choose a reasonable default.
                #
                # For example, the standard persistent disk type is a resource name
                # typically ending in "pd-standard".  If SSD persistent disks are
                # available, the resource name typically ends with "pd-ssd".  The
                # actual valid values are defined the Google Compute Engine API,
                # not by the Cloud Dataflow API; consult the Google Compute Engine
                # documentation for more information about determining the set of
                # available disk types for a particular project and zone.
                #
                # Google Compute Engine Disk types are local to a particular
                # project in a particular zone, and so the resource name will
                # typically look something like this:
                #
                # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
          },
        ],
        "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
          "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
          "algorithm": "A String", # The algorithm to use for autoscaling.
        },
        "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
            # select a default set of packages which are useful to worker
            # harnesses written in a particular language.
        "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
            # attempt to choose a reasonable default.
        "metadata": { # Metadata to set on the Google Compute Engine VMs.
          "a_key": "A String",
        },
      },
    ],
    "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
        # storage.  The system will append the suffix "/temp-{JOBNAME} to
        # this resource prefix, where {JOBNAME} is the value of the
        # job_name field.  The resulting bucket and object prefix is used
        # as the prefix of the resources used to store temporary data
        # needed during the job execution.  NOTE: This will override the
        # value in taskrunner_settings.
        # The supported resource type is:
        #
        # Google Cloud Storage:
        #
        #   storage.googleapis.com/{bucket}/{object}
        #   bucket.storage.googleapis.com/{object}
  },
  "location": "A String", # The [regional endpoint]
      # (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
      # contains this job.
  "tempFiles": [ # A set of files the system should be aware of that are used
      # for temporary storage. These temporary files will be
      # removed on job completion.
      # No duplicates are allowed.
      # No file patterns are supported.
      # 
      # The supported files are:
      # 
      # Google Cloud Storage:
      # 
      #    storage.googleapis.com/{bucket}/{object}
      #    bucket.storage.googleapis.com/{object}
    "A String",
  ],
  "type": "A String", # The type of Cloud Dataflow job.
  "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
      # If this field is set, the service will ensure its uniqueness.
      # The request to create a job will fail if the service has knowledge of a
      # previously submitted job with the same client's ID and job name.
      # The caller may use this field to ensure idempotence of job
      # creation across retried attempts to create a job.
      # By default, the field is empty and, in that case, the service ignores it.
  "createdFromSnapshotId": "A String", # If this is specified, the job's initial state is populated from the given
      # snapshot.
  "stepsLocation": "A String", # The GCS location where the steps are stored.
  "currentStateTime": "A String", # The timestamp associated with the current state.
  "startTime": "A String", # The timestamp when the job was started (transitioned to JOB_STATE_PENDING).
      # Flexible resource scheduling jobs are started with some delay after job
      # creation, so start_time is unset before start and is updated when the
      # job is started by the Cloud Dataflow service. For other jobs, start_time
      # always equals to create_time and is immutable and set by the Cloud Dataflow
      # service.
  "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
      # Cloud Dataflow service.
  "requestedState": "A String", # The job's requested state.
      # 
      # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
      # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
      # also be used to directly set a job's requested state to
      # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
      # job if it has not already reached a terminal state.
  "name": "A String", # The user-specified Cloud Dataflow job name.
      # 
      # Only one Job with a given name may exist in a project at any
      # given time. If a caller attempts to create a Job with the same
      # name as an already-existing Job, the attempt returns the
      # existing Job.
      # 
      # The name must match the regular expression
      # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
  "steps": [ # Exactly one of step or steps_location should be specified.
      # 
      # The top-level steps that constitute the entire job.
    { # Defines a particular step within a Cloud Dataflow job.
        #
        # A job consists of multiple steps, each of which performs some
        # specific operation as part of the overall job.  Data is typically
        # passed from one step to another as part of the job.
        #
        # Here's an example of a sequence of steps which together implement a
        # Map-Reduce job:
        #
        #   * Read a collection of data from some source, parsing the
        #     collection's elements.
        #
        #   * Validate the elements.
        #
        #   * Apply a user-defined function to map each element to some value
        #     and extract an element-specific key value.
        #
        #   * Group elements with the same key into a single element with
        #     that key, transforming a multiply-keyed collection into a
        #     uniquely-keyed collection.
        #
        #   * Write the elements out to some data sink.
        #
        # Note that the Cloud Dataflow service may be used to run many different
        # types of jobs, not just Map-Reduce.
      "kind": "A String", # The kind of step in the Cloud Dataflow job.
      "properties": { # Named properties associated with the step. Each kind of
          # predefined step has its own required set of properties.
          # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
        "a_key": "", # Properties of the object.
      },
      "name": "A String", # The name that identifies the step. This must be unique for each
          # step with respect to all other steps in the Cloud Dataflow job.
    },
  ],
  "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
      # of the job it replaced.
      # 
      # When sending a `CreateJobRequest`, you can update a job by specifying it
      # here. The job named here is stopped, and its intermediate state is
      # transferred to this job.
  "currentState": "A String", # The current state of the job.
      # 
      # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
      # specified.
      # 
      # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
      # terminal state. After a job has reached a terminal state, no
      # further state updates may be made.
      # 
      # This field may be mutated by the Cloud Dataflow service;
      # callers cannot mutate it.
  "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
      # isn't contained in the submitted job.
    "stages": { # A mapping from each stage to the information about that stage.
      "a_key": { # Contains information about how a particular
          # google.dataflow.v1beta3.Step will be executed.
        "stepName": [ # The steps associated with the execution stage.
            # Note that stages may have several steps, and that a given step
            # might be run by more than one stage.
          "A String",
        ],
      },
    },
  },
}

  x__xgafv: string, V1 error format.
    Allowed values
      1 - v1 error format
      2 - v2 error format

Returns:
  An object of the form:

    { # Defines a job to be run by the Cloud Dataflow service.
    "labels": { # User-defined labels for this job.
        #
        # The labels map can contain no more than 64 entries.  Entries of the labels
        # map are UTF8 strings that comply with the following restrictions:
        #
        # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
        # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
        # * Both keys and values are additionally constrained to be <= 128 bytes in
        # size.
      "a_key": "A String",
    },
    "jobMetadata": { # Metadata available primarily for filtering jobs. Will be included in the # This field is populated by the Dataflow service to support filtering jobs
        # by the metadata values provided here. Populated for ListJobs and all GetJob
        # views SUMMARY and higher.
        # ListJob response and Job SUMMARY view.
      "sdkVersion": { # The version of the SDK used to run the job. # The SDK version used to run the job.
        "versionDisplayName": "A String", # A readable string describing the version of the SDK.
        "version": "A String", # The version of the SDK used to run the job.
        "sdkSupportStatus": "A String", # The support status for this SDK version.
      },
      "pubsubDetails": [ # Identification of a PubSub source used in the Dataflow job.
        { # Metadata for a PubSub connector used by the job.
          "topic": "A String", # Topic accessed in the connection.
          "subscription": "A String", # Subscription used in the connection.
        },
      ],
      "datastoreDetails": [ # Identification of a Datastore source used in the Dataflow job.
        { # Metadata for a Datastore connector used by the job.
          "projectId": "A String", # ProjectId accessed in the connection.
          "namespace": "A String", # Namespace used in the connection.
        },
      ],
      "fileDetails": [ # Identification of a File source used in the Dataflow job.
        { # Metadata for a File connector used by the job.
          "filePattern": "A String", # File Pattern used to access files by the connector.
        },
      ],
      "spannerDetails": [ # Identification of a Spanner source used in the Dataflow job.
        { # Metadata for a Spanner connector used by the job.
          "instanceId": "A String", # InstanceId accessed in the connection.
          "projectId": "A String", # ProjectId accessed in the connection.
          "databaseId": "A String", # DatabaseId accessed in the connection.
        },
      ],
      "bigTableDetails": [ # Identification of a BigTable source used in the Dataflow job.
        { # Metadata for a BigTable connector used by the job.
          "instanceId": "A String", # InstanceId accessed in the connection.
          "projectId": "A String", # ProjectId accessed in the connection.
          "tableId": "A String", # TableId accessed in the connection.
        },
      ],
      "bigqueryDetails": [ # Identification of a BigQuery source used in the Dataflow job.
        { # Metadata for a BigQuery connector used by the job.
          "projectId": "A String", # Project accessed in the connection.
          "dataset": "A String", # Dataset accessed in the connection.
          "table": "A String", # Table accessed in the connection.
          "query": "A String", # Query used to access data in the connection.
        },
      ],
    },
    "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
        # A description of the user pipeline and stages through which it is executed.
        # Created by Cloud Dataflow service.  Only retrieved with
        # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
        # form.  This data is provided by the Dataflow service for ease of visualizing
        # the pipeline and interpreting Dataflow provided metrics.
      "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
        { # Description of the type, names/ids, and input/outputs for a transform.
          "kind": "A String", # Type of transform.
          "name": "A String", # User provided name for this transform instance.
          "inputCollectionName": [ # User names for all collection inputs to this transform.
            "A String",
          ],
          "displayData": [ # Transform-specific display data.
            { # Data provided with a pipeline or transform to provide descriptive info.
              "shortStrValue": "A String", # A possible additional shorter value to display.
                  # For example a java_class_name_value of com.mypackage.MyDoFn
                  # will be stored with MyDoFn as the short_str_value and
                  # com.mypackage.MyDoFn as the java_class_name value.
                  # short_str_value can be displayed and java_class_name_value
                  # will be displayed as a tooltip.
              "durationValue": "A String", # Contains value if the data is of duration type.
              "url": "A String", # An optional full URL.
              "floatValue": 3.14, # Contains value if the data is of float type.
              "namespace": "A String", # The namespace for the key. This is usually a class name or programming
                  # language namespace (i.e. python module) which defines the display data.
                  # This allows a dax monitoring system to specially handle the data
                  # and perform custom rendering.
              "javaClassValue": "A String", # Contains value if the data is of java class type.
              "label": "A String", # An optional label to display in a dax UI for the element.
              "boolValue": True or False, # Contains value if the data is of a boolean type.
              "strValue": "A String", # Contains value if the data is of string type.
              "key": "A String", # The key identifying the display data.
                  # This is intended to be used as a label for the display data
                  # when viewed in a dax monitoring system.
              "int64Value": "A String", # Contains value if the data is of int64 type.
              "timestampValue": "A String", # Contains value if the data is of timestamp type.
            },
          ],
          "outputCollectionName": [ # User  names for all collection outputs to this transform.
            "A String",
          ],
          "id": "A String", # SDK generated id of this transform instance.
        },
      ],
      "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
        { # Description of the composing transforms, names/ids, and input/outputs of a
            # stage of execution.  Some composing transforms and sources may have been
            # generated by the Dataflow service during execution planning.
          "componentSource": [ # Collections produced and consumed by component transforms of this stage.
            { # Description of an interstitial value between transforms in an execution
                # stage.
              "userName": "A String", # Human-readable name for this transform; may be user or system generated.
              "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
                  # source is most closely associated.
              "name": "A String", # Dataflow service generated name for this source.
            },
          ],
          "kind": "A String", # Type of tranform this stage is executing.
          "name": "A String", # Dataflow service generated name for this stage.
          "outputSource": [ # Output sources for this stage.
            { # Description of an input or output of an execution stage.
              "userName": "A String", # Human-readable name for this source; may be user or system generated.
              "sizeBytes": "A String", # Size of the source, if measurable.
              "name": "A String", # Dataflow service generated name for this source.
              "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
                  # source is most closely associated.
            },
          ],
          "inputSource": [ # Input sources for this stage.
            { # Description of an input or output of an execution stage.
              "userName": "A String", # Human-readable name for this source; may be user or system generated.
              "sizeBytes": "A String", # Size of the source, if measurable.
              "name": "A String", # Dataflow service generated name for this source.
              "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
                  # source is most closely associated.
            },
          ],
          "componentTransform": [ # Transforms that comprise this execution stage.
            { # Description of a transform executed as part of an execution stage.
              "userName": "A String", # Human-readable name for this transform; may be user or system generated.
              "originalTransform": "A String", # User name for the original user transform with which this transform is
                  # most closely associated.
              "name": "A String", # Dataflow service generated name for this source.
            },
          ],
          "id": "A String", # Dataflow service generated id for this stage.
        },
      ],
      "displayData": [ # Pipeline level display data.
        { # Data provided with a pipeline or transform to provide descriptive info.
          "shortStrValue": "A String", # A possible additional shorter value to display.
              # For example a java_class_name_value of com.mypackage.MyDoFn
              # will be stored with MyDoFn as the short_str_value and
              # com.mypackage.MyDoFn as the java_class_name value.
              # short_str_value can be displayed and java_class_name_value
              # will be displayed as a tooltip.
          "durationValue": "A String", # Contains value if the data is of duration type.
          "url": "A String", # An optional full URL.
          "floatValue": 3.14, # Contains value if the data is of float type.
          "namespace": "A String", # The namespace for the key. This is usually a class name or programming
              # language namespace (i.e. python module) which defines the display data.
              # This allows a dax monitoring system to specially handle the data
              # and perform custom rendering.
          "javaClassValue": "A String", # Contains value if the data is of java class type.
          "label": "A String", # An optional label to display in a dax UI for the element.
          "boolValue": True or False, # Contains value if the data is of a boolean type.
          "strValue": "A String", # Contains value if the data is of string type.
          "key": "A String", # The key identifying the display data.
              # This is intended to be used as a label for the display data
              # when viewed in a dax monitoring system.
          "int64Value": "A String", # Contains value if the data is of int64 type.
          "timestampValue": "A String", # Contains value if the data is of timestamp type.
        },
      ],
    },
    "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
        # callers cannot mutate it.
      { # A message describing the state of a particular execution stage.
        "executionStageName": "A String", # The name of the execution stage.
        "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
        "currentStateTime": "A String", # The time at which the stage transitioned to this state.
      },
    ],
    "id": "A String", # The unique ID of this job.
        #
        # This field is set by the Cloud Dataflow service when the Job is
        # created, and is immutable for the life of the job.
    "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
        # `JOB_STATE_UPDATED`), this field contains the ID of that job.
    "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
    "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
        # corresponding name prefixes of the new job.
      "a_key": "A String",
    },
    "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
      "version": { # A structure describing which components and their versions of the service
          # are required in order to run the job.
        "a_key": "", # Properties of the object.
      },
      "flexResourceSchedulingGoal": "A String", # Which Flexible Resource Scheduling mode to run in.
      "serviceKmsKeyName": "A String", # If set, contains the Cloud KMS key identifier used to encrypt data
          # at rest, AKA a Customer Managed Encryption Key (CMEK).
          #
          # Format:
          #   projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY
      "internalExperiments": { # Experimental settings.
        "a_key": "", # Properties of the object. Contains field @type with type URL.
      },
      "dataset": "A String", # The dataset for the current project where various workflow
          # related tables are stored.
          #
          # The supported resource type is:
          #
          # Google BigQuery:
          #   bigquery.googleapis.com/{dataset}
      "experiments": [ # The list of experiments to enable.
        "A String",
      ],
      "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
      "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
          # options are passed through the service and are used to recreate the
          # SDK pipeline options on the worker in a language agnostic and platform
          # independent way.
        "a_key": "", # Properties of the object.
      },
      "userAgent": { # A description of the process that generated the request.
        "a_key": "", # Properties of the object.
      },
      "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
          # unspecified, the service will attempt to choose a reasonable
          # default.  This should be in the form of the API service name,
          # e.g. "compute.googleapis.com".
      "workerPools": [ # The worker pools. At least one "harness" worker pool must be
          # specified in order for the job to have workers.
        { # Describes one particular pool of Cloud Dataflow workers to be
            # instantiated by the Cloud Dataflow service in order to perform the
            # computations required by a job.  Note that a workflow job may use
            # multiple pools, in order to match the various computational
            # requirements of the various stages of the job.
          "diskSourceImage": "A String", # Fully qualified source image for disks.
          "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
              # using the standard Dataflow task runner.  Users should ignore
              # this field.
            "workflowFileName": "A String", # The file to store the workflow in.
            "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
                # will not be uploaded.
                #
                # The supported resource type is:
                #
                # Google Cloud Storage:
                #   storage.googleapis.com/{bucket}/{object}
                #   bucket.storage.googleapis.com/{object}
            "commandlinesFileName": "A String", # The file to store preprocessing commands in.
            "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
              "reportingEnabled": True or False, # Whether to send work progress updates to the service.
              "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
                  # "shuffle/v1beta1".
              "workerId": "A String", # The ID of the worker running this pipeline.
              "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
                  #
                  # When workers access Google Cloud APIs, they logically do so via
                  # relative URLs.  If this field is specified, it supplies the base
                  # URL to use for resolving these relative URLs.  The normative
                  # algorithm used is defined by RFC 1808, "Relative Uniform Resource
                  # Locators".
                  #
                  # If not specified, the default value is "http://www.googleapis.com/"
              "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
                  # "dataflow/v1b3/projects".
              "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
                  # storage.
                  #
                  # The supported resource type is:
                  #
                  # Google Cloud Storage:
                  #
                  #   storage.googleapis.com/{bucket}/{object}
                  #   bucket.storage.googleapis.com/{object}
            },
            "vmId": "A String", # The ID string of the VM.
            "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
            "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
            "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
                # access the Cloud Dataflow API.
              "A String",
            ],
            "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
                # taskrunner; e.g. "root".
            "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
                #
                # When workers access Google Cloud APIs, they logically do so via
                # relative URLs.  If this field is specified, it supplies the base
                # URL to use for resolving these relative URLs.  The normative
                # algorithm used is defined by RFC 1808, "Relative Uniform Resource
                # Locators".
                #
                # If not specified, the default value is "http://www.googleapis.com/"
            "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
                # taskrunner; e.g. "wheel".
            "languageHint": "A String", # The suggested backend language.
            "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
                # console.
            "streamingWorkerMainClass": "A String", # The streaming worker main class name.
            "logDir": "A String", # The directory on the VM to store logs.
            "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
            "harnessCommand": "A String", # The command to launch the worker harness.
            "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
                # temporary storage.
                #
                # The supported resource type is:
                #
                # Google Cloud Storage:
                #   storage.googleapis.com/{bucket}/{object}
                #   bucket.storage.googleapis.com/{object}
            "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
          },
          "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
              # are supported.
          "packages": [ # Packages to be installed on workers.
            { # The packages that must be installed in order for a worker to run the
                # steps of the Cloud Dataflow job that will be assigned to its worker
                # pool.
                #
                # This is the mechanism by which the Cloud Dataflow SDK causes code to
                # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
                # might use this to install jars containing the user's code and all of the
                # various dependencies (libraries, data files, etc.) required in order
                # for that code to run.
              "location": "A String", # The resource to read the package from. The supported resource type is:
                  #
                  # Google Cloud Storage:
                  #
                  #   storage.googleapis.com/{bucket}
                  #   bucket.storage.googleapis.com/
              "name": "A String", # The name of the package.
            },
          ],
          "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
              # service will attempt to choose a reasonable default.
          "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
              # the service will use the network "default".
          "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
              # will attempt to choose a reasonable default.
          "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
              # attempt to choose a reasonable default.
          "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
              # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
              # `TEARDOWN_NEVER`.
              # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
              # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
              # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
              # down.
              #
              # If the workers are not torn down by the service, they will
              # continue to run and use Google Compute Engine VM resources in the
              # user's project until they are explicitly terminated by the user.
              # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
              # policy except for small, manually supervised test jobs.
              #
              # If unknown or unspecified, the service will attempt to choose a reasonable
              # default.
          "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
              # Compute Engine API.
          "ipConfiguration": "A String", # Configuration for VM IPs.
          "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
              # service will choose a number of threads (according to the number of cores
              # on the selected machine type for batch, or 1 by convention for streaming).
          "poolArgs": { # Extra arguments for this worker pool.
            "a_key": "", # Properties of the object. Contains field @type with type URL.
          },
          "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
              # execute the job.  If zero or unspecified, the service will
              # attempt to choose a reasonable default.
          "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
              # harness, residing in Google Container Registry.
          "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
              # the form "regions/REGION/subnetworks/SUBNETWORK".
          "dataDisks": [ # Data disks that are used by a VM in this workflow.
            { # Describes the data disk used by a workflow job.
              "mountPoint": "A String", # Directory in a VM where disk is mounted.
              "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
                  # attempt to choose a reasonable default.
              "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
                  # must be a disk type appropriate to the project and zone in which
                  # the workers will run.  If unknown or unspecified, the service
                  # will attempt to choose a reasonable default.
                  #
                  # For example, the standard persistent disk type is a resource name
                  # typically ending in "pd-standard".  If SSD persistent disks are
                  # available, the resource name typically ends with "pd-ssd".  The
                  # actual valid values are defined the Google Compute Engine API,
                  # not by the Cloud Dataflow API; consult the Google Compute Engine
                  # documentation for more information about determining the set of
                  # available disk types for a particular project and zone.
                  #
                  # Google Compute Engine Disk types are local to a particular
                  # project in a particular zone, and so the resource name will
                  # typically look something like this:
                  #
                  # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
            },
          ],
          "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
            "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
            "algorithm": "A String", # The algorithm to use for autoscaling.
          },
          "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
              # select a default set of packages which are useful to worker
              # harnesses written in a particular language.
          "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
              # attempt to choose a reasonable default.
          "metadata": { # Metadata to set on the Google Compute Engine VMs.
            "a_key": "A String",
          },
        },
      ],
      "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
          # storage.  The system will append the suffix "/temp-{JOBNAME} to
          # this resource prefix, where {JOBNAME} is the value of the
          # job_name field.  The resulting bucket and object prefix is used
          # as the prefix of the resources used to store temporary data
          # needed during the job execution.  NOTE: This will override the
          # value in taskrunner_settings.
          # The supported resource type is:
          #
          # Google Cloud Storage:
          #
          #   storage.googleapis.com/{bucket}/{object}
          #   bucket.storage.googleapis.com/{object}
    },
    "location": "A String", # The [regional endpoint]
        # (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
        # contains this job.
    "tempFiles": [ # A set of files the system should be aware of that are used
        # for temporary storage. These temporary files will be
        # removed on job completion.
        # No duplicates are allowed.
        # No file patterns are supported.
        #
        # The supported files are:
        #
        # Google Cloud Storage:
        #
        #    storage.googleapis.com/{bucket}/{object}
        #    bucket.storage.googleapis.com/{object}
      "A String",
    ],
    "type": "A String", # The type of Cloud Dataflow job.
    "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
        # If this field is set, the service will ensure its uniqueness.
        # The request to create a job will fail if the service has knowledge of a
        # previously submitted job with the same client's ID and job name.
        # The caller may use this field to ensure idempotence of job
        # creation across retried attempts to create a job.
        # By default, the field is empty and, in that case, the service ignores it.
    "createdFromSnapshotId": "A String", # If this is specified, the job's initial state is populated from the given
        # snapshot.
    "stepsLocation": "A String", # The GCS location where the steps are stored.
    "currentStateTime": "A String", # The timestamp associated with the current state.
    "startTime": "A String", # The timestamp when the job was started (transitioned to JOB_STATE_PENDING).
        # Flexible resource scheduling jobs are started with some delay after job
        # creation, so start_time is unset before start and is updated when the
        # job is started by the Cloud Dataflow service. For other jobs, start_time
        # always equals to create_time and is immutable and set by the Cloud Dataflow
        # service.
    "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
        # Cloud Dataflow service.
    "requestedState": "A String", # The job's requested state.
        #
        # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
        # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
        # also be used to directly set a job's requested state to
        # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
        # job if it has not already reached a terminal state.
    "name": "A String", # The user-specified Cloud Dataflow job name.
        #
        # Only one Job with a given name may exist in a project at any
        # given time. If a caller attempts to create a Job with the same
        # name as an already-existing Job, the attempt returns the
        # existing Job.
        #
        # The name must match the regular expression
        # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
    "steps": [ # Exactly one of step or steps_location should be specified.
        #
        # The top-level steps that constitute the entire job.
      { # Defines a particular step within a Cloud Dataflow job.
          #
          # A job consists of multiple steps, each of which performs some
          # specific operation as part of the overall job.  Data is typically
          # passed from one step to another as part of the job.
          #
          # Here's an example of a sequence of steps which together implement a
          # Map-Reduce job:
          #
          #   * Read a collection of data from some source, parsing the
          #     collection's elements.
          #
          #   * Validate the elements.
          #
          #   * Apply a user-defined function to map each element to some value
          #     and extract an element-specific key value.
          #
          #   * Group elements with the same key into a single element with
          #     that key, transforming a multiply-keyed collection into a
          #     uniquely-keyed collection.
          #
          #   * Write the elements out to some data sink.
          #
          # Note that the Cloud Dataflow service may be used to run many different
          # types of jobs, not just Map-Reduce.
        "kind": "A String", # The kind of step in the Cloud Dataflow job.
        "properties": { # Named properties associated with the step. Each kind of
            # predefined step has its own required set of properties.
            # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
          "a_key": "", # Properties of the object.
        },
        "name": "A String", # The name that identifies the step. This must be unique for each
            # step with respect to all other steps in the Cloud Dataflow job.
      },
    ],
    "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
        # of the job it replaced.
        #
        # When sending a `CreateJobRequest`, you can update a job by specifying it
        # here. The job named here is stopped, and its intermediate state is
        # transferred to this job.
    "currentState": "A String", # The current state of the job.
        #
        # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
        # specified.
        #
        # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
        # terminal state. After a job has reached a terminal state, no
        # further state updates may be made.
        #
        # This field may be mutated by the Cloud Dataflow service;
        # callers cannot mutate it.
    "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
        # isn't contained in the submitted job.
      "stages": { # A mapping from each stage to the information about that stage.
        "a_key": { # Contains information about how a particular
            # google.dataflow.v1beta3.Step will be executed.
          "stepName": [ # The steps associated with the execution stage.
              # Note that stages may have several steps, and that a given step
              # might be run by more than one stage.
            "A String",
          ],
        },
      },
    },
  }

Dataflow API . projects . locations . jobs

Instance Methods

Method Details