Skip to content
This repository has been archived by the owner on Nov 29, 2023. It is now read-only.

feat: add optional parameters (tarball_gcs_dir, diagnosis_interval, jobs, yarn_application_ids) in DiagnoseClusterRequest #560

Merged
merged 2 commits into from
Sep 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 50 additions & 22 deletions google/cloud/dataproc_v1/types/clusters.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from google.protobuf import duration_pb2 # type: ignore
from google.protobuf import field_mask_pb2 # type: ignore
from google.protobuf import timestamp_pb2 # type: ignore
from google.type import interval_pb2 # type: ignore
import proto # type: ignore

from google.cloud.dataproc_v1.types import shared
Expand Down Expand Up @@ -837,26 +838,20 @@ class InstanceGroupConfig(proto.Message):
Instance Group. See `Dataproc -> Minimum CPU
Platform <https://cloud.google.com/dataproc/docs/concepts/compute/dataproc-min-cpu>`__.
min_num_instances (int):
Optional. The minimum number of instances to create. If
min_num_instances is set, min_num_instances is used for a
criteria to decide the cluster. Cluster creation will be
failed by being an error state if the total number of
instances created is less than the min_num_instances. For
example, given that num_instances = 5 and min_num_instances
= 3,

- if 4 instances are created and then registered
successfully but one instance is failed, the failed VM
will be deleted and the cluster will be resized to 4
instances in running state.
- if 2 instances are created successfully and 3 instances
are failed, the cluster will be in an error state and
does not delete failed VMs for debugging.
- if 2 instance are created and then registered
successfully but 3 instances are failed to initialize,
the cluster will be in an error state and does not delete
failed VMs for debugging. NB: This can only be set for
primary workers now.
Optional. The minimum number of primary worker instances to
create. If ``min_num_instances`` is set, cluster creation
will succeed if the number of primary workers created is at
least equal to the ``min_num_instances`` number.

Example: Cluster creation request with ``num_instances`` =
``5`` and ``min_num_instances`` = ``3``:

- If 4 VMs are created and 1 instance fails, the failed VM
is deleted. The cluster is resized to 4 instances and
placed in a ``RUNNING`` state.
- If 2 instances are created and 3 instances fail, the
cluster in placed in an ``ERROR`` state. The failed VMs
are not deleted.
instance_flexibility_policy (google.cloud.dataproc_v1.types.InstanceFlexibilityPolicy):
Optional. Instance flexibility Policy
allowing a mixture of VM shapes and provisioning
Expand Down Expand Up @@ -1251,13 +1246,13 @@ class NodeGroup(proto.Message):
"""

class Role(proto.Enum):
r"""Node group roles.
r"""Node pool roles.

Values:
ROLE_UNSPECIFIED (0):
Required unspecified role.
DRIVER (1):
Job drivers run on the node group.
Job drivers run on the node pool.
"""
ROLE_UNSPECIFIED = 0
DRIVER = 1
Expand Down Expand Up @@ -2359,6 +2354,22 @@ class DiagnoseClusterRequest(proto.Message):
handle the request.
cluster_name (str):
Required. The cluster name.
tarball_gcs_dir (str):
Optional. The output Cloud Storage directory
for the diagnostic tarball. If not specified, a
task-specific directory in the cluster's staging
bucket will be used.
diagnosis_interval (google.type.interval_pb2.Interval):
Optional. Time interval in which diagnosis
should be carried out on the cluster.
jobs (MutableSequence[str]):
Optional. Specifies a list of jobs on which
diagnosis is to be performed. Format:
projects/{project}/regions/{region}/jobs/{job}
yarn_application_ids (MutableSequence[str]):
Optional. Specifies a list of yarn
applications on which diagnosis is to be
performed.
"""

project_id: str = proto.Field(
Expand All @@ -2373,6 +2384,23 @@ class DiagnoseClusterRequest(proto.Message):
proto.STRING,
number=2,
)
tarball_gcs_dir: str = proto.Field(
proto.STRING,
number=4,
)
diagnosis_interval: interval_pb2.Interval = proto.Field(
proto.MESSAGE,
number=6,
message=interval_pb2.Interval,
)
jobs: MutableSequence[str] = proto.RepeatedField(
proto.STRING,
number=10,
)
yarn_application_ids: MutableSequence[str] = proto.RepeatedField(
proto.STRING,
number=11,
)


class DiagnoseClusterResults(proto.Message):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
],
"language": "PYTHON",
"name": "google-cloud-dataproc",
"version": "5.5.1"
"version": "0.1.0"
},
"snippets": [
{
Expand Down
2 changes: 1 addition & 1 deletion scripts/fixup_dataproc_v1_keywords.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ class dataprocCallTransformer(cst.CSTTransformer):
'delete_cluster': ('project_id', 'region', 'cluster_name', 'cluster_uuid', 'request_id', ),
'delete_job': ('project_id', 'region', 'job_id', ),
'delete_workflow_template': ('name', 'version', ),
'diagnose_cluster': ('project_id', 'region', 'cluster_name', ),
'diagnose_cluster': ('project_id', 'region', 'cluster_name', 'tarball_gcs_dir', 'diagnosis_interval', 'jobs', 'yarn_application_ids', ),
'get_autoscaling_policy': ('name', ),
'get_batch': ('name', ),
'get_cluster': ('project_id', 'region', 'cluster_name', ),
Expand Down
1 change: 1 addition & 0 deletions tests/unit/gapic/dataproc_v1/test_cluster_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
from google.protobuf import field_mask_pb2 # type: ignore
from google.protobuf import json_format
from google.protobuf import timestamp_pb2 # type: ignore
from google.type import interval_pb2 # type: ignore
import grpc
from grpc.experimental import aio
from proto.marshal.rules import wrappers
Expand Down