-
Notifications
You must be signed in to change notification settings - Fork 503
/
instance.py
89 lines (81 loc) · 3.65 KB
/
instance.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
"""AWS instance provisioning."""
from typing import Dict, List, Any, Optional
from botocore import config
from sky.adaptors import aws
BOTO_MAX_RETRIES = 12
# Tag uniquely identifying all nodes of a cluster
TAG_RAY_CLUSTER_NAME = 'ray-cluster-name'
def _filter_instances(ec2, filters: List[Dict[str, Any]],
included_instances: Optional[List[str]],
excluded_instances: Optional[List[str]]):
instances = ec2.instances.filter(Filters=filters)
if included_instances is not None and excluded_instances is not None:
raise ValueError('"included_instances" and "exclude_instances"'
'cannot be specified at the same time.')
if included_instances is not None:
instances = instances.filter(InstanceIds=included_instances)
elif excluded_instances is not None:
included_instances = []
for inst in list(instances):
if inst.id not in excluded_instances:
included_instances.append(inst.id)
instances = instances.filter(InstanceIds=included_instances)
return instances
def stop_instances(region: str,
cluster_name: str,
included_instances: Optional[List[str]] = None,
excluded_instances: Optional[List[str]] = None) -> None:
"""See sky/provision/__init__.py"""
ec2 = aws.resource(
'ec2',
region_name=region,
config=config.Config(retries={'max_attempts': BOTO_MAX_RETRIES}))
filters = [
{
'Name': 'instance-state-name',
'Values': ['pending', 'running'],
},
{
'Name': f'tag:{TAG_RAY_CLUSTER_NAME}',
'Values': [cluster_name],
},
]
instances = _filter_instances(ec2, filters, included_instances,
excluded_instances)
instances.stop()
# TODO(suquark): Currently, the implementation of GCP and Azure will
# wait util the cluster is fully terminated, while other clouds just
# trigger the termination process (via http call) and then return.
# It's not clear that which behavior should be expected. We will not
# wait for the termination for now, since this is the default behavior
# of most cloud implementations (including AWS).
def terminate_instances(region: str,
cluster_name: str,
included_instances: Optional[List[str]] = None,
excluded_instances: Optional[List[str]] = None) -> None:
"""See sky/provision/__init__.py"""
ec2 = aws.resource(
'ec2',
region_name=region,
config=config.Config(retries={'max_attempts': BOTO_MAX_RETRIES}))
filters = [
{
'Name': 'instance-state-name',
# exclude 'shutting-down' or 'terminated' states
'Values': ['pending', 'running', 'stopping', 'stopped'],
},
{
'Name': f'tag:{TAG_RAY_CLUSTER_NAME}',
'Values': [cluster_name],
},
]
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/ec2.html#EC2.Instance
instances = _filter_instances(ec2, filters, included_instances,
excluded_instances)
instances.terminate()
# TODO(suquark): Currently, the implementation of GCP and Azure will
# wait util the cluster is fully terminated, while other clouds just
# trigger the termination process (via http call) and then return.
# It's not clear that which behavior should be expected. We will not
# wait for the termination for now, since this is the default behavior
# of most cloud implementations (including AWS).