-
Notifications
You must be signed in to change notification settings - Fork 42
/
manager.py
executable file
·377 lines (314 loc) · 13.7 KB
/
manager.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
import json
import logging
import paramiko
from paramiko import SSHException
from pyparsing import Combine, Literal, OneOrMore, nums, srange, Word
import salt.client
import subprocess
from subprocess import PIPE
import sys
import unittest
from circularlist import CircularList
from etcd import Etcd
from formation import Formation
from load import Load
class ManagerError(BaseException):
# Generic manager error
pass
class Manager(object):
'''
A manager to orchestrate the creation and
deletion of container clusters
'''
def __init__(self, logger):
self.salt_client = salt.client.LocalClient()
self.etcd = Etcd(logger)
self.logger = logger
# Parse out the username and formation name
# from the ETCD directory string
self.formation_parser = Literal('/formations/') + \
Word(srange("[0-9a-zA-Z_-]")).setResultsName('username') + Literal('/') + \
Word(srange("[0-9a-zA-Z_-]")).setResultsName('formation_name')
def fqdn_to_shortname(self, fqdn):
if '.' in fqdn:
return fqdn.split('.')[0]
else:
return fqdn
def check_salt_key_used(self, hostname):
self.logger.info("Checking if the key for {host} is already used".format(
host=hostname))
s = subprocess.Popen('salt-key', shell=True, stdout=PIPE)
salt_list = s.communicate()[0]
if hostname in salt_list:
return True
else:
return False
def check_port_used(self, host, port):
self.logger.info("Checking if {port} on {host} is open with salt-client".format(
host=host, port=port))
results = self.salt_client.cmd(host, 'cmd.run',
['netstat -an | grep %s | grep tcp | grep -i listen' % port],
expr_form='list')
self.logger.debug("Salt return: {lsof}".format(lsof=results[host]))
if results[host] is not '':
return True
else:
return False
# TODO
def check_for_existing_formation(self, formation_name):
# If the user passed in an existing formation name lets append to it
pass
def get_docker_cluster(self):
# Return a list of docker hosts
cluster = self.etcd.get_key('docker_cluster')
if cluster is not None:
return cluster.split(',')
else:
return None
def get_load_balancer_cluster(self):
# Return a list of nginx hosts
cluster = self.etcd.get_key('nginx_cluster')
if cluster is not None:
return cluster.split(',')
else:
return None
def order_cluster_by_load(self, cluster_list):
# Sample salt output
# {'dlceph01.drwg.local': '0.27 0.16 0.15 1/1200 26234'}
# define grammar
point = Literal('.')
number = Word(nums)
floatnumber = Combine( number + point + number)
float_list = OneOrMore(floatnumber)
results = self.salt_client.cmd(','.join(cluster_list), 'cmd.run', ['cat /proc/loadavg'], expr_form='list')
load_list = []
self.logger.debug("Salt load return: {load}".format(load=results))
for host in results:
host_load = results[host]
match = float_list.parseString(host_load)
if match:
one_min = match[0]
five_min = match[1]
fifteen_min = match[2]
self.logger.debug("Adding Load({host}, {one_min}, {five_min}, {fifteen_min}".format(
host=host, one_min=one_min, five_min=five_min, fifteen_min=fifteen_min))
load_list.append(Load(host, one_min, five_min, fifteen_min))
else:
self.logger.error("Could not parse host load output")
# Sort the list by fifteen min load
load_list = sorted(load_list, key=lambda x: x.fifteen_min_load)
for load in load_list:
self.logger.debug("Sorted load list: " + str(load))
return load_list
# Retun a list of formations the user owns
def list_formations(self, username):
formation_list = []
formations = self.etcd.list_directory('formations/'+username)
for formation in formations:
parse_results = self.formation_parser.parseString(formation)
if parse_results:
formation_name = parse_results['formation_name']
formation_list.append(formation_name)
else:
self.logger.error("Could not parse the ETCD string")
self.logger.info('Formation list {formations} for user {user}'.format(
formations=formation_list, user=username))
return formation_list
# Load the formation and return a Formation object
def load_formation_from_etcd(self, username, formation_name):
f = Formation(username,formation_name)
app_list = json.loads(json.loads(
self.etcd.get_key('/formations/{username}/{formation_name}'.format(
username=username, formation_name=formation_name))))
for app in app_list:
# If our host doesn't support swapping we're going to get some garbage
# message in here
if "WARNING" in app['container_id']:
app['container_id'] = app['container_id'].replace("WARNING: Your "\
"kernel does not support memory swap capabilities. Limitation discarded.\n","")
#Message changed in docker 0.8.0
app['container_id'] = app['container_id'].replace("WARNING: WARNING:"\
"Your kernel does not support swap limit capabilities. Limitation "\
"discarded.\n","")
app['container_id'].strip('\n')
# Set volumes if needed
volumes = None
if app['volumes']:
self.logger.info("Setting volumes to: " + ''.join(app['volumes']))
volumes = app['volumes']
f.add_app(app['container_id'], app['hostname'], app['cpu_shares'],
app['ram'], app['port_list'], app['ssh_port'], 22, app['host_server'], volumes)
# Return fully parsed and populated formation object
return f
def save_formation_to_etcd(self, formation):
name = formation.name
username = formation.username
self.etcd.set_key('formations/{username}/{formation_name}'.format(
username=username, formation_name=name), formation)
# TODO write code to add new apps to load balancer
def add_app_to_nginx(self, app):
pass
# TODO write code to add new apps to the load balancer
def add_app_to_apache(self, app):
pass
def start_application(self, app):
# Run a salt cmd to startup the formation
docker_command = "docker run -c={cpu_shares} -d -i -t -h=\"{hostname}\" -m={ram}m "\
"--name={hostname} {port_list} {volume_list} {image} /sbin/my_init -- bash"
self.logger.info("Port list %s" % app.port_list)
port_list = ' '.join(map(lambda x: '-p ' + x, app.port_list))
# Only create this list if needed
volume_list = ''
if app.volume_list:
volume_list = ' '.join(map(lambda x: '-v ' + x, app.volume_list))
d = docker_command.format(cpu_shares=app.cpu_shares,
hostname=app.hostname, ram=app.ram, image=app.docker_image,
port_list=port_list, volume_list=volume_list)
self.logger.info("Starting up docker container on {host_server} with cmd: {docker_cmd}".format(
host_server=app.host_server, docker_cmd=d))
salt_process = self.salt_client.cmd(app.host_server,'cmd.run', [d], expr_form='list')
container_id = salt_process[app.host_server]
if container_id:
if "WARNING" in container_id:
container_id = container_id.replace("WARNING: "\
"Your kernel does not support swap limit capabilities. Limitation "\
"discarded.\n","")
container_id.strip("\n")
#Docker only uses the first 12 chars to identify a container
app.change_container_id(container_id[0:12])
def bootstrap_application(self, app):
# Log into the host with paramiko and run the salt bootstrap script
host_server = self.fqdn_to_shortname(app.host_server)
self.logger.info("Bootstrapping {hostname} on server: {host_server} port: {port}".format(
hostname=app.hostname,
host_server=host_server,
port=app.ssh_port))
try:
ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
ssh.connect(hostname=host_server, port=app.ssh_port,
username='root', password='newroot')
transport = paramiko.Transport((host_server, app.ssh_port))
transport.connect(username = 'root', password = 'newroot')
sftp = paramiko.SFTPClient.from_transport(transport)
sftp.put('bootstrap.sh', '/root/bootstrap.sh')
sftp.put('start.sh', '/root/start.sh')
ssh.exec_command("chmod +x /root/bootstrap.sh")
ssh.exec_command("chmod +x /root/start.sh")
stdin, stdout, stderr = ssh.exec_command("bash /root/start.sh")
self.logger.debug(''.join(stdout.readlines()))
ssh.close()
except SSHException:
self.logger.error("Failed to log into server. Shutting it down and cleaning up the mess.")
self.delete_container(app.host_server, app.container_id)
# Stops and deletes a container
def delete_container(self, host_server, container_id):
results = self.salt_client.cmd(host_server, 'cmd.run',
['docker stop {container_id}'.format(container_id=container_id)],
expr_form='list')
self.logger.debug("Salt return: {stop_cmd}".format(stop_cmd=results[host_server]))
results = self.salt_client.cmd(host_server, 'cmd.run',
['docker rm {container_id}'.format(container_id=container_id)],
expr_form='list')
self.logger.debug("Salt return: {rm_cmd}".format(rm_cmd=results[host_server]))
# Stops and deletes a formation. Use with caution
def delete_formation(self, user, formation_name):
formation_list = self.list_formations(user)
if formation_name in formation_list:
pass
else:
self.logger.error("Formation name not found!")
def list_containers(self, user, formation_name):
pass
def create_containers(self, user, number, formation_name,
cpu_shares, ram, port_list, hostname_scheme, volume_list,
docker_image, force_host_server=None):
f = Formation(user, formation_name)
# Convert ram to bytes from MB
ram = ram * 1024 * 1024
# Get the cluster machines on each creation
cluster_list = self.get_docker_cluster()
circular_cluster_list = CircularList(self.order_cluster_by_load(cluster_list))
# Loop for the requested amount of containers to be created
for i in range(1, number+1):
# [{"host_port":ssh_host_port, "container_port":ssh_container_port}]
ssh_host_port = 9022 + i
ssh_container_port = 22
host_server = circular_cluster_list[i].hostname
hostname = '{hostname}{number}'.format(
hostname=hostname_scheme,
number=str(i).zfill(3))
# First check if we can add this host to salt. If not exit with -1
if self.check_salt_key_used(hostname):
self.logger.error('Salt key is already taken for {hostname}'.format(
hostname=hostname))
sys.exit(-1)
# We are being asked to overwrite this
if force_host_server:
host_server = force_host_server
validated_ports = []
while self.check_port_used(host_server, ssh_host_port):
ssh_host_port = ssh_host_port +1
for port in port_list:
self.logger.info("Checking if port {port} on {host} is in use".format(
port=port, host=host_server))
if ':' in port:
ports = port.split(':')
# Only check if the host port is free. The container port should be free
while self.check_port_used(host_server, ports[0]):
ports[0] = int(ports[0]) + 1
# Add this to the validated port list
validated_ports.append('{host_port}:{container_port}'.format(
host_port = str(ports[0]),
container_port = str(ports[1])))
else:
while self.check_port_used(host_server, port):
port = int(port) + 1
validated_ports.append(str(port))
self.logger.info('Adding app to formation {formation_name}: {hostname} cpu_shares={cpu} '
'ram={ram} ports={ports} host_server={host_server} docker_image={docker_image}'.format(
formation_name=formation_name, hostname=hostname,
cpu=cpu_shares, ram=ram, ports=validated_ports, host_server=host_server,
docker_image=docker_image))
f.add_app(None, '{hostname}'.format(hostname=hostname),
cpu_shares, ram, validated_ports, ssh_host_port,
ssh_container_port, host_server, docker_image, volume_list)
# Lets get this party started
for app in f.application_list:
self.start_application(app)
#self.logger.info("Sleeping 2 seconds while the container starts")
#time.sleep(2)
#self.bootstrap_application(app)
self.logger.info("Saving the formation to ETCD")
self.save_formation_to_etcd(f)
class TestManager(unittest.TestCase):
def test_checkPortUsed(self):
self.assertEquals(1, 0)
def test_getDockerCluster(self):
self.assertEquals(1, 0)
def test_getLoadBalancerCluster(self):
self.assertEquals(1, 0)
def test_orderClusterByLoad(self):
self.assertEquals(1, 0)
def test_deleteContainer(self):
self.assertEquals(1, 0)
def test_saveFormationToEtcd(self):
logger = logging.getLogger()
stream = logging.StreamHandler(sys.stdout)
stream.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
stream.setFormatter(formatter)
logger.addHandler(stream)
manager = Manager(logger)
expected_text = '[{"username": "cholcomb", "cpu-shares": 102, '\
'"ram": 100, "hostname": "app01", "ports": [{"host_port": 8080, '\
'"container_port": 8080}], "host_server": "dlceph02"}]'
username = 'cholcomb'
formation_name = 'test_formation'
f = Formation(username, formation_name)
f.add_app(username, 'app01', 102, 100, [{"host_port":8080, "container_port":8080}], 'dlceph02')
manager.save_formation_to_etcd(f)
etcd_ret = manager.etcd.get_key('{username}/{hostname}'.format(username=username, hostname=formation_name))
logger.debug("Etcd_ret: %s" % etcd_ret)
logger.debug("Expected text: %s" % expected_text)
self.assertEquals(etcd_ret, expected_text)