Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Inm/lambda function and gradle task to run grafana on aws with beam #2408

Merged
merged 16 commits into from
Mar 21, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions aws/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,8 @@ task deploy(type: AWSLambdaInvokeTask) {
"region": "${ext.getParameterValue('region') ?: defaultRegion}",
"shutdown_wait": "${ext.getParameterValue('shutdownWait')}",
"shutdown_behaviour": "${ext.getParameterValue('shutdownBehaviour')}",
"command": "deploy"
"command": "deploy",
"run_grafana" : "${ext.getParameterValue('runGrafana') ?: false}"
}"""
functionName = "simulateBeam"
invocationType = InvocationType.RequestResponse
Expand Down Expand Up @@ -218,7 +219,7 @@ def checkInstanceId() {
}

def buildPayload(command) {
println "command:"+command
println "command:" + command
return """{
"instance_ids": "${project.findProperty('instanceIds')}",
"region": "${project.findProperty('region') ?: defaultRegion}",
Expand All @@ -237,7 +238,7 @@ def getCurrentGitBranch() {

def getCurrentGitUserEmail() {
def rawGitUserEmail = getGitResultFromWorkingDirUsing('git config user.email', "GitUserEmailNotFound")
return rawGitUserEmail.replaceAll('@',"AT").replaceAll("\\.", "_")
return rawGitUserEmail.replaceAll('@', "AT").replaceAll("\\.", "_")
}

def getGitResultFromWorkingDirUsing(command, defaultResult) {
Expand Down
169 changes: 117 additions & 52 deletions aws/src/main/python/beam_lambda/lambda_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@

CONFIG_SCRIPT = '''./gradlew --stacktrace :run -PappArgs="['--config', '$cf']" -PmaxRAM=$MAX_RAM'''

CONFIG_SCRIPT_WITH_GRAFANA = '''sudo ./gradlew --stacktrace grafanaStart
- ./gradlew --stacktrace :run -PappArgs="['--config', '$cf']" -PmaxRAM=$MAX_RAM'''

EXECUTE_SCRIPT = '''./gradlew --stacktrace :execute -PmainClass=$MAIN_CLASS -PappArgs="$cf" -PmaxRAM=$MAX_RAM'''

EXPERIMENT_SCRIPT = '''./bin/experiment.sh $cf cloud'''
Expand All @@ -22,11 +25,10 @@
- finalPath="$path2";
- done;
- done;
- sudo cp /home/ubuntu/git/beam/gc_* "$finalPath"
- sudo cp /var/log/cloud-init-output.log "$finalPath"
- sudo aws --region "$S3_REGION" s3 cp "$finalPath" s3://beam-outputs/"$finalPath" --recursive;
- s3p="$s3p, https://s3.us-east-2.amazonaws.com/beam-outputs/index.html#$finalPath"
- sudo aws --region "$S3_REGION" s3 ls s3://beam-outputs/$finalPath | wc -l
- s3_output_count = $?'''
- s3p="$s3p, https://s3.us-east-2.amazonaws.com/beam-outputs/index.html#$finalPath"'''

END_SCRIPT_DEFAULT = '''echo "End script not provided."'''

Expand Down Expand Up @@ -63,12 +65,35 @@
0 * * * * curl -X POST -H "Content-type: application/json" --data '"'"'{"$(ec2metadata --instance-type) instance $(ec2metadata --instance-id) running... \\n Batch [$UID] completed and instance of type $(ec2metadata --instance-type) is still running in $REGION since last $(($(($(date +%s) - $(cat /tmp/.starttime))) / 3600)) Hour $(($(($(date +%s) - $(cat /tmp/.starttime))) / 60)) Minute."}'"'"
path: /tmp/slack_notification
runcmd:
- ln -sf /var/log/cloud-init-output.log /home/ubuntu/git/beam/cloud-init-output.log
- echo "-------------------Starting Beam Sim----------------------"
- echo $(date +%s) > /tmp/.starttime
- cd /home/ubuntu/git/beam
- rm -rf /home/ubuntu/git/beam/test/input/sf-light/r5/network.dat
- ln -sf /var/log/cloud-init-output.log ./cloud-init-output.log
- hello_msg=$(printf "Run Started \\n Run Name** $TITLED** \\n Instance ID %s \\n Instance type **%s** \\n Host name **%s** \\n Web browser ** http://%s:8000 ** \\n Region $REGION \\n Batch $UID \\n Branch **$BRANCH** \\n Commit $COMMIT" $(ec2metadata --instance-id) $(ec2metadata --instance-type) $(ec2metadata --public-hostname) $(ec2metadata --public-hostname))
- start_json=$(printf "{
\\"command\\":\\"add\\",
\\"type\\":\\"beam\\",
\\"sheet_id\\":\\"$SHEET_ID\\",
\\"run\\":{
\\"status\\":\\"Run Started\\",
\\"name\\":\\"$TITLED\\",
\\"instance_id\\":\\"%s\\",
\\"instance_type\\":\\"%s\\",
\\"host_name\\":\\"%s\\",
\\"browser\\":\\"http://%s:8000\\",
\\"branch\\":\\"$BRANCH\\",
\\"region\\":\\"$REGION\\",
\\"batch\\":\\"$UID\\",
\\"commit\\":\\"$COMMIT\\",
\\"s3_link\\":\\"%s\\",
\\"max_ram\\":\\"$MAX_RAM\\",
\\"config_file\\":\\"$CONFIG\\",
\\"sigopt_client_id\\":\\"$SIGOPT_CLIENT_ID\\",
\\"sigopt_dev_id\\":\\"$SIGOPT_DEV_ID\\"
}
}" $(ec2metadata --instance-id) $(ec2metadata --instance-type) $(ec2metadata --public-hostname) $(ec2metadata --public-hostname))
- echo $start_json
- chmod +x /tmp/slack.sh
- echo "notification sent..."
- echo "notification saved..."
Expand All @@ -91,9 +116,9 @@
- export SIGOPT_DEV_ID="$SIGOPT_DEV_ID"
- echo $MAXRAM
- /tmp/slack.sh "$hello_msg"
- /home/ubuntu/git/glip.sh -i "http://icons.iconarchive.com/icons/uiconstock/socialmedia/32/AWS-icon.png" -a "Run Started" -b "Run Name** $TITLED** \\n Instance ID $(ec2metadata --instance-id) \\n Instance type **$(ec2metadata --instance-type)** \\n Host name **$(ec2metadata --public-hostname)** \\n Web browser **http://$(ec2metadata --public-hostname):8000** \\n Region $REGION \\n Batch $UID \\n Branch **$BRANCH** \\n Commit $COMMIT"

- curl -X POST "https://ca4ircx74d.execute-api.us-east-2.amazonaws.com/production/spreadsheet" -H "Content-Type:application/json" --data "$start_json"
- s3p=""
- s3_output_count=0
- for cf in $CONFIG
- do
- echo "-------------------running $cf----------------------"
Expand All @@ -104,14 +129,32 @@
- then
- s3glip="\\n S3 output url ${s3p#","}"
- fi
- if [ $s3_output_count > 0 ]
- then
- sudo aws ec2 create-tags --region $REGION --resources $(ec2metadata --instance-id) --tags Key=BackedUp,Value=$s3p
- fi
- bye_msg=$(printf "Run Completed \\n Run Name** $TITLED** \\n Instance ID %s \\n Instance type **%s** \\n Host name **%s** \\n Web browser ** http://%s:8000 ** \\n Region $REGION \\n Batch $UID \\n Branch **$BRANCH** \\n Commit $COMMIT %s \\n Shutdown in $SHUTDOWN_WAIT minutes" $(ec2metadata --instance-id) $(ec2metadata --instance-type) $(ec2metadata --public-hostname) $(ec2metadata --public-hostname) "$s3glip")
- echo "$bye_msg"
- stop_json=$(printf "{
\\"command\\":\\"add\\",
\\"type\\":\\"beam\\",
\\"sheet_id\\":\\"$SHEET_ID\\",
\\"run\\":{
\\"status\\":\\"Run Completed\\",
\\"name\\":\\"$TITLED\\",
\\"instance_id\\":\\"%s\\",
\\"instance_type\\":\\"%s\\",
\\"host_name\\":\\"%s\\",
\\"browser\\":\\"http://%s:8000\\",
\\"branch\\":\\"$BRANCH\\",
\\"region\\":\\"$REGION\\",
\\"batch\\":\\"$UID\\",
\\"commit\\":\\"$COMMIT\\",
\\"s3_link\\":\\"%s\\",
\\"max_ram\\":\\"$MAX_RAM\\",
\\"config_file\\":\\"$CONFIG\\",
\\"sigopt_client_id\\":\\"$SIGOPT_CLIENT_ID\\",
\\"sigopt_dev_id\\":\\"$SIGOPT_DEV_ID\\"
}
}" $(ec2metadata --instance-id) $(ec2metadata --instance-type) $(ec2metadata --public-hostname) $(ec2metadata --public-hostname) "${s3p#","}")
- /tmp/slack.sh "$bye_msg"
- /home/ubuntu/git/glip.sh -i "http://icons.iconarchive.com/icons/uiconstock/socialmedia/32/AWS-icon.png" -a "Run Completed" -b "Run Name** $TITLED** \\n Instance ID $(ec2metadata --instance-id) \\n Instance type **$(ec2metadata --instance-type)** \\n Host name **$(ec2metadata --public-hostname)** \\n Web browser **http://$(ec2metadata --public-hostname):8000** \\n Region $REGION \\n Batch $UID \\n Branch **$BRANCH** \\n Commit $COMMIT $s3glip \\n Shutdown in $SHUTDOWN_WAIT minutes"
- curl -X POST "https://ca4ircx74d.execute-api.us-east-2.amazonaws.com/production/spreadsheet" -H "Content-Type:application/json" --data "$stop_json"
- $END_SCRIPT
- sudo shutdown -h +$SHUTDOWN_WAIT
'''))
Expand Down Expand Up @@ -177,30 +220,30 @@ def validate(name):

def deploy(script, instance_type, region_prefix, shutdown_behaviour, instance_name, volume_size):
res = ec2.run_instances(BlockDeviceMappings=[
{
'DeviceName': '/dev/sda1',
'Ebs': {
'VolumeSize': volume_size,
'VolumeType': 'gp2'
}
}
],
ImageId=os.environ[region_prefix + 'IMAGE_ID'],
InstanceType=instance_type,
UserData=script,
KeyName=os.environ[region_prefix + 'KEY_NAME'],
MinCount=1,
MaxCount=1,
SecurityGroupIds=[os.environ[region_prefix + 'SECURITY_GROUP']],
IamInstanceProfile={'Name': os.environ['IAM_ROLE'] },
InstanceInitiatedShutdownBehavior=shutdown_behaviour,
TagSpecifications=[ {
'ResourceType': 'instance',
'Tags': [ {
'Key': 'Name',
'Value': instance_name
} ]
} ])
{
'DeviceName': '/dev/sda1',
'Ebs': {
'VolumeSize': volume_size,
'VolumeType': 'gp2'
}
}
],
ImageId=os.environ[region_prefix + 'IMAGE_ID'],
InstanceType=instance_type,
UserData=script,
KeyName=os.environ[region_prefix + 'KEY_NAME'],
MinCount=1,
MaxCount=1,
SecurityGroupIds=[os.environ[region_prefix + 'SECURITY_GROUP']],
IamInstanceProfile={'Name': os.environ['IAM_ROLE'] },
InstanceInitiatedShutdownBehavior=shutdown_behaviour,
TagSpecifications=[ {
'ResourceType': 'instance',
'Tags': [ {
'Key': 'Name',
'Value': instance_name
} ]
} ])
return res['Instances'][0]['InstanceId']

def get_dns(instance_id):
Expand Down Expand Up @@ -232,9 +275,17 @@ def terminate_instance(instance_ids):
return ec2.terminate_instances(InstanceIds=instance_ids)

def deploy_handler(event):
titled = event.get('title', 'hostname-test')
if titled is None:
return "Unable to start the run, runName is required. Please restart with appropriate runName."
missing_parameters = []

def parameter_wasnt_specified(parameter_value):
# in gradle if parameter wasn't specified then project.findProperty return 'null'
return parameter_value is None or parameter_value == 'null'

def get_param(param_name):
param_value = event.get(param_name)
if parameter_wasnt_specified(param_value):
missing_parameters.append(param_name)
return param_value

branch = event.get('branch', BRANCH_DEFAULT)
commit_id = event.get('commit', COMMIT_DEFAULT)
Expand All @@ -245,27 +296,40 @@ def deploy_handler(event):
execute_args = event.get('execute_args', EXECUTE_ARGS_DEFAULT)
batch = event.get('batch', TRUE)
max_ram = event.get('max_ram', MAXRAM_DEFAULT)
s3_publish = event.get('s3_publish', 'true')
instance_type = event.get('instance_type', os.environ['INSTANCE_TYPE'])
s3_publish = event.get('s3_publish', TRUE)
volume_size = event.get('storage_size', 64)
shutdown_wait = event.get('shutdown_wait', SHUTDOWN_DEFAULT)
region = event.get('region', os.environ['REGION'])
shutdown_behaviour = event.get('shutdown_behaviour', os.environ['SHUTDOWN_BEHAVIOUR'])
sigopt_client_id = event.get('sigopt_client_id', os.environ['SIGOPT_CLIENT_ID'])
sigopt_dev_id = event.get('sigopt_dev_id', os.environ['SIGOPT_DEV_ID'])
end_script = event.get('end_script', END_SCRIPT_DEFAULT)
run_grafana = event.get('run_grafana', 'false')

titled = get_param('title')
instance_type = get_param('instance_type')
region = get_param('region')
shutdown_behaviour = get_param('shutdown_behaviour')

if missing_parameters:
return "Unable to start, missing parameters: " + ", ".join(missing_parameters)

if instance_type not in instance_types:
return "Unable to start run, {instance_type} instance type not supported.".format(instance_type=instance_type)
#instance_type = os.environ['INSTANCE_TYPE']

if shutdown_behaviour not in shutdown_behaviours:
shutdown_behaviour = os.environ['SHUTDOWN_BEHAVIOUR']
return "Unable to start run, {shutdown_behaviour} shutdown behaviour not supported.".format(shutdown_behaviour=shutdown_behaviour)

if region not in regions:
return "Unable to start run, {region} region not supported.".format(region=region)

if volume_size < 64 or volume_size > 256:
volume_size = 64

selected_script = CONFIG_SCRIPT
selected_script = ""
if run_grafana == TRUE:
selected_script = CONFIG_SCRIPT_WITH_GRAFANA
else:
selected_script = CONFIG_SCRIPT

params = configs
if s3_publish == TRUE:
selected_script += S3_PUBLISH_SCRIPT
Expand All @@ -288,36 +352,37 @@ def deploy_handler(event):

txt = ''

if region not in regions:
return "Unable to start run, {region} region not supported.".format(region=region)

init_ec2(region)

if validate(branch) and validate(commit_id):
runNum = 1
for arg in params:
uid = str(uuid.uuid4())[:8]
runName = titled
runName = runName + '_' + shutdown_behaviour.toUpperCase()
if len(params) > 1:
runName += "-" + `runNum`
script = initscript.replace('$RUN_SCRIPT',selected_script).replace('$REGION',region).replace('$S3_REGION',os.environ['REGION']) \
script = initscript.replace('$RUN_SCRIPT',selected_script).replace('$REGION',region).replace('$S3_REGION', os.environ['REGION']) \
.replace('$BRANCH',branch).replace('$COMMIT', commit_id).replace('$CONFIG', arg) \
.replace('$MAIN_CLASS', execute_class).replace('$UID', uid).replace('$SHUTDOWN_WAIT', shutdown_wait) \
.replace('$TITLED', runName).replace('$MAX_RAM', max_ram).replace('$S3_PUBLISH', s3_publish) \
.replace('$SIGOPT_CLIENT_ID', sigopt_client_id).replace('$SIGOPT_DEV_ID', sigopt_dev_id).replace('$END_SCRIPT', end_script) \
.replace('$SLACK_HOOK_WITH_TOKEN', os.environ['SLACK_HOOK_WITH_TOKEN'])
.replace('$SLACK_HOOK_WITH_TOKEN', os.environ['SLACK_HOOK_WITH_TOKEN']) \
.replace('$SHEET_ID', os.environ['SHEET_ID'])
instance_id = deploy(script, instance_type, region.replace("-", "_")+'_', shutdown_behaviour, runName, volume_size)
host = get_dns(instance_id)
txt = txt + 'Started batch: {batch} with run name: {titled} for branch/commit {branch}/{commit} at host {dns} (InstanceID: {instance_id}). '.format(branch=branch, titled=runName, commit=commit_id, dns=host, batch=uid, instance_id=instance_id)

if run_grafana == TRUE:
txt = txt + 'Grafana will be available at http://{dns}:3003/d/dvib8mbWz/beam-simulation-global-view'.format(dns=host)

runNum += 1
else:
txt = 'Unable to start bach for branch/commit {branch}/{commit}. '.format(branch=branch, commit=commit_id)

return txt

def instance_handler(event):
region = event.get('region', os.environ['REGION'])
region = event.get('region')
instance_ids = event.get('instance_ids')
command_id = event.get('command')
system_instances = os.environ['SYSTEM_INSTANCES']
Expand Down
4 changes: 2 additions & 2 deletions aws/src/main/python/updateBeamAMI/lambda_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,12 @@ def update_lambda(image_ids, function_name):
'us_west_2_IMAGE_ID': image_ids['us-west-2'],
})
lm.update_function_configuration(
FunctionName='simulateBeam',
FunctionName=function_name,
Environment={
'Variables': en_var
}
)
logger.info('simulateBeam image ids updated')
logger.info(function_name + ' image ids updated')


def check_instance_id(instance_ids):
Expand Down
1 change: 1 addition & 0 deletions gradle.deploy.properties
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ beamBranch=develop
beamCommit=HEAD
beamConfigs=test/input/sf-light/sf-light-25k.conf
instanceType=t2.small
runGrafana=false

#c5.9xlarge (36/141) -> 5 instances -> $1.53 per Hour
#m4.10xlarge (40/160) -> 5 -> $2.00 per Hour
Expand Down