diff --git a/aws/build.gradle b/aws/build.gradle index 84940e5279f..be07ab3ab62 100755 --- a/aws/build.gradle +++ b/aws/build.gradle @@ -90,7 +90,8 @@ task deploy(type: AWSLambdaInvokeTask) { "region": "${ext.getParameterValue('region') ?: defaultRegion}", "shutdown_wait": "${ext.getParameterValue('shutdownWait')}", "shutdown_behaviour": "${ext.getParameterValue('shutdownBehaviour')}", - "command": "deploy" + "command": "deploy", + "run_grafana" : "${ext.getParameterValue('runGrafana') ?: false}" }""" functionName = "simulateBeam" invocationType = InvocationType.RequestResponse @@ -218,7 +219,7 @@ def checkInstanceId() { } def buildPayload(command) { - println "command:"+command + println "command:" + command return """{ "instance_ids": "${project.findProperty('instanceIds')}", "region": "${project.findProperty('region') ?: defaultRegion}", @@ -237,7 +238,7 @@ def getCurrentGitBranch() { def getCurrentGitUserEmail() { def rawGitUserEmail = getGitResultFromWorkingDirUsing('git config user.email', "GitUserEmailNotFound") - return rawGitUserEmail.replaceAll('@',"AT").replaceAll("\\.", "_") + return rawGitUserEmail.replaceAll('@', "AT").replaceAll("\\.", "_") } def getGitResultFromWorkingDirUsing(command, defaultResult) { diff --git a/aws/src/main/python/beam_lambda/lambda_function.py b/aws/src/main/python/beam_lambda/lambda_function.py index 53015985459..7851421168d 100755 --- a/aws/src/main/python/beam_lambda/lambda_function.py +++ b/aws/src/main/python/beam_lambda/lambda_function.py @@ -8,6 +8,9 @@ CONFIG_SCRIPT = '''./gradlew --stacktrace :run -PappArgs="['--config', '$cf']" -PmaxRAM=$MAX_RAM''' +CONFIG_SCRIPT_WITH_GRAFANA = '''sudo ./gradlew --stacktrace grafanaStart + - ./gradlew --stacktrace :run -PappArgs="['--config', '$cf']" -PmaxRAM=$MAX_RAM''' + EXECUTE_SCRIPT = '''./gradlew --stacktrace :execute -PmainClass=$MAIN_CLASS -PappArgs="$cf" -PmaxRAM=$MAX_RAM''' EXPERIMENT_SCRIPT = '''./bin/experiment.sh $cf cloud''' @@ -22,11 +25,10 @@ - finalPath="$path2"; - done; - done; + - sudo cp /home/ubuntu/git/beam/gc_* "$finalPath" - sudo cp /var/log/cloud-init-output.log "$finalPath" - sudo aws --region "$S3_REGION" s3 cp "$finalPath" s3://beam-outputs/"$finalPath" --recursive; - - s3p="$s3p, https://s3.us-east-2.amazonaws.com/beam-outputs/index.html#$finalPath" - - sudo aws --region "$S3_REGION" s3 ls s3://beam-outputs/$finalPath | wc -l - - s3_output_count = $?''' + - s3p="$s3p, https://s3.us-east-2.amazonaws.com/beam-outputs/index.html#$finalPath"''' END_SCRIPT_DEFAULT = '''echo "End script not provided."''' @@ -63,12 +65,35 @@ 0 * * * * curl -X POST -H "Content-type: application/json" --data '"'"'{"$(ec2metadata --instance-type) instance $(ec2metadata --instance-id) running... \\n Batch [$UID] completed and instance of type $(ec2metadata --instance-type) is still running in $REGION since last $(($(($(date +%s) - $(cat /tmp/.starttime))) / 3600)) Hour $(($(($(date +%s) - $(cat /tmp/.starttime))) / 60)) Minute."}'"'" path: /tmp/slack_notification runcmd: + - ln -sf /var/log/cloud-init-output.log /home/ubuntu/git/beam/cloud-init-output.log - echo "-------------------Starting Beam Sim----------------------" - echo $(date +%s) > /tmp/.starttime - cd /home/ubuntu/git/beam - rm -rf /home/ubuntu/git/beam/test/input/sf-light/r5/network.dat - - ln -sf /var/log/cloud-init-output.log ./cloud-init-output.log - hello_msg=$(printf "Run Started \\n Run Name** $TITLED** \\n Instance ID %s \\n Instance type **%s** \\n Host name **%s** \\n Web browser ** http://%s:8000 ** \\n Region $REGION \\n Batch $UID \\n Branch **$BRANCH** \\n Commit $COMMIT" $(ec2metadata --instance-id) $(ec2metadata --instance-type) $(ec2metadata --public-hostname) $(ec2metadata --public-hostname)) + - start_json=$(printf "{ + \\"command\\":\\"add\\", + \\"type\\":\\"beam\\", + \\"sheet_id\\":\\"$SHEET_ID\\", + \\"run\\":{ + \\"status\\":\\"Run Started\\", + \\"name\\":\\"$TITLED\\", + \\"instance_id\\":\\"%s\\", + \\"instance_type\\":\\"%s\\", + \\"host_name\\":\\"%s\\", + \\"browser\\":\\"http://%s:8000\\", + \\"branch\\":\\"$BRANCH\\", + \\"region\\":\\"$REGION\\", + \\"batch\\":\\"$UID\\", + \\"commit\\":\\"$COMMIT\\", + \\"s3_link\\":\\"%s\\", + \\"max_ram\\":\\"$MAX_RAM\\", + \\"config_file\\":\\"$CONFIG\\", + \\"sigopt_client_id\\":\\"$SIGOPT_CLIENT_ID\\", + \\"sigopt_dev_id\\":\\"$SIGOPT_DEV_ID\\" + } + }" $(ec2metadata --instance-id) $(ec2metadata --instance-type) $(ec2metadata --public-hostname) $(ec2metadata --public-hostname)) + - echo $start_json - chmod +x /tmp/slack.sh - echo "notification sent..." - echo "notification saved..." @@ -91,9 +116,9 @@ - export SIGOPT_DEV_ID="$SIGOPT_DEV_ID" - echo $MAXRAM - /tmp/slack.sh "$hello_msg" - - /home/ubuntu/git/glip.sh -i "http://icons.iconarchive.com/icons/uiconstock/socialmedia/32/AWS-icon.png" -a "Run Started" -b "Run Name** $TITLED** \\n Instance ID $(ec2metadata --instance-id) \\n Instance type **$(ec2metadata --instance-type)** \\n Host name **$(ec2metadata --public-hostname)** \\n Web browser **http://$(ec2metadata --public-hostname):8000** \\n Region $REGION \\n Batch $UID \\n Branch **$BRANCH** \\n Commit $COMMIT" + + - curl -X POST "https://ca4ircx74d.execute-api.us-east-2.amazonaws.com/production/spreadsheet" -H "Content-Type:application/json" --data "$start_json" - s3p="" - - s3_output_count=0 - for cf in $CONFIG - do - echo "-------------------running $cf----------------------" @@ -104,14 +129,32 @@ - then - s3glip="\\n S3 output url ${s3p#","}" - fi - - if [ $s3_output_count > 0 ] - - then - - sudo aws ec2 create-tags --region $REGION --resources $(ec2metadata --instance-id) --tags Key=BackedUp,Value=$s3p - - fi - bye_msg=$(printf "Run Completed \\n Run Name** $TITLED** \\n Instance ID %s \\n Instance type **%s** \\n Host name **%s** \\n Web browser ** http://%s:8000 ** \\n Region $REGION \\n Batch $UID \\n Branch **$BRANCH** \\n Commit $COMMIT %s \\n Shutdown in $SHUTDOWN_WAIT minutes" $(ec2metadata --instance-id) $(ec2metadata --instance-type) $(ec2metadata --public-hostname) $(ec2metadata --public-hostname) "$s3glip") - echo "$bye_msg" + - stop_json=$(printf "{ + \\"command\\":\\"add\\", + \\"type\\":\\"beam\\", + \\"sheet_id\\":\\"$SHEET_ID\\", + \\"run\\":{ + \\"status\\":\\"Run Completed\\", + \\"name\\":\\"$TITLED\\", + \\"instance_id\\":\\"%s\\", + \\"instance_type\\":\\"%s\\", + \\"host_name\\":\\"%s\\", + \\"browser\\":\\"http://%s:8000\\", + \\"branch\\":\\"$BRANCH\\", + \\"region\\":\\"$REGION\\", + \\"batch\\":\\"$UID\\", + \\"commit\\":\\"$COMMIT\\", + \\"s3_link\\":\\"%s\\", + \\"max_ram\\":\\"$MAX_RAM\\", + \\"config_file\\":\\"$CONFIG\\", + \\"sigopt_client_id\\":\\"$SIGOPT_CLIENT_ID\\", + \\"sigopt_dev_id\\":\\"$SIGOPT_DEV_ID\\" + } + }" $(ec2metadata --instance-id) $(ec2metadata --instance-type) $(ec2metadata --public-hostname) $(ec2metadata --public-hostname) "${s3p#","}") - /tmp/slack.sh "$bye_msg" - - /home/ubuntu/git/glip.sh -i "http://icons.iconarchive.com/icons/uiconstock/socialmedia/32/AWS-icon.png" -a "Run Completed" -b "Run Name** $TITLED** \\n Instance ID $(ec2metadata --instance-id) \\n Instance type **$(ec2metadata --instance-type)** \\n Host name **$(ec2metadata --public-hostname)** \\n Web browser **http://$(ec2metadata --public-hostname):8000** \\n Region $REGION \\n Batch $UID \\n Branch **$BRANCH** \\n Commit $COMMIT $s3glip \\n Shutdown in $SHUTDOWN_WAIT minutes" + - curl -X POST "https://ca4ircx74d.execute-api.us-east-2.amazonaws.com/production/spreadsheet" -H "Content-Type:application/json" --data "$stop_json" - $END_SCRIPT - sudo shutdown -h +$SHUTDOWN_WAIT ''')) @@ -177,30 +220,30 @@ def validate(name): def deploy(script, instance_type, region_prefix, shutdown_behaviour, instance_name, volume_size): res = ec2.run_instances(BlockDeviceMappings=[ - { - 'DeviceName': '/dev/sda1', - 'Ebs': { - 'VolumeSize': volume_size, - 'VolumeType': 'gp2' - } - } - ], - ImageId=os.environ[region_prefix + 'IMAGE_ID'], - InstanceType=instance_type, - UserData=script, - KeyName=os.environ[region_prefix + 'KEY_NAME'], - MinCount=1, - MaxCount=1, - SecurityGroupIds=[os.environ[region_prefix + 'SECURITY_GROUP']], - IamInstanceProfile={'Name': os.environ['IAM_ROLE'] }, - InstanceInitiatedShutdownBehavior=shutdown_behaviour, - TagSpecifications=[ { - 'ResourceType': 'instance', - 'Tags': [ { - 'Key': 'Name', - 'Value': instance_name - } ] - } ]) + { + 'DeviceName': '/dev/sda1', + 'Ebs': { + 'VolumeSize': volume_size, + 'VolumeType': 'gp2' + } + } + ], + ImageId=os.environ[region_prefix + 'IMAGE_ID'], + InstanceType=instance_type, + UserData=script, + KeyName=os.environ[region_prefix + 'KEY_NAME'], + MinCount=1, + MaxCount=1, + SecurityGroupIds=[os.environ[region_prefix + 'SECURITY_GROUP']], + IamInstanceProfile={'Name': os.environ['IAM_ROLE'] }, + InstanceInitiatedShutdownBehavior=shutdown_behaviour, + TagSpecifications=[ { + 'ResourceType': 'instance', + 'Tags': [ { + 'Key': 'Name', + 'Value': instance_name + } ] + } ]) return res['Instances'][0]['InstanceId'] def get_dns(instance_id): @@ -232,9 +275,17 @@ def terminate_instance(instance_ids): return ec2.terminate_instances(InstanceIds=instance_ids) def deploy_handler(event): - titled = event.get('title', 'hostname-test') - if titled is None: - return "Unable to start the run, runName is required. Please restart with appropriate runName." + missing_parameters = [] + + def parameter_wasnt_specified(parameter_value): + # in gradle if parameter wasn't specified then project.findProperty return 'null' + return parameter_value is None or parameter_value == 'null' + + def get_param(param_name): + param_value = event.get(param_name) + if parameter_wasnt_specified(param_value): + missing_parameters.append(param_name) + return param_value branch = event.get('branch', BRANCH_DEFAULT) commit_id = event.get('commit', COMMIT_DEFAULT) @@ -245,27 +296,40 @@ def deploy_handler(event): execute_args = event.get('execute_args', EXECUTE_ARGS_DEFAULT) batch = event.get('batch', TRUE) max_ram = event.get('max_ram', MAXRAM_DEFAULT) - s3_publish = event.get('s3_publish', 'true') - instance_type = event.get('instance_type', os.environ['INSTANCE_TYPE']) + s3_publish = event.get('s3_publish', TRUE) volume_size = event.get('storage_size', 64) shutdown_wait = event.get('shutdown_wait', SHUTDOWN_DEFAULT) - region = event.get('region', os.environ['REGION']) - shutdown_behaviour = event.get('shutdown_behaviour', os.environ['SHUTDOWN_BEHAVIOUR']) sigopt_client_id = event.get('sigopt_client_id', os.environ['SIGOPT_CLIENT_ID']) sigopt_dev_id = event.get('sigopt_dev_id', os.environ['SIGOPT_DEV_ID']) end_script = event.get('end_script', END_SCRIPT_DEFAULT) + run_grafana = event.get('run_grafana', 'false') + + titled = get_param('title') + instance_type = get_param('instance_type') + region = get_param('region') + shutdown_behaviour = get_param('shutdown_behaviour') + + if missing_parameters: + return "Unable to start, missing parameters: " + ", ".join(missing_parameters) if instance_type not in instance_types: return "Unable to start run, {instance_type} instance type not supported.".format(instance_type=instance_type) - #instance_type = os.environ['INSTANCE_TYPE'] if shutdown_behaviour not in shutdown_behaviours: - shutdown_behaviour = os.environ['SHUTDOWN_BEHAVIOUR'] + return "Unable to start run, {shutdown_behaviour} shutdown behaviour not supported.".format(shutdown_behaviour=shutdown_behaviour) + + if region not in regions: + return "Unable to start run, {region} region not supported.".format(region=region) if volume_size < 64 or volume_size > 256: volume_size = 64 - selected_script = CONFIG_SCRIPT + selected_script = "" + if run_grafana == TRUE: + selected_script = CONFIG_SCRIPT_WITH_GRAFANA + else: + selected_script = CONFIG_SCRIPT + params = configs if s3_publish == TRUE: selected_script += S3_PUBLISH_SCRIPT @@ -288,9 +352,6 @@ def deploy_handler(event): txt = '' - if region not in regions: - return "Unable to start run, {region} region not supported.".format(region=region) - init_ec2(region) if validate(branch) and validate(commit_id): @@ -298,18 +359,22 @@ def deploy_handler(event): for arg in params: uid = str(uuid.uuid4())[:8] runName = titled - runName = runName + '_' + shutdown_behaviour.toUpperCase() if len(params) > 1: runName += "-" + `runNum` - script = initscript.replace('$RUN_SCRIPT',selected_script).replace('$REGION',region).replace('$S3_REGION',os.environ['REGION']) \ + script = initscript.replace('$RUN_SCRIPT',selected_script).replace('$REGION',region).replace('$S3_REGION', os.environ['REGION']) \ .replace('$BRANCH',branch).replace('$COMMIT', commit_id).replace('$CONFIG', arg) \ .replace('$MAIN_CLASS', execute_class).replace('$UID', uid).replace('$SHUTDOWN_WAIT', shutdown_wait) \ .replace('$TITLED', runName).replace('$MAX_RAM', max_ram).replace('$S3_PUBLISH', s3_publish) \ .replace('$SIGOPT_CLIENT_ID', sigopt_client_id).replace('$SIGOPT_DEV_ID', sigopt_dev_id).replace('$END_SCRIPT', end_script) \ - .replace('$SLACK_HOOK_WITH_TOKEN', os.environ['SLACK_HOOK_WITH_TOKEN']) + .replace('$SLACK_HOOK_WITH_TOKEN', os.environ['SLACK_HOOK_WITH_TOKEN']) \ + .replace('$SHEET_ID', os.environ['SHEET_ID']) instance_id = deploy(script, instance_type, region.replace("-", "_")+'_', shutdown_behaviour, runName, volume_size) host = get_dns(instance_id) txt = txt + 'Started batch: {batch} with run name: {titled} for branch/commit {branch}/{commit} at host {dns} (InstanceID: {instance_id}). '.format(branch=branch, titled=runName, commit=commit_id, dns=host, batch=uid, instance_id=instance_id) + + if run_grafana == TRUE: + txt = txt + 'Grafana will be available at http://{dns}:3003/d/dvib8mbWz/beam-simulation-global-view'.format(dns=host) + runNum += 1 else: txt = 'Unable to start bach for branch/commit {branch}/{commit}. '.format(branch=branch, commit=commit_id) @@ -317,7 +382,7 @@ def deploy_handler(event): return txt def instance_handler(event): - region = event.get('region', os.environ['REGION']) + region = event.get('region') instance_ids = event.get('instance_ids') command_id = event.get('command') system_instances = os.environ['SYSTEM_INSTANCES'] diff --git a/aws/src/main/python/updateBeamAMI/lambda_function.py b/aws/src/main/python/updateBeamAMI/lambda_function.py index 53ff85bd4b3..f87b684150c 100644 --- a/aws/src/main/python/updateBeamAMI/lambda_function.py +++ b/aws/src/main/python/updateBeamAMI/lambda_function.py @@ -63,12 +63,12 @@ def update_lambda(image_ids, function_name): 'us_west_2_IMAGE_ID': image_ids['us-west-2'], }) lm.update_function_configuration( - FunctionName='simulateBeam', + FunctionName=function_name, Environment={ 'Variables': en_var } ) - logger.info('simulateBeam image ids updated') + logger.info(function_name + ' image ids updated') def check_instance_id(instance_ids): diff --git a/gradle.deploy.properties b/gradle.deploy.properties index be4d966ac07..9d0b20751dc 100644 --- a/gradle.deploy.properties +++ b/gradle.deploy.properties @@ -3,6 +3,7 @@ beamBranch=develop beamCommit=HEAD beamConfigs=test/input/sf-light/sf-light-25k.conf instanceType=t2.small +runGrafana=false #c5.9xlarge (36/141) -> 5 instances -> $1.53 per Hour #m4.10xlarge (40/160) -> 5 -> $2.00 per Hour