Skip to content

Commit

Permalink
Merge pull request wfau#1176 from stvoutsin/feature/yarn-monitor
Browse files Browse the repository at this point in the history
Introduce temp disk monitor / cleaner
  • Loading branch information
stvoutsin authored Jul 27, 2023
2 parents 2ae4044 + 8e77ba3 commit aee4d41
Show file tree
Hide file tree
Showing 11 changed files with 644 additions and 20 deletions.
2 changes: 1 addition & 1 deletion deployments/common/pip/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@ pyarrow==8.0.0
koalas==1.8.2
GaiaXPy==1.1.4
git+https://github.com/wfau/[email protected]

git+https://github.com/stvoutsin/[email protected]
15 changes: 0 additions & 15 deletions deployments/hadoop-yarn/ansible/43-setup-ssl.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,21 +39,6 @@
become: true
command: "pip3 install certbot-nginx"

- name: "Install Cron"
become: true
yum:
name:
- cronie
- cronie-anacron
update_cache: yes
state: present

- name: "Start Crond"
service:
name: crond
state: restarted
become: yes

- name: "Generate NGINX configuration"
become: true
template:
Expand Down
45 changes: 45 additions & 0 deletions deployments/hadoop-yarn/ansible/45-setup-cron.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#
# <meta:header>
# <meta:licence>
# Copyright (c) 2023, ROE (http://www.roe.ac.uk/)
#
# This information is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This information is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# </meta:licence>
# </meta:header>
#


- name: "Setup Cron"
hosts: zeppelin
gather_facts: false
vars_files:
- config/ansible.yml
- config/domains.yml
- /opt/aglais/aglais-status.yml
tasks:
- name: "Install Cron"
become: true
yum:
name:
- cronie
- cronie-anacron
update_cache: yes
state: present

- name: "Start Crond"
service:
name: crond
state: restarted
become: yes

44 changes: 44 additions & 0 deletions deployments/hadoop-yarn/ansible/46-setup-yarn-monitor.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#
# <meta:header>
# <meta:licence>
# Copyright (c) 2023, ROE (http://www.roe.ac.uk/)
#
# This information is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This information is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# </meta:licence>
# </meta:header>
#


- name: "Setup the Yarn Monitor / Cleaner"
hosts: zeppelin
gather_facts: false
vars_files:
- config/ansible.yml
- config/domains.yml
- /opt/aglais/aglais-status.yml
vars:
sshuser: "fedora"
sshkeyname: "id_rsa"
threshold_percent: 90
environment_vars: "{{ ansible_env }}"
tasks:
- name: Get SSH_AUTH_SOCK value
command: echo $SSH_AUTH_SOCK
register: ssh_auth_sock

- name: "Create Cronjob to run Python command"
ansible.builtin.cron:
name: "Run YarnCleaner command every 5 minutes"
minute: "*/5"
job: "auth=$(ls -t /tmp/ssh-*/agent.* | head -n 1) && export SSH_AUTH_SOCK=$auth && python3 -c \"from yarncleaner import YarnCleaner; YarnCleaner(workers={{ groups['workers'] }}, ssh_username='{{ sshuser }}', ssh_key_file='/home/fedora/.ssh/{{ sshkeyname }}.pub').clean(threshold_percent={{ threshold_percent }})\""
2 changes: 2 additions & 0 deletions deployments/hadoop-yarn/ansible/create-all.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@
- import_playbook: 33-install-prometheus.yml
- import_playbook: 42-install-nginx.yml
- import_playbook: 44-create-maintenance-page.yml
- import_playbook: 45-setup-cron.yml
- import_playbook: 46-setup-yarn-monitor.yml

- import_playbook: 38-install-user-db.yml
- import_playbook: 39-create-user-scripts.yml
Expand Down
2 changes: 1 addition & 1 deletion deployments/hadoop-yarn/bin/deploy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ EOF
# Install our integration tests.
#[root@ansibler]

pip install git+https://github.com/wfau/[email protected].6
pip install git+https://github.com/wfau/[email protected].7


# -----------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion deployments/zeppelin/test/config/basic.json
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
{
"name" : "Library_Validation.json",
"filepath" : "https://raw.githubusercontent.com/wfau/aglais-testing/bc9b9787b5b6225e11df5a4ef0272bcec660a44e/notebooks/Library_validation.json",
"totaltime" : 15,
"totaltime" : 10,
"results" : []
}

Expand Down
10 changes: 10 additions & 0 deletions deployments/zeppelin/test/config/disktest.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"notebooks": [
{
"name": "DiskOverflow",
"filepath": "https://raw.githubusercontent.com/stvoutsin/aglais-testing/main/notebooks/disk_overflow.zpln",
"totaltime": 1300,
"results": []
}
]
}
2 changes: 1 addition & 1 deletion deployments/zeppelin/test/config/full.json
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
{
"name" : "Library_Validation.json",
"filepath" : "https://raw.githubusercontent.com/wfau/aglais-testing/bc9b9787b5b6225e11df5a4ef0272bcec660a44e/notebooks/Library_validation.json",
"totaltime" : 15,
"totaltime" : 10,
"results" : []
}

Expand Down
2 changes: 1 addition & 1 deletion deployments/zeppelin/test/config/quick.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
{
"name" : "Library_Validation.json",
"filepath" : "https://raw.githubusercontent.com/wfau/aglais-testing/bc9b9787b5b6225e11df5a4ef0272bcec660a44e/notebooks/Library_validation.json",
"totaltime" : 15,
"totaltime" : 10,
"results" : []
}

Expand Down
Loading

0 comments on commit aee4d41

Please sign in to comment.