Skip to content

Commit

Permalink
Merge pull request #438 from BiBiServ/dev
Browse files Browse the repository at this point in the history
Keeping master up to date
  • Loading branch information
XaverStiensmeier authored Sep 28, 2023
2 parents 769eb10 + 93cbdd7 commit 1978b9b
Show file tree
Hide file tree
Showing 38 changed files with 1,217 additions and 542 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ resources/playbook/group_vars/

# any log files
*.log
log/

# Byte-compiled / optimized / DLL files
__pycache__/
Expand Down
1 change: 1 addition & 0 deletions bibigrid.sh
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
#!/bin/bash
python3 -m bibigrid.core.startup "$@"
18 changes: 14 additions & 4 deletions bibigrid.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# See https://cloud.denbi.de/wiki/Tutorials/BiBiGrid/ (after update)
# See https://github.com/BiBiServ/bibigrid/blob/master/documentation/markdown/features/configuration.md
# First configuration also holds general cluster information and must include the master.
# All other configurations mustn't include another master, but exactly one vpngtw instead (keys like master).

Expand All @@ -8,9 +9,10 @@
# -- BEGIN: GENERAL CLUSTER INFORMATION --
## sshPublicKeyFiles listed here will be added to access the cluster. A temporary key is created by bibigrid itself.
#sshPublicKeyFiles:
# - [key one]
# - [public key one]

## Volumes and snapshots that will be mounted to master
# autoMount: False # WARNING: will overwrite unidentified filesystems
#masterMounts: # KEY NOT FULLY IMPLEMENTED YET
# - [mount one]

Expand All @@ -36,13 +38,15 @@

## Uncomment if you don't want assign a public ip to the master; for internal cluster (Tuebingen).
#useMasterWithPublicIp: False # defaults True if False no public-ip (floating-ip) will be allocated
# deleteTmpKeypairAfter: False
# dontUploadCredentials: False

# Other keys - default False
#localFS: True
#localDNSlookup: True
#zabbix: True
#nfs: True
#ide: True # Very useful to set on True. Use `./bibigrid.sh -i [path-to-bibigrid.yml] -ide -cid [cluster-id]` to start port forwarding to access the ide.
#ide: True # A nice way to view your cluster as if you were using Visual Studio Code

useMasterAsCompute: True # Currently ignored by slurm

Expand All @@ -52,15 +56,17 @@
# master configuration
masterInstance:
type: # existing type/flavor on your cloud. See launch instance>flavor for options
image: # existing image on your cloud. See https://openstack.cebitec.uni-bielefeld.de/project/images pick an active one. Currently only ubuntu22.04 is supported
image: # existing active image on your cloud. Consider using regex to prevent image updates from breaking your running cluster
# features: # list

# -- END: GENERAL CLUSTER INFORMATION --

# fallbackOnOtherImage: False # if True, most similar image by name will be picked. A regex can also be given instead.

# worker configuration
#workerInstances:
# - type: # existing type/flavor on your cloud. See launch instance>flavor for options
# image: # same as master
# image: # same as master. Consider using regex to prevent image updates from breaking your running cluster
# count: # any number of workers you would like to create with set type, image combination
# # features: # list

Expand All @@ -87,6 +93,10 @@

# Depends on cloud site and project
subnet: # existing subnet on your cloud. See https://openstack.cebitec.uni-bielefeld.de/project/networks/
# or network:
# gateway: # if you want to use a gateway for create.
# ip: # IP of gateway to use
# portFunction: 30000 + oct4 # variables are called: oct1.oct2.oct3.oct4

# Uncomment if no full DNS service for started instances is available.
# Currently, the case in Berlin, DKFZ, Heidelberg and Tuebingen.
Expand Down
12 changes: 5 additions & 7 deletions bibigrid/core/actions/check.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,19 @@
"""
Module that acts as a wrapper and uses validate_configuration to validate given configuration
"""
import logging
from bibigrid.core.utility import validate_configuration

LOG = logging.getLogger("bibigrid")


def check(configurations, providers):
def check(configurations, providers, log):
"""
Uses validate_configuration to validate given configuration.
:param configurations: list of configurations (dicts)
:param providers: list of providers
:param log:
:return:
"""
success = validate_configuration.ValidateConfiguration(configurations, providers).validate()
success = validate_configuration.ValidateConfiguration(configurations, providers, log).validate()
check_result = "succeeded! Cluster is ready to start." if success else "failed!"
print(f"Total check {check_result}")
LOG.info("Total check returned %s.", success)
log.log(42, f"Total check {check_result}")
log.info("Total check returned %s.", success)
return 0
181 changes: 104 additions & 77 deletions bibigrid/core/actions/create.py

Large diffs are not rendered by default.

40 changes: 21 additions & 19 deletions bibigrid/core/actions/ide.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,16 @@
This module contains methods to establish port forwarding in order to access an ide (theia).
"""

import logging
import random
import re
import signal
import subprocess
import sys
import time
import webbrowser

import sshtunnel
import sympy

from bibigrid.core.utility.handler import cluster_ssh_handler

Expand All @@ -20,7 +21,7 @@
LOCAL_BIND_ADDRESS = 9191
MAX_JUMP = 100
LOCALHOST = "127.0.0.1"
LOG = logging.getLogger("bibigrid")



def sigint_handler(caught_signal, frame): # pylint: disable=unused-argument
Expand Down Expand Up @@ -49,37 +50,38 @@ def is_used(ip_address):
for line in lines:
is_open = re.match(rf'tcp.*{ip_address}:([0-9][0-9]*).*ESTABLISHED\s*$', line)
if is_open is not None:
print(line)
ports_used.append(is_open[1])


def ide(cluster_id, master_provider, master_configuration):
def ide(cluster_id, master_provider, master_configuration, log):
"""
Creates a port forwarding from LOCAL_BIND_ADDRESS to REMOTE_BIND_ADDRESS from localhost to master of specified
cluster
@param cluster_id: cluster_id or ip
@param master_provider: master's provider
@param master_configuration: master's configuration
@param log:
@return:
"""
LOG.info("Starting port forwarding for ide")
log.info("Starting port forwarding for ide")
master_ip, ssh_user, used_private_key = cluster_ssh_handler.get_ssh_connection_info(cluster_id, master_provider,
master_configuration)
master_configuration, log)
used_local_bind_address = LOCAL_BIND_ADDRESS
if master_ip and ssh_user and used_private_key:
attempts = 0
if master_configuration.get("gateway"):
octets = {f'oct{enum + 1}': int(elem) for enum, elem in enumerate(master_ip.split("."))}
port = sympy.sympify(master_configuration["gateway"]["portFunction"]).subs(dict(octets))
gateway = (master_configuration["gateway"]["ip"], int(port))
else:
gateway = None
while attempts < 16:
attempts += 1
try:
with sshtunnel.SSHTunnelForwarder(
ssh_address_or_host=master_ip, # Raspberry Pi in my network

ssh_username=ssh_user,
ssh_pkey=used_private_key,

local_bind_address=(LOCALHOST, used_local_bind_address),
remote_bind_address=(LOCALHOST, REMOTE_BIND_ADDRESS)
) as server:
with sshtunnel.SSHTunnelForwarder(ssh_address_or_host=gateway or master_ip, ssh_username=ssh_user,
ssh_pkey=used_private_key,
local_bind_address=(LOCALHOST, used_local_bind_address),
remote_bind_address=(LOCALHOST, REMOTE_BIND_ADDRESS)) as server:
print("CTRL+C to close port forwarding when you are done.")
with server:
# opens in existing window if any default program exists
Expand All @@ -88,11 +90,11 @@ def ide(cluster_id, master_provider, master_configuration):
time.sleep(5)
except sshtunnel.HandlerSSHTunnelForwarderError:
used_local_bind_address += random.randint(1, MAX_JUMP)
LOG.info("Attempt: %s. Port in use... Trying new port %s", attempts, used_local_bind_address)
log.info("Attempt: %s. Port in use... Trying new port %s", attempts, used_local_bind_address)
if not master_ip:
LOG.warning("Cluster id %s doesn't match an existing cluster with a master.", cluster_id)
log.warning("Cluster id %s doesn't match an existing cluster with a master.", cluster_id)
if not ssh_user:
LOG.warning("No ssh user has been specified in the first configuration.")
log.warning("No ssh user has been specified in the first configuration.")
if not used_private_key:
LOG.warning("No matching sshPublicKeyFiles can be found in the first configuration or in .bibigrid")
log.warning("No matching sshPublicKeyFiles can be found in the first configuration or in .bibigrid")
return 1
60 changes: 32 additions & 28 deletions bibigrid/core/actions/list_clusters.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,22 @@
This includes a method to create a dictionary containing all running clusters and their servers.
"""

import logging
import pprint
import re

from bibigrid.core.actions import create

SERVER_REGEX = re.compile(r"^bibigrid-((master)-([a-zA-Z0-9]+)|(worker|vpngtw)\d+-([a-zA-Z0-9]+)-\d+)$")
LOG = logging.getLogger("bibigrid")

def dict_clusters(providers):

def dict_clusters(providers, log):
"""
Creates a dictionary containing all servers by type and provider information
:param providers: list of all providers
:param log:
:return: list of all clusters in yaml format
"""
LOG.info("Creating cluster dictionary...")
log.info("Creating cluster dictionary...")
cluster_dict = {}
for provider in providers:
servers = provider.list_servers()
Expand Down Expand Up @@ -53,56 +53,59 @@ def setup(cluster_dict, cluster_id, server, provider):
server["cloud_specification"] = provider.cloud_specification["identifier"]


def print_list_clusters(cluster_id, providers):
def log_list(cluster_id, providers, log):
"""
Calls dict_clusters and gives a visual representation of the found cluster.
Detail depends on whether a cluster_id is given or not.
:param cluster_id:
:param providers:
:param log:
:return:
"""
cluster_dict = dict_clusters(providers=providers)
if cluster_id: # pylint: disable=too-many-nested-blocks
cluster_dict = dict_clusters(providers=providers, log=log)
if cluster_id: # pylint: disable=too-many-nested-blocks
if cluster_dict.get(cluster_id):
LOG.info("Printing specific cluster_dictionary")
master_count, worker_count, vpn_count = get_size_overview(cluster_dict[cluster_id])
print(f"\tCluster has {master_count} master, {vpn_count} vpngtw and {worker_count} regular workers. "
f"The cluster is spread over {vpn_count + master_count} reachable provider(s).")
log.info("Printing specific cluster_dictionary")
master_count, worker_count, vpn_count = get_size_overview(cluster_dict[cluster_id], log)
log.log(42, f"\tCluster has {master_count} master, {vpn_count} vpngtw and {worker_count} regular workers. "
f"The cluster is spread over {vpn_count + master_count} reachable provider(s).")
pprint.pprint(cluster_dict[cluster_id])
else:
LOG.info("Cluster with cluster-id {cluster_id} not found.")
print(f"Cluster with cluster-id {cluster_id} not found.")
log.info("Cluster with cluster-id {cluster_id} not found.")
log.log(42, f"Cluster with cluster-id {cluster_id} not found.")
else:
LOG.info("Printing overview of cluster all clusters")
log.info("Printing overview of cluster all clusters")
if cluster_dict:
for cluster_key_id, cluster_node_dict in cluster_dict.items():
print(f"Cluster-ID: {cluster_key_id}")
log.log(42, f"Cluster-ID: {cluster_key_id}")
master = cluster_node_dict.get('master')
if master:
for key in ["name", "user_id", "launched_at", "key_name", "public_v4", "public_v6", "provider"]:
value = cluster_node_dict['master'].get(key)
if value:
print(f"\t{key}: {value}")
log.log(42, f"\t{key}: {value}")
security_groups = get_security_groups(cluster_node_dict)
print(f"\tsecurity_groups: {security_groups}")
log.log(42, f"\tsecurity_groups: {security_groups}")
networks = get_networks(cluster_node_dict)
print(f"\tnetwork: {pprint.pformat(networks)}")
log.log(42, f"\tnetwork: {pprint.pformat(networks)}")
else:
LOG.warning("No master for cluster: %s.", cluster_key_id)
master_count, worker_count, vpn_count = get_size_overview(cluster_node_dict)
print(f"\tCluster has {master_count} master, {vpn_count} vpngtw and {worker_count} regular workers. "
f"The cluster is spread over {vpn_count + master_count} reachable provider(s).")
log.warning("No master for cluster: %s.", cluster_key_id)
master_count, worker_count, vpn_count = get_size_overview(cluster_node_dict, log)
log.log(42,
f"\tCluster has {master_count} master, {vpn_count} vpngtw and {worker_count} regular workers. "
f"The cluster is spread over {vpn_count + master_count} reachable provider(s).")
else:
print("No cluster found.")
log.log(42, "No cluster found.")
return 0


def get_size_overview(cluster_dict):
def get_size_overview(cluster_dict, log):
"""
:param cluster_dict: dictionary of cluster to size_overview
:param log:
:return: number of masters, number of workers, number of vpns
"""
LOG.info("Printing size overview")
log.info("Printing size overview")
master_count = int(bool(cluster_dict.get("master")))
worker_count = len(cluster_dict.get("workers") or "")
vpn_count = len(cluster_dict.get("vpngtws") or "")
Expand Down Expand Up @@ -135,19 +138,20 @@ def get_security_groups(cluster_dict):
return security_groups


def get_master_access_ip(cluster_id, master_provider):
def get_master_access_ip(cluster_id, master_provider, log):
"""
Returns master's ip of cluster cluster_id
:param master_provider: master's provider
:param cluster_id: Id of cluster
:param log:
:return: public ip of master
"""
LOG.info("Finding master ip for cluster %s...", cluster_id)
log.info("Finding master ip for cluster %s...", cluster_id)
servers = master_provider.list_servers()
for server in servers:
master = create.MASTER_IDENTIFIER(cluster_id=cluster_id)
if server["name"].startswith(master):
return server.get("public_v4") or server.get("public_v6") or server.get("private_v4")
LOG.warning("Cluster %s not found on master_provider %s.", cluster_id,
log.warning("Cluster %s not found on master_provider %s.", cluster_id,
master_provider.cloud_specification["identifier"])
return None
Loading

0 comments on commit 1978b9b

Please sign in to comment.