Skip to content

Commit

Permalink
Merge pull request #2513 from chaoss/dev
Browse files Browse the repository at this point in the history
Release PR
  • Loading branch information
sgoggins authored Sep 5, 2023
2 parents c7b9117 + d6c665e commit 98299a5
Show file tree
Hide file tree
Showing 14 changed files with 662 additions and 37 deletions.
6 changes: 3 additions & 3 deletions augur/application/cli/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,21 +170,21 @@ def determine_worker_processes(ratio,maximum):
sleep_time += 6

#60% of estimate, Maximum value of 45
core_num_processes = determine_worker_processes(.6, 80)
core_num_processes = determine_worker_processes(.6, 45)
logger.info(f"Starting core worker processes with concurrency={core_num_processes}")
core_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={core_num_processes} -n core:{uuid.uuid4().hex}@%h"
process_list.append(subprocess.Popen(core_worker.split(" ")))
sleep_time += 6

#20% of estimate, Maximum value of 25
secondary_num_processes = determine_worker_processes(.2, 26)
secondary_num_processes = determine_worker_processes(.2, 25)
logger.info(f"Starting secondary worker processes with concurrency={secondary_num_processes}")
secondary_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={secondary_num_processes} -n secondary:{uuid.uuid4().hex}@%h -Q secondary"
process_list.append(subprocess.Popen(secondary_worker.split(" ")))
sleep_time += 6

#15% of estimate, Maximum value of 20
facade_num_processes = determine_worker_processes(.2, 40)
facade_num_processes = determine_worker_processes(.2, 20)
logger.info(f"Starting facade worker processes with concurrency={facade_num_processes}")
facade_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={facade_num_processes} -n facade:{uuid.uuid4().hex}@%h -Q facade"

Expand Down
2 changes: 1 addition & 1 deletion augur/application/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def get_development_flag():
},
"Celery": {
"worker_process_vmem_cap": 0.25,
"refresh_materialized_views_interval_in_days": 7
"refresh_materialized_views_interval_in_days": 1
},
"Redis": {
"cache_group": 0,
Expand Down
466 changes: 466 additions & 0 deletions augur/application/schema/alembic/versions/25_unique_on_mataview.py

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,34 @@ def contributor_breadth_model() -> None:
tool_version = '0.0.1'
data_source = 'GitHub API'


# This version of the query pulls contributors who have not had any data collected yet
# To the top of the list
cntrb_login_query = s.sql.text("""
SELECT DISTINCT gh_login, cntrb_id
FROM augur_data.contributors
WHERE gh_login IS NOT NULL
SELECT DISTINCT
gh_login,
cntrb_id
FROM
(
SELECT DISTINCT
gh_login,
cntrb_id,
data_collection_date
FROM
(
SELECT DISTINCT
contributors.gh_login,
contributors.cntrb_id,
contributor_repo.data_collection_date :: DATE
FROM
contributor_repo
RIGHT OUTER JOIN contributors ON contributors.cntrb_id = contributor_repo.cntrb_id
AND contributors.gh_login IS NOT NULL
ORDER BY
contributor_repo.data_collection_date :: DATE NULLS FIRST
) A
ORDER BY
data_collection_date DESC NULLS FIRST
) b
""")

result = engine.execute(cntrb_login_query)
Expand Down
125 changes: 113 additions & 12 deletions augur/tasks/db/refresh_materialized_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,27 +6,128 @@

from augur.tasks.init.celery_app import celery_app as celery
from augur.application.db.session import DatabaseSession
from augur.application.logs import AugurLogger


@celery.task
def refresh_materialized_views():

#self.logger = AugurLogger("data_collection_jobs").get_logger()

from augur.tasks.init.celery_app import engine

logger = logging.getLogger(refresh_materialized_views.__name__)
self.logger = logging.getLogger(refresh_materialized_views.__name__)

mv1_refresh = s.sql.text("""
REFRESH MATERIALIZED VIEW concurrently augur_data.api_get_all_repo_prs with data;
COMMIT;
""")

mv2_refresh = s.sql.text("""
REFRESH MATERIALIZED VIEW concurrently augur_data.api_get_all_repos_commits with data;
COMMIT;
""")

mv3_refresh = s.sql.text("""
REFRESH MATERIALIZED VIEW concurrently augur_data.api_get_all_repos_issues with data;
COMMIT;
""")

mv4_refresh = s.sql.text("""
REFRESH MATERIALIZED VIEW concurrently augur_data.augur_new_contributors with data;
COMMIT;
""")
mv5_refresh = s.sql.text("""
REFRESH MATERIALIZED VIEW concurrently augur_data.explorer_commits_and_committers_daily_count with data;
COMMIT;
""")

mv6_refresh = s.sql.text("""
REFRESH MATERIALIZED VIEW concurrently augur_data.explorer_new_contributors with data;
COMMIT;
""")

refresh_view_query = s.sql.text("""
REFRESH MATERIALIZED VIEW augur_data.api_get_all_repos_issues with data;
REFRESH MATERIALIZED VIEW augur_data.explorer_commits_and_committers_daily_count with data;
REFRESH MATERIALIZED VIEW augur_data.api_get_all_repos_commits with data;
REFRESH MATERIALIZED VIEW augur_data.augur_new_contributors with data;
REFRESH MATERIALIZED VIEW augur_data.explorer_contributor_actions with data;
REFRESH MATERIALIZED VIEW augur_data.explorer_libyear_all with data;
REFRESH MATERIALIZED VIEW augur_data.explorer_libyear_detail with data;
REFRESH MATERIALIZED VIEW augur_data.explorer_new_contributors with data;
REFRESH MATERIALIZED VIEW augur_data.explorer_libyear_summary with data;
mv7_refresh = s.sql.text("""
REFRESH MATERIALIZED VIEW concurrently augur_data.explorer_entry_list with data;
COMMIT;
""")

with DatabaseSession(logger, engine) as session:
mv_8_refresh = s.sql.text("""
REFRESH MATERIALIZED VIEW concurrently augur_data.explorer_contributor_actions with data;
COMMIT;
""")



try:
with DatabaseSession(logger, engine) as session:
session.execute_sql(mv1_refresh)
except Exception as e:
self.logger.info(f"error is {e}")
pass


try:
with DatabaseSession(logger, engine) as session:
session.execute_sql(mv1_refresh)
except Exception as e:
self.logger.info(f"error is {e}")
pass

try:
with DatabaseSession(logger, engine) as session:
session.execute_sql(mv2_refresh)
except Exception as e:
self.logger.info(f"error is {e}")
pass

try:
with DatabaseSession(logger, engine) as session:
session.execute_sql(mv3_refresh)
except Exception as e:
self.logger.info(f"error is {e}")
pass

try:
with DatabaseSession(logger, engine) as session:
session.execute_sql(mv4_refresh)
except Exception as e:
self.logger.info(f"error is {e}")
pass

try:
with DatabaseSession(logger, engine) as session:
session.execute_sql(mv5_refresh)
except Exception as e:
self.logger.info(f"error is {e}")
pass

try:
with DatabaseSession(logger, engine) as session:
session.execute_sql(mv6_refresh)
except Exception as e:
self.logger.info(f"error is {e}")
pass

try:
with DatabaseSession(logger, engine) as session:
session.execute_sql(mv7_refresh)
except Exception as e:
self.logger.info(f"error is {e}")
pass

try:
with DatabaseSession(logger, engine) as session:
session.execute_sql(mv8_refresh)
except Exception as e:
self.logger.info(f"error is {e}")
pass







session.execute_sql(refresh_view_query)
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ def get_files(path):
files = list(p.glob('**/*.kt'))
return files

def get_imports_for_file(path):
def get_deps_for_file(path):
with open(path, 'r') as f:
content = f.read()
matches = re.findall('import\s+(.*?)(?:;|\n)', content, re.DOTALL)
Expand Down
6 changes: 3 additions & 3 deletions augur/tasks/git/dependency_tasks/dependency_util/rust_deps.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@ def get_files(path):
files = list(p.glob('**/*.rs'))
return files

def get_imports_for_file(path):
def get_deps_for_file(path):
#gets imports in specified file path.
with open(path, 'r') as f:
content = f.read()
matches = re.findall(r'use\s+([\w:]+)(\s+as\s+([\w:]+))?(\s*\*\s*)?(;|\n)', content)
imports = []
for m in matches:
import_path = m[0]
imports.append(import_path)
return imports

return imports
16 changes: 15 additions & 1 deletion augur/tasks/github/issues/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from sqlalchemy.exc import IntegrityError

from augur.tasks.github.util.github_api_key_handler import GithubApiKeyHandler

from augur.tasks.init.celery_app import celery_app as celery
from augur.tasks.init.celery_app import AugurCoreRepoCollectionTask
Expand All @@ -29,16 +30,29 @@ def collect_issues(repo_git : str) -> int:

augur_db = manifest.augur_db

logger.info(f'this is the manifest.key_auth value: {str(manifest.key_auth)}')

try:

query = augur_db.session.query(Repo).filter(Repo.repo_git == repo_git)
repo_obj = execute_session_query(query, 'one')
repo_id = repo_obj.repo_id

#try this
# the_key = manifest.key_auth
# try:
# randomon = GithubApiKeyHandler(augur_db.session)
# the_key = randomon.get_random_key()
# logger.info(f'The Random Key {the_key}')
# except Exception as e:
# logger.info(f'error: {e}')
# the_key = manifest.key_auth
# pass

owner, repo = get_owner_repo(repo_git)

issue_data = retrieve_all_issue_data(repo_git, logger, manifest.key_auth)

#issue_data = retrieve_all_issue_data(repo_git, logger, the_key)

if issue_data:
total_issues = len(issue_data)
Expand Down
8 changes: 4 additions & 4 deletions augur/tasks/github/repo_info/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,10 +150,10 @@ def repo_info_model(augur_db, key_auth, repo_orm_obj, logger):
pr_merged: pullRequests(states: MERGED) {
totalCount
}
ref(qualifiedName: "master") {
defaultBranchRef {
target {
... on Commit {
history(first: 0){
history {
totalCount
}
}
Expand Down Expand Up @@ -248,15 +248,15 @@ def repo_info_model(augur_db, key_auth, repo_orm_obj, logger):
'security_audit_file': None,
'status': None,
'keywords': None,
'commit_count': data['ref']['target']['history']['totalCount'] if data['ref'] else None,
'commit_count': data['defaultBranchRef']['target']['history']['totalCount'] if data['defaultBranchRef'] else None,
'issues_count': data['issue_count']['totalCount'] if data['issue_count'] else None,
'issues_closed': data['issues_closed']['totalCount'] if data['issues_closed'] else None,
'pull_request_count': data['pr_count']['totalCount'] if data['pr_count'] else None,
'pull_requests_open': data['pr_open']['totalCount'] if data['pr_open'] else None,
'pull_requests_closed': data['pr_closed']['totalCount'] if data['pr_closed'] else None,
'pull_requests_merged': data['pr_merged']['totalCount'] if data['pr_merged'] else None,
'tool_source': 'Repo_info Model',
'tool_version': '0.42',
'tool_version': '0.50.0',
'data_source': "Github"
}

Expand Down
20 changes: 18 additions & 2 deletions augur/tasks/github/util/github_api_key_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from augur.tasks.util.redis_list import RedisList
from augur.application.db.session import DatabaseSession
from augur.application.config import AugurConfig
from sqlalchemy import func


class NoValidKeysError(Exception):
Expand Down Expand Up @@ -39,7 +40,7 @@ def __init__(self, session: DatabaseSession):

self.keys = self.get_api_keys()

# self.logger.debug(f"Retrieved {len(self.keys)} github api keys for use")
self.logger.info(f"Retrieved {len(self.keys)} github api keys for use")

def get_random_key(self):
"""Retrieves a random key from the list of keys
Expand Down Expand Up @@ -71,9 +72,12 @@ def get_api_keys_from_database(self) -> List[str]:
from augur.application.db.models import WorkerOauth

select = WorkerOauth.access_token
# randomizing the order at db time
#select.order_by(func.random())
where = [WorkerOauth.access_token != self.config_key, WorkerOauth.platform == 'github']

return [key_tuple[0] for key_tuple in self.session.query(select).filter(*where).all()]
return [key_tuple[0] for key_tuple in self.session.query(select).filter(*where).order_by(func.random()).all()]
#return [key_tuple[0] for key_tuple in self.session.query(select).filter(*where).all()]


def get_api_keys(self) -> List[str]:
Expand Down Expand Up @@ -130,6 +134,18 @@ def get_api_keys(self) -> List[str]:
if not valid_keys:
raise NoValidKeysError("No valid github api keys found in the config or worker oauth table")


# shuffling the keys so not all processes get the same keys in the same order
valid_now = valid_keys
#try:
#self.logger.info(f'valid keys before shuffle: {valid_keys}')
#valid_keys = random.sample(valid_keys, len(valid_keys))
#self.logger.info(f'valid keys AFTER shuffle: {valid_keys}')
#except Exception as e:
# self.logger.debug(f'{e}')
# valid_keys = valid_now
# pass

return valid_keys

def is_bad_api_key(self, client: httpx.Client, oauth_key: str) -> bool:
Expand Down
2 changes: 2 additions & 0 deletions augur/tasks/github/util/github_random_key_auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from augur.tasks.util.random_key_auth import RandomKeyAuth
from augur.tasks.github.util.github_api_key_handler import GithubApiKeyHandler
from augur.application.db.session import DatabaseSession
import random


class GithubRandomKeyAuth(RandomKeyAuth):
Expand All @@ -16,6 +17,7 @@ def __init__(self, session: DatabaseSession, logger):

# gets the github api keys from the database via the GithubApiKeyHandler
github_api_keys = GithubApiKeyHandler(session).keys
#github_api_keys = random.sample(github_api_keys, len(github_api_keys))

if not github_api_keys:
print("Failed to find github api keys. This is usually because your key has expired")
Expand Down
6 changes: 3 additions & 3 deletions augur/tasks/start_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,16 +261,16 @@ def augur_collection_monitor():
enabled_phase_names = get_enabled_phase_names_from_config(session.logger, session)

if primary_repo_collect_phase.__name__ in enabled_phase_names:
start_primary_collection(session, max_repo=40)
start_primary_collection(session, max_repo=30)

if secondary_repo_collect_phase.__name__ in enabled_phase_names:
start_secondary_collection(session, max_repo=10)

if facade_phase.__name__ in enabled_phase_names:
start_facade_collection(session, max_repo=30)
start_facade_collection(session, max_repo=20)

if machine_learning_phase.__name__ in enabled_phase_names:
start_ml_collection(session,max_repo=5)
start_ml_collection(session,max_repo=1)

# have a pipe of 180

Expand Down
Loading

0 comments on commit 98299a5

Please sign in to comment.