Skip to content

Commit

Permalink
update backup scheduler for Prresutls DB
Browse files Browse the repository at this point in the history
  • Loading branch information
dignityc committed Oct 31, 2024
1 parent 14396e1 commit 42f4689
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 3 deletions.
4 changes: 4 additions & 0 deletions common_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,10 @@ def analyze_pagepile_processing():
print("No processing records found for Pagepile QIDs")
return None

#Data extraction for time complexity analysis
def time_complexity_analysis():
pass

if __name__ == "__main__":
processed_date = "2024-09-06"
#pagePile_results_extraction(processed_date)
Expand Down
33 changes: 31 additions & 2 deletions eventHandler.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,31 @@ def update_prior_item_list():

print("prior_item_list.csv has been successfully updated.")

def backup_database():
"""
Backup the reference_checked.db file to the specified HPC directory
with date prefix in format YYYYMMDD_reference_checked.db
"""
try:
# Source database path
source_db = 'reference_checked.db'

# Create backup directory if it doesn't exist
backup_dir = '/hpc/scratch/prj/inf_wqp/prove_backup'
os.makedirs(backup_dir, exist_ok=True)

# Generate backup filename with date prefix
date_prefix = datetime.datetime.now().strftime('%Y%m%d')
backup_filename = f"{date_prefix}_reference_checked.db"
backup_path = os.path.join(backup_dir, backup_filename)

# Copy the database file
import shutil
shutil.copy2(source_db, backup_path)
print(f"Database backup created successfully at {backup_path}")

except Exception as e:
print(f"Error during database backup: {e}")

def main(batch_qids):
reset_database = False # Developer mode to test, it initialize db for getting clean db
Expand All @@ -303,15 +328,19 @@ def main(batch_qids):
print(f"Database file {db_path} has been deleted.")

initialize_database(db_path)

# Schedule both tasks for Monday
schedule.every().monday.do(update_prior_item_list)
schedule.every().monday.do(backup_database)

while True:
try:
prove_process(db_path, batch_qids, algo_version)
schedule.run_pending()
except Exception as e:
print(f"An error occurred in the main loop: {e}")
time.sleep(30)
time.sleep(30)


if __name__ == "__main__":
batch_qids = 2
Expand Down
2 changes: 1 addition & 1 deletion html_fetching.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ def reading_html_by_requests(self, url: str) -> None:

def reading_html_by_chrome(self, driver, url: str) -> None:
try:
response = requests.get(url, timeout=5)
response = requests.get(url, timeout=15)

if response.status_code == 200:
driver.get(url)
Expand Down

0 comments on commit 42f4689

Please sign in to comment.