-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
140 lines (119 loc) · 4.81 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
from minio import Minio
from datetime import datetime, timedelta, time
import logging
import yaml
with open('config.yml', 'r') as file:
config = yaml.load(file, Loader=yaml.FullLoader)
logging.basicConfig(
format='%(asctime)s %(levelname)-4s - %(message)s',
level=logging.INFO,
datefmt='%Y-%m-%d %H:%M:%S')
def main():
# Create the MinIO client.
client = Minio(
config["MINIO_URL"],
access_key=config["MINIO_ACCESS_KEY"],
secret_key=config["MINIO_SECRET_KEY"],
secure=config["MINIO_SECURE"],
region=config["MINIO_REGION"],
)
# Iterate over the buckets.
for bucket in config["MINIO_BUCKETS"]:
logging.info(f"Processing bucket {bucket}...")
# Get all objects in the bucket.
objects = client.list_objects(bucket)
# Create a dictionary to store the backups by date.
backups_by_date = {}
# Iterate over the objects and group them by date.
for obj in objects:
date_str = obj.object_name.split("-")
date_str = date_str[0] + '-' + date_str[1]
date = datetime.strptime(date_str, "%Y%m%d-%H%M%S")
# Add the object to the dictionary.
if backups_by_date.get(date) is None:
backups_by_date[date] = {
'data': obj,
'remove': False,
}
else:
print(f"Duplicate backup found: {obj.object_name}")
return
# Sort the backups by date, from newest to oldest.
backup_dates = sorted(backups_by_date.keys(), reverse=True)
# Get the current date at midnight.
today = datetime.combine(datetime.today(), time.min)
# Calculate the cutoff dates for each backup retention period.
one_day_ago = today - timedelta(days=1)
one_week_ago = today - timedelta(weeks=1)
one_month_ago = today - timedelta(weeks=4)
six_months_ago = today - timedelta(weeks=26)
one_year_ago = today - timedelta(weeks=52)
prev_date = today
# Iterate over the backups and delete the ones that don't meet the retention criteria.
for date in backup_dates:
# Keep all backups from the same day or the day before.
if date >= one_day_ago:
prev_date = date
continue
# Keep only one backup per day from one day ago to one week ago.
if date >= one_week_ago and prev_date < one_day_ago:
backups_by_date[prev_date]['remove'] = True
prev_date = date
continue
for i, backup in enumerate(backups_by_date.get(date)):
if i != 0:
logging.info(f"Deleting {backup.object_name}...")
client.remove_object(bucket, backup.object_name)
continue
# Keep only one backup per week from one week ago to one month ago.
if date >= one_month_ago and prev_date < one_week_ago:
backups_by_date[prev_date]['remove'] = True
prev_date = date
continue
if date.weekday() != 6:
for i, backup in enumerate(backups_by_date.get(date)):
if i != 0:
logging.info(f"Deleting {backup.object_name}...")
client.remove_object(bucket, backup.object_name)
continue
# Keep only one backup per month from one month ago to six months ago.
if date >= six_months_ago and prev_date < one_month_ago:
backups_by_date[prev_date]['remove'] = True
prev_date = date
continue
if date.day != 1:
for i, backup in enumerate(backups_by_date.get(date)):
if i != 0:
logging.info(f"Deleting {backup.object_name}...")
#client.remove_object(bucket, backup.object_name)
continue
# Keep only one backup per two months from six months ago to one year ago.
if date >= one_year_ago and prev_date < six_months_ago:
backups_by_date[prev_date]['remove'] = True
prev_date = date
continue
if date.month % 2 != 0:
for i, backup in enumerate(backups_by_date.get(date)):
if i != 0:
logging.info(f"Deleting {backup.object_name}...")
#client.remove_object(bucket, backup.object_name)
continue
# Keep only one backup per six months from one year ago.
if date < one_year_ago and prev_date < one_year_ago:
# TODO
continue
if date.month != 1:
for i, backup in enumerate(backups_by_date.get(date)):
if i != 0:
logging.info(f"Deleting {backup.object_name}...")
#client.remove_object(bucket, backup.object_name)
continue
prev_date = date
# Delete the backups that don't meet the retention criteria.
for date in backup_dates:
if backups_by_date[date]['remove']:
logging.info(f"Deleting {backups_by_date[date]['data'].object_name}...")
client.remove_object(bucket, backups_by_date[date]['data'].object_name)
logging.info(f"Finished processing bucket {bucket}.")
if __name__ == "__main__":
main()