Skip to content

Commit

Permalink
gh-115809: Improve TimedRotatingFileHandler.getFilesToDelete() (GH-11…
Browse files Browse the repository at this point in the history
…5812)

Improve algorithm for computing which rolled-over log files to delete
in logging.TimedRotatingFileHandler. It is now reliable for handlers
without namer and with arbitrary deterministic namer that leaves
the datetime part in the file name unmodified.
  • Loading branch information
serhiy-storchaka authored Mar 3, 2024
1 parent 002a594 commit 87faec2
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 36 deletions.
51 changes: 24 additions & 27 deletions Lib/logging/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,19 +232,19 @@ def __init__(self, filename, when='h', interval=1, backupCount=0,
if self.when == 'S':
self.interval = 1 # one second
self.suffix = "%Y-%m-%d_%H-%M-%S"
self.extMatch = r"^\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2}(\.\w+)?$"
extMatch = r"(?<!\d)\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2}(?!\d)"
elif self.when == 'M':
self.interval = 60 # one minute
self.suffix = "%Y-%m-%d_%H-%M"
self.extMatch = r"^\d{4}-\d{2}-\d{2}_\d{2}-\d{2}(\.\w+)?$"
extMatch = r"(?<!\d)\d{4}-\d{2}-\d{2}_\d{2}-\d{2}(?!\d)"
elif self.when == 'H':
self.interval = 60 * 60 # one hour
self.suffix = "%Y-%m-%d_%H"
self.extMatch = r"^\d{4}-\d{2}-\d{2}_\d{2}(\.\w+)?$"
extMatch = r"(?<!\d)\d{4}-\d{2}-\d{2}_\d{2}(?!\d)"
elif self.when == 'D' or self.when == 'MIDNIGHT':
self.interval = 60 * 60 * 24 # one day
self.suffix = "%Y-%m-%d"
self.extMatch = r"^\d{4}-\d{2}-\d{2}(\.\w+)?$"
extMatch = r"(?<!\d)\d{4}-\d{2}-\d{2}(?!\d)"
elif self.when.startswith('W'):
self.interval = 60 * 60 * 24 * 7 # one week
if len(self.when) != 2:
Expand All @@ -253,11 +253,17 @@ def __init__(self, filename, when='h', interval=1, backupCount=0,
raise ValueError("Invalid day specified for weekly rollover: %s" % self.when)
self.dayOfWeek = int(self.when[1])
self.suffix = "%Y-%m-%d"
self.extMatch = r"^\d{4}-\d{2}-\d{2}(\.\w+)?$"
extMatch = r"(?<!\d)\d{4}-\d{2}-\d{2}(?!\d)"
else:
raise ValueError("Invalid rollover interval specified: %s" % self.when)

self.extMatch = re.compile(self.extMatch, re.ASCII)
# extMatch is a pattern for matching a datetime suffix in a file name.
# After custom naming, it is no longer guaranteed to be separated by
# periods from other parts of the filename. The lookup statements
# (?<!\d) and (?!\d) ensure that the datetime suffix (which itself
# starts and ends with digits) is not preceded or followed by digits.
# This reduces the number of false matches and improves performance.
self.extMatch = re.compile(extMatch, re.ASCII)
self.interval = self.interval * interval # multiply by units requested
# The following line added because the filename passed in could be a
# path object (see Issue #27493), but self.baseFilename will be a string
Expand Down Expand Up @@ -376,31 +382,22 @@ def getFilesToDelete(self):
for fileName in fileNames:
if fileName[:plen] == prefix:
suffix = fileName[plen:]
if self.extMatch.match(suffix):
if self.extMatch.fullmatch(suffix):
result.append(os.path.join(dirName, fileName))
else:
# See bpo-44753: Don't use the extension when computing the prefix.
n, e = os.path.splitext(baseName)
prefix = n + '.'
plen = len(prefix)
for fileName in fileNames:
# Our files could be just about anything after custom naming, but
# likely candidates are of the form
# foo.log.DATETIME_SUFFIX or foo.DATETIME_SUFFIX.log
if (not fileName.startswith(baseName) and fileName.endswith(e) and
len(fileName) > (plen + 1) and not fileName[plen+1].isdigit()):
continue
# Our files could be just about anything after custom naming,
# but they should contain the datetime suffix.
# Try to find the datetime suffix in the file name and verify
# that the file name can be generated by this handler.
m = self.extMatch.search(fileName)
while m:
dfn = self.namer(self.baseFilename + "." + m[0])
if os.path.basename(dfn) == fileName:
result.append(os.path.join(dirName, fileName))
break
m = self.extMatch.search(fileName, m.start() + 1)

if fileName[:plen] == prefix:
suffix = fileName[plen:]
# See bpo-45628: The date/time suffix could be anywhere in the
# filename

parts = suffix.split('.')
for part in parts:
if self.extMatch.match(part):
result.append(os.path.join(dirName, fileName))
break
if len(result) < self.backupCount:
result = []
else:
Expand Down
37 changes: 28 additions & 9 deletions Lib/test/test_logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -6260,7 +6260,7 @@ def test_compute_files_to_delete(self):
for i in range(10):
times.append(dt.strftime('%Y-%m-%d_%H-%M-%S'))
dt += datetime.timedelta(seconds=5)
prefixes = ('a.b', 'a.b.c', 'd.e', 'd.e.f')
prefixes = ('a.b', 'a.b.c', 'd.e', 'd.e.f', 'g')
files = []
rotators = []
for prefix in prefixes:
Expand All @@ -6273,10 +6273,22 @@ def test_compute_files_to_delete(self):
if prefix.startswith('a.b'):
for t in times:
files.append('%s.log.%s' % (prefix, t))
else:
rotator.namer = lambda name: name.replace('.log', '') + '.log'
elif prefix.startswith('d.e'):
def namer(filename):
dirname, basename = os.path.split(filename)
basename = basename.replace('.log', '') + '.log'
return os.path.join(dirname, basename)
rotator.namer = namer
for t in times:
files.append('%s.%s.log' % (prefix, t))
elif prefix == 'g':
def namer(filename):
dirname, basename = os.path.split(filename)
basename = 'g' + basename[6:] + '.oldlog'
return os.path.join(dirname, basename)
rotator.namer = namer
for t in times:
files.append('g%s.oldlog' % t)
# Create empty files
for fn in files:
p = os.path.join(wd, fn)
Expand All @@ -6286,18 +6298,23 @@ def test_compute_files_to_delete(self):
for i, prefix in enumerate(prefixes):
rotator = rotators[i]
candidates = rotator.getFilesToDelete()
self.assertEqual(len(candidates), 3)
self.assertEqual(len(candidates), 3, candidates)
if prefix.startswith('a.b'):
p = '%s.log.' % prefix
for c in candidates:
d, fn = os.path.split(c)
self.assertTrue(fn.startswith(p))
else:
elif prefix.startswith('d.e'):
for c in candidates:
d, fn = os.path.split(c)
self.assertTrue(fn.endswith('.log'))
self.assertTrue(fn.endswith('.log'), fn)
self.assertTrue(fn.startswith(prefix + '.') and
fn[len(prefix) + 2].isdigit())
elif prefix == 'g':
for c in candidates:
d, fn = os.path.split(c)
self.assertTrue(fn.endswith('.oldlog'))
self.assertTrue(fn.startswith('g') and fn[1].isdigit())

def test_compute_files_to_delete_same_filename_different_extensions(self):
# See GH-93205 for background
Expand All @@ -6321,6 +6338,8 @@ def test_compute_files_to_delete_same_filename_different_extensions(self):
rotators.append(rotator)
for t in times:
files.append('%s.%s' % (prefix, t))
for t in times:
files.append('a.log.%s.c' % t)
# Create empty files
for f in files:
(wd / f).touch()
Expand All @@ -6329,11 +6348,11 @@ def test_compute_files_to_delete_same_filename_different_extensions(self):
backupCount = i+1
rotator = rotators[i]
candidates = rotator.getFilesToDelete()
self.assertEqual(len(candidates), n_files - backupCount)
matcher = re.compile(r"^\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2}(\.\w+)?$")
self.assertEqual(len(candidates), n_files - backupCount, candidates)
matcher = re.compile(r"^\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2}\Z")
for c in candidates:
d, fn = os.path.split(c)
self.assertTrue(fn.startswith(prefix))
self.assertTrue(fn.startswith(prefix+'.'))
suffix = fn[(len(prefix)+1):]
self.assertRegex(suffix, matcher)

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Improve algorithm for computing which rolled-over log files to delete in
:class:`logging.TimedRotatingFileHandler`. It is now reliable for handlers
without ``namer`` and with arbitrary deterministic ``namer`` that leaves the
datetime part in the file name unmodified.

0 comments on commit 87faec2

Please sign in to comment.