-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Retention-based Partition Dropping (#44)
* Add command: drop, to calculate partition drops based on retention periods * Deduplicate methods that moved into database_helpers * Add database helper tests * Add dropper tests * More test cleanups * Update to PyLint 2.17.7 to fix Python11 * More tests * pytlint needs pytest * Add an assertion for correct ordering of partitions
- Loading branch information
Showing
11 changed files
with
677 additions
and
58 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
""" | ||
Helper functions for database operations | ||
""" | ||
|
||
from datetime import datetime, timezone | ||
import logging | ||
|
||
import partitionmanager.table_append_partition as pm_tap | ||
import partitionmanager.types | ||
|
||
|
||
def get_position_of_table(database, table, map_data): | ||
"""Returns a Position of the table at the current moment.""" | ||
|
||
pos_list = pm_tap.get_current_positions(database, table, map_data["range_cols"]) | ||
|
||
cur_pos = partitionmanager.types.Position() | ||
cur_pos.set_position([pos_list[col] for col in map_data["range_cols"]]) | ||
|
||
return cur_pos | ||
|
||
|
||
def calculate_exact_timestamp_via_query(database, table, position_partition): | ||
"""Calculates the exact timestamp of a PositionPartition. | ||
raises ValueError if the position is incalculable | ||
""" | ||
|
||
log = logging.getLogger(f"calculate_exact_timestamp_via_query:{table.name}") | ||
|
||
if not table.has_date_query: | ||
raise ValueError("Table has no defined date query") | ||
|
||
if not isinstance(position_partition, partitionmanager.types.PositionPartition): | ||
raise ValueError("Only PositionPartitions are supported") | ||
|
||
if len(position_partition.position) != 1: | ||
raise ValueError( | ||
"This method is only valid for single-column partitions right now" | ||
) | ||
arg = position_partition.position.as_sql_input()[0] | ||
|
||
sql_select_cmd = table.earliest_utc_timestamp_query.get_statement_with_argument(arg) | ||
log.debug( | ||
"Executing %s to derive partition %s at position %s", | ||
sql_select_cmd, | ||
position_partition.name, | ||
position_partition.position, | ||
) | ||
|
||
start = datetime.now() | ||
exact_time_result = database.run(sql_select_cmd) | ||
end = datetime.now() | ||
|
||
if not len(exact_time_result) == 1: | ||
raise partitionmanager.types.NoExactTimeException("No exact timestamp result") | ||
if not len(exact_time_result[0]) == 1: | ||
raise partitionmanager.types.NoExactTimeException( | ||
"Unexpected column count for the timestamp result" | ||
) | ||
for key, value in exact_time_result[0].items(): | ||
exact_time = datetime.fromtimestamp(value, tz=timezone.utc) | ||
break | ||
|
||
log.debug( | ||
"Exact time of %s returned for %s at position %s, query took %s", | ||
exact_time, | ||
position_partition.name, | ||
position_partition.position, | ||
(end - start), | ||
) | ||
return exact_time |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
import unittest | ||
|
||
from .database_helpers import get_position_of_table, calculate_exact_timestamp_via_query | ||
|
||
from .types import ( | ||
DatabaseCommand, | ||
NoExactTimeException, | ||
PositionPartition, | ||
SqlInput, | ||
SqlQuery, | ||
Table, | ||
) | ||
|
||
|
||
class MockDatabase(DatabaseCommand): | ||
def __init__(self): | ||
self._responses = list() | ||
self.num_queries = 0 | ||
|
||
def add_response(self, expected, response): | ||
self._responses.insert(0, {"expected": expected, "response": response}) | ||
|
||
def run(self, cmd): | ||
self.num_queries += 1 | ||
if not self._responses: | ||
raise Exception(f"No mock responses available for cmd [{cmd}]") | ||
|
||
r = self._responses.pop() | ||
if r["expected"] in cmd: | ||
return r["response"] | ||
|
||
raise Exception(f"Received command [{cmd}] and expected [{r['expected']}]") | ||
|
||
def db_name(self): | ||
return SqlInput("the-database") | ||
|
||
|
||
class TestDatabaseHelpers(unittest.TestCase): | ||
def test_position_of_table(self): | ||
db = MockDatabase() | ||
db.add_response("SELECT id FROM `burgers` ORDER BY", [{"id": 90210}]) | ||
|
||
table = Table("burgers") | ||
data = {"range_cols": ["id"]} | ||
|
||
pos = get_position_of_table(db, table, data) | ||
self.assertEqual(pos.as_list(), [90210]) | ||
|
||
def test_exact_timestamp_no_query(self): | ||
db = MockDatabase() | ||
db.add_response("SELECT id FROM `burgers` ORDER BY", [{"id": 42}]) | ||
|
||
table = Table("burgers") | ||
self.assertFalse(table.has_date_query) | ||
|
||
pos = PositionPartition("p_start") | ||
pos.set_position([42]) | ||
|
||
with self.assertRaises(ValueError): | ||
calculate_exact_timestamp_via_query(db, table, pos) | ||
|
||
def test_exact_timestamp(self): | ||
db = MockDatabase() | ||
db.add_response( | ||
"SELECT UNIX_TIMESTAMP(`cooked`)", [{"UNIX_TIMESTAMP": 17541339060}] | ||
) | ||
|
||
table = Table("burgers") | ||
table.set_earliest_utc_timestamp_query( | ||
SqlQuery( | ||
"SELECT UNIX_TIMESTAMP(`cooked`) FROM `orders` " | ||
"WHERE `type` = \"burger\" AND `id` > '?' ORDER BY `id` ASC LIMIT 1;" | ||
) | ||
) | ||
|
||
pos = PositionPartition("p_start") | ||
pos.set_position([150]) | ||
|
||
ts = calculate_exact_timestamp_via_query(db, table, pos) | ||
assert f"{ts}" == "2525-11-11 18:11:00+00:00" | ||
|
||
def test_no_exact_timestamp(self): | ||
db = MockDatabase() | ||
db.add_response( | ||
"SELECT UNIX_TIMESTAMP(`cooked`)", | ||
[{"UNIX_TIMESTAMP": 17541339060}, {"UNIX_TIMESTAMP": 17541339070}], | ||
) | ||
|
||
table = Table("burgers") | ||
table.set_earliest_utc_timestamp_query( | ||
SqlQuery( | ||
"SELECT UNIX_TIMESTAMP(`cooked`) FROM `orders` " | ||
"WHERE `type` = \"burger\" AND `id` > '?' ORDER BY `id` ASC LIMIT 1;" | ||
) | ||
) | ||
|
||
pos = PositionPartition("p_start") | ||
pos.set_position([150]) | ||
|
||
with self.assertRaises(NoExactTimeException): | ||
calculate_exact_timestamp_via_query(db, table, pos) | ||
|
||
db.add_response( | ||
"SELECT UNIX_TIMESTAMP(`cooked`)", | ||
[{"UNIX_TIMESTAMP": 17541339060, "column2": True}], | ||
) | ||
|
||
with self.assertRaises(NoExactTimeException): | ||
calculate_exact_timestamp_via_query(db, table, pos) |
Oops, something went wrong.