From adf2f1fa0cf1b81ac15b8fe0c037f6907df07369 Mon Sep 17 00:00:00 2001 From: Luis Date: Thu, 22 May 2025 10:46:06 -0400 Subject: [PATCH 1/7] fix(scoring): flush empty list of results and breakdown items SQLAlchemy order of execution together with missing configuration was causing duplicates in score_definition_results and score_definition_results_breakdown tables. Apparently, SQLAlchemy needs a little help with the order of DELETE-INSERT statements in the same transaction. --- testgen/commands/run_refresh_score_cards_results.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/testgen/commands/run_refresh_score_cards_results.py b/testgen/commands/run_refresh_score_cards_results.py index cd0fa099..abd63f2c 100644 --- a/testgen/commands/run_refresh_score_cards_results.py +++ b/testgen/commands/run_refresh_score_cards_results.py @@ -2,7 +2,7 @@ import logging import time -from testgen.common.models import with_database_session +from testgen.common.models import get_current_session, with_database_session from testgen.common.models.scores import ( SCORE_CATEGORIES, ScoreCard, @@ -36,6 +36,7 @@ def run_refresh_score_cards_results( LOG.exception("CurrentStep: Stopping scorecards results refresh after unexpected error") return + db_session = get_current_session() for definition in definitions: LOG.info( "CurrentStep: Refreshing results for scorecard %s in project %s", @@ -45,6 +46,10 @@ def run_refresh_score_cards_results( try: fresh_score_card = definition.as_score_card() + definition.results = [] + definition.breakdown = [] + db_session.flush([definition]) + definition.results = _score_card_to_results(fresh_score_card) definition.breakdown = _score_definition_to_results_breakdown(definition) if add_history_entry: From 40b1e6cc417c4e934ff92dcceca3b0b0415c1426 Mon Sep 17 00:00:00 2001 From: Luis Date: Thu, 22 May 2025 10:49:33 -0400 Subject: [PATCH 2/7] fix(scoring): remove deleted entries when refreshing history --- testgen/common/models/scores.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/testgen/common/models/scores.py b/testgen/common/models/scores.py index bf0039bf..e09a29a0 100644 --- a/testgen/common/models/scores.py +++ b/testgen/common/models/scores.py @@ -361,17 +361,22 @@ def recalculate_scores_history(self) -> None: ) overall_scores = get_current_session().execute(query).mappings().all() current_history: dict[tuple[datetime, str, str], ScoreDefinitionResultHistoryEntry] = {} - for entry in self.history: - current_history[(entry.last_run_time, entry.category,)] = entry - renewed_history: dict[tuple[datetime, str, str], float] = {} + for scores in overall_scores: renewed_history[(scores["last_run_time"], "score",)] = scores["score"] renewed_history[(scores["last_run_time"], "cde_score",)] = scores["cde_score"] + for entry in self.history: + entry_key = (entry.last_run_time, entry.category,) + if entry_key in renewed_history: + current_history[entry_key] = entry + for key, entry in current_history.items(): entry.score = renewed_history[key] + self.history = list(current_history.values()) + def _get_raw_query_filters(self, cde_only: bool = False, prefix: str | None = None) -> list[str]: values_by_field = defaultdict(list) for filter_ in self.filters: From 4d4c819ab4625019679ec6fc0d673d54f41cb24b Mon Sep 17 00:00:00 2001 From: Luis Date: Thu, 22 May 2025 10:49:59 -0400 Subject: [PATCH 3/7] fix(scoring): default to `datetime.min` when comparing datetimes use `datetime.min` instead of `0` when comparing run time for the latest profiling and test runs. --- testgen/ui/views/score_explorer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/testgen/ui/views/score_explorer.py b/testgen/ui/views/score_explorer.py index d88f2461..bbc0400b 100644 --- a/testgen/ui/views/score_explorer.py +++ b/testgen/ui/views/score_explorer.py @@ -1,3 +1,4 @@ +from datetime import datetime from io import BytesIO from typing import ClassVar @@ -246,7 +247,7 @@ def save_score_definition(_) -> None: latest_run = max( profiling_queries.get_latest_run_date(project_code), test_run_queries.get_latest_run_date(project_code), - key=lambda run: getattr(run, "run_time", 0), + key=lambda run: getattr(run, "run_time", datetime.min), ) refresh_kwargs = { From 8aa423670990ebdd73104d386c9232cabc3c3d26 Mon Sep 17 00:00:00 2001 From: Luis Date: Thu, 22 May 2025 10:51:55 -0400 Subject: [PATCH 4/7] misc: rotate log files at midnight --- testgen/common/logs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testgen/common/logs.py b/testgen/common/logs.py index 396fd3db..6c42b353 100644 --- a/testgen/common/logs.py +++ b/testgen/common/logs.py @@ -33,7 +33,7 @@ def configure_logging( file_handler = ConcurrentTimedRotatingFileHandler( get_log_full_path(), - when="D", + when="MIDNIGHT", interval=1, backupCount=int(settings.LOG_FILE_MAX_QTY), ) From f33ee4cde78b4d27e25854bb43e6194c0f84e7fe Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Thu, 22 May 2025 13:34:18 -0400 Subject: [PATCH 5/7] fix(test-schedules): incorrect project parameter passed to cli --- testgen/template/dbupgrade/0137_incremental_upgrade.sql | 5 +++++ testgen/ui/views/test_runs.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) create mode 100644 testgen/template/dbupgrade/0137_incremental_upgrade.sql diff --git a/testgen/template/dbupgrade/0137_incremental_upgrade.sql b/testgen/template/dbupgrade/0137_incremental_upgrade.sql new file mode 100644 index 00000000..efa3d78a --- /dev/null +++ b/testgen/template/dbupgrade/0137_incremental_upgrade.sql @@ -0,0 +1,5 @@ +SET SEARCH_PATH TO {SCHEMA_NAME}; + +UPDATE job_schedules + SET kwargs = kwargs - 'project_code' || jsonb_build_object('project_key', kwargs->'project_code') +WHERE key = 'run-tests'; \ No newline at end of file diff --git a/testgen/ui/views/test_runs.py b/testgen/ui/views/test_runs.py index ec6c96cc..6806fe37 100644 --- a/testgen/ui/views/test_runs.py +++ b/testgen/ui/views/test_runs.py @@ -133,7 +133,7 @@ def arg_value_input(self) -> tuple[bool, list[typing.Any], dict[str, typing.Any] display_column="test_suite", required=True, ) - return bool(ts_name), [], {"project_code": self.project_code, "test_suite_key": ts_name} + return bool(ts_name), [], {"project_key": self.project_code, "test_suite_key": ts_name} def render_empty_state(project_code: str, user_can_run: bool) -> bool: From 0d1ec36022a1f86d1828976a380924d2435e228d Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Thu, 22 May 2025 15:28:38 -0400 Subject: [PATCH 6/7] fix(scheduler): cannot cancel scheduled runs --- testgen/common/process_service.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/testgen/common/process_service.py b/testgen/common/process_service.py index 3f720b34..b37460ee 100644 --- a/testgen/common/process_service.py +++ b/testgen/common/process_service.py @@ -13,13 +13,13 @@ def get_current_process_id(): def kill_profile_run(process_id): - keywords = ["run-profile"] + keywords = ["/dk/bin/testgen", "run-profile"] status, message = kill_process(process_id, keywords) return status, message def kill_test_run(process_id): - keywords = ["run-tests"] + keywords = ["/dk/bin/testgen", "run-tests"] status, message = kill_process(process_id, keywords) return status, message @@ -31,7 +31,7 @@ def kill_process(process_id, keywords=None): return False, msg try: process = psutil.Process(process_id) - if process.name().lower() != "testgen": + if process.name().lower() not in ["testgen", "python3"]: message = f"The process was not killed because the process_id {process_id} is not a testgen process. Details: {process.name()}" LOG.error(f"kill_process: {message}") return False, message From 090459e7a6ad8fb4985923dd83be14748ee88564 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Thu, 22 May 2025 20:56:12 -0400 Subject: [PATCH 7/7] release: 4.0.9 -> 4.0.12 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 066d31db..dded5064 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ build-backend = "setuptools.build_meta" [project] name = "dataops-testgen" -version = "4.0.9" +version = "4.0.12" description = "DataKitchen's Data Quality DataOps TestGen" authors = [ { "name" = "DataKitchen, Inc.", "email" = "info@datakitchen.io" },