From a9d8ac2077beab9b7d713e2bde29cf156fcb9128 Mon Sep 17 00:00:00 2001 From: Ricardo Boni Date: Mon, 24 Mar 2025 23:33:26 -0400 Subject: [PATCH 01/34] feat(scheduler): Adding scheduling capabilities Allowing the users to schedule when to run profiling, re-generate tests and run tests --- deploy/testgen.dockerfile | 2 +- pyproject.toml | 3 +- testgen/__main__.py | 83 +++++--- testgen/common/logs.py | 49 +---- testgen/common/models/__init__.py | 1 + testgen/common/models/scheduler.py | 37 ++++ testgen/scheduler/__init__.py | 4 + testgen/scheduler/base.py | 127 ++++++++++++ testgen/scheduler/cli_scheduler.py | 140 +++++++++++++ testgen/settings.py | 4 + .../030_initialize_new_schema_structure.sql | 10 + .../dbupgrade/0135_incremental_upgrade.sql | 11 + testgen/ui/app.py | 6 +- testgen/ui/components/frontend/js/main.js | 2 + .../frontend/js/pages/schedule_list.js | 110 ++++++++++ .../frontend/js/pages/test_suites.js | 12 +- testgen/ui/components/widgets/__init__.py | 1 + .../components/widgets/testgen_component.py | 3 +- testgen/ui/components/widgets/tz_select.py | 9 + testgen/ui/views/dialogs/manage_schedules.py | 126 ++++++++++++ testgen/ui/views/profiling_runs.py | 33 +++ testgen/ui/views/test_runs.py | 33 +++ testgen/ui/views/test_suites.py | 52 +++++ tests/unit/test_scheduler_base.py | 134 ++++++++++++ tests/unit/test_scheduler_cli.py | 191 ++++++++++++++++++ 25 files changed, 1105 insertions(+), 78 deletions(-) create mode 100644 testgen/common/models/scheduler.py create mode 100644 testgen/scheduler/__init__.py create mode 100644 testgen/scheduler/base.py create mode 100644 testgen/scheduler/cli_scheduler.py create mode 100644 testgen/template/dbupgrade/0135_incremental_upgrade.sql create mode 100644 testgen/ui/components/frontend/js/pages/schedule_list.js create mode 100644 testgen/ui/components/widgets/tz_select.py create mode 100644 testgen/ui/views/dialogs/manage_schedules.py create mode 100644 tests/unit/test_scheduler_base.py create mode 100644 tests/unit/test_scheduler_cli.py diff --git a/deploy/testgen.dockerfile b/deploy/testgen.dockerfile index a3f1039b..5b72978c 100644 --- a/deploy/testgen.dockerfile +++ b/deploy/testgen.dockerfile @@ -32,4 +32,4 @@ USER testgen WORKDIR /dk ENTRYPOINT ["testgen"] -CMD [ "ui", "run" ] +CMD [ "run-app" ] diff --git a/pyproject.toml b/pyproject.toml index 43c18c60..8222841d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -62,6 +62,7 @@ dependencies = [ "reportlab==4.2.2", "pydantic==1.10.13", "streamlit-pydantic==0.6.0", + "cron-converter==1.2.1", # Pinned to match the manually compiled libs or for security "pyarrow==18.1.0", @@ -245,7 +246,7 @@ ignore = ["TRY003", "S608", "S404", "F841", "B023"] "__init__.py" = ["F403"] "testgen/__main__.py" = ["ARG001", "S603"] "tasks.py" = ["F403"] -"tests*" = ["S101", "T201"] +"tests*" = ["S101", "T201", "ARG001"] "invocations/**" = ["ARG001", "T201"] "testgen/common/encrypt.py" = ["S413"] "testgen/ui/pdf/dk_logo.py" = ["T201"] diff --git a/testgen/__main__.py b/testgen/__main__.py index 2fbbe845..b9815f9c 100644 --- a/testgen/__main__.py +++ b/testgen/__main__.py @@ -1,8 +1,8 @@ import logging import os +import signal import subprocess import sys -import typing from dataclasses import dataclass, field import click @@ -40,14 +40,16 @@ get_tg_db, get_tg_host, get_tg_schema, - logs, version_service, ) +from testgen.scheduler import register_scheduler_job, run_scheduler from testgen.ui.queries import profiling_run_queries, test_run_queries from testgen.utils import plugins LOG = logging.getLogger("testgen") +APP_MODULES = ["ui", "scheduler"] + @dataclass class Configuration: @@ -58,8 +60,17 @@ class Configuration: pass_configuration = click.make_pass_decorator(Configuration) +class CliGroup(click.Group): + def invoke(self, ctx: Context): + try: + super().invoke(ctx) + except Exception: + LOG.exception("There was an unexpected error") + + @tui() @click.group( + cls=CliGroup, help=f"This version: {settings.VERSION} \n\nLatest version: {version_service.get_latest_version()} \n\nSchema revision: {get_schema_revision()}" ) @click.option( @@ -93,6 +104,7 @@ def cli(ctx: Context, verbose: bool): LOG.debug("Current Step: Main Program") +@register_scheduler_job @cli.command("run-profile", help="Generates a new profile of the table group.") @pass_configuration @click.option( @@ -112,6 +124,7 @@ def run_profile(configuration: Configuration, table_group_id: str): click.echo("\n" + message) +@register_scheduler_job @cli.command("run-test-generation", help="Generates or refreshes the tests for a table group.") @click.option( "-tg", @@ -143,6 +156,7 @@ def run_test_generation(configuration: Configuration, table_group_id: str, test_ click.echo("\n" + message) +@register_scheduler_job @cli.command("run-tests", help="Performs tests defined for a test suite.") @click.option( "-pk", @@ -590,19 +604,19 @@ def list_table_groups(configuration: Configuration, project_key: str, display: b def ui(): ... -@ui.command("run", help="Run the browser application with default settings") -@click.option("-d", "--debug", is_flag=True, default=False) -def run(debug: bool): +@ui.command("plugins", help="List installed application plugins") +def list_ui_plugins(): + installed_plugins = list(plugins.discover()) + + click.echo(click.style(len(installed_plugins), fg="bright_magenta") + click.style(" plugins installed", bold=True)) + for plugin in installed_plugins: + click.echo(click.style(" + ", fg="bright_green") + f"{plugin.package: <30}" + f"\tversion: {plugin.version}") + + +def run_ui(): from testgen.ui.scripts import patch_streamlit - configure_logging( - level=logging.INFO, - log_format="%(message)s", - ) status_code: int = -1 - logger = logging.getLogger("testgen") - stderr: typing.TextIO = typing.cast(typing.TextIO, logs.LogPipe(logger, logging.INFO)) - stdout: typing.TextIO = typing.cast(typing.TextIO, logs.LogPipe(logger, logging.INFO)) use_ssl = os.path.isfile(settings.SSL_CERT_FILE) and os.path.isfile(settings.SSL_KEY_FILE) @@ -624,28 +638,43 @@ def run(debug: bool): f"--server.sslCertFile={settings.SSL_CERT_FILE}" if use_ssl else "", f"--server.sslKeyFile={settings.SSL_KEY_FILE}" if use_ssl else "", "--", - f"{'--debug' if debug else ''}", + f"{'--debug' if settings.IS_DEBUG else ''}", ], - stdout=stdout, - stderr=stderr, ) except Exception: LOG.exception(f"Testgen UI exited with status code {status_code}") - raise - finally: - if stderr: - stderr.close() - if stdout: - stdout.close() -@ui.command("plugins", help="List installed application plugins") -def list_ui_plugins(): - installed_plugins = list(plugins.discover()) +@cli.command("run-app", help="Runs TestGen's application modules") +@click.argument( + "module", + type=click.Choice(["all", *APP_MODULES]), + default="all", +) +def run_app(module): - click.echo(click.style(len(installed_plugins), fg="bright_magenta") + click.style(" plugins installed", bold=True)) - for plugin in installed_plugins: - click.echo(click.style(" + ", fg="bright_green") + f"{plugin.package: <30}" + f"\tversion: {plugin.version}") + match module: + case "ui": + run_ui() + + case "scheduler": + run_scheduler() + + case "all": + children = [ + subprocess.Popen([sys.executable, sys.argv[0], "run-app", m], start_new_session=True) + for m in APP_MODULES + ] + + def term_children(signum, _): + for child in children: + child.send_signal(signum) + + signal.signal(signal.SIGINT, term_children) + signal.signal(signal.SIGTERM, term_children) + + for child in children: + child.wait() if __name__ == "__main__": diff --git a/testgen/common/logs.py b/testgen/common/logs.py index ff4511be..396fd3db 100644 --- a/testgen/common/logs.py +++ b/testgen/common/logs.py @@ -1,11 +1,9 @@ __all__ = ["configure_logging"] -import io import logging import logging.handlers import os import sys -import threading from concurrent_log_handler import ConcurrentTimedRotatingFileHandler @@ -13,8 +11,8 @@ def configure_logging( - level: int = logging.DEBUG, - log_format: str = "[PID: %(process)s] %(asctime)s - %(levelname)s - %(message)s", + level: int = logging.INFO, + log_format: str = "[PID: %(process)s] %(asctime)s %(levelname)+7s %(message)s", ) -> None: """ Configures the testgen logger. @@ -22,22 +20,13 @@ def configure_logging( logger = logging.getLogger("testgen") logger.setLevel(level) - if not any(isinstance(handler, logging.StreamHandler) for handler in logger.handlers): - formatter = logging.Formatter(log_format) - - console_out_handler = logging.StreamHandler(stream=sys.stdout) - if settings.IS_DEBUG: - console_out_handler.setLevel(level) - else: - console_out_handler.setLevel(logging.WARNING) - console_out_handler.setFormatter(formatter) + if not logger.hasHandlers(): - console_err_handler = logging.StreamHandler(stream=sys.stderr) - console_err_handler.setLevel(logging.WARNING) - console_err_handler.setFormatter(formatter) + formatter = logging.Formatter(log_format) - logger.addHandler(console_out_handler) - logger.addHandler(console_err_handler) + console_handler = logging.StreamHandler(stream=sys.stdout) + console_handler.setFormatter(formatter) + logger.addHandler(console_handler) if settings.LOG_TO_FILE: os.makedirs(settings.LOG_FILE_PATH, exist_ok=True) @@ -48,33 +37,9 @@ def configure_logging( interval=1, backupCount=int(settings.LOG_FILE_MAX_QTY), ) - file_handler.setLevel(level) file_handler.setFormatter(formatter) - logger.addHandler(file_handler) def get_log_full_path() -> str: return os.path.join(settings.LOG_FILE_PATH, "app.log") - - -class LogPipe(threading.Thread, io.TextIOBase): - def __init__(self, logger: logging.Logger, log_level: int) -> None: - threading.Thread.__init__(self) - - self.daemon = False - self.logger = logger - self.level = log_level - self.readDescriptor, self.writeDescriptor = os.pipe() - self.start() - - def run(self) -> None: - with os.fdopen(self.readDescriptor) as reader: - for line in iter(reader.readline, ""): - self.logger.log(self.level, line.strip("\n")) - - def fileno(self) -> int: - return self.writeDescriptor - - def close(self) -> None: - os.close(self.writeDescriptor) diff --git a/testgen/common/models/__init__.py b/testgen/common/models/__init__.py index 26b9b505..3734b11b 100644 --- a/testgen/common/models/__init__.py +++ b/testgen/common/models/__init__.py @@ -23,6 +23,7 @@ ) Base = declarative_base() + Session = sessionmaker( engine, expire_on_commit=False, diff --git a/testgen/common/models/scheduler.py b/testgen/common/models/scheduler.py new file mode 100644 index 00000000..b2cdf61e --- /dev/null +++ b/testgen/common/models/scheduler.py @@ -0,0 +1,37 @@ +import uuid +from collections.abc import Iterable +from datetime import datetime +from typing import Any, Self + +from cron_converter import Cron +from sqlalchemy import JSON, Column, String, select +from sqlalchemy.dialects.postgresql import UUID +from sqlalchemy.orm import InstrumentedAttribute + +from testgen.common.models import Base, get_current_session + + +class JobSchedule(Base): + __tablename__ = "job_schedules" + + id: UUID = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) + project_code: str = Column(String) + + key: str = Column(String, nullable=False) + args: list[Any] = Column(JSON, nullable=False, default=[]) + kwargs: dict[str, Any] = Column(JSON, nullable=False, default={}) + cron_expr: str = Column(String, nullable=False) + cron_tz: str = Column(String, nullable=False) + + @classmethod + def select_where(cls, *clauses, order_by: str | InstrumentedAttribute | None = None) -> Iterable[Self]: + query = select(cls).where(*clauses).order_by(order_by) + return get_current_session().execute(query) + + def get_sample_triggering_timestamps(self, n=3) -> list[datetime]: + schedule = Cron(cron_string=self.cron_expr).schedule(timezone_str=self.cron_tz) + return [schedule.next() for _ in range(n)] + + @property + def cron_tz_str(self) -> str: + return self.cron_tz.replace("_", " ") diff --git a/testgen/scheduler/__init__.py b/testgen/scheduler/__init__.py new file mode 100644 index 00000000..ef9c0d46 --- /dev/null +++ b/testgen/scheduler/__init__.py @@ -0,0 +1,4 @@ +__all__ = ["register_scheduler_job", "run_scheduler"] + + +from .cli_scheduler import register_scheduler_job, run_scheduler diff --git a/testgen/scheduler/base.py b/testgen/scheduler/base.py new file mode 100644 index 00000000..74ac4f76 --- /dev/null +++ b/testgen/scheduler/base.py @@ -0,0 +1,127 @@ +import logging +import threading +from collections.abc import Iterable +from dataclasses import dataclass +from datetime import UTC, datetime, timedelta +from enum import Enum, auto +from zoneinfo import ZoneInfo + +from cron_converter import Cron + +MAX_WORKERS = 3 + +LOG = logging.getLogger("testgen") + +class DelayedPolicy(Enum): + SKIP = auto() + ONCE = auto() + ALL = auto() + + +@dataclass +class Job: + cron_expr: str + cron_tz: str + delayed_policy: DelayedPolicy + + def get_triggering_times(self, base_time: datetime): + cron = Cron(self.cron_expr) + scheduler = cron.schedule(base_time.astimezone(ZoneInfo(self.cron_tz))) + while True: + yield scheduler.next() + + +class Scheduler: + + def __init__(self): + self.base_time = None + self._stopping = threading.Event() + self._reload_event = threading.Event() + self.thread: threading.Thread | None = None + + def get_jobs(self) -> Iterable[Job]: + raise NotImplementedError + + def start_job(self, job: Job, triggering_time: datetime) -> None: + raise NotImplementedError + + def reload_jobs(self): + self._reload_event.set() + + def start(self, base_time: datetime): + self.base_time = base_time + + if self.thread: + raise RuntimeError("The scheduler can be started only once") + self.thread = threading.Thread(target=self._run) + self.thread.start() + + def shutdown(self): + self._stopping.set() + self._reload_event.set() + + def wait(self, timeout: float | None = None): + self.thread.join(timeout=timeout) + + def _get_now(self): + return datetime.now(UTC) + + def _get_next_jobs(self): + + job_list_head = [] + for job in self.get_jobs(): + gen = job.get_triggering_times(self.base_time) + job_list_head.append((next(gen), gen, job)) + self._reload_event.clear() + + while job_list_head and not self._stopping.is_set(): + + job_list_head.sort(key=lambda t: t[0]) + jobs = [] + now = self._get_now() + + while True: + triggering_time, gen, job = job_list_head.pop(0) + + next_trigger_time = next(gen) + while job.delayed_policy in (DelayedPolicy.SKIP, DelayedPolicy.ONCE) and next_trigger_time < now: + next_trigger_time = next(gen) + job_list_head.append((next_trigger_time, gen, job)) + + if triggering_time >= now or job.delayed_policy in (DelayedPolicy.ALL, DelayedPolicy.ONCE): + jobs.append(job) + + if triggering_time < job_list_head[0][0]: + break + + if jobs: + yield triggering_time, jobs + + def _wait_until(self, triggering_time: datetime): + timeout = (triggering_time - datetime.now(UTC)).total_seconds() + if timeout > 0: + if self._reload_event.wait(timeout): + return False + else: + return True + else: + return True + + def _run(self): + while not self._stopping.is_set(): + next_jobs = self._get_next_jobs() + + while True: + try: + triggering_time, jobs = next(next_jobs) + except StopIteration: + self._reload_event.wait() + break + + if self._wait_until(triggering_time): + LOG.info("%d jobs to start at %s", len(jobs), triggering_time) + for job in jobs: + self.start_job(job, triggering_time) + self.base_time = triggering_time + timedelta(seconds=1) + else: + break diff --git a/testgen/scheduler/cli_scheduler.py b/testgen/scheduler/cli_scheduler.py new file mode 100644 index 00000000..9ebc7e78 --- /dev/null +++ b/testgen/scheduler/cli_scheduler.py @@ -0,0 +1,140 @@ +import logging +import signal +import subprocess +import sys +import threading +from collections.abc import Iterable +from dataclasses import dataclass +from datetime import UTC, datetime +from itertools import chain +from typing import Any + +from click import Command + +from testgen.common.models import with_database_session +from testgen.common.models.scheduler import JobSchedule +from testgen.scheduler.base import DelayedPolicy, Job, Scheduler + +LOG = logging.getLogger("testgen") + +JOB_REGISTRY: dict[str, Command] = {} + +@dataclass +class CliJob(Job): + key: str + args: Iterable[Any] + kwargs: dict[str, Any] + + +class CliScheduler(Scheduler): + + def __init__(self): + self._running_jobs: set[subprocess.Popen] = set() + self._running_jobs_cond = threading.Condition() + self.reload_timer = None + LOG.info("Starting CLI Scheduler with registered jobs: %s", ", ".join(JOB_REGISTRY.keys())) + super().__init__() + + @with_database_session + def get_jobs(self) -> Iterable[CliJob]: + + # Scheduling the next reload to the next 50th second of a minute + self.reload_timer = threading.Timer((110 - datetime.now().second) % 60 or 60, self.reload_jobs) + self.reload_timer.start() + + job_list = [] + for (job_model,) in JobSchedule.select_where(): + if job_model.key not in JOB_REGISTRY: + LOG.error("Job '%s' scheduled but not registered", job_model.key) + continue + + job_list.append( + CliJob( + cron_expr=job_model.cron_expr, + cron_tz=job_model.cron_tz, + delayed_policy=DelayedPolicy.SKIP, + key=job_model.key, + args=job_model.args, + kwargs=job_model.kwargs + ) + ) + + LOG.info("Loaded %s schedules", len(job_list)) + + return job_list + + def start_job(self, job: CliJob, triggering_time: datetime) -> None: + cmd = JOB_REGISTRY[job.key] + + LOG.info("Starting job '%s' due '%s'", job.key, triggering_time) + + exec_cmd = [ + sys.executable, + sys.argv[0], + cmd.name, + *map(str, job.args), + *chain(*chain((opt.opts[0], str(job.kwargs[opt.name])) for opt in cmd.params if opt.name in job.kwargs)), + ] + + LOG.info("Executing %s"," ".join(exec_cmd)) + + proc = subprocess.Popen(exec_cmd, start_new_session=True) # noqa: S603 + threading.Thread(target=self._proc_wrapper, args=(proc,)).start() + + def _proc_wrapper(self, proc: subprocess.Popen): + with self._running_jobs_cond: + self._running_jobs.add(proc) + try: + ret_code = proc.wait() + LOG.info("Job PID %d ended with code %d", proc.pid, ret_code) + except Exception: + LOG.exception("Error running job PID %d", proc.pid) + finally: + with self._running_jobs_cond: + self._running_jobs.remove(proc) + self._running_jobs_cond.notify() + + def run(self): + interrupted = threading.Event() + + def sig_handler(signum, _): + sig = signal.Signals(signum) + if interrupted.is_set(): + LOG.info("Received signal %s, propagating to %d running jobs", sig.name, len(self._running_jobs)) + for job in self._running_jobs: + job.send_signal(signum) + else: + LOG.info("Received signal %s for the fist time, starting the shutdown process.", sig.name) + interrupted.set() + + signal.signal(signal.SIGINT, sig_handler) + signal.signal(signal.SIGTERM, sig_handler) + + try: + self.start(datetime.now(UTC)) + interrupted.wait() + if self.reload_timer: + self.reload_timer.cancel() + self.shutdown() + self.wait() + finally: + LOG.info("The scheduler has been shut down. No new jobs will be started.") + + with self._running_jobs_cond: + if self._running_jobs: + LOG.info("Waiting %d running jobs to complete", len(self._running_jobs)) + self._running_jobs_cond.wait_for(lambda: len(self._running_jobs) == 0) + + LOG.info("All jobs terminated") + + +def run_scheduler(): + scheduler = CliScheduler() + scheduler.run() + +def register_scheduler_job(cmd: Command): + if cmd.name in JOB_REGISTRY: + raise ValueError(f"A job with the '{cmd.name}' key is already registered.") + + JOB_REGISTRY[cmd.name] = cmd + return cmd diff --git a/testgen/settings.py b/testgen/settings.py index 5b79d9e4..4ccd8be2 100644 --- a/testgen/settings.py +++ b/testgen/settings.py @@ -484,3 +484,7 @@ """ Disables sending usage data when set to any value except "true" and "yes". Defaults to "yes" """ + +SCHEDULER_ADDRESS: str = os.getenv("TG_SCHEDULER_ADDRESS", "localhost:8510") + +SCHEDULER_AUTH_KEY: str = os.getenv("TG_SCHEDULER_AUTH_KEY", "scheduler_key") diff --git a/testgen/template/dbsetup/030_initialize_new_schema_structure.sql b/testgen/template/dbsetup/030_initialize_new_schema_structure.sql index f44b4276..84982a89 100644 --- a/testgen/template/dbsetup/030_initialize_new_schema_structure.sql +++ b/testgen/template/dbsetup/030_initialize_new_schema_structure.sql @@ -862,5 +862,15 @@ CREATE INDEX shlast_runs_pro_run CREATE INDEX shlast_runs_tst_run ON score_history_latest_runs(last_test_run_id); +CREATE TABLE job_schedules ( + id UUID NOT NULL PRIMARY KEY, + project_code VARCHAR(30) NOT NULL, + key VARCHAR(100) NOT NULL, + args JSON NOT NULL, + kwargs JSON NOT NULL, + cron_expr VARCHAR(50) NOT NULL, + cron_tz VARCHAR(30) NOT NULL +); + INSERT INTO tg_revision (component, revision) VALUES ('metadata_db', 0); diff --git a/testgen/template/dbupgrade/0135_incremental_upgrade.sql b/testgen/template/dbupgrade/0135_incremental_upgrade.sql new file mode 100644 index 00000000..71f8ddc3 --- /dev/null +++ b/testgen/template/dbupgrade/0135_incremental_upgrade.sql @@ -0,0 +1,11 @@ +SET SEARCH_PATH TO {SCHEMA_NAME}; + +CREATE TABLE job_schedules ( + id UUID NOT NULL PRIMARY KEY, + project_code VARCHAR(30) NOT NULL, + key VARCHAR(100) NOT NULL, + args JSON NOT NULL, + kwargs JSON NOT NULL, + cron_expr VARCHAR(50) NOT NULL, + cron_tz VARCHAR(30) NOT NULL +) diff --git a/testgen/ui/app.py b/testgen/ui/app.py index 8dd99e55..6cc88b39 100644 --- a/testgen/ui/app.py +++ b/testgen/ui/app.py @@ -1,5 +1,4 @@ import logging -import sys import streamlit as st @@ -72,7 +71,4 @@ def set_locale(): if __name__ == "__main__": - log_level = logging.INFO - if settings.IS_DEBUG_LOG_LEVEL or "--debug" in sys.argv: - log_level = logging.DEBUG - render(log_level=log_level) + render(log_level=logging.DEBUG if settings.IS_DEBUG_LOG_LEVEL else logging.INFO) diff --git a/testgen/ui/components/frontend/js/main.js b/testgen/ui/components/frontend/js/main.js index 61c1d3a3..e01a2cda 100644 --- a/testgen/ui/components/frontend/js/main.js +++ b/testgen/ui/components/frontend/js/main.js @@ -24,6 +24,7 @@ import { QualityDashboard } from './pages/quality_dashboard.js'; import { ScoreDetails } from './pages/score_details.js'; import { ScoreExplorer } from './pages/score_explorer.js'; import { ColumnProfilingResults } from './data_profiling/column_profiling_results.js'; +import { ScheduleList } from './pages/schedule_list.js'; let currentWindowVan = van; let topWindowVan = window.top.van; @@ -47,6 +48,7 @@ const TestGenComponent = (/** @type {string} */ id, /** @type {object} */ props) quality_dashboard: QualityDashboard, score_details: ScoreDetails, score_explorer: ScoreExplorer, + schedule_list: ScheduleList, }; if (Object.keys(componentById).includes(id)) { diff --git a/testgen/ui/components/frontend/js/pages/schedule_list.js b/testgen/ui/components/frontend/js/pages/schedule_list.js new file mode 100644 index 00000000..bfdab632 --- /dev/null +++ b/testgen/ui/components/frontend/js/pages/schedule_list.js @@ -0,0 +1,110 @@ +/** + * @typedef Schedule + * @type {object} + * @property {string} argValue + * @property {string} cronExpr + * @property {string} cronTz + * @property {string[]} sample + * + * @typedef Properties + * @type {object} + * @property {Schedule[]} items + * @property {Permissions} permissions + * @property {string} argLabel + */ +import van from '../van.min.js'; +import { Button } from '../components/button.js'; +import { Streamlit } from '../streamlit.js'; +import { emitEvent, getValue, resizeFrameHeightToElement } from '../utils.js'; +import { withTooltip } from '../components/tooltip.js'; + + +const { div, span, i, rawHTML } = van.tags; + +const ScheduleList = (/** @type Properties */ props) => { + window.testgen.isPage = false; + + const scheduleItems = van.derive(() => { + let items = []; + try { + items = JSON.parse(getValue(props.items)); + } catch (e) { + console.log(e) + } + Streamlit.setFrameHeight(100 * items.length); + return items; + }); + const columns = ['40%', '50%', '10%']; + + const tableId = 'profiling-schedules-table'; + resizeFrameHeightToElement(tableId); + + return div( + { class: 'table', id: tableId }, + div( + { class: 'table-header flex-row' }, + span( + { style: `flex: ${columns[0]}` }, + getValue(props.argLabel), + ), + span( + { style: `flex: ${columns[1]}` }, + 'Cron Expression | Timezone', + ), + span( + { style: `flex: ${columns[2]}` }, + 'Actions', + ), + ), + () => div( + scheduleItems.val.map(item => ScheduleListItem(item, columns)), + ), + ); +} + +const ScheduleListItem = ( + /** @type Schedule */ item, + /** @type string[] */ columns, +) => { + return div( + { class: 'table-row flex-row' }, + div( + { style: `flex: ${columns[0]}` }, + div(item.argValue), + ), + div( + { class: 'flex-row', style: `flex: ${columns[1]}` }, + div( + div( + { style: 'font-family: \'Roboto Mono\', monospace; font-size: 12px' }, + item.cronExpr, + withTooltip( + i( + { + class: 'material-symbols-rounded text-secondary ml-1', + style: 'position: relative; font-size: 16px;', + }, + 'info', + ), + { text: [div("Sample triggering timestamps:"), ...item.sample?.map(v => div(v))] }, + ), + ), + div( + { class: 'text-caption mt-1' }, + item.cronTz, + ), + ), + ), + div( + { style: `flex: ${columns[2]}` }, + Button({ + type: 'stroked', + label: 'Delete', + style: 'width: auto; height: 32px;', + onclick: () => emitEvent('DeleteSchedule', { payload: item }), + }), + ), + ); +} + +export { ScheduleList }; diff --git a/testgen/ui/components/frontend/js/pages/test_suites.js b/testgen/ui/components/frontend/js/pages/test_suites.js index 9e62ef67..ff0bc5da 100644 --- a/testgen/ui/components/frontend/js/pages/test_suites.js +++ b/testgen/ui/components/frontend/js/pages/test_suites.js @@ -84,13 +84,23 @@ const TestSuites = (/** @type Properties */ props) => { style: 'font-size: 14px;', onChange: (value) => emitEvent('FilterApplied', {payload: value}), }), + userCanEdit + ? Button({ + icon: 'today', + type: 'stroked', + label: 'Test Generation Schedules', + width: 'fit-content', + style: 'margin-left: auto; background: var(--dk-card-background);', + onclick: () => emitEvent('SchedulesClicked', {}), + }) + : '', userCanEdit ? Button({ icon: 'add', type: 'stroked', label: 'Add Test Suite', width: 'fit-content', - style: 'margin-left: auto; background: var(--dk-card-background);', + style: 'margin-left: 10px; background: var(--dk-card-background);', onclick: () => emitEvent('AddTestSuiteClicked', {}), }) : '', diff --git a/testgen/ui/components/widgets/__init__.py b/testgen/ui/components/widgets/__init__.py index d6c3fdc2..fcda10a0 100644 --- a/testgen/ui/components/widgets/__init__.py +++ b/testgen/ui/components/widgets/__init__.py @@ -26,4 +26,5 @@ from testgen.ui.components.widgets.sorting_selector import sorting_selector from testgen.ui.components.widgets.summary_bar import summary_bar from testgen.ui.components.widgets.testgen_component import testgen_component +from testgen.ui.components.widgets.tz_select import tz_select from testgen.ui.components.widgets.wizard import WizardStep, wizard diff --git a/testgen/ui/components/widgets/testgen_component.py b/testgen/ui/components/widgets/testgen_component.py index 74e939fc..ae692c14 100644 --- a/testgen/ui/components/widgets/testgen_component.py +++ b/testgen/ui/components/widgets/testgen_component.py @@ -17,6 +17,7 @@ "test_suites", "quality_dashboard", "score_details", + "schedule_list", ] @@ -52,7 +53,7 @@ def on_change(): if event_id != session.testgen_event_id: session.testgen_event_id = event_id handler(event_data.get("payload")) - + event_data = component( id_=component_id, key=key, diff --git a/testgen/ui/components/widgets/tz_select.py b/testgen/ui/components/widgets/tz_select.py new file mode 100644 index 00000000..74293d1d --- /dev/null +++ b/testgen/ui/components/widgets/tz_select.py @@ -0,0 +1,9 @@ +import zoneinfo + +import streamlit as st + + +def tz_select(*, default="America/New_York", **kwargs): + tz_options = sorted(zoneinfo.available_timezones()) + index = tz_options.index(st.session_state.get("browser_timezone", default)) + return st.selectbox(options=tz_options, index=index, format_func=lambda v: v.replace("_", " "), **kwargs) diff --git a/testgen/ui/views/dialogs/manage_schedules.py b/testgen/ui/views/dialogs/manage_schedules.py new file mode 100644 index 00000000..2acfedde --- /dev/null +++ b/testgen/ui/views/dialogs/manage_schedules.py @@ -0,0 +1,126 @@ +import json +import zoneinfo +from datetime import datetime +from typing import Any +from uuid import UUID + +import cron_converter +import streamlit as st + +from testgen.common.models import Session +from testgen.common.models.scheduler import JobSchedule +from testgen.ui.components import widgets as testgen +from testgen.ui.components.widgets import tz_select +from testgen.ui.services import user_session_service + + +class ScheduleDialog: + + title: str = "" + arg_label: str = "" + job_key: str = "" + + def init(self, project_code: str) -> None: + raise NotImplementedError + + def get_arg_value(self, job): + raise NotImplementedError + + def arg_value_input(self) -> tuple[bool, list[Any], dict[str, Any]]: + raise NotImplementedError + + def open(self, project_code: str) -> None: + return st.dialog(title=self.title)(self._open)(project_code) + + def _open(self, project_code: str) -> None: + + self.init(project_code) + + with Session() as db_session: + + scheduled_jobs = ( + db_session.query(JobSchedule) + .where(JobSchedule.project_code == project_code, JobSchedule.key == self.job_key) + ) + scheduled_jobs_json = [] + for job in scheduled_jobs: + job_json = { + "id": str(job.id), + "argValue": self.get_arg_value(job), + "cronExpr": job.cron_expr, + "cronTz": job.cron_tz_str, + "sample": [ + sample.strftime("%a %b %-d %Y, %-I:%M %p %z") + for sample in job.get_sample_triggering_timestamps(2) + ], + } + scheduled_jobs_json.append(job_json) + + def on_delete_sched(item): + with Session() as db_session: + try: + sched, = db_session.query(JobSchedule).where(JobSchedule.id == UUID(item["id"])) + db_session.delete(sched) + except ValueError: + db_session.rollback() + else: + db_session.commit() + st.rerun(scope="fragment") + + testgen.testgen_component( + "schedule_list", + props={ + "items": json.dumps(scheduled_jobs_json), + "argLabel": self.arg_label, + "readOnly": not user_session_service.user_can_edit(), + }, + event_handlers={"DeleteSchedule": on_delete_sched} + ) + + with st.expander("Add schedule", expanded=len(scheduled_jobs_json) < 6): + + arg_column, expr_column, tz_column, button_column = st.columns([.3, .4, .3, .1], vertical_alignment="bottom") + + with arg_column: + args_valid, args, kwargs = self.arg_value_input() + + with expr_column: + cron_expr = st.text_input(label="Cron Expression") + + with tz_column: + cron_tz = tz_select(label="Timezone") + + is_cron_valid = False + if cron_expr: + try: + cron_schedule = cron_converter.Cron(cron_expr).schedule(datetime.now(zoneinfo.ZoneInfo(cron_tz))) + sample = [cron_schedule.next().strftime("%a %b %-d %Y, %-I:%M %p %z") for _ in range(2)] + except ValueError as e: + st.warning(str(e), icon=":material/warning:") + except Exception as e: + st.error("Error validating the Cron expression") + else: + st.success(f"**Next Runs:** {' — '.join(sample)}", icon=":material/check:") + is_cron_valid = True + + with button_column: + add_button = st.button( + "Add", + use_container_width=True, + disabled=not args_valid or not is_cron_valid, + ) + + if add_button: + with Session() as db_session: + sched_model = JobSchedule( + project_code=project_code, + key=self.job_key, + cron_expr=cron_expr, + cron_tz=cron_tz, + args=args, + kwargs=kwargs, + ) + db_session.add(sched_model) + db_session.commit() + + st.rerun(scope="fragment") diff --git a/testgen/ui/views/profiling_runs.py b/testgen/ui/views/profiling_runs.py index 14504afe..e26428ef 100644 --- a/testgen/ui/views/profiling_runs.py +++ b/testgen/ui/views/profiling_runs.py @@ -15,6 +15,7 @@ from testgen.ui.queries import profiling_run_queries, project_queries from testgen.ui.services import user_session_service from testgen.ui.session import session +from testgen.ui.views.dialogs.manage_schedules import ScheduleDialog from testgen.ui.views.dialogs.run_profiling_dialog import run_profiling_dialog from testgen.utils import friendly_score, to_int @@ -66,6 +67,11 @@ def render(self, project_code: str | None = None, table_group_id: str | None = N testgen.flex_row_end() if user_can_run: + st.button( + ":material/today: Profiling Schedules", + help="Manages when profiling should run for a given table group", + on_click=partial(ProfilingScheduleDialog().open, project_code) + ) st.button( ":material/play_arrow: Run Profiling", help="Run profiling for a table group", @@ -96,6 +102,33 @@ def render(self, project_code: str | None = None, table_group_id: str | None = N ) +class ProfilingScheduleDialog(ScheduleDialog): + + title = "Manage Profiling Schedules" + arg_label = "Table Group" + job_key = "run-profile" + table_groups: pd.DataFrame | None = None + + def init(self, project_code: str) -> None: + self.table_groups = get_db_table_group_choices(project_code) + + def get_arg_value(self, job): + return self.table_groups.loc[ + self.table_groups["id"] == job.kwargs["table_group_id"], "table_groups_name" + ].iloc[0] + + def arg_value_input(self) -> tuple[bool, list[typing.Any], dict[str, typing.Any]]: + tg_id = testgen.select( + label="Table Group", + options=self.table_groups, + value_column="id", + display_column="table_groups_name", + required=True, + ) + return bool(tg_id), [], {"table_group_id": tg_id} + + + def render_empty_state(project_code: str, user_can_run: bool) -> bool: project_summary_df = project_queries.get_summary_by_code(project_code) if project_summary_df["profiling_runs_ct"]: diff --git a/testgen/ui/views/test_runs.py b/testgen/ui/views/test_runs.py index 765e46b6..b82680c9 100644 --- a/testgen/ui/views/test_runs.py +++ b/testgen/ui/views/test_runs.py @@ -15,6 +15,7 @@ from testgen.ui.queries import project_queries, test_run_queries from testgen.ui.services import user_session_service from testgen.ui.session import session +from testgen.ui.views.dialogs.manage_schedules import ScheduleDialog from testgen.ui.views.dialogs.run_tests_dialog import run_tests_dialog from testgen.utils import friendly_score, to_int @@ -76,6 +77,12 @@ def render(self, project_code: str | None = None, table_group_id: str | None = N testgen.flex_row_end(actions_column) if user_can_run: + st.button( + ":material/today: Test Run Schedules", + help="Manages when a test suite should run.", + on_click=partial(TestRunScheduleDialog().open, project_code) + ) + st.button( ":material/play_arrow: Run Tests", help="Run tests for a test suite", @@ -105,6 +112,32 @@ def render(self, project_code: str | None = None, table_group_id: str | None = N ) +class TestRunScheduleDialog(ScheduleDialog): + + title = "Manage Test Run Schedules" + arg_label = "Test Suite" + job_key = "run-tests" + test_suites: pd.DataFrame | None = None + + def init(self, project_code: str) -> None: + self.test_suites = get_db_test_suite_choices(project_code) + + def get_arg_value(self, job): + return self.test_suites.loc[ + self.test_suites["id"] == job.kwargs["test_suite_id"], "test_suite" + ].iloc[0] + + def arg_value_input(self) -> tuple[bool, list[typing.Any], dict[str, typing.Any]]: + ts_id = testgen.select( + label="Test Suite", + options=self.test_suites, + value_column="id", + display_column="test_suite", + required=True, + ) + return bool(ts_id), [], {"test_suite_id": ts_id} + + def render_empty_state(project_code: str, user_can_run: bool) -> bool: project_summary_df = project_queries.get_summary_by_code(project_code) if project_summary_df["test_runs_ct"]: diff --git a/testgen/ui/views/test_suites.py b/testgen/ui/views/test_suites.py index 9246b1a2..47354e80 100644 --- a/testgen/ui/views/test_suites.py +++ b/testgen/ui/views/test_suites.py @@ -2,8 +2,10 @@ import typing from functools import partial +import pandas as pd import streamlit as st +import testgen.ui.services.database_service as db import testgen.ui.services.form_service as fm import testgen.ui.services.query_service as dq import testgen.ui.services.test_suite_service as test_suite_service @@ -17,6 +19,7 @@ from testgen.ui.services.string_service import empty_if_null from testgen.ui.session import session from testgen.ui.views.dialogs.generate_tests_dialog import generate_tests_dialog +from testgen.ui.views.dialogs.manage_schedules import ScheduleDialog from testgen.ui.views.dialogs.run_tests_dialog import run_tests_dialog from testgen.utils import format_field @@ -100,10 +103,59 @@ def render(self, project_code: str | None = None, table_group_id: str | None = N "DeleteActionClicked": delete_test_suite_dialog, "RunTestsClicked": lambda test_suite_id: run_tests_dialog(project_code, test_suite_service.get_by_id(test_suite_id)), "GenerateTestsClicked": lambda test_suite_id: generate_tests_dialog(test_suite_service.get_by_id(test_suite_id)), + "SchedulesClicked": lambda _: TestGenerationScheduleDialog().open(project_code), }, ) +class TestGenerationScheduleDialog(ScheduleDialog): + + title = "Manage Test Generation Schedules" + arg_label = "Test Suite" + job_key = "run-test-generation" + test_suites: pd.DataFrame | None = None + + def init(self, project_code: str) -> None: + self.test_suites = get_db_test_suite_choices(project_code) + + def get_arg_value(self, job): + return self.test_suites.loc[ + ( + (self.test_suites["test_suite"] == job.kwargs["test_suite_key"]) & + (self.test_suites["table_groups_id"] == job.kwargs["table_group_id"]) + ), + "test_suite" + ].iloc[0] + + def arg_value_input(self) -> tuple[bool, list[typing.Any], dict[str, typing.Any]]: + ts_id = testgen.select( + label="Test Suite", + options=self.test_suites, + value_column="id", + display_column="test_suite", + required=True, + ) + ts_row = self.test_suites.loc[self.test_suites["id"] == ts_id].iloc[0] + + return bool(ts_id), [], {"test_suite_key": ts_row["test_suite"], "table_group_id": ts_row["table_groups_id"]} + + + +@st.cache_data(show_spinner=False) +def get_db_test_suite_choices(project_code: str) -> pd.DataFrame: + schema = st.session_state["dbschema"] + + sql = f""" + SELECT id::VARCHAR, test_suite, table_groups_id::VARCHAR + FROM {schema}.test_suites + WHERE project_code = '{project_code}' + ORDER BY test_suite + """ + + return db.retrieve_data(sql) + + + def on_test_suites_filtered(table_group_id: str | None = None) -> None: Router().set_query_params({ "table_group_id": table_group_id }) diff --git a/tests/unit/test_scheduler_base.py b/tests/unit/test_scheduler_base.py new file mode 100644 index 00000000..f78efc20 --- /dev/null +++ b/tests/unit/test_scheduler_base.py @@ -0,0 +1,134 @@ +import time +from contextlib import contextmanager +from datetime import UTC, datetime, timedelta +from itertools import islice +from unittest.mock import Mock, patch + +import pytest + +from testgen.scheduler.base import DelayedPolicy, Job, Scheduler + + +@contextmanager +def assert_finishes_within(**kwargs): + start = datetime.now() + yield + assert datetime.now() < start + timedelta(**kwargs), f"Code block took more than {kwargs!s} to complete" + + +@pytest.fixture +def scheduler_instance() -> Scheduler: + class TestScheduler(Scheduler): + get_jobs = Mock() + start_job = Mock() + + return TestScheduler() + + +@pytest.fixture +def no_wait(scheduler_instance): + mock = Mock(side_effect=lambda _: not scheduler_instance._reload_event.is_set()) + with patch.object(scheduler_instance, "_wait_until", mock): + yield mock + + +@pytest.fixture +def base_time(scheduler_instance): + dt = datetime(2025, 4, 15, 9, 0, 0, tzinfo=UTC) + with patch.object(scheduler_instance, "base_time", dt): + yield dt + + +@pytest.fixture +def now_5_min_ahead(scheduler_instance, base_time): + now = scheduler_instance.base_time + timedelta(minutes=5) + def now_func(): + return max(scheduler_instance.base_time, now) + with patch.object(scheduler_instance, "_get_now", now_func): + yield now_func + + + +@pytest.mark.unit +@pytest.mark.parametrize( + ("expr", "dpol", "expected_minutes"), + [ + ("* * * * *", DelayedPolicy.ALL, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), + ("* * * * *", DelayedPolicy.ONCE, [0, 5, 6, 7, 8, 9, 10, 11, 12, 13]), + ("* * * * *", DelayedPolicy.SKIP, [5, 6, 7, 8, 9, 10, 11, 12, 13, 14]), + ]) +def test_delayed_jobs_policies(expr, dpol, expected_minutes, scheduler_instance, base_time, now_5_min_ahead): + scheduler_instance.get_jobs.return_value = [Job(cron_expr=expr, cron_tz="UTC", delayed_policy=dpol)] + triggering_times = [tt for tt, jobs in islice(scheduler_instance._get_next_jobs(), 10)] + expected_triggering_times = [base_time + timedelta(minutes=m) for m in expected_minutes] + assert triggering_times == expected_triggering_times + + +@pytest.mark.unit +def test_jobs_start_in_order(scheduler_instance, base_time): + jobs = { + 3: Job(cron_expr="*/3 * * * *", cron_tz="UTC", delayed_policy=DelayedPolicy.ALL), + 2: Job(cron_expr="*/2 * * * *", cron_tz="UTC", delayed_policy=DelayedPolicy.ALL), + 4: Job(cron_expr="*/4 * * * *", cron_tz="UTC", delayed_policy=DelayedPolicy.ALL), + 5: Job(cron_expr="*/5 * * * *", cron_tz="UTC", delayed_policy=DelayedPolicy.ALL), + } + + scheduler_instance.get_jobs.return_value = list(jobs.values()) + next_jobs = scheduler_instance._get_next_jobs() + + for triggering_time, triggred_jobs in islice(next_jobs, 12): + for divisor, job in jobs.items(): + assert job not in triggred_jobs or triggering_time.minute % divisor == 0 + assert job in triggred_jobs or triggering_time.minute % divisor != 0 + + +@pytest.mark.unit +@pytest.mark.parametrize("with_job", (True, False)) +def test_reloads_and_shutdowns_immediately(with_job, scheduler_instance, base_time): + jobs = [Job(cron_expr="0 0 * * *", cron_tz="UTC", delayed_policy=DelayedPolicy.ALL)] if with_job else [] + scheduler_instance.get_jobs.return_value = jobs + + scheduler_instance.start(base_time) + time.sleep(0.05) + assert scheduler_instance.get_jobs.call_count == 1 + with assert_finishes_within(milliseconds=100): + scheduler_instance.reload_jobs() + time.sleep(0.05) + assert scheduler_instance.get_jobs.call_count == 2 + scheduler_instance.shutdown() + scheduler_instance.wait() + + +@pytest.mark.unit +def test_job_start_is_called(scheduler_instance, base_time, no_wait): + jobs = [ + Job(cron_expr="* * * * *", cron_tz="UTC", delayed_policy=DelayedPolicy.ALL), + Job(cron_expr="*/2 * * * *", cron_tz="UTC", delayed_policy=DelayedPolicy.ALL), + ] + with ( + patch.object(scheduler_instance, "start_job") as start_job_mock, + patch.object( + scheduler_instance, + "get_jobs", + side_effect=lambda: iter(jobs), + ) as get_jobs_mock, + patch.object( + scheduler_instance, + "_get_next_jobs", + side_effect=lambda: islice(Scheduler._get_next_jobs(scheduler_instance), 4), + ) as get_next_mock, + ): + scheduler_instance.start(base_time) + + for multiplier in (1, 2): + while start_job_mock.call_count != 6 * multiplier: + time.sleep(0.01) + + assert get_jobs_mock.call_count == multiplier + assert get_next_mock.call_count == multiplier + + if multiplier == 1: + scheduler_instance.reload_jobs() + + scheduler_instance.shutdown() + scheduler_instance.wait() diff --git a/tests/unit/test_scheduler_cli.py b/tests/unit/test_scheduler_cli.py new file mode 100644 index 00000000..4c4c493e --- /dev/null +++ b/tests/unit/test_scheduler_cli.py @@ -0,0 +1,191 @@ +import signal +import threading +import time +from datetime import UTC, datetime +from unittest.mock import MagicMock, Mock, patch + +import pytest + +from testgen.common.models.scheduler import JobSchedule +from testgen.scheduler.base import DelayedPolicy +from testgen.scheduler.cli_scheduler import CliJob, CliScheduler + + +@pytest.fixture +def scheduler_instance() -> CliScheduler: + with patch("testgen.scheduler.cli_scheduler.threading.Timer"): + yield CliScheduler() + + +@pytest.fixture +def popen_barrier(): + yield threading.Barrier(2) + + +@pytest.fixture +def popen_proc_mock(popen_barrier): + mock = MagicMock() + mock.wait.side_effect = popen_barrier.wait + yield mock + + +@pytest.fixture +def popen_mock(popen_proc_mock): + with patch("testgen.scheduler.cli_scheduler.subprocess.Popen", return_value=popen_proc_mock) as mock: + yield mock + + +@pytest.fixture +def db_jobs(scheduler_instance): + with ( + patch("testgen.scheduler.cli_scheduler.JobSchedule.select_where") as mock, + ): + yield mock + + +@pytest.fixture +def cmd_mock(): + opt_mock = Mock() + opt_mock.opts = ["-b"] + opt_mock.name = "b" + + cmd_mock = Mock() + cmd_mock.params = [opt_mock] + cmd_mock.name = "test-job" + return cmd_mock + + +@pytest.fixture +def job_data(cmd_mock): + with patch.dict("testgen.scheduler.cli_scheduler.JOB_REGISTRY", {"test-job": cmd_mock}): + yield { + "cron_expr": "*/5 9-17 * * *", + "cron_tz": "UTC", + "key": "test-job", + "args": ["a"], + "kwargs": {"b": "c"}, + } + + +@pytest.fixture +def job_sched(job_data): + yield JobSchedule(**job_data) + + +@pytest.fixture +def cli_job(job_data): + yield CliJob(**job_data, delayed_policy=DelayedPolicy.SKIP) + + +@pytest.mark.unit +def test_get_jobs(scheduler_instance, db_jobs, job_sched): + db_jobs.return_value = iter([[job_sched]]) + + jobs = list(scheduler_instance.get_jobs()) + + assert len(jobs) == 1 + assert isinstance(jobs[0], CliJob) + for attr in ("cron_expr", "cron_tz", "key", "args", "kwargs"): + assert getattr(jobs[0], attr) == getattr(job_sched, attr), f"Attribute '{attr}' does not match" + + +@pytest.mark.unit +def test_job_start(scheduler_instance, cli_job, cmd_mock, popen_mock, popen_proc_mock): + with patch("testgen.scheduler.cli_scheduler.threading.Thread") as thread_mock: + scheduler_instance.start_job(cli_job, datetime.now(UTC)) + + call_args = popen_mock.call_args[0][0] + assert call_args[2] == cmd_mock.name + assert call_args[3] == cli_job.args[0] + assert call_args[4], call_args[5] == cli_job.kwargs.items()[0] + + thread_mock.assert_called_once_with(target=scheduler_instance._proc_wrapper, args=(popen_proc_mock,)) + + +@pytest.mark.unit +@pytest.mark.parametrize("proc_side_effect", (lambda: None, RuntimeError)) +def test_proc_wrapper(proc_side_effect, scheduler_instance): + with ( + patch.object(scheduler_instance, "_running_jobs_cond") as cond_mock, + patch.object(scheduler_instance, "_running_jobs") as set_mock, + ): + cond_mock.__enter__.return_value = True + proc_mock = Mock() + proc_mock.pid = 555 + proc_mock.wait = Mock(side_effect=proc_side_effect) + + scheduler_instance._proc_wrapper(proc_mock) + + set_mock.add.assert_called_once() + set_mock.remove.assert_called_once() + cond_mock.notify.assert_called_once() + + +@pytest.mark.unit +def test_shutdown_no_jobs(scheduler_instance): + with ( + patch.object(scheduler_instance, "start") as start_mock, + patch.object(scheduler_instance, "shutdown") as shutdown_mock, + patch.object(scheduler_instance, "wait") as wait_mock, + patch("testgen.scheduler.cli_scheduler.signal.signal") as signal_mock, + ): + start_called = threading.Event() + start_mock.side_effect = lambda *_: start_called.set() + + thread = threading.Thread(target=scheduler_instance.run) + thread.start() + + start_called.wait() + sig_hanlder = signal_mock.call_args[0][1] + shutdown_mock.assert_not_called() + + sig_hanlder(15, None) + + thread.join() + + shutdown_mock.assert_called_once() + wait_mock.assert_called_once() + assert not scheduler_instance._running_jobs + + +@pytest.mark.unit +@pytest.mark.parametrize("sig", [signal.SIGINT, signal.SIGTERM]) +def test_shutdown(scheduler_instance, sig): + with ( + patch.object(scheduler_instance, "start") as start_mock, + patch.object(scheduler_instance, "shutdown") as shutdown_mock, + patch.object(scheduler_instance, "wait") as wait_mock, + patch("testgen.scheduler.cli_scheduler.signal.signal") as signal_mock, + ): + start_called = threading.Event() + start_mock.side_effect = lambda *_: start_called.set() + + jobs = [MagicMock() for _ in range(5)] + + thread = threading.Thread(target=scheduler_instance.run) + thread.start() + + start_called.wait() + sig_handler = signal_mock.call_args[0][1] + shutdown_mock.assert_not_called() + + for job in jobs: + scheduler_instance._running_jobs.add(job) + + for send_sig_count in range(3): + sig_handler(sig, None) + time.sleep(0.05) + for job in jobs: + assert job.send_signal.call_count == send_sig_count + if send_sig_count: + job.send_signal.assert_called_with(sig) + + scheduler_instance._running_jobs.clear() + with scheduler_instance._running_jobs_cond: + scheduler_instance._running_jobs_cond.notify() + + thread.join() + + shutdown_mock.assert_called_once() + wait_mock.assert_called_once() + assert not scheduler_instance._running_jobs From c2c7143911df49bd68d9b45af7d0f6eb0bebabe9 Mon Sep 17 00:00:00 2001 From: Ricardo Boni Date: Thu, 24 Apr 2025 14:38:54 -0400 Subject: [PATCH 02/34] fix(scheduler): Preventing the scheduler to crash on recoverable failures --- testgen/__main__.py | 2 +- testgen/common/models/scheduler.py | 4 ++++ testgen/scheduler/base.py | 13 +++++++++++-- testgen/scheduler/cli_scheduler.py | 24 ++++++++++++++++++++---- testgen/settings.py | 4 ---- tests/unit/test_scheduler_base.py | 28 ++++++++++++++++++---------- 6 files changed, 54 insertions(+), 21 deletions(-) diff --git a/testgen/__main__.py b/testgen/__main__.py index b9815f9c..7d4cb456 100644 --- a/testgen/__main__.py +++ b/testgen/__main__.py @@ -94,7 +94,7 @@ def cli(ctx: Context, verbose: bool): sys.exit(1) if ( - ctx.invoked_subcommand not in ["ui", "tui", "setup-system-db", "upgrade-system-version", "quick-start"] + ctx.invoked_subcommand not in ["run-app", "scheduler", "ui", "tui", "setup-system-db", "upgrade-system-version", "quick-start"] and not is_db_revision_up_to_date() ): click.secho("The system database schema is outdated. Automatically running the following command:", fg="red") diff --git a/testgen/common/models/scheduler.py b/testgen/common/models/scheduler.py index b2cdf61e..854dc3a2 100644 --- a/testgen/common/models/scheduler.py +++ b/testgen/common/models/scheduler.py @@ -28,6 +28,10 @@ def select_where(cls, *clauses, order_by: str | InstrumentedAttribute | None = N query = select(cls).where(*clauses).order_by(order_by) return get_current_session().execute(query) + @classmethod + def count(cls): + return get_current_session().query(cls).count() + def get_sample_triggering_timestamps(self, n=3) -> list[datetime]: schedule = Cron(cron_string=self.cron_expr).schedule(timezone_str=self.cron_tz) return [schedule.next() for _ in range(n)] diff --git a/testgen/scheduler/base.py b/testgen/scheduler/base.py index 74ac4f76..d737dff9 100644 --- a/testgen/scheduler/base.py +++ b/testgen/scheduler/base.py @@ -68,8 +68,14 @@ def _get_now(self): def _get_next_jobs(self): + try: + all_jobs = self.get_jobs() + except Exception as e: + LOG.error("Error obtaining jobs: %r", e) # noqa: TRY400 + return + job_list_head = [] - for job in self.get_jobs(): + for job in all_jobs: gen = job.get_triggering_times(self.base_time) job_list_head.append((next(gen), gen, job)) self._reload_event.clear() @@ -121,7 +127,10 @@ def _run(self): if self._wait_until(triggering_time): LOG.info("%d jobs to start at %s", len(jobs), triggering_time) for job in jobs: - self.start_job(job, triggering_time) + try: + self.start_job(job, triggering_time) + except Exception as e: + LOG.error("Error starting %r: %r", job, e) # noqa: TRY400 self.base_time = triggering_time + timedelta(seconds=1) else: break diff --git a/testgen/scheduler/cli_scheduler.py b/testgen/scheduler/cli_scheduler.py index 9ebc7e78..f0c5e05e 100644 --- a/testgen/scheduler/cli_scheduler.py +++ b/testgen/scheduler/cli_scheduler.py @@ -3,6 +3,7 @@ import subprocess import sys import threading +import time from collections.abc import Iterable from dataclasses import dataclass from datetime import UTC, datetime @@ -59,7 +60,7 @@ def get_jobs(self) -> Iterable[CliJob]: ) ) - LOG.info("Loaded %s schedules", len(job_list)) + LOG.info("Loaded %s schedule(s)", len(job_list)) return job_list @@ -76,7 +77,7 @@ def start_job(self, job: CliJob, triggering_time: datetime) -> None: *chain(*chain((opt.opts[0], str(job.kwargs[opt.name])) for opt in cmd.params if opt.name in job.kwargs)), ] - LOG.info("Executing %s"," ".join(exec_cmd)) + LOG.info("Executing '%s'", " ".join(exec_cmd)) proc = subprocess.Popen(exec_cmd, start_new_session=True) # noqa: S603 threading.Thread(target=self._proc_wrapper, args=(proc,)).start() @@ -100,7 +101,7 @@ def run(self): def sig_handler(signum, _): sig = signal.Signals(signum) if interrupted.is_set(): - LOG.info("Received signal %s, propagating to %d running jobs", sig.name, len(self._running_jobs)) + LOG.info("Received signal %s, propagating to %d running job(s)", sig.name, len(self._running_jobs)) for job in self._running_jobs: job.send_signal(signum) else: @@ -122,16 +123,31 @@ def sig_handler(signum, _): with self._running_jobs_cond: if self._running_jobs: - LOG.info("Waiting %d running jobs to complete", len(self._running_jobs)) + LOG.info("Waiting %d running job(s) to complete", len(self._running_jobs)) self._running_jobs_cond.wait_for(lambda: len(self._running_jobs) == 0) LOG.info("All jobs terminated") +@with_database_session +def check_db_is_ready() -> bool: + try: + count = JobSchedule.count() + except Exception: + LOG.info("Database is not ready yet.") + return False + else: + LOG.info("Database is ready. A total of %d schedule(s) were found.", count) + return True + + def run_scheduler(): + while not check_db_is_ready(): + time.sleep(10) scheduler = CliScheduler() scheduler.run() + def register_scheduler_job(cmd: Command): if cmd.name in JOB_REGISTRY: raise ValueError(f"A job with the '{cmd.name}' key is already registered.") diff --git a/testgen/settings.py b/testgen/settings.py index 4ccd8be2..5b79d9e4 100644 --- a/testgen/settings.py +++ b/testgen/settings.py @@ -484,7 +484,3 @@ """ Disables sending usage data when set to any value except "true" and "yes". Defaults to "yes" """ - -SCHEDULER_ADDRESS: str = os.getenv("TG_SCHEDULER_ADDRESS", "localhost:8510") - -SCHEDULER_AUTH_KEY: str = os.getenv("TG_SCHEDULER_AUTH_KEY", "scheduler_key") diff --git a/tests/unit/test_scheduler_base.py b/tests/unit/test_scheduler_base.py index f78efc20..4dbb126a 100644 --- a/tests/unit/test_scheduler_base.py +++ b/tests/unit/test_scheduler_base.py @@ -22,7 +22,7 @@ class TestScheduler(Scheduler): get_jobs = Mock() start_job = Mock() - return TestScheduler() + yield TestScheduler() @pytest.fixture @@ -48,6 +48,17 @@ def now_func(): yield now_func +@pytest.mark.unit +def test_getting_jobs_wont_crash(scheduler_instance, base_time): + scheduler_instance.get_jobs.side_effect = Exception + scheduler_instance.start(base_time) + + time.sleep(0.05) + assert scheduler_instance.thread.is_alive() + + scheduler_instance.shutdown() + scheduler_instance.wait() + @pytest.mark.unit @pytest.mark.parametrize( @@ -100,18 +111,15 @@ def test_reloads_and_shutdowns_immediately(with_job, scheduler_instance, base_ti @pytest.mark.unit -def test_job_start_is_called(scheduler_instance, base_time, no_wait): +@pytest.mark.parametrize("start_side_effect", (lambda *_: None, Exception)) +def test_job_start_is_called(start_side_effect, scheduler_instance, base_time, no_wait): jobs = [ Job(cron_expr="* * * * *", cron_tz="UTC", delayed_policy=DelayedPolicy.ALL), Job(cron_expr="*/2 * * * *", cron_tz="UTC", delayed_policy=DelayedPolicy.ALL), ] + scheduler_instance.get_jobs.side_effect = lambda: iter(jobs) + scheduler_instance.start_job.side_effect = start_side_effect with ( - patch.object(scheduler_instance, "start_job") as start_job_mock, - patch.object( - scheduler_instance, - "get_jobs", - side_effect=lambda: iter(jobs), - ) as get_jobs_mock, patch.object( scheduler_instance, "_get_next_jobs", @@ -121,10 +129,10 @@ def test_job_start_is_called(scheduler_instance, base_time, no_wait): scheduler_instance.start(base_time) for multiplier in (1, 2): - while start_job_mock.call_count != 6 * multiplier: + while scheduler_instance.start_job.call_count != 6 * multiplier: time.sleep(0.01) - assert get_jobs_mock.call_count == multiplier + assert scheduler_instance.get_jobs.call_count == multiplier assert get_next_mock.call_count == multiplier if multiplier == 1: From bfca74f0c5135d70edc23e565880334f2728e3dd Mon Sep 17 00:00:00 2001 From: Ricardo Boni Date: Mon, 28 Apr 2025 15:54:19 -0400 Subject: [PATCH 03/34] misc(scheduler): Removing the test generation scheduling --- testgen/__main__.py | 1 - .../frontend/js/pages/test_suites.js | 12 +---- testgen/ui/views/test_suites.py | 52 ------------------- 3 files changed, 1 insertion(+), 64 deletions(-) diff --git a/testgen/__main__.py b/testgen/__main__.py index 7d4cb456..e36208e0 100644 --- a/testgen/__main__.py +++ b/testgen/__main__.py @@ -124,7 +124,6 @@ def run_profile(configuration: Configuration, table_group_id: str): click.echo("\n" + message) -@register_scheduler_job @cli.command("run-test-generation", help="Generates or refreshes the tests for a table group.") @click.option( "-tg", diff --git a/testgen/ui/components/frontend/js/pages/test_suites.js b/testgen/ui/components/frontend/js/pages/test_suites.js index ff0bc5da..9e62ef67 100644 --- a/testgen/ui/components/frontend/js/pages/test_suites.js +++ b/testgen/ui/components/frontend/js/pages/test_suites.js @@ -84,23 +84,13 @@ const TestSuites = (/** @type Properties */ props) => { style: 'font-size: 14px;', onChange: (value) => emitEvent('FilterApplied', {payload: value}), }), - userCanEdit - ? Button({ - icon: 'today', - type: 'stroked', - label: 'Test Generation Schedules', - width: 'fit-content', - style: 'margin-left: auto; background: var(--dk-card-background);', - onclick: () => emitEvent('SchedulesClicked', {}), - }) - : '', userCanEdit ? Button({ icon: 'add', type: 'stroked', label: 'Add Test Suite', width: 'fit-content', - style: 'margin-left: 10px; background: var(--dk-card-background);', + style: 'margin-left: auto; background: var(--dk-card-background);', onclick: () => emitEvent('AddTestSuiteClicked', {}), }) : '', diff --git a/testgen/ui/views/test_suites.py b/testgen/ui/views/test_suites.py index 47354e80..9246b1a2 100644 --- a/testgen/ui/views/test_suites.py +++ b/testgen/ui/views/test_suites.py @@ -2,10 +2,8 @@ import typing from functools import partial -import pandas as pd import streamlit as st -import testgen.ui.services.database_service as db import testgen.ui.services.form_service as fm import testgen.ui.services.query_service as dq import testgen.ui.services.test_suite_service as test_suite_service @@ -19,7 +17,6 @@ from testgen.ui.services.string_service import empty_if_null from testgen.ui.session import session from testgen.ui.views.dialogs.generate_tests_dialog import generate_tests_dialog -from testgen.ui.views.dialogs.manage_schedules import ScheduleDialog from testgen.ui.views.dialogs.run_tests_dialog import run_tests_dialog from testgen.utils import format_field @@ -103,59 +100,10 @@ def render(self, project_code: str | None = None, table_group_id: str | None = N "DeleteActionClicked": delete_test_suite_dialog, "RunTestsClicked": lambda test_suite_id: run_tests_dialog(project_code, test_suite_service.get_by_id(test_suite_id)), "GenerateTestsClicked": lambda test_suite_id: generate_tests_dialog(test_suite_service.get_by_id(test_suite_id)), - "SchedulesClicked": lambda _: TestGenerationScheduleDialog().open(project_code), }, ) -class TestGenerationScheduleDialog(ScheduleDialog): - - title = "Manage Test Generation Schedules" - arg_label = "Test Suite" - job_key = "run-test-generation" - test_suites: pd.DataFrame | None = None - - def init(self, project_code: str) -> None: - self.test_suites = get_db_test_suite_choices(project_code) - - def get_arg_value(self, job): - return self.test_suites.loc[ - ( - (self.test_suites["test_suite"] == job.kwargs["test_suite_key"]) & - (self.test_suites["table_groups_id"] == job.kwargs["table_group_id"]) - ), - "test_suite" - ].iloc[0] - - def arg_value_input(self) -> tuple[bool, list[typing.Any], dict[str, typing.Any]]: - ts_id = testgen.select( - label="Test Suite", - options=self.test_suites, - value_column="id", - display_column="test_suite", - required=True, - ) - ts_row = self.test_suites.loc[self.test_suites["id"] == ts_id].iloc[0] - - return bool(ts_id), [], {"test_suite_key": ts_row["test_suite"], "table_group_id": ts_row["table_groups_id"]} - - - -@st.cache_data(show_spinner=False) -def get_db_test_suite_choices(project_code: str) -> pd.DataFrame: - schema = st.session_state["dbschema"] - - sql = f""" - SELECT id::VARCHAR, test_suite, table_groups_id::VARCHAR - FROM {schema}.test_suites - WHERE project_code = '{project_code}' - ORDER BY test_suite - """ - - return db.retrieve_data(sql) - - - def on_test_suites_filtered(table_group_id: str | None = None) -> None: Router().set_query_params({ "table_group_id": table_group_id }) From 6b1d34dc1018957611794e3dd9570b3dd81aa0ca Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Tue, 22 Apr 2025 00:37:38 -0400 Subject: [PATCH 04/34] fix(navigation): upgrade libraries and prevent events handled twice --- deploy/testgen.dockerfile | 1 - pyproject.toml | 7 +--- testgen/__main__.py | 6 ++-- testgen/ui/app.py | 11 ++++--- testgen/ui/assets.py | 4 +-- testgen/ui/assets/style.css | 28 ++++++++-------- .../frontend/js/components/empty_state.js | 4 +-- .../frontend/js/components/sidebar.js | 8 ++--- testgen/ui/components/widgets/breadcrumbs.py | 7 +++- testgen/ui/components/widgets/link.py | 7 +++- testgen/ui/components/widgets/sidebar.py | 7 ++++ testgen/ui/navigation/router.py | 32 ++++++------------- testgen/ui/session.py | 5 ++- testgen/ui/views/hygiene_issues.py | 19 ----------- testgen/ui/views/login.py | 2 +- 15 files changed, 64 insertions(+), 84 deletions(-) diff --git a/deploy/testgen.dockerfile b/deploy/testgen.dockerfile index a3f1039b..4bd97fc3 100644 --- a/deploy/testgen.dockerfile +++ b/deploy/testgen.dockerfile @@ -25,7 +25,6 @@ RUN chown -R testgen:testgen /var/lib/testgen /dk/lib/python3.12/site-packages/s ENV TESTGEN_VERSION=${TESTGEN_VERSION} ENV TESTGEN_DOCKER_HUB_REPO=${TESTGEN_DOCKER_HUB_REPO} ENV TG_RELEASE_CHECK=docker -ENV STREAMLIT_SERVER_MAX_UPLOAD_SIZE=200 USER testgen diff --git a/pyproject.toml b/pyproject.toml index 56321182..d4dca0c4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,17 +38,12 @@ dependencies = [ "pycryptodome==3.21", "prettytable==3.7.0", "requests_extensions==1.1.3", - "bz2file==0.98", - "trogon==0.4.0", "numpy==1.26.4", "pandas==2.1.4", - "streamlit==1.38.0", + "streamlit==1.44.1", "streamlit-extras==0.3.0", "streamlit-aggrid==0.3.4.post3", - "streamlit-antd-components==0.2.2", - "streamlit-plotly-events==0.0.6", "plotly_express==0.4.1", - "streamlit-option-menu==0.3.6", "streamlit-authenticator==0.2.3", "streamlit-javascript==0.1.5", "progress==1.6", diff --git a/testgen/__main__.py b/testgen/__main__.py index 2fbbe845..d0c92af6 100644 --- a/testgen/__main__.py +++ b/testgen/__main__.py @@ -8,7 +8,6 @@ import click from click.core import Context from progress.spinner import MoonSpinner -from trogon import tui from testgen import settings from testgen.commands.run_execute_tests import run_execution_steps @@ -58,7 +57,6 @@ class Configuration: pass_configuration = click.make_pass_decorator(Configuration) -@tui() @click.group( help=f"This version: {settings.VERSION} \n\nLatest version: {version_service.get_latest_version()} \n\nSchema revision: {get_schema_revision()}" ) @@ -83,7 +81,7 @@ def cli(ctx: Context, verbose: bool): sys.exit(1) if ( - ctx.invoked_subcommand not in ["ui", "tui", "setup-system-db", "upgrade-system-version", "quick-start"] + ctx.invoked_subcommand not in ["ui", "setup-system-db", "upgrade-system-version", "quick-start"] and not is_db_revision_up_to_date() ): click.secho("The system database schema is outdated. Automatically running the following command:", fg="red") @@ -621,6 +619,8 @@ def run(debug: bool): "run", app_file, "--browser.gatherUsageStats=false", + "--client.showErrorDetails=none", + "--client.toolbarMode=minimal", f"--server.sslCertFile={settings.SSL_CERT_FILE}" if use_ssl else "", f"--server.sslKeyFile={settings.SSL_KEY_FILE}" if use_ssl else "", "--", diff --git a/testgen/ui/app.py b/testgen/ui/app.py index 8dd99e55..ddaedd78 100644 --- a/testgen/ui/app.py +++ b/testgen/ui/app.py @@ -20,7 +20,9 @@ def render(log_level: int = logging.INFO): page_title="TestGen", page_icon=get_asset_path("favicon.ico"), layout="wide", - initial_sidebar_state="collapsed" if user_session_service.user_has_catalog_role() else "auto" + # Collapse when logging out because the sidebar takes some time to be removed from the DOM + # Collapse for Catalog role since they only have access to one page + initial_sidebar_state="collapsed" if session.logging_out or user_session_service.user_has_catalog_role() else "auto" ) application = get_application(log_level=log_level) @@ -39,15 +41,14 @@ def render(log_level: int = logging.INFO): if not session.project: session.project = st.query_params.get("project_code") if not session.project and len(projects) > 0: - project_service.set_current_project(projects[0]["code"]) + session.project = projects[0]["code"] if session.authentication_status is None and not session.logging_out: user_session_service.load_user_session() application.logo.render() - hide_sidebar = not session.authentication_status or session.logging_in - if not hide_sidebar: + if session.authentication_status and not session.logging_in: with st.sidebar: testgen.sidebar( projects=projects, @@ -57,7 +58,7 @@ def render(log_level: int = logging.INFO): current_page=session.current_page, ) - application.router.run(hide_sidebar) + application.router.run() @st.cache_resource(validate=lambda _: not settings.IS_DEBUG, show_spinner=False) diff --git a/testgen/ui/assets.py b/testgen/ui/assets.py index 9ea10f18..114ade32 100644 --- a/testgen/ui/assets.py +++ b/testgen/ui/assets.py @@ -1,6 +1,6 @@ import pathlib -from streamlit.elements.image import WidthBehaviour, image_to_url +from streamlit.elements.lib.image_utils import WidthBehavior, image_to_url def get_asset_path(path: str) -> str: @@ -11,7 +11,7 @@ def get_asset_data_url(path: str) -> str: absolute_path = get_asset_path(path) return image_to_url( absolute_path, - int(WidthBehaviour.ORIGINAL), + int(WidthBehavior.ORIGINAL), clamp=False, channels="RGB", output_format="auto", diff --git a/testgen/ui/assets/style.css b/testgen/ui/assets/style.css index b444185b..25802d26 100644 --- a/testgen/ui/assets/style.css +++ b/testgen/ui/assets/style.css @@ -37,21 +37,13 @@ body { img.dk-logo-img { margin: 0 0 30px 0; - width:100%; -} - -/* Header bar */ -MainMenu { - display: none; + width: 100%; } +/* Streamlit header */ header { display: none !important; } - -footer { - display: none !important; -} /* ... */ /* Sidebar */ @@ -68,11 +60,11 @@ section[data-testid="stSidebar"] { /* */ /* Main content */ -.appview-container > :nth-child(2 of section) { +div[data-testid="stAppViewContainer"] > :nth-child(2 of section) { background-color: #f8f9fa; } -section.main > :nth-child(1 of div).block-container { +div[data-testid="stMainBlockContainer"] { padding: 12px 24px 24px; } @@ -80,12 +72,16 @@ div[data-testid="stVerticalBlock"] { gap: 0.5rem; } -.appview-container:has(section[data-testid="stSidebar"]) div[data-testid="stSidebarCollapsedControl"] { +div[data-testid="stAppViewContainer"]:has(section[data-testid="stSidebar"]) div[data-testid="stSidebarCollapsedControl"] { top: 0.5rem; border-radius: 4px; background-color: var(--border-color); padding: 3px 0 0 8px; } + +div[data-testid="stAppViewContainer"]:has(section[data-testid="stSidebar"][aria-expanded="true"]) div[data-testid="stSidebarCollapsedControl"] { + display: none; +} /* */ /* Dialog - sets the width of all st.dialog */ @@ -275,11 +271,13 @@ Use as testgen.text("text", "extra_styles") */ .tg-header { margin: 0; padding: 0; + font-size: 26px; font-weight: 500; + line-height: 1.2; transition: padding 0.3s; } -[data-testid="stSidebarCollapsedControl"] ~ section.main .tg-header { +[data-testid="stSidebar"][aria-expanded="false"] ~ [data-testid="stMain"] .tg-header { padding-left: 80px; } @@ -380,7 +378,7 @@ div[data-testid="stVerticalBlockBorderWrapper"]:has(> div > div[data-testid="stV } /* Main content */ - .appview-container > :nth-child(2 of section) { + div[data-testid="stAppViewContainer"] > :nth-child(2 of section) { background-color: rgb(14, 17, 23); } /* */ diff --git a/testgen/ui/components/frontend/js/components/empty_state.js b/testgen/ui/components/frontend/js/components/empty_state.js index b89863e9..7c243b50 100644 --- a/testgen/ui/components/frontend/js/components/empty_state.js +++ b/testgen/ui/components/frontend/js/components/empty_state.js @@ -8,7 +8,7 @@ * @type {object} * @property {string} href * @property {string} label -* +* * @typedef Properties * @type {object} * @property {string} icon @@ -78,7 +78,7 @@ stylesheet.replace(` .tg-empty-state { margin-top: 80px; border: 1px solid var(--border-color); - padding: 112px 0px !important; + padding: 112px 24px !important; } .tg-empty-state--title { diff --git a/testgen/ui/components/frontend/js/components/sidebar.js b/testgen/ui/components/frontend/js/components/sidebar.js index 239c3a2c..dbc2c344 100644 --- a/testgen/ui/components/frontend/js/components/sidebar.js +++ b/testgen/ui/components/frontend/js/components/sidebar.js @@ -198,7 +198,7 @@ const VersionRow = (/** @type string */ label, /** @type string */ version, icon function emitEvent(/** @type Object */ data) { if (Sidebar.StreamlitInstance) { - Sidebar.StreamlitInstance.sendData(data); + Sidebar.StreamlitInstance.sendData({ ...data, _id: Math.random() }); // Identify the event so its handler is called once } } @@ -209,9 +209,7 @@ function navigate(/** @type object */ event, /** @type string */ path, /** @type // Prevent Streamlit from reacting to event event.stopPropagation(); - if (Sidebar.StreamlitInstance && path !== currentPage) { - Sidebar.StreamlitInstance.sendData({ path }); - } + emitEvent({ path }); } function isCurrentPage(/** @type string */ itemPath, /** @type string */ currentPage) { @@ -234,7 +232,7 @@ stylesheet.replace(` display: flex; flex-direction: column; justify-content: space-between; - height: calc(100% - 76px); + height: calc(100% - 78px); } .menu .menu--project { diff --git a/testgen/ui/components/widgets/breadcrumbs.py b/testgen/ui/components/widgets/breadcrumbs.py index ecfc88a4..4f9012fb 100644 --- a/testgen/ui/components/widgets/breadcrumbs.py +++ b/testgen/ui/components/widgets/breadcrumbs.py @@ -2,6 +2,7 @@ from testgen.ui.components.utils.component import component from testgen.ui.navigation.router import Router +from testgen.ui.session import session def breadcrumbs( @@ -23,7 +24,11 @@ def breadcrumbs( props={"breadcrumbs": breadcrumbs}, ) if data: - Router().navigate(to=data["href"], with_args=data["params"]) + # Prevent handling the same event multiple times + event_id = data.get("_id") + if event_id != session.breadcrumb_event_id: + session.breadcrumb_event_id = event_id + Router().navigate(to=data["href"], with_args=data["params"]) class Breadcrumb(typing.TypedDict): path: str | None diff --git a/testgen/ui/components/widgets/link.py b/testgen/ui/components/widgets/link.py index ce3e26c5..431af75d 100644 --- a/testgen/ui/components/widgets/link.py +++ b/testgen/ui/components/widgets/link.py @@ -2,6 +2,7 @@ from testgen.ui.components.utils.component import component from testgen.ui.navigation.router import Router +from testgen.ui.session import session TooltipPosition = typing.Literal["left", "right"] @@ -51,4 +52,8 @@ def link( clicked = component(id_="link", key=key, props=props) if clicked: - Router().navigate(to=href, with_args=params) + # Prevent handling the same event multiple times + event_id = clicked.get("_id") + if event_id != session.link_event_id: + session.link_event_id = event_id + Router().navigate(to=href, with_args=params) diff --git a/testgen/ui/components/widgets/sidebar.py b/testgen/ui/components/widgets/sidebar.py index 312f19a1..95bb8360 100644 --- a/testgen/ui/components/widgets/sidebar.py +++ b/testgen/ui/components/widgets/sidebar.py @@ -56,6 +56,13 @@ def on_change(): # So we store the path and navigate on the next run event_data = getattr(session, SIDEBAR_KEY) + + # Prevent handling the same event multiple times + event_id = event_data.get("_id") + if event_id == session.sidebar_event_id: + return + session.sidebar_event_id = event_id + project = event_data.get("project") path = event_data.get("path") view_logs = event_data.get("view_logs") diff --git a/testgen/ui/navigation/router.py b/testgen/ui/navigation/router.py index c6c2c3e1..d0911033 100644 --- a/testgen/ui/navigation/router.py +++ b/testgen/ui/navigation/router.py @@ -10,7 +10,6 @@ from testgen.utils.singleton import Singleton LOG = logging.getLogger("testgen") -COOKIES_READY_RERUNS = 2 class Router(Singleton): @@ -24,29 +23,13 @@ def __init__( self._routes = {route.path: route(self) for route in routes} if routes else {} self._pending_navigation: dict | None = None - def run(self, hide_sidebar=False) -> None: + def run(self) -> None: streamlit_pages = [route.streamlit_page for route in self._routes.values()] - # Don't use position="hidden" when our custom sidebar needs to be displayed - # The default [data-testid="stSidebarNav"] element seems to be needed to keep the sidebar DOM stable - # Otherwise anything custom in the sidebar randomly flickers on page navigation - current_page = st.navigation(streamlit_pages, position="hidden" if hide_sidebar else "sidebar") + current_page = st.navigation(streamlit_pages, position="hidden") session.current_page_args = st.query_params - # This hack is needed because the auth cookie is not retrieved on the first run - # We have to store the page and wait for the second or third run - if not session.cookies_ready: - session.cookies_ready = 1 - session.page_pending_cookies = current_page - # Set this anyway so that sidebar displays initial selection correctly - session.current_page = current_page.url_path - st.rerun() - - # Sometimes the cookie is ready on the second rerun and other times only on the third -_- - # so we have to make sure the page renders correctly in both cases - # and also handle the login page! - elif session.cookies_ready == COOKIES_READY_RERUNS or session.authentication_status or (session.page_pending_cookies and not session.page_pending_cookies.url_path): - session.cookies_ready = COOKIES_READY_RERUNS + if session.cookies_ready: current_page = session.page_pending_cookies or current_page session.page_pending_cookies = None @@ -58,8 +41,11 @@ def run(self, hide_sidebar=False) -> None: session.current_page = current_page.url_path current_page.run() else: - session.cookies_ready += 1 - time.sleep(0.3) + # This hack is needed because the auth cookie is not retrieved on the first run + # We have to store the page and wait for the next run + session.page_pending_cookies = current_page + session.cookies_ready = True + st.rerun() def queue_navigation(self, /, to: str, with_args: dict | None = None) -> None: self._pending_navigation = {"to": to, "with_args": with_args or {}} @@ -91,6 +77,8 @@ def navigate(self, /, to: str, with_args: dict = {}) -> None: # noqa: B006 } if not session.current_page.startswith("quality-dashboard") and not to.startswith("quality-dashboard"): st.cache_data.clear() + + session.current_page = to st.switch_page(route.streamlit_page) except KeyError as k: diff --git a/testgen/ui/session.py b/testgen/ui/session.py index 826fcc6e..aa0820ed 100644 --- a/testgen/ui/session.py +++ b/testgen/ui/session.py @@ -12,7 +12,7 @@ class TestgenSession(Singleton): - cookies_ready: int + cookies_ready: bool logging_in: bool logging_out: bool page_pending_cookies: st.Page # type: ignore @@ -36,6 +36,9 @@ class TestgenSession(Singleton): latest_version: str | None testgen_event_id: str | None + sidebar_event_id: str | None + link_event_id: str | None + breadcrumb_event_id: str | None def __init__(self, state: SessionStateProxy) -> None: super().__setattr__("_state", state) diff --git a/testgen/ui/views/hygiene_issues.py b/testgen/ui/views/hygiene_issues.py index f2dd6bea..adc96efc 100644 --- a/testgen/ui/views/hygiene_issues.py +++ b/testgen/ui/views/hygiene_issues.py @@ -3,7 +3,6 @@ from io import BytesIO import pandas as pd -import plotly.express as px import streamlit as st import testgen.ui.queries.profiling_queries as profiling_queries @@ -463,24 +462,6 @@ def get_source_data(hi_data): return get_source_data_uncached(hi_data) -def write_frequency_graph(df_tests): - # Count the frequency of each test_name - df_count = df_tests["anomaly_name"].value_counts().reset_index() - df_count.columns = ["anomaly_name", "frequency"] - - # Sort the DataFrame by frequency in ascending order for display - df_count = df_count.sort_values(by="frequency", ascending=True) - - # Create a horizontal bar chart using Plotly Express - fig = px.bar(df_count, x="frequency", y="anomaly_name", orientation="h", title="Issue Frequency") - fig.update_layout(title_font={"color": "green"}, paper_bgcolor="rgba(0,0,0,0)", plot_bgcolor="rgba(0,0,0,0)") - if len(df_count) <= 5: - # fig.update_layout(bargap=0.9) - fig.update_layout(height=300) - - st.plotly_chart(fig) - - @st.dialog(title="Source Data") def source_data_dialog(selected_row): st.markdown(f"#### {selected_row['anomaly_name']}") diff --git a/testgen/ui/views/login.py b/testgen/ui/views/login.py index 9fbabab5..ed972fab 100644 --- a/testgen/ui/views/login.py +++ b/testgen/ui/views/login.py @@ -38,7 +38,7 @@ def render(self, **_kwargs) -> None: with login_column: st.html("""


-

Welcome to DataKitchen DataOps TestGen

+

Welcome to DataKitchen DataOps TestGen

""") name, authentication_status, username = authenticator.login("Login") From 8baea1471caf25791c782b41bd068217196ee6b1 Mon Sep 17 00:00:00 2001 From: Ricardo Boni Date: Thu, 1 May 2025 22:30:17 -0400 Subject: [PATCH 05/34] misc(scheduler): Addressing code review feedback --- README.md | 6 +- docs/local_development.md | 22 ++- testgen/common/models/scheduler.py | 8 +- testgen/scheduler/base.py | 15 +- .../030_initialize_new_schema_structure.sql | 10 +- .../dbupgrade/0135_incremental_upgrade.sql | 11 +- .../frontend/js/pages/schedule_list.js | 17 ++- testgen/ui/components/widgets/page.py | 4 +- testgen/ui/views/dialogs/manage_schedules.py | 139 ++++++++++++------ testgen/ui/views/profiling_runs.py | 18 +-- testgen/ui/views/test_runs.py | 18 +-- tests/unit/test_scheduler_base.py | 1 + 12 files changed, 170 insertions(+), 99 deletions(-) diff --git a/README.md b/README.md index 1878ff21..5b8b0947 100644 --- a/README.md +++ b/README.md @@ -149,12 +149,12 @@ Make sure the PostgreSQL database server is up and running. Initialize the appli testgen setup-system-db --yes ``` -### Run the TestGen UI +### Run the application modules -Run the following command to start the TestGen UI. It will open the browser at [http://localhost:8501](http://localhost:8501). +Run the following command to start TestGen. It will open the browser at [http://localhost:8501](http://localhost:8501). ```shell -testgen ui run +testgen run-app ``` Verify that you can login to the UI with the `TESTGEN_USERNAME` and `TESTGEN_PASSWORD` values that you configured in the environment variables. diff --git a/docs/local_development.md b/docs/local_development.md index 09c8f9d9..ae704e3b 100644 --- a/docs/local_development.md +++ b/docs/local_development.md @@ -98,8 +98,24 @@ testgen run-tests --project-key DEFAULT --test-suite-key default-suite-1 testgen quick-start --simulate-fast-forward ``` -### Run Streamlit -Run the local Streamlit-based TestGen application. It will open the browser at [http://localhost:8501](http://localhost:8501). +### Run the Application + +TheGen has two modules that have to be running: The web user interface (UI) and the Scheduler. +The scheduler starts jobs (profiling, test execution, ...) at their scheduled times. + +The following command starts both modules, each in their own process: + +```shell +testgen run-app +``` + +Alternatively, you can run each individually: + + +```shell +testgen run-app ui +``` + ```shell -testgen ui run +testgen run-app scheduler ``` diff --git a/testgen/common/models/scheduler.py b/testgen/common/models/scheduler.py index 854dc3a2..99b7ed33 100644 --- a/testgen/common/models/scheduler.py +++ b/testgen/common/models/scheduler.py @@ -4,8 +4,8 @@ from typing import Any, Self from cron_converter import Cron -from sqlalchemy import JSON, Column, String, select -from sqlalchemy.dialects.postgresql import UUID +from sqlalchemy import Column, String, select +from sqlalchemy.dialects.postgresql import JSONB, UUID from sqlalchemy.orm import InstrumentedAttribute from testgen.common.models import Base, get_current_session @@ -18,8 +18,8 @@ class JobSchedule(Base): project_code: str = Column(String) key: str = Column(String, nullable=False) - args: list[Any] = Column(JSON, nullable=False, default=[]) - kwargs: dict[str, Any] = Column(JSON, nullable=False, default={}) + args: list[Any] = Column(JSONB, nullable=False, default=[]) + kwargs: dict[str, Any] = Column(JSONB, nullable=False, default={}) cron_expr: str = Column(String, nullable=False) cron_tz: str = Column(String, nullable=False) diff --git a/testgen/scheduler/base.py b/testgen/scheduler/base.py index d737dff9..0c0e5bc6 100644 --- a/testgen/scheduler/base.py +++ b/testgen/scheduler/base.py @@ -68,17 +68,18 @@ def _get_now(self): def _get_next_jobs(self): + job_list_head = [] + try: all_jobs = self.get_jobs() except Exception as e: LOG.error("Error obtaining jobs: %r", e) # noqa: TRY400 - return - - job_list_head = [] - for job in all_jobs: - gen = job.get_triggering_times(self.base_time) - job_list_head.append((next(gen), gen, job)) - self._reload_event.clear() + else: + for job in all_jobs: + gen = job.get_triggering_times(self.base_time) + job_list_head.append((next(gen), gen, job)) + finally: + self._reload_event.clear() while job_list_head and not self._stopping.is_set(): diff --git a/testgen/template/dbsetup/030_initialize_new_schema_structure.sql b/testgen/template/dbsetup/030_initialize_new_schema_structure.sql index 84982a89..8907c1aa 100644 --- a/testgen/template/dbsetup/030_initialize_new_schema_structure.sql +++ b/testgen/template/dbsetup/030_initialize_new_schema_structure.sql @@ -866,11 +866,15 @@ CREATE TABLE job_schedules ( id UUID NOT NULL PRIMARY KEY, project_code VARCHAR(30) NOT NULL, key VARCHAR(100) NOT NULL, - args JSON NOT NULL, - kwargs JSON NOT NULL, + args JSONB NOT NULL, + kwargs JSONB NOT NULL, cron_expr VARCHAR(50) NOT NULL, - cron_tz VARCHAR(30) NOT NULL + cron_tz VARCHAR(30) NOT NULL, + UNIQUE (project_code, key, args, kwargs, cron_expr, cron_tz) ); +CREATE INDEX job_schedules_idx ON job_schedules (project_code, key); + + INSERT INTO tg_revision (component, revision) VALUES ('metadata_db', 0); diff --git a/testgen/template/dbupgrade/0135_incremental_upgrade.sql b/testgen/template/dbupgrade/0135_incremental_upgrade.sql index 71f8ddc3..bd0b4de4 100644 --- a/testgen/template/dbupgrade/0135_incremental_upgrade.sql +++ b/testgen/template/dbupgrade/0135_incremental_upgrade.sql @@ -4,8 +4,11 @@ CREATE TABLE job_schedules ( id UUID NOT NULL PRIMARY KEY, project_code VARCHAR(30) NOT NULL, key VARCHAR(100) NOT NULL, - args JSON NOT NULL, - kwargs JSON NOT NULL, + args JSONB NOT NULL, + kwargs JSONB NOT NULL, cron_expr VARCHAR(50) NOT NULL, - cron_tz VARCHAR(30) NOT NULL -) + cron_tz VARCHAR(30) NOT NULL, + UNIQUE (project_code, key, args, kwargs, cron_expr, cron_tz) +); + +CREATE INDEX job_schedules_idx ON job_schedules (project_code, key); diff --git a/testgen/ui/components/frontend/js/pages/schedule_list.js b/testgen/ui/components/frontend/js/pages/schedule_list.js index bfdab632..a4621c5d 100644 --- a/testgen/ui/components/frontend/js/pages/schedule_list.js +++ b/testgen/ui/components/frontend/js/pages/schedule_list.js @@ -6,11 +6,15 @@ * @property {string} cronTz * @property {string[]} sample * + * @typedef Permissions + * @type {object} + * @property {boolean} can_edit + * * @typedef Properties * @type {object} * @property {Schedule[]} items * @property {Permissions} permissions - * @property {string} argLabel + * @property {string} arg_label */ import van from '../van.min.js'; import { Button } from '../components/button.js'; @@ -45,7 +49,7 @@ const ScheduleList = (/** @type Properties */ props) => { { class: 'table-header flex-row' }, span( { style: `flex: ${columns[0]}` }, - getValue(props.argLabel), + getValue(props.arg_label), ), span( { style: `flex: ${columns[1]}` }, @@ -57,7 +61,7 @@ const ScheduleList = (/** @type Properties */ props) => { ), ), () => div( - scheduleItems.val.map(item => ScheduleListItem(item, columns)), + scheduleItems.val.map(item => ScheduleListItem(item, columns, getValue(props.permissions))), ), ); } @@ -65,6 +69,7 @@ const ScheduleList = (/** @type Properties */ props) => { const ScheduleListItem = ( /** @type Schedule */ item, /** @type string[] */ columns, + /** @type Permissions */ permissions, ) => { return div( { class: 'table-row flex-row' }, @@ -86,7 +91,7 @@ const ScheduleListItem = ( }, 'info', ), - { text: [div("Sample triggering timestamps:"), ...item.sample?.map(v => div(v))] }, + { text: [div("Next runs:"), ...item.sample?.map(v => div(v))] }, ), ), div( @@ -97,12 +102,12 @@ const ScheduleListItem = ( ), div( { style: `flex: ${columns[2]}` }, - Button({ + permissions.can_edit ? Button({ type: 'stroked', label: 'Delete', style: 'width: auto; height: 32px;', onclick: () => emitEvent('DeleteSchedule', { payload: item }), - }), + }) : null, ), ); } diff --git a/testgen/ui/components/widgets/page.py b/testgen/ui/components/widgets/page.py index ab397f23..02a8c34e 100644 --- a/testgen/ui/components/widgets/page.py +++ b/testgen/ui/components/widgets/page.py @@ -38,8 +38,8 @@ def page_links(help_topic: str | None = None): st.link_button(":material/school:", TRAINING_URL, help="Training Portal") -def whitespace(size: float, container: DeltaGenerator | None = None): - _apply_html(f'
', container) +def whitespace(size: float, unit: str = "rem", container: DeltaGenerator | None = None): + _apply_html(f'
', container) def divider(margin_top: int = 0, margin_bottom: int = 0, container: DeltaGenerator | None = None): diff --git a/testgen/ui/views/dialogs/manage_schedules.py b/testgen/ui/views/dialogs/manage_schedules.py index 2acfedde..a0863885 100644 --- a/testgen/ui/views/dialogs/manage_schedules.py +++ b/testgen/ui/views/dialogs/manage_schedules.py @@ -6,6 +6,7 @@ import cron_converter import streamlit as st +from sqlalchemy.exc import IntegrityError from testgen.common.models import Session from testgen.common.models.scheduler import JobSchedule @@ -20,7 +21,10 @@ class ScheduleDialog: arg_label: str = "" job_key: str = "" - def init(self, project_code: str) -> None: + def __init__(self): + self.project_code = None + + def init(self) -> None: raise NotImplementedError def get_arg_value(self, job): @@ -30,17 +34,17 @@ def arg_value_input(self) -> tuple[bool, list[Any], dict[str, Any]]: raise NotImplementedError def open(self, project_code: str) -> None: - return st.dialog(title=self.title)(self._open)(project_code) - - def _open(self, project_code: str) -> None: - - self.init(project_code) + st.session_state["schedule_form_success"] = None + st.session_state["schedule_cron_expr"] = "" + self.project_code = project_code + self.init() + return st.dialog(title=self.title)(self.render)() + def render(self) -> None: with Session() as db_session: - scheduled_jobs = ( db_session.query(JobSchedule) - .where(JobSchedule.project_code == project_code, JobSchedule.key == self.job_key) + .where(JobSchedule.project_code == self.project_code, JobSchedule.key == self.job_key) ) scheduled_jobs_json = [] for job in scheduled_jobs: @@ -50,7 +54,7 @@ def _open(self, project_code: str) -> None: "cronExpr": job.cron_expr, "cronTz": job.cron_tz_str, "sample": [ - sample.strftime("%a %b %-d %Y, %-I:%M %p %z") + sample.strftime("%a %b %-d, %-I:%M %p") for sample in job.get_sample_triggering_timestamps(2) ], } @@ -71,56 +75,93 @@ def on_delete_sched(item): "schedule_list", props={ "items": json.dumps(scheduled_jobs_json), - "argLabel": self.arg_label, - "readOnly": not user_session_service.user_can_edit(), + "arg_abel": self.arg_label, + "permissions": {"can_edit": user_session_service.user_can_edit()}, }, event_handlers={"DeleteSchedule": on_delete_sched} ) - with st.expander("Add schedule", expanded=len(scheduled_jobs_json) < 6): - - arg_column, expr_column, tz_column, button_column = st.columns([.3, .4, .3, .1], vertical_alignment="bottom") - - with arg_column: - args_valid, args, kwargs = self.arg_value_input() - - with expr_column: - cron_expr = st.text_input(label="Cron Expression") + if user_session_service.user_can_edit(): + with st.container(border=True): + self.add_schedule_form() + + def add_schedule_form(self): + st.html("Add schedule") + arg_column, expr_column, tz_column, button_column = st.columns([.3, .4, .3, .1], vertical_alignment="bottom") + status_container = st.empty() + + with status_container: + match st.session_state.get("schedule_form_success", None): + case True: + st.success("Schedule added.", icon=":material/check:") + st.session_state["schedule_cron_expr"] = "" + st.session_state["schedule_cron_tz"] = st.session_state.get( + "browser_timezone", st.session_state["schedule_cron_tz"] + ) + st.session_state["schedule_form_success"] = None + case False: + st.error("This schedule already exists.", icon=":material/block:") + case None: + testgen.whitespace(56, "px") + + with arg_column: + args_valid, args, kwargs = self.arg_value_input() + + with expr_column: + cron_expr = st.text_input( + label="Cron Expression", + help="Examples: Every day at 6:00 AM: 0 6 * * * — Every Monday at 5:30 PM: 30 17 * * 1", + key="schedule_cron_expr", + ) - with tz_column: - cron_tz = tz_select(label="Timezone") + with tz_column: + cron_tz = tz_select(label="Timezone", key="schedule_cron_tz") - is_cron_valid = False - if cron_expr: + cron_obj = None + if cron_expr: + with status_container: try: - cron_schedule = cron_converter.Cron(cron_expr).schedule(datetime.now(zoneinfo.ZoneInfo(cron_tz))) - sample = [cron_schedule.next().strftime("%a %b %-d %Y, %-I:%M %p %z") for _ in range(2)] + cron_obj = cron_converter.Cron(cron_expr) + cron_schedule = cron_obj.schedule(datetime.now(zoneinfo.ZoneInfo(cron_tz))) + sample = [cron_schedule.next().strftime("%a %b %-d, %-I:%M %p") for _ in range(3)] except ValueError as e: st.warning(str(e), icon=":material/warning:") except Exception as e: st.error("Error validating the Cron expression") else: - st.success(f"**Next Runs:** {' — '.join(sample)}", icon=":material/check:") - is_cron_valid = True - - with button_column: - add_button = st.button( - "Add", - use_container_width=True, - disabled=not args_valid or not is_cron_valid, - ) - - if add_button: - with Session() as db_session: - sched_model = JobSchedule( - project_code=project_code, - key=self.job_key, - cron_expr=cron_expr, - cron_tz=cron_tz, - args=args, - kwargs=kwargs, - ) - db_session.add(sched_model) - db_session.commit() + # We postpone the validation status update when the previous reran was a failed + # attempt to insert an schedule so that it does not override the error message + if st.session_state.get("schedule_form_success", None) is None: + st.success( + f"**Next runs:** {' | '.join(sample)} ({cron_tz.replace('_', ' ')})", + icon=":material/check:", + ) + else: + st.session_state["schedule_form_success"] = None + + with button_column: + add_button = st.button( + "Add", + use_container_width=True, + disabled=not args_valid or not cron_obj, + ) - st.rerun(scope="fragment") + if add_button: + with Session() as db_session: + with status_container: + try: + sched_model = JobSchedule( + project_code=self.project_code, + key=self.job_key, + cron_expr=cron_obj.to_string(), + cron_tz=cron_tz, + args=args, + kwargs=kwargs, + ) + db_session.add(sched_model) + db_session.commit() + except IntegrityError: + st.session_state["schedule_form_success"] = False + else: + st.session_state["schedule_form_success"] = True + st.rerun(scope="fragment") diff --git a/testgen/ui/views/profiling_runs.py b/testgen/ui/views/profiling_runs.py index e26428ef..9daa3b26 100644 --- a/testgen/ui/views/profiling_runs.py +++ b/testgen/ui/views/profiling_runs.py @@ -66,12 +66,13 @@ def render(self, project_code: str | None = None, table_group_id: str | None = N with actions_column: testgen.flex_row_end() + st.button( + ":material/today: Profiling Schedules", + help="Manages when profiling should run for a given table group", + on_click=partial(ProfilingScheduleDialog().open, project_code) + ) + if user_can_run: - st.button( - ":material/today: Profiling Schedules", - help="Manages when profiling should run for a given table group", - on_click=partial(ProfilingScheduleDialog().open, project_code) - ) st.button( ":material/play_arrow: Run Profiling", help="Run profiling for a table group", @@ -104,13 +105,13 @@ def render(self, project_code: str | None = None, table_group_id: str | None = N class ProfilingScheduleDialog(ScheduleDialog): - title = "Manage Profiling Schedules" + title = "Profiling Schedules" arg_label = "Table Group" job_key = "run-profile" table_groups: pd.DataFrame | None = None - def init(self, project_code: str) -> None: - self.table_groups = get_db_table_group_choices(project_code) + def init(self) -> None: + self.table_groups = get_db_table_group_choices(self.project_code) def get_arg_value(self, job): return self.table_groups.loc[ @@ -128,7 +129,6 @@ def arg_value_input(self) -> tuple[bool, list[typing.Any], dict[str, typing.Any] return bool(tg_id), [], {"table_group_id": tg_id} - def render_empty_state(project_code: str, user_can_run: bool) -> bool: project_summary_df = project_queries.get_summary_by_code(project_code) if project_summary_df["profiling_runs_ct"]: diff --git a/testgen/ui/views/test_runs.py b/testgen/ui/views/test_runs.py index b82680c9..04dd12ec 100644 --- a/testgen/ui/views/test_runs.py +++ b/testgen/ui/views/test_runs.py @@ -76,13 +76,13 @@ def render(self, project_code: str | None = None, table_group_id: str | None = N with actions_column: testgen.flex_row_end(actions_column) - if user_can_run: - st.button( - ":material/today: Test Run Schedules", - help="Manages when a test suite should run.", - on_click=partial(TestRunScheduleDialog().open, project_code) - ) + st.button( + ":material/today: Test Run Schedules", + help="Manages when a test suite should run.", + on_click=partial(TestRunScheduleDialog().open, project_code) + ) + if user_can_run: st.button( ":material/play_arrow: Run Tests", help="Run tests for a test suite", @@ -114,13 +114,13 @@ def render(self, project_code: str | None = None, table_group_id: str | None = N class TestRunScheduleDialog(ScheduleDialog): - title = "Manage Test Run Schedules" + title = "Test Run Schedules" arg_label = "Test Suite" job_key = "run-tests" test_suites: pd.DataFrame | None = None - def init(self, project_code: str) -> None: - self.test_suites = get_db_test_suite_choices(project_code) + def init(self) -> None: + self.test_suites = get_db_test_suite_choices(self.project_code) def get_arg_value(self, job): return self.test_suites.loc[ diff --git a/tests/unit/test_scheduler_base.py b/tests/unit/test_scheduler_base.py index 4dbb126a..e92037c2 100644 --- a/tests/unit/test_scheduler_base.py +++ b/tests/unit/test_scheduler_base.py @@ -55,6 +55,7 @@ def test_getting_jobs_wont_crash(scheduler_instance, base_time): time.sleep(0.05) assert scheduler_instance.thread.is_alive() + assert not scheduler_instance._reload_event.is_set() scheduler_instance.shutdown() scheduler_instance.wait() From 6aac326722eca6e9a30440ff5f7196734bef574c Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Fri, 25 Apr 2025 16:32:23 -0400 Subject: [PATCH 06/34] refactor(routing): improve session management and navigation --- docs/local_development.md | 1 + testgen/ui/app.py | 17 +++--- .../frontend/js/components/sidebar.js | 54 +++++++++++++------ .../frontend/js/data_profiling/data_issues.js | 5 +- .../js/data_profiling/data_profiling_utils.js | 2 + .../frontend/js/pages/project_dashboard.js | 14 ++--- .../frontend/js/pages/quality_dashboard.js | 24 ++++++--- .../frontend/js/pages/score_explorer.js | 3 +- .../frontend/js/pages/test_suites.js | 2 + testgen/ui/components/widgets/sidebar.py | 30 +++++------ testgen/ui/navigation/page.py | 30 ++++------- testgen/ui/navigation/router.py | 21 ++++++-- testgen/ui/queries/profiling_queries.py | 3 ++ testgen/ui/services/project_service.py | 7 +-- testgen/ui/services/test_suite_service.py | 4 ++ testgen/ui/services/user_session_service.py | 3 ++ testgen/ui/session.py | 6 +-- testgen/ui/views/connections/page.py | 4 +- testgen/ui/views/data_catalog.py | 6 ++- testgen/ui/views/hygiene_issues.py | 4 +- testgen/ui/views/login.py | 12 +---- testgen/ui/views/profiling_results.py | 4 +- testgen/ui/views/profiling_runs.py | 5 +- testgen/ui/views/project_dashboard.py | 5 +- testgen/ui/views/project_settings.py | 4 +- testgen/ui/views/quality_dashboard.py | 4 ++ testgen/ui/views/score_details.py | 9 ++-- testgen/ui/views/score_explorer.py | 22 ++++++-- testgen/ui/views/table_groups/page.py | 4 +- testgen/ui/views/test_definitions.py | 6 +-- testgen/ui/views/test_results.py | 9 ++-- testgen/ui/views/test_runs.py | 6 ++- testgen/ui/views/test_suites.py | 5 +- 33 files changed, 202 insertions(+), 133 deletions(-) diff --git a/docs/local_development.md b/docs/local_development.md index 09c8f9d9..2da237b2 100644 --- a/docs/local_development.md +++ b/docs/local_development.md @@ -63,6 +63,7 @@ Create a `local.env` file with the following environment variables, replacing th ```shell export TESTGEN_DEBUG=yes export TESTGEN_LOG_TO_FILE=no +export TG_ANALYTICS=no export TESTGEN_USERNAME= export TESTGEN_PASSWORD= export TG_DECRYPT_SALT= diff --git a/testgen/ui/app.py b/testgen/ui/app.py index ddaedd78..fdd11a20 100644 --- a/testgen/ui/app.py +++ b/testgen/ui/app.py @@ -22,7 +22,9 @@ def render(log_level: int = logging.INFO): layout="wide", # Collapse when logging out because the sidebar takes some time to be removed from the DOM # Collapse for Catalog role since they only have access to one page - initial_sidebar_state="collapsed" if session.logging_out or user_session_service.user_has_catalog_role() else "auto" + initial_sidebar_state="collapsed" + if session.logging_out or user_session_service.user_has_catalog_role() + else "auto", ) application = get_application(log_level=log_level) @@ -36,12 +38,9 @@ def render(log_level: int = logging.INFO): set_locale() session.dbschema = db.get_schema() - - projects = project_service.get_projects() - if not session.project: - session.project = st.query_params.get("project_code") - if not session.project and len(projects) > 0: - session.project = projects[0]["code"] + session.sidebar_project = ( + session.page_args_pending_router and session.page_args_pending_router.get("project_code") + ) or st.query_params.get("project_code", session.sidebar_project) if session.authentication_status is None and not session.logging_out: user_session_service.load_user_session() @@ -51,8 +50,8 @@ def render(log_level: int = logging.INFO): if session.authentication_status and not session.logging_in: with st.sidebar: testgen.sidebar( - projects=projects, - current_project=session.project, + projects=project_service.get_projects(), + current_project=session.sidebar_project, menu=application.menu.update_version(application.get_version()), username=session.username, current_page=session.current_page, diff --git a/testgen/ui/components/frontend/js/components/sidebar.js b/testgen/ui/components/frontend/js/components/sidebar.js index dbc2c344..6e83f3f8 100644 --- a/testgen/ui/components/frontend/js/components/sidebar.js +++ b/testgen/ui/components/frontend/js/components/sidebar.js @@ -40,6 +40,8 @@ const van = window.top.van; const { a, button, div, i, img, label, option, select, span } = van.tags; +const PROJECT_CODE_QUERY_PARAM = "project_code" + const Sidebar = (/** @type {Properties} */ props) => { if (Sidebar.StreamlitInstance) { Sidebar.StreamlitInstance.setFrameHeight(1); @@ -60,7 +62,7 @@ const Sidebar = (/** @type {Properties} */ props) => { div({ class: 'caption' }, 'Project'), () => props.projects.val.length > 1 ? ProjectSelect(props.projects, currentProject) - : div(currentProject.val.name), + : div(currentProject.val?.name ?? '...'), ), () => { const menuItems = props.menu?.val.items || []; @@ -68,8 +70,8 @@ const Sidebar = (/** @type {Properties} */ props) => { {class: 'content'}, menuItems.map(item => item.items?.length > 0 - ? MenuSection(item, props.current_page) - : MenuItem(item, props.current_page)) + ? MenuSection(item, props.current_page, currentProject.val?.code) + : MenuItem(item, props.current_page, currentProject.val?.code)) ); }, ), @@ -119,18 +121,19 @@ const ProjectSelect = (/** @type Project[] */ projects, /** @type string */ curr class: 'project-select--label', onclick: () => opened.val = !opened.val, }, - div(currentProject.val.name), + div(currentProject.val?.name ?? '...'), i({ class: 'material-symbols-rounded' }, 'arrow_drop_down'), ), () => opened.val ? div( { class: 'project-select--options-wrapper' }, - projects.val.map(({ name, code }) => div( + projects.val.map(({ name, code }) => a( { - class: `project-select--option ${code === currentProject.val.code ? 'selected' : ''}`, - onclick: () => { + class: `project-select--option ${code === currentProject.val?.code ? 'selected' : ''}`, + href: `/?${PROJECT_CODE_QUERY_PARAM}=${code}`, + onclick: (event) => { opened.val = false; - emitEvent({ project: code }); + navigate(event, '', { [PROJECT_CODE_QUERY_PARAM]: code }); }, }, name, @@ -140,18 +143,26 @@ const ProjectSelect = (/** @type Project[] */ projects, /** @type string */ curr ); }; -const MenuSection = (/** @type {MenuItem} */ item, /** @type {string} */ currentPage) => { +const MenuSection = ( + /** @type {MenuItem} */ item, + /** @type {string} */ currentPage, + /** @type {string} */ projectCode, +) => { return div( {class: 'menu--section'}, div({class: 'menu--section--label'}, item.label), div( {class: 'menu--section--items'}, - ...item.items.map(child => MenuItem(child, currentPage)), + ...item.items.map(child => MenuItem(child, currentPage, projectCode)), ) ); } -const MenuItem = (/** @type {MenuItem} */ item, /** @type {string} */ currentPage) => { +const MenuItem = ( + /** @type {MenuItem} */ item, + /** @type {string} */ currentPage, + /** @type {string} */ projectCode, +) => { const classes = van.derive(() => { if (isCurrentPage(item.page, currentPage?.val)) { return 'menu--item active'; @@ -160,7 +171,11 @@ const MenuItem = (/** @type {MenuItem} */ item, /** @type {string} */ currentPag }); return a( - {class: classes, href: `/${item.page}`, onclick: (event) => navigate(event, item.page, currentPage?.val)}, + { + class: classes, + href: `/${item.page}?${PROJECT_CODE_QUERY_PARAM}=${projectCode}`, + onclick: (event) => navigate(event, item.page, { [PROJECT_CODE_QUERY_PARAM]: projectCode }), + }, i({class: 'menu--item--icon material-symbols-rounded'}, item.icon), span({class: 'menu--item--label'}, item.label), ); @@ -202,14 +217,18 @@ function emitEvent(/** @type Object */ data) { } } -function navigate(/** @type object */ event, /** @type string */ path, /** @type string */ currentPage = null) { +function navigate( + /** @type object */ event, + /** @type string */ path, + /** @type object */ params = {}, +) { // Needed to prevent page refresh // Returning false does not work because VanJS does not use inline handlers -> https://github.com/vanjs-org/van/discussions/246 event.preventDefault(); // Prevent Streamlit from reacting to event event.stopPropagation(); - emitEvent({ path }); + emitEvent({ path, params }); } function isCurrentPage(/** @type string */ itemPath, /** @type string */ currentPage) { @@ -260,7 +279,7 @@ stylesheet.replace(` z-index: 99; } -.project-select--option { +.project-select .project-select--option { display: flex; align-items: center; height: 40px; @@ -269,11 +288,12 @@ stylesheet.replace(` font-size: 14px; color: var(--primary-text-color); } -.project-select--option:hover { +.project-select .project-select--option:hover { background: var(--select-hover-background); } -.project-select--option.selected { +.project-select .project-select--option.selected { + pointer-events: none; background: var(--select-hover-background); color: var(--primary-color); } diff --git a/testgen/ui/components/frontend/js/data_profiling/data_issues.js b/testgen/ui/components/frontend/js/data_profiling/data_issues.js index f62720b7..c0b1d568 100644 --- a/testgen/ui/components/frontend/js/data_profiling/data_issues.js +++ b/testgen/ui/components/frontend/js/data_profiling/data_issues.js @@ -158,7 +158,10 @@ const TestIssuesCard = (/** @type Properties */ props, /** @type Table | Column `No test results yet for ${item.type}.`, props.noLinks ? null : Link({ href: 'test-suites', - params: { table_group_id: item.table_group_id }, + params: { + project_code: item.project_code, + table_group_id: item.table_group_id, + }, open_new: true, label: 'Go to Test Suites', right_icon: 'chevron_right', diff --git a/testgen/ui/components/frontend/js/data_profiling/data_profiling_utils.js b/testgen/ui/components/frontend/js/data_profiling/data_profiling_utils.js index 232f1192..0aafa3d2 100644 --- a/testgen/ui/components/frontend/js/data_profiling/data_profiling_utils.js +++ b/testgen/ui/components/frontend/js/data_profiling/data_profiling_utils.js @@ -27,6 +27,7 @@ * @property {string} schema_name * @property {string} table_group_id * @property {string} connection_id + * @property {string} project_code * * Characteristics * @property {'A' | 'B' | 'D' | 'N' | 'T' | 'X'} general_type * @property {string} column_type @@ -135,6 +136,7 @@ * @property {string} schema_name * @property {string} table_group_id * @property {string} connection_id + * @property {string} project_code * * Characteristics * @property {string} functional_table_type * @property {number} record_ct diff --git a/testgen/ui/components/frontend/js/pages/project_dashboard.js b/testgen/ui/components/frontend/js/pages/project_dashboard.js index 38f0bdf0..30a93d9d 100644 --- a/testgen/ui/components/frontend/js/pages/project_dashboard.js +++ b/testgen/ui/components/frontend/js/pages/project_dashboard.js @@ -1,6 +1,7 @@ /** * @typedef ProjectSummary * @type {object} + * @property {string} project_code * @property {number} table_groups_count * @property {number} test_suites_count * @property {number} test_definitions_count @@ -8,7 +9,7 @@ * @property {number} profiling_runs_count * @property {number} connections_count * @property {string} default_connection_id - * + * * @typedef TestSuiteSummary * @type {object} * @property {string} id @@ -23,7 +24,7 @@ * @property {number} last_run_failed_ct * @property {number} last_run_error_ct * @property {number} last_run_dismissed_ct - * + * * @typedef TableGroupSummary * @type {object} * @property {string} id @@ -51,13 +52,13 @@ * @property {number} latest_tests_dismissed_ct * @property {TestSuiteSummary[]} test_suites * @property {boolean} expanded - * + * * @typedef SortOption * @type {object} * @property {string} value * @property {string} label * @property {boolean} selected - * + * * @typedef Properties * @type {object} * @property {ProjectSummary} project @@ -177,13 +178,13 @@ const ProjectDashboard = (/** @type Properties */ props) => { style: 'font-size: 14px;', }), ) - : undefined, + : '', () => !getValue(isEmpty) ? div( { class: 'flex-column mt-2' }, getValue(filteredTableGroups).map(tableGroup => TableGroupCard(tableGroup)), ) - : undefined, + : '', ); } @@ -361,6 +362,7 @@ const ConditionalEmptyState = (/** @type ProjectSummary */ project) => { link: { label: 'Go to Connections', href: 'connections', + params: { project_code: project.project_code }, }, }; const forTablegroups = { diff --git a/testgen/ui/components/frontend/js/pages/quality_dashboard.js b/testgen/ui/components/frontend/js/pages/quality_dashboard.js index 135e1808..41175648 100644 --- a/testgen/ui/components/frontend/js/pages/quality_dashboard.js +++ b/testgen/ui/components/frontend/js/pages/quality_dashboard.js @@ -1,18 +1,19 @@ /** * @import { Score } from '../components/score_card.js'; - * + * * @typedef ProjectSummary * @type {object} + * @property {string} project_code * @property {number} connections_count * @property {string} default_connection_id * @property {number} table_groups_count * @property {number} profiling_runs_count - * + * * @typedef Category * @type {object} * @property {string} label * @property {number} score - * + * * @typedef Properties * @type {object} * @property {ProjectSummary} project_summary @@ -63,6 +64,7 @@ const QualityDashboard = (/** @type {Properties} */ props) => { }, filterTerm, sortedBy, + getValue(props.project_summary), ), () => div( { class: 'flex-row fx-flex-wrap fx-gap-4' }, @@ -82,7 +84,12 @@ const QualityDashboard = (/** @type {Properties} */ props) => { ); }; -const Toolbar = (options, /** @type {string} */ filterBy, /** @type {string} */ sortedBy) => { +const Toolbar = ( + options, + /** @type {string} */ filterBy, + /** @type {string} */ sortedBy, + /** @type ProjectSummary */ projectSummary +) => { const sortOptions = [ { label: "Score Name", value: "name" }, { label: "Lowest Score", value: "score" }, @@ -116,7 +123,10 @@ const Toolbar = (options, /** @type {string} */ filterBy, /** @type {string} */ label: 'Score Explorer', color: 'primary', style: 'background: var(--button-generic-background-color); width: unset; margin-right: 16px;', - onclick: () => emitEvent('LinkClicked', { href: 'quality-dashboard:explorer' }), + onclick: () => emitEvent('LinkClicked', { + href: 'quality-dashboard:explorer', + params: { project_code: projectSummary.project_code }, + }), }), Button({ type: 'icon', @@ -135,6 +145,7 @@ const ConditionalEmptyState = (/** @type ProjectSummary */ projectSummary) => { link: { label: 'Score Explorer', href: 'quality-dashboard:explorer', + params: { project_code: projectSummary.project_code }, }, }; @@ -144,6 +155,7 @@ const ConditionalEmptyState = (/** @type ProjectSummary */ projectSummary) => { link: { label: 'Go to Connections', href: 'connections', + params: { project_code: projectSummary.project_code }, }, }; } else if (projectSummary.profiling_runs_count <= 0) { @@ -167,4 +179,4 @@ const ConditionalEmptyState = (/** @type ProjectSummary */ projectSummary) => { const stylesheet = new CSSStyleSheet(); stylesheet.replace(''); -export { QualityDashboard }; \ No newline at end of file +export { QualityDashboard }; diff --git a/testgen/ui/components/frontend/js/pages/score_explorer.js b/testgen/ui/components/frontend/js/pages/score_explorer.js index 6c3fdf2e..cd19054b 100644 --- a/testgen/ui/components/frontend/js/pages/score_explorer.js +++ b/testgen/ui/components/frontend/js/pages/score_explorer.js @@ -7,6 +7,7 @@ * @typedef ScoreDefinition * @type {object} * @property {string} name + * @property {string} project_code * @property {boolean} total_score * @property {boolean} cde_score * @property {string} category @@ -306,7 +307,7 @@ const Toolbar = ( () => { const isNew_ = getValue(isNew); let href = 'quality-dashboard'; - let params = {}; + let params = {project_code: definition.project_code}; if (!isNew_) { href = 'quality-dashboard:score-details'; params = {definition_id: definition.id}; diff --git a/testgen/ui/components/frontend/js/pages/test_suites.js b/testgen/ui/components/frontend/js/pages/test_suites.js index 9e62ef67..a3f906f8 100644 --- a/testgen/ui/components/frontend/js/pages/test_suites.js +++ b/testgen/ui/components/frontend/js/pages/test_suites.js @@ -1,6 +1,7 @@ /** * @typedef ProjectSummary * @type {object} + * @property {string} project_code * @property {number} test_suites_ct * @property {number} connections_ct * @property {number} table_groups_ct @@ -211,6 +212,7 @@ const ConditionalEmptyState = ( link: { label: 'Go to Connections', href: 'connections', + params: { project_code: projectSummary.project_code }, }, }; } else if (projectSummary.table_groups_ct <= 0) { diff --git a/testgen/ui/components/widgets/sidebar.py b/testgen/ui/components/widgets/sidebar.py index 95bb8360..44356750 100644 --- a/testgen/ui/components/widgets/sidebar.py +++ b/testgen/ui/components/widgets/sidebar.py @@ -4,7 +4,7 @@ from testgen.ui.components.utils.component import component from testgen.ui.navigation.menu import Menu from testgen.ui.navigation.router import Router -from testgen.ui.services import javascript_service, project_service, user_session_service +from testgen.ui.services import javascript_service, user_session_service from testgen.ui.session import session from testgen.ui.views.dialogs.application_logs_dialog import application_logs_dialog @@ -49,6 +49,7 @@ def sidebar( on_change=on_change, ) + def on_change(): # We cannot navigate directly here # because st.switch_page uses st.rerun under the hood @@ -63,21 +64,14 @@ def on_change(): return session.sidebar_event_id = event_id - project = event_data.get("project") - path = event_data.get("path") - view_logs = event_data.get("view_logs") - - if project: - project_service.set_current_project(project) - Router().queue_navigation(to="") - - if path: - if path == LOGOUT_PATH: - javascript_service.clear_component_states() - user_session_service.end_user_session() - Router().queue_navigation(to="", with_args={ "project_code": session.project }) - else: - Router().queue_navigation(to=path, with_args={ "project_code": session.project }) - - if view_logs: + if event_data.get("view_logs"): application_logs_dialog() + elif event_data.get("path") == LOGOUT_PATH: + javascript_service.clear_component_states() + user_session_service.end_user_session() + Router().queue_navigation(to="") + else: + Router().queue_navigation( + to=event_data.get("path") or session.user_default_page, + with_args=event_data.get("params", {}), + ) diff --git a/testgen/ui/navigation/page.py b/testgen/ui/navigation/page.py index ca4ed5f4..1ba4cfff 100644 --- a/testgen/ui/navigation/page.py +++ b/testgen/ui/navigation/page.py @@ -33,17 +33,20 @@ def _navigate(self) -> None: self.router.navigate_to_pending() for guard in self.can_activate or []: can_activate = guard() - if type(can_activate) == str: - return self.router.navigate(to=can_activate) + if can_activate != True: + session.sidebar_project = session.sidebar_project or project_service.get_projects()[0]["code"] + + if type(can_activate) == str: + return self.router.navigate(to=can_activate, with_args={ "project_code": session.sidebar_project }) - if not can_activate: session.page_pending_login = self.path - return self.router.navigate(to=session.user_default_page or "") + session.page_args_pending_login = st.query_params.to_dict() - session.current_page_args = session.current_page_args or {} - self._validate_project_query_param() + default_page = session.user_default_page or "" + with_args = { "project_code": session.sidebar_project } if default_page else {} + return self.router.navigate(to=default_page, with_args=with_args) - self.render(**self._query_params_to_kwargs(session.current_page_args)) + self.render(**self._query_params_to_kwargs(st.query_params)) def _query_params_to_kwargs(self, query_params: dict | QueryParamsProxy) -> dict: if not isinstance(query_params, QueryParamsProxy): @@ -55,19 +58,6 @@ def _query_params_to_kwargs(self, query_params: dict | QueryParamsProxy) -> dict kwargs[key] = values_list if len(values_list) > 1 else query_params.get(key) return kwargs - def _validate_project_query_param(self) -> None: - if self.path != "" and ":" not in self.path: - project_param = session.current_page_args.get("project_code") - valid_project_codes = [ project["code"] for project in project_service.get_projects() ] - - if project_param not in valid_project_codes: # Ensure top-level pages have valid project_code - session.current_page_args.update({ "project_code": session.project}) - self.router.set_query_params({ "project_code": session.project}) - elif project_param != session.project: # Sync session state with query param - project_service.set_current_project(project_param) - else: - session.current_page_args.pop("project_code", None) - @abc.abstractmethod def render(self, **kwargs) -> None: raise NotImplementedError diff --git a/testgen/ui/navigation/router.py b/testgen/ui/navigation/router.py index d0911033..754edc7b 100644 --- a/testgen/ui/navigation/router.py +++ b/testgen/ui/navigation/router.py @@ -27,14 +27,28 @@ def run(self) -> None: streamlit_pages = [route.streamlit_page for route in self._routes.values()] current_page = st.navigation(streamlit_pages, position="hidden") - session.current_page_args = st.query_params + + # This hack is needed because the auth cookie is not set if navigation happens immediately after login + # We have to navigate on the next run + if session.logging_in: + session.logging_in = False + + pending_route = session.page_pending_login or session.user_default_page or "" + pending_args = ( + (session.page_args_pending_login or {}) + if session.page_pending_login + else {"project_code": session.sidebar_project} + ) + session.page_pending_login = None + session.page_args_pending_login = None + + self.navigate(to=pending_route, with_args=pending_args) if session.cookies_ready: current_page = session.page_pending_cookies or current_page session.page_pending_cookies = None if session.page_args_pending_router is not None: - session.current_page_args = session.page_args_pending_router st.query_params.from_dict(session.page_args_pending_router) session.page_args_pending_router = None @@ -42,9 +56,8 @@ def run(self) -> None: current_page.run() else: # This hack is needed because the auth cookie is not retrieved on the first run - # We have to store the page and wait for the next run + # We have to store the page and wait until cookies are ready session.page_pending_cookies = current_page - session.cookies_ready = True st.rerun() def queue_navigation(self, /, to: str, with_args: dict | None = None) -> None: diff --git a/testgen/ui/queries/profiling_queries.py b/testgen/ui/queries/profiling_queries.py index ec42541a..ab18f736 100644 --- a/testgen/ui/queries/profiling_queries.py +++ b/testgen/ui/queries/profiling_queries.py @@ -74,6 +74,9 @@ @st.cache_data(show_spinner="Loading data ...") def get_run_by_id(profile_run_id: str) -> pd.Series: + if not is_uuid4(profile_run_id): + return pd.Series() + schema: str = st.session_state["dbschema"] sql = f""" SELECT profiling_starttime, table_groups_id::VARCHAR, table_groups_name, pr.project_code, pr.dq_score_profiling, diff --git a/testgen/ui/services/project_service.py b/testgen/ui/services/project_service.py index 18063ba7..9ede797f 100644 --- a/testgen/ui/services/project_service.py +++ b/testgen/ui/services/project_service.py @@ -16,9 +16,10 @@ def get_projects(): return projects -def set_current_project(project_code: str) -> None: - session.project = project_code - Router().set_query_params({ "project_code": project_code }) +def set_sidebar_project(project_code: str) -> None: + if project_code != session.sidebar_project: + session.sidebar_project = project_code + st.rerun() @st.cache_data(show_spinner=False) diff --git a/testgen/ui/services/test_suite_service.py b/testgen/ui/services/test_suite_service.py index b8779631..9a6326a1 100644 --- a/testgen/ui/services/test_suite_service.py +++ b/testgen/ui/services/test_suite_service.py @@ -3,6 +3,7 @@ import testgen.ui.queries.test_suite_queries as test_suite_queries import testgen.ui.services.test_definition_service as test_definition_service +from testgen.utils import is_uuid4 def get_by_project(project_code, table_group_id=None): @@ -11,6 +12,9 @@ def get_by_project(project_code, table_group_id=None): def get_by_id(test_suite_id: str) -> pd.Series: + if not is_uuid4(test_suite_id): + return pd.Series() + schema = st.session_state["dbschema"] df = test_suite_queries.get_by_id(schema, test_suite_id) if not df.empty: diff --git a/testgen/ui/services/user_session_service.py b/testgen/ui/services/user_session_service.py index a5b8c186..be0f27ae 100644 --- a/testgen/ui/services/user_session_service.py +++ b/testgen/ui/services/user_session_service.py @@ -23,6 +23,9 @@ def load_user_session() -> None: # Replacing this with st.context.cookies does not work # Because it does not update when cookies are deleted on logout cookies = stx.CookieManager(key="testgen.cookies.get") + if cookies.cookies: + session.cookies_ready = True + token = cookies.get(AUTH_TOKEN_COOKIE_NAME) if token is not None: try: diff --git a/testgen/ui/session.py b/testgen/ui/session.py index aa0820ed..b82cbc21 100644 --- a/testgen/ui/session.py +++ b/testgen/ui/session.py @@ -17,11 +17,9 @@ class TestgenSession(Singleton): logging_out: bool page_pending_cookies: st.Page # type: ignore page_pending_login: str - page_pending_sidebar: str + page_args_pending_login: dict page_args_pending_router: dict - current_page: str - current_page_args: dict dbschema: str @@ -31,7 +29,7 @@ class TestgenSession(Singleton): auth_role: Literal["admin", "data_quality", "analyst", "business", "catalog"] user_default_page: str - project: str + sidebar_project: str add_project: bool latest_version: str | None diff --git a/testgen/ui/views/connections/page.py b/testgen/ui/views/connections/page.py index 10cab979..92b87ce9 100644 --- a/testgen/ui/views/connections/page.py +++ b/testgen/ui/views/connections/page.py @@ -30,6 +30,7 @@ class ConnectionsPage(Page): can_activate: typing.ClassVar = [ lambda: session.authentication_status, lambda: not user_session_service.user_has_catalog_role(), + lambda: "project_code" in st.query_params, ] menu_item = MenuItem( icon="database", @@ -342,7 +343,8 @@ def execute_setup( on_click=lambda: ( st.session_state.__setattr__("setup_data_config:navigate-to", "profiling-runs") or st.session_state.__setattr__("setup_data_config:navigate-to-args", { - "table_group": table_group_id + "project_code": table_group["project_code"], + "table_group": table_group_id, }) ), ) diff --git a/testgen/ui/views/data_catalog.py b/testgen/ui/views/data_catalog.py index c131b81c..553720e6 100644 --- a/testgen/ui/views/data_catalog.py +++ b/testgen/ui/views/data_catalog.py @@ -30,15 +30,15 @@ class DataCatalogPage(Page): path = "data-catalog" can_activate: typing.ClassVar = [ lambda: session.authentication_status, + lambda: "project_code" in st.query_params, ] menu_item = MenuItem(icon=PAGE_ICON, label=PAGE_TITLE, section="Data Profiling", order=0) - def render(self, project_code: str | None = None, table_group_id: str | None = None, selected: str | None = None, **_kwargs) -> None: + def render(self, project_code: str, table_group_id: str | None = None, selected: str | None = None, **_kwargs) -> None: testgen.page_header( PAGE_TITLE, ) - project_code = project_code or session.project user_can_navigate = not user_session_service.user_has_catalog_role() if render_empty_state(project_code, user_can_navigate): @@ -62,6 +62,7 @@ def render(self, project_code: str | None = None, table_group_id: str | None = N columns_df = get_table_group_columns(table_group_id) selected_item = get_selected_item(selected, table_group_id) if selected_item: + selected_item["project_code"] = project_code selected_item["connection_id"] = str( table_groups_df.loc[table_groups_df["id"] == table_group_id].iloc[0]["connection_id"]) else: @@ -166,6 +167,7 @@ def render_empty_state(project_code: str, user_can_navigate: bool) -> bool: action_label="Go to Connections", action_disabled=not user_can_navigate, link_href="connections", + link_params={ "project_code": project_code }, ) else: testgen.empty_state( diff --git a/testgen/ui/views/hygiene_issues.py b/testgen/ui/views/hygiene_issues.py index adc96efc..654326c0 100644 --- a/testgen/ui/views/hygiene_issues.py +++ b/testgen/ui/views/hygiene_issues.py @@ -27,7 +27,7 @@ class HygieneIssuesPage(Page): can_activate: typing.ClassVar = [ lambda: session.authentication_status, lambda: not user_session_service.user_has_catalog_role(), - lambda: "run_id" in session.current_page_args or "profiling-runs", + lambda: "run_id" in st.query_params or "profiling-runs", ] def render( @@ -48,7 +48,7 @@ def render( return run_date = date_service.get_timezoned_timestamp(st.session_state, run_df["profiling_starttime"]) - project_service.set_current_project(run_df["project_code"]) + project_service.set_sidebar_project(run_df["project_code"]) testgen.page_header( "Hygiene Issues", diff --git a/testgen/ui/views/login.py b/testgen/ui/views/login.py index ed972fab..3f08d190 100644 --- a/testgen/ui/views/login.py +++ b/testgen/ui/views/login.py @@ -52,13 +52,5 @@ def render(self, **_kwargs) -> None: if authentication_status: user_session_service.start_user_session(name, username) - - # This hack is needed because the auth cookie is not set if navigation happens immediately - if session.logging_in: - session.logging_in = False - next_route = session.page_pending_login or session.user_default_page - session.page_pending_login = None - self.router.navigate(next_route) - else: - session.logging_in = True - MixpanelService().send_event("login") + session.logging_in = True + MixpanelService().send_event("login") diff --git a/testgen/ui/views/profiling_results.py b/testgen/ui/views/profiling_results.py index 24cd6da7..c09dd7ce 100644 --- a/testgen/ui/views/profiling_results.py +++ b/testgen/ui/views/profiling_results.py @@ -22,7 +22,7 @@ class ProfilingResultsPage(Page): can_activate: typing.ClassVar = [ lambda: session.authentication_status, lambda: not user_session_service.user_has_catalog_role(), - lambda: "run_id" in session.current_page_args or "profiling-runs", + lambda: "run_id" in st.query_params or "profiling-runs", ] def render(self, run_id: str, table_name: str | None = None, column_name: str | None = None, **_kwargs) -> None: @@ -35,7 +35,7 @@ def render(self, run_id: str, table_name: str | None = None, column_name: str | return run_date = date_service.get_timezoned_timestamp(st.session_state, run_df["profiling_starttime"]) - project_service.set_current_project(run_df["project_code"]) + project_service.set_sidebar_project(run_df["project_code"]) testgen.page_header( "Data Profiling Results", diff --git a/testgen/ui/views/profiling_runs.py b/testgen/ui/views/profiling_runs.py index 14504afe..1bf04411 100644 --- a/testgen/ui/views/profiling_runs.py +++ b/testgen/ui/views/profiling_runs.py @@ -29,6 +29,7 @@ class DataProfilingPage(Page): can_activate: typing.ClassVar = [ lambda: session.authentication_status, lambda: not user_session_service.user_has_catalog_role(), + lambda: "project_code" in st.query_params, ] menu_item = MenuItem( icon=PAGE_ICON, @@ -38,13 +39,12 @@ class DataProfilingPage(Page): roles=[ role for role in typing.get_args(user_session_service.RoleType) if role != "catalog" ], ) - def render(self, project_code: str | None = None, table_group_id: str | None = None, **_kwargs) -> None: + def render(self, project_code: str, table_group_id: str | None = None, **_kwargs) -> None: testgen.page_header( PAGE_TITLE, "investigate-profiling", ) - project_code = project_code or session.project user_can_run = user_session_service.user_can_edit() if render_empty_state(project_code, user_can_run): return @@ -110,6 +110,7 @@ def render_empty_state(project_code: str, user_can_run: bool) -> bool: message=testgen.EmptyStateMessage.Connection, action_label="Go to Connections", link_href="connections", + link_params={ "project_code": project_code }, ) elif not project_summary_df["table_groups_ct"]: testgen.empty_state( diff --git a/testgen/ui/views/project_dashboard.py b/testgen/ui/views/project_dashboard.py index be3a2e82..3515e68c 100644 --- a/testgen/ui/views/project_dashboard.py +++ b/testgen/ui/views/project_dashboard.py @@ -22,6 +22,7 @@ class ProjectDashboardPage(Page): can_activate: typing.ClassVar = [ lambda: session.authentication_status, lambda: not user_session_service.user_has_catalog_role(), + lambda: "project_code" in st.query_params, ] menu_item = MenuItem( icon=PAGE_ICON, @@ -30,13 +31,12 @@ class ProjectDashboardPage(Page): roles=[ role for role in typing.get_args(user_session_service.RoleType) if role != "catalog" ], ) - def render(self, project_code: str | None = None, **_kwargs): + def render(self, project_code: str, **_kwargs): testgen.page_header( PAGE_TITLE, "introduction-to-dataops-testgen", ) - project_code = project_code or session.project table_groups = get_table_groups_summary(project_code) project_summary_df = project_queries.get_summary_by_code(project_code) @@ -83,6 +83,7 @@ def render(self, project_code: str | None = None, **_kwargs): "project_dashboard", props={ "project": { + "project_code": project_code, "table_groups_count": len(table_groups.index), "test_suites_count": int(table_groups["latest_tests_suite_ct"].sum()), "test_definitions_count": int(table_groups["latest_tests_ct"].sum()), diff --git a/testgen/ui/views/project_settings.py b/testgen/ui/views/project_settings.py index 21e0059e..18e9980a 100644 --- a/testgen/ui/views/project_settings.py +++ b/testgen/ui/views/project_settings.py @@ -20,7 +20,7 @@ class ProjectSettingsPage(Page): can_activate: typing.ClassVar = [ lambda: session.authentication_status, lambda: user_session_service.user_is_admin(), - lambda: session.project is not None, + lambda: "project_code" in st.query_params, ] menu_item = MenuItem( icon="settings", @@ -34,7 +34,7 @@ class ProjectSettingsPage(Page): existing_names: list[str] | None = None def render(self, project_code: str | None = None, **_kwargs) -> None: - self.project = project_service.get_project_by_code(project_code or session.project) + self.project = project_service.get_project_by_code(project_code) testgen.page_header( PAGE_TITLE, diff --git a/testgen/ui/views/quality_dashboard.py b/testgen/ui/views/quality_dashboard.py index 3a09fe7d..107ba49b 100644 --- a/testgen/ui/views/quality_dashboard.py +++ b/testgen/ui/views/quality_dashboard.py @@ -1,5 +1,7 @@ from typing import ClassVar, get_args +import streamlit as st + from testgen.ui.components import widgets as testgen from testgen.ui.navigation.menu import MenuItem from testgen.ui.navigation.page import Page @@ -17,6 +19,7 @@ class QualityDashboardPage(Page): can_activate: ClassVar = [ lambda: session.authentication_status, lambda: not user_session_service.user_has_catalog_role(), + lambda: "project_code" in st.query_params, ] menu_item = MenuItem( icon="readiness_score", @@ -33,6 +36,7 @@ def render(self, *, project_code: str, **_kwargs) -> None: "quality_dashboard", props={ "project_summary": { + "project_code": project_code, "connections_count": int(project_summary["connections_ct"]), "default_connection_id": str(project_summary["default_connection_id"]), "table_groups_count": int(project_summary["table_groups_ct"]), diff --git a/testgen/ui/views/score_details.py b/testgen/ui/views/score_details.py index 749ce1aa..c96fd98c 100644 --- a/testgen/ui/views/score_details.py +++ b/testgen/ui/views/score_details.py @@ -14,7 +14,7 @@ from testgen.ui.navigation.router import Router from testgen.ui.pdf import hygiene_issue_report, test_result_report from testgen.ui.queries.scoring_queries import get_all_score_cards, get_score_card_issue_reports -from testgen.ui.services import user_session_service +from testgen.ui.services import project_service, user_session_service from testgen.ui.session import session, temp_value from testgen.ui.views.dialogs.profiling_results_dialog import profiling_results_dialog from testgen.utils import format_score_card, format_score_card_breakdown, format_score_card_issues @@ -27,7 +27,7 @@ class ScoreDetailsPage(Page): can_activate: ClassVar = [ lambda: session.authentication_status, lambda: not user_session_service.user_has_catalog_role(), - lambda: "definition_id" in session.current_page_args or "quality-dashboard", + lambda: "definition_id" in st.query_params or "quality-dashboard", ] def render( @@ -39,7 +39,6 @@ def render( drilldown: str | None = None, **_kwargs ): - project_code: str = session.project score_definition: ScoreDefinition = ScoreDefinition.get(definition_id) if not score_definition: @@ -48,11 +47,13 @@ def render( "quality-dashboard", ) return + + project_service.set_sidebar_project(score_definition.project_code) testgen.page_header( "Score Details", breadcrumbs=[ - {"path": "quality-dashboard", "label": "Quality Dashboard", "params": {"project_code": project_code}}, + {"path": "quality-dashboard", "label": "Quality Dashboard", "params": {"project_code": score_definition.project_code}}, {"label": score_definition.name}, ], ) diff --git a/testgen/ui/views/score_explorer.py b/testgen/ui/views/score_explorer.py index 01e92cdd..17edfaaf 100644 --- a/testgen/ui/views/score_explorer.py +++ b/testgen/ui/views/score_explorer.py @@ -30,6 +30,7 @@ class ScoreExplorerPage(Page): can_activate: ClassVar = [ lambda: session.authentication_status, lambda: not user_session_service.user_has_catalog_role(), + lambda: "definition_id" in st.query_params or "project_code" in st.query_params or "quality-dashboard", ] def render( @@ -43,13 +44,22 @@ def render( breakdown_score_type: str | None = "score", drilldown: str | None = None, definition_id: str | None = None, + project_code: str | None = None, **_kwargs ): - project_code: str = session.project page_title: str = "Score Explorer" last_breadcrumb: str = page_title if definition_id: original_score_definition = ScoreDefinition.get(definition_id) + + if not original_score_definition: + self.router.navigate_with_warning( + f"Scorecard with ID '{definition_id}' does not exist. Redirecting to Quality Dashboard ...", + "quality-dashboard", + ) + return + + project_code = original_score_definition.project_code page_title = "Edit Scorecard" last_breadcrumb = original_score_definition.name testgen.page_header(page_title, breadcrumbs=[ @@ -202,6 +212,7 @@ def get_report_file_data(update_progress, issue) -> FILE_DATA_TYPE: def save_score_definition(_) -> None: + project_code = st.query_params.get("project_code") definition_id = st.query_params.get("definition_id") name = st.query_params.get("name") total_score = st.query_params.get("total_score") @@ -221,11 +232,12 @@ def save_score_definition(_) -> None: if definition_id: is_new = False score_definition = ScoreDefinition.get(definition_id) + project_code = score_definition.project_code if is_new: latest_run = max( - profiling_queries.get_latest_run_date(session.project), - test_run_queries.get_latest_run_date(session.project), + profiling_queries.get_latest_run_date(project_code), + test_run_queries.get_latest_run_date(project_code), key=lambda run: getattr(run, "run_time", 0), ) @@ -234,7 +246,7 @@ def save_score_definition(_) -> None: "refresh_date": latest_run.run_time if latest_run else None, } - score_definition.project_code = session.project + score_definition.project_code = project_code score_definition.name = name score_definition.total_score = total_score and total_score.lower() == "true" score_definition.cde_score = cde_score and cde_score.lower() == "true" @@ -248,7 +260,7 @@ def save_score_definition(_) -> None: get_all_score_cards.clear() if not is_new: - run_recalculate_score_card(project_code=score_definition.project_code, definition_id=score_definition.id) + run_recalculate_score_card(project_code=project_code, definition_id=score_definition.id) Router().set_query_params({ "name": None, diff --git a/testgen/ui/views/table_groups/page.py b/testgen/ui/views/table_groups/page.py index f97a6c45..d557e33c 100644 --- a/testgen/ui/views/table_groups/page.py +++ b/testgen/ui/views/table_groups/page.py @@ -23,7 +23,7 @@ class TableGroupsPage(Page): can_activate: typing.ClassVar = [ lambda: session.authentication_status, lambda: not user_session_service.user_has_catalog_role(), - lambda: "connection_id" in session.current_page_args or "connections", + lambda: "connection_id" in st.query_params or "connections", ] def render(self, connection_id: str, **_kwargs) -> None: @@ -35,7 +35,7 @@ def render(self, connection_id: str, **_kwargs) -> None: ) project_code = connection["project_code"] - project_service.set_current_project(project_code) + project_service.set_sidebar_project(project_code) user_can_edit = user_session_service.user_can_edit() testgen.page_header( diff --git a/testgen/ui/views/test_definitions.py b/testgen/ui/views/test_definitions.py index 90e4f686..d14d8a27 100644 --- a/testgen/ui/views/test_definitions.py +++ b/testgen/ui/views/test_definitions.py @@ -28,7 +28,7 @@ class TestDefinitionsPage(Page): can_activate: typing.ClassVar = [ lambda: session.authentication_status, lambda: not user_session_service.user_has_catalog_role(), - lambda: "test_suite_id" in session.current_page_args or "test-suites", + lambda: "test_suite_id" in st.query_params or "test-suites", ] def render(self, test_suite_id: str, table_name: str | None = None, column_name: str | None = None, **_kwargs) -> None: @@ -41,7 +41,7 @@ def render(self, test_suite_id: str, table_name: str | None = None, column_name: table_group = table_group_service.get_by_id(test_suite["table_groups_id"]) project_code = table_group["project_code"] - project_service.set_current_project(project_code) + project_service.set_sidebar_project(project_code) user_can_edit = user_session_service.user_can_edit() user_can_disposition = user_session_service.user_can_disposition() @@ -91,7 +91,7 @@ def render(self, test_suite_id: str, table_name: str | None = None, column_name: add_test_dialog(project_code, table_group, test_suite, table_name, column_name) selected = show_test_defs_grid( - session.project, test_suite["test_suite"], table_name, column_name, do_multi_select, table_actions_column, + project_code, test_suite["test_suite"], table_name, column_name, do_multi_select, table_actions_column, table_group["id"] ) fm.render_refresh_button(table_actions_column) diff --git a/testgen/ui/views/test_results.py b/testgen/ui/views/test_results.py index 42d6001f..f64251d2 100644 --- a/testgen/ui/views/test_results.py +++ b/testgen/ui/views/test_results.py @@ -23,7 +23,7 @@ from testgen.ui.session import session from testgen.ui.views.dialogs.profiling_results_dialog import view_profiling_button from testgen.ui.views.test_definitions import show_test_form_by_id -from testgen.utils import friendly_score +from testgen.utils import friendly_score, is_uuid4 ALWAYS_SPIN = False @@ -33,7 +33,7 @@ class TestResultsPage(Page): can_activate: typing.ClassVar = [ lambda: session.authentication_status, lambda: not user_session_service.user_has_catalog_role(), - lambda: "run_id" in session.current_page_args or "test-runs", + lambda: "run_id" in st.query_params or "test-runs", ] def render( @@ -54,7 +54,7 @@ def render( return run_date = date_service.get_timezoned_timestamp(st.session_state, run_df["test_starttime"]) - project_service.set_current_project(run_df["project_code"]) + project_service.set_sidebar_project(run_df["project_code"]) testgen.page_header( "Test Results", @@ -228,6 +228,9 @@ def refresh_score(project_code: str, run_id: str, table_group_id: str | None) -> @st.cache_data(show_spinner=ALWAYS_SPIN) def get_run_by_id(test_run_id: str) -> pd.Series: + if not is_uuid4(test_run_id): + return pd.Series() + schema: str = st.session_state["dbschema"] sql = f""" SELECT tr.test_starttime, diff --git a/testgen/ui/views/test_runs.py b/testgen/ui/views/test_runs.py index 765e46b6..fea81f76 100644 --- a/testgen/ui/views/test_runs.py +++ b/testgen/ui/views/test_runs.py @@ -28,6 +28,7 @@ class TestRunsPage(Page): can_activate: typing.ClassVar = [ lambda: session.authentication_status, lambda: not user_session_service.user_has_catalog_role(), + lambda: "project_code" in st.query_params, ] menu_item = MenuItem( icon=PAGE_ICON, @@ -37,13 +38,12 @@ class TestRunsPage(Page): roles=[ role for role in typing.get_args(user_session_service.RoleType) if role != "catalog" ], ) - def render(self, project_code: str | None = None, table_group_id: str | None = None, test_suite_id: str | None = None, **_kwargs) -> None: + def render(self, project_code: str, table_group_id: str | None = None, test_suite_id: str | None = None, **_kwargs) -> None: testgen.page_header( PAGE_TITLE, "test-results", ) - project_code = project_code or session.project user_can_run = user_session_service.user_can_edit() if render_empty_state(project_code, user_can_run): return @@ -119,6 +119,7 @@ def render_empty_state(project_code: str, user_can_run: bool) -> bool: message=testgen.EmptyStateMessage.Connection, action_label="Go to Connections", link_href="connections", + link_params={ "project_code": project_code }, ) elif not project_summary_df["table_groups_ct"]: testgen.empty_state( @@ -136,6 +137,7 @@ def render_empty_state(project_code: str, user_can_run: bool) -> bool: message=testgen.EmptyStateMessage.TestSuite, action_label="Go to Test Suites", link_href="test-suites", + link_params={ "project_code": project_code }, ) else: testgen.empty_state( diff --git a/testgen/ui/views/test_suites.py b/testgen/ui/views/test_suites.py index 9246b1a2..efe7c19d 100644 --- a/testgen/ui/views/test_suites.py +++ b/testgen/ui/views/test_suites.py @@ -29,6 +29,7 @@ class TestSuitesPage(Page): can_activate: typing.ClassVar = [ lambda: session.authentication_status, lambda: not user_session_service.user_has_catalog_role(), + lambda: "project_code" in st.query_params, ] menu_item = MenuItem( icon=PAGE_ICON, @@ -38,13 +39,12 @@ class TestSuitesPage(Page): roles=[ role for role in typing.get_args(user_session_service.RoleType) if role != "catalog" ], ) - def render(self, project_code: str | None = None, table_group_id: str | None = None, **_kwargs) -> None: + def render(self, project_code: str, table_group_id: str | None = None, **_kwargs) -> None: testgen.page_header( PAGE_TITLE, "create-a-test-suite", ) - project_code = project_code or session.project table_groups = get_db_table_group_choices(project_code) user_can_edit = user_session_service.user_can_edit() test_suites = test_suite_service.get_by_project(project_code, table_group_id) @@ -71,6 +71,7 @@ def render(self, project_code: str | None = None, table_group_id: str | None = N "test_suites", props={ "project_summary": { + "project_code": project_code, "test_suites_ct": format_field(project_summary["test_suites_ct"]), "connections_ct": format_field(project_summary["connections_ct"]), "table_groups_ct": format_field(project_summary["table_groups_ct"]), From 4cd2c6c00b69b6c11050face198230e9c3d88b91 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Fri, 2 May 2025 01:32:11 -0400 Subject: [PATCH 07/34] refactor: remove dead code --- testgen/ui/services/form_service.py | 131 ---------------------------- 1 file changed, 131 deletions(-) diff --git a/testgen/ui/services/form_service.py b/testgen/ui/services/form_service.py index 20fe0a14..e350bc96 100644 --- a/testgen/ui/services/form_service.py +++ b/testgen/ui/services/form_service.py @@ -1,16 +1,12 @@ -# For render_logo -import base64 import typing from builtins import float from enum import Enum from io import BytesIO -from os.path import splitext from pathlib import Path from time import sleep import pandas as pd import streamlit as st -from attrs import validators from pandas.api.types import is_datetime64_any_dtype from st_aggrid import AgGrid, ColumnsAutoSizeMode, DataReturnMode, GridOptionsBuilder, GridUpdateMode, JsCode from streamlit_extras.no_default_selectbox import selectbox @@ -515,63 +511,6 @@ def render_html_list(dct_row, lst_columns, str_section_header=None, int_data_wid st.divider() -def render_markdown_list(dct_row, lst_columns, str_header=None): - # Renders sets of values as vertical markdown list - - str_blank_line = "
" # chr(10) + chr(10) - - if str_header: - # Header with extra line - str_markdown = f":green[**{str_header}**]" + str_blank_line - else: - str_markdown = "" - - for col in lst_columns: - # Column: Value with extra line - str_markdown += f"**{ut_prettify_header(col)}**:  `{dct_row[col]!s}`" + str_blank_line - - # Drop last blank line - i = str_markdown.rfind(str_blank_line) - if i != -1: - str_markdown = str_markdown[:i] - - with st.container(): - st.markdown(str_markdown, unsafe_allow_html=True) - st.divider() - - -def render_markdown_table(df, lst_columns): - # Filter the DataFrame to include only the specified columns - - df_filtered = df[lst_columns] - - # Initialize markdown string - md_str = "" - # Add headers - headers = "|".join([f" {ut_prettify_header(col)} " for col in lst_columns]) - md_str += f"|{headers}|\n" - # Add alignment row - alignments = [] - for col in lst_columns: - if pd.api.types.is_numeric_dtype(df_filtered[col]): - alignments.append("---:") - else: - alignments.append(":---") - md_str += f"|{'|'.join(alignments)}|\n" - - # Add rows - for _, row in df_filtered.iterrows(): - row_str = [] - for col in lst_columns: - if pd.api.types.is_numeric_dtype(df_filtered[col]): - row_str.append(f" {row[col]} ") - else: - row_str.append(f" {row[col]} ") - md_str += f"|{'|'.join(row_str)}|\n" - - st.markdown(md_str) - - def render_grid_select( df: pd.DataFrame, show_columns, @@ -766,73 +705,3 @@ def render_grid_select( if bind_to_query_name and bind_to_query_prop: Router().set_query_params({bind_to_query_name: selected_rows[0][bind_to_query_prop]}) return selected_rows - - -def render_logo(logo_path: str = logo_file): - st.markdown( - f"""""", - unsafe_allow_html=True, - ) - - -def render_icon_link(target_url, width=20, height=20, icon_path=help_icon): - # left, right = st.columns([0.5, 0.5]) - # with left: - - # Check if the icon_path is a URL or a local path - if validators.url(icon_path): - img_data = icon_path - else: - # If local path, convert the image to base64 - img_data = base64.b64encode(Path(icon_path).read_bytes()).decode() - - # Get the image extension - img_format = splitext(icon_path)[-1].replace(".", "") - - base_html = f""" - - - - """ - if validators.url(icon_path): - html_code = base_html.format(img_data) - else: - html_code = base_html.format(f"data:image/{img_format};base64,{img_data}") - - st.markdown(html_code, unsafe_allow_html=True) - - -def render_icon_link_new(target_url, width=20, height=20, icon_path=help_icon): - # FIXME: Why doesn't this work? - - # Check if the icon_path is a URL or a local path - if validators.url(icon_path): - img_data = icon_path - else: - # If local path, convert the image to base64 - img_data = base64.b64encode(Path(icon_path).read_bytes()).decode() - - # Get the image extension - img_format = splitext(icon_path)[-1].replace(".", "") - - if not validators.url(icon_path): - img_data = f"data:image/{img_format};base64,{img_data}" - - html_code = f""" - - - - -""" - - st.markdown(html_code, unsafe_allow_html=True) From c9f98cdd8b6f2b3a49306b92c8969767add9ecaf Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Fri, 2 May 2025 11:17:22 -0400 Subject: [PATCH 08/34] lint: remove unused import --- testgen/ui/services/project_service.py | 1 - 1 file changed, 1 deletion(-) diff --git a/testgen/ui/services/project_service.py b/testgen/ui/services/project_service.py index 9ede797f..2f4443cf 100644 --- a/testgen/ui/services/project_service.py +++ b/testgen/ui/services/project_service.py @@ -1,6 +1,5 @@ import streamlit as st -from testgen.ui.navigation.router import Router from testgen.ui.queries import project_queries from testgen.ui.services import database_service, query_service from testgen.ui.session import session From 01ab812ef03708c9962b6e37494c5c014036abb4 Mon Sep 17 00:00:00 2001 From: Ricardo Boni Date: Fri, 2 May 2025 13:29:55 -0400 Subject: [PATCH 09/34] misc(scheduler): Addressing code review feedback --- docs/local_development.md | 2 +- testgen/ui/components/widgets/tz_select.py | 40 +++++++++++++++-- testgen/ui/views/dialogs/manage_schedules.py | 45 +++++++++----------- 3 files changed, 59 insertions(+), 28 deletions(-) diff --git a/docs/local_development.md b/docs/local_development.md index ae704e3b..df6d3684 100644 --- a/docs/local_development.md +++ b/docs/local_development.md @@ -100,7 +100,7 @@ testgen quick-start --simulate-fast-forward ### Run the Application -TheGen has two modules that have to be running: The web user interface (UI) and the Scheduler. +TestGen has two modules that have to be running: The web user interface (UI) and the Scheduler. The scheduler starts jobs (profiling, test execution, ...) at their scheduled times. The following command starts both modules, each in their own process: diff --git a/testgen/ui/components/widgets/tz_select.py b/testgen/ui/components/widgets/tz_select.py index 74293d1d..81e1dfe7 100644 --- a/testgen/ui/components/widgets/tz_select.py +++ b/testgen/ui/components/widgets/tz_select.py @@ -2,8 +2,42 @@ import streamlit as st +MOST_RELEVANT_TIMEZONES = [ + "Africa/Johannesburg", # +02:00 + "America/Chicago", # -05:00 + "America/Denver", # -06:00 + "America/Halifax", # -03:00 + "America/Los_Angeles", # -07:00 + "America/Mexico_City", # -06:00 + "America/New_York", # -04:00 + "America/Phoenix", # -07:00 + "America/Sao_Paulo", # -03:00 + "America/Vancouver", # -07:00 + "Asia/Bangkok", # +07:00 + "Asia/Dubai", # +04:00 + "Asia/Kolkata", # +05:30 + "Asia/Riyadh", # +03:00 + "Asia/Seoul", # +09:00 + "Asia/Shanghai", # +08:00 + "Asia/Singapore", # +08:00 + "Asia/Tokyo", # +09:00 + "Australia/Sydney", # +10:00 + "Europe/Berlin", # +02:00 + "Europe/Istanbul", # +03:00 + "Europe/London", # +01:00 + "Europe/Moscow", # +03:00 + "Europe/Paris", # +02:00 + "Pacific/Auckland", # +12:00 +] + def tz_select(*, default="America/New_York", **kwargs): - tz_options = sorted(zoneinfo.available_timezones()) - index = tz_options.index(st.session_state.get("browser_timezone", default)) - return st.selectbox(options=tz_options, index=index, format_func=lambda v: v.replace("_", " "), **kwargs) + tz_options = MOST_RELEVANT_TIMEZONES[:] + tz_options.extend(sorted(tz for tz in zoneinfo.available_timezones() if tz not in MOST_RELEVANT_TIMEZONES)) + + if "index" in kwargs: + raise ValueError("Use the Session State API instead.") + + if "key" not in kwargs or kwargs["key"] not in st.session_state: + kwargs["index"] = tz_options.index(st.session_state.get("browser_timezone", default)) + return st.selectbox(options=tz_options, format_func=lambda v: v.replace("_", " "), **kwargs) diff --git a/testgen/ui/views/dialogs/manage_schedules.py b/testgen/ui/views/dialogs/manage_schedules.py index a0863885..a641ca26 100644 --- a/testgen/ui/views/dialogs/manage_schedules.py +++ b/testgen/ui/views/dialogs/manage_schedules.py @@ -129,8 +129,8 @@ def add_schedule_form(self): except Exception as e: st.error("Error validating the Cron expression") else: - # We postpone the validation status update when the previous reran was a failed - # attempt to insert an schedule so that it does not override the error message + # We postpone the validation status update when the previous rerun had a failed + # attempt to insert a schedule. This prevents the error message of being overridden if st.session_state.get("schedule_form_success", None) is None: st.success( f"**Next runs:** {' | '.join(sample)} ({cron_tz.replace('_', ' ')})", @@ -139,29 +139,26 @@ def add_schedule_form(self): else: st.session_state["schedule_form_success"] = None + is_form_valid = bool(args_valid and cron_obj) with button_column: - add_button = st.button( - "Add", - use_container_width=True, - disabled=not args_valid or not cron_obj, - ) + add_button = st.button("Add", use_container_width=True, disabled=not is_form_valid) - if add_button: + # We also check for `is_form_valid` here because apparently it's possible to click a disabled button =) + if add_button and is_form_valid: with Session() as db_session: - with status_container: - try: - sched_model = JobSchedule( - project_code=self.project_code, - key=self.job_key, - cron_expr=cron_obj.to_string(), - cron_tz=cron_tz, - args=args, - kwargs=kwargs, - ) - db_session.add(sched_model) - db_session.commit() - except IntegrityError: - st.session_state["schedule_form_success"] = False - else: - st.session_state["schedule_form_success"] = True + try: + sched_model = JobSchedule( + project_code=self.project_code, + key=self.job_key, + cron_expr=cron_obj.to_string(), + cron_tz=cron_tz, + args=args, + kwargs=kwargs, + ) + db_session.add(sched_model) + db_session.commit() + except IntegrityError: + st.session_state["schedule_form_success"] = False + else: + st.session_state["schedule_form_success"] = True st.rerun(scope="fragment") From d08e0662db86527808bc8f11c3ce5f99c10590a6 Mon Sep 17 00:00:00 2001 From: Ricardo Boni Date: Fri, 2 May 2025 18:47:02 -0400 Subject: [PATCH 10/34] misc(scheduler): Fixing how we clean up the timezone selectbox --- testgen/ui/components/widgets/tz_select.py | 51 +++++++++++++++----- testgen/ui/views/dialogs/manage_schedules.py | 8 ++- 2 files changed, 42 insertions(+), 17 deletions(-) diff --git a/testgen/ui/components/widgets/tz_select.py b/testgen/ui/components/widgets/tz_select.py index 81e1dfe7..bbd09a0b 100644 --- a/testgen/ui/components/widgets/tz_select.py +++ b/testgen/ui/components/widgets/tz_select.py @@ -2,42 +2,69 @@ import streamlit as st -MOST_RELEVANT_TIMEZONES = [ +HANDY_TIMEZONES = [ + "Africa/Abidjan", # +00:00 "Africa/Johannesburg", # +02:00 - "America/Chicago", # -05:00 - "America/Denver", # -06:00 + "Africa/Lagos", # +01:00 + "America/Anchorage", # -09:00 + "America/Argentina/Buenos_Aires", # -03:00 + "America/Bogota", # -05:00 + "America/Chicago", # -06:00 + "America/Denver", # -07:00 "America/Halifax", # -03:00 - "America/Los_Angeles", # -07:00 + "America/Los_Angeles", # -08:00 "America/Mexico_City", # -06:00 - "America/New_York", # -04:00 + "America/New_York", # -04:00 (during DST) "America/Phoenix", # -07:00 "America/Sao_Paulo", # -03:00 - "America/Vancouver", # -07:00 + "America/Toronto", # -04:00 (during DST) + "America/Vancouver", # -08:00 + "Asia/Almaty", # +06:00 + "Asia/Baku", # +04:00 "Asia/Bangkok", # +07:00 + "Asia/Colombo", # +05:30 + "Asia/Dhaka", # +06:00 "Asia/Dubai", # +04:00 + "Asia/Jakarta", # +07:00 + "Asia/Kabul", # +04:30 "Asia/Kolkata", # +05:30 + "Asia/Manila", # +08:00 "Asia/Riyadh", # +03:00 "Asia/Seoul", # +09:00 "Asia/Shanghai", # +08:00 "Asia/Singapore", # +08:00 "Asia/Tokyo", # +09:00 + "Atlantic/Azores", # -01:00 + "Atlantic/South_Georgia", # -02:00 "Australia/Sydney", # +10:00 - "Europe/Berlin", # +02:00 + "Europe/Amsterdam", # +01:00 + "Europe/Athens", # +02:00 + "Europe/Berlin", # +01:00 + "Europe/Bucharest", # +02:00 + "Europe/Helsinki", # +02:00 "Europe/Istanbul", # +03:00 - "Europe/London", # +01:00 + "Europe/London", # +00:00 "Europe/Moscow", # +03:00 - "Europe/Paris", # +02:00 + "Europe/Paris", # +01:00 "Pacific/Auckland", # +12:00 + "Pacific/Honolulu", # -10:00 + "Pacific/Noumea", # +11:00 + "Pacific/Port_Moresby", # +10:00 ] def tz_select(*, default="America/New_York", **kwargs): - tz_options = MOST_RELEVANT_TIMEZONES[:] - tz_options.extend(sorted(tz for tz in zoneinfo.available_timezones() if tz not in MOST_RELEVANT_TIMEZONES)) + tz_options = HANDY_TIMEZONES[:] + tz_options.extend(sorted(tz for tz in zoneinfo.available_timezones() if tz not in HANDY_TIMEZONES)) if "index" in kwargs: raise ValueError("Use the Session State API instead.") - if "key" not in kwargs or kwargs["key"] not in st.session_state: + # This is wierd, but apparently Streamlit likes it this way + if "key" in kwargs and st.session_state.get(kwargs["key"], None) in tz_options: + kwargs["index"] = tz_options.index(st.session_state[kwargs["key"]]) + del st.session_state[kwargs["key"]] + else: kwargs["index"] = tz_options.index(st.session_state.get("browser_timezone", default)) + return st.selectbox(options=tz_options, format_func=lambda v: v.replace("_", " "), **kwargs) diff --git a/testgen/ui/views/dialogs/manage_schedules.py b/testgen/ui/views/dialogs/manage_schedules.py index a641ca26..b597dd1d 100644 --- a/testgen/ui/views/dialogs/manage_schedules.py +++ b/testgen/ui/views/dialogs/manage_schedules.py @@ -94,11 +94,9 @@ def add_schedule_form(self): match st.session_state.get("schedule_form_success", None): case True: st.success("Schedule added.", icon=":material/check:") - st.session_state["schedule_cron_expr"] = "" - st.session_state["schedule_cron_tz"] = st.session_state.get( - "browser_timezone", st.session_state["schedule_cron_tz"] - ) - st.session_state["schedule_form_success"] = None + del st.session_state["schedule_cron_expr"] + del st.session_state["schedule_cron_tz"] + del st.session_state["schedule_form_success"] case False: st.error("This schedule already exists.", icon=":material/block:") case None: From 82ac7fc56bd5b4d98ac4157a665e1b22cba07065 Mon Sep 17 00:00:00 2001 From: Luis Date: Wed, 23 Apr 2025 14:27:47 -0400 Subject: [PATCH 11/34] misc(ui): add testid to elements for ease of locatability --- .../frontend/js/components/button.js | 2 ++ .../components/frontend/js/components/card.js | 3 ++- .../frontend/js/components/checkbox.js | 6 +++-- .../frontend/js/components/input.js | 2 ++ .../frontend/js/components/line_chart.js | 2 ++ .../frontend/js/components/score_breakdown.js | 23 ++++++++++++------- .../frontend/js/components/score_card.js | 13 ++++++----- .../frontend/js/components/score_history.js | 2 +- .../frontend/js/components/score_issues.js | 2 +- .../frontend/js/components/select.js | 12 ++++++---- .../frontend/js/components/spark_line.js | 3 ++- .../frontend/js/pages/profiling_runs.js | 13 +++++++---- .../frontend/js/pages/project_dashboard.js | 4 ++++ .../frontend/js/pages/quality_dashboard.js | 4 ++++ .../frontend/js/pages/score_explorer.js | 16 +++++++++---- .../frontend/js/pages/test_suites.js | 6 +++-- 16 files changed, 77 insertions(+), 36 deletions(-) diff --git a/testgen/ui/components/frontend/js/components/button.js b/testgen/ui/components/frontend/js/components/button.js index 9c08d1ce..d90b0034 100644 --- a/testgen/ui/components/frontend/js/components/button.js +++ b/testgen/ui/components/frontend/js/components/button.js @@ -13,6 +13,7 @@ * @property {(Function|null)} onclick * @property {(bool)} disabled * @property {string?} style + * @property {string?} testId */ import { emitEvent, enforceElementWidth, getValue, loadStylesheet } from '../utils.js'; import van from '../van.min.js'; @@ -67,6 +68,7 @@ const Button = (/** @type Properties */ props) => { disabled: props.disabled, onmouseenter: props.tooltip ? (() => showTooltip.val = true) : undefined, onmouseleave: props.tooltip ? (() => showTooltip.val = false) : undefined, + 'data-testid': getValue(props.testId) ?? '', }, () => window.testgen.isPage && getValue(props.tooltip) ? Tooltip({ text: props.tooltip, diff --git a/testgen/ui/components/frontend/js/components/card.js b/testgen/ui/components/frontend/js/components/card.js index 35dfee9e..b883b9b7 100644 --- a/testgen/ui/components/frontend/js/components/card.js +++ b/testgen/ui/components/frontend/js/components/card.js @@ -7,6 +7,7 @@ * @property {boolean?} border * @property {string?} id * @property {string?} class + * @property {string?} testId */ import { loadStylesheet } from '../utils.js'; import van from '../van.min.js'; @@ -17,7 +18,7 @@ const Card = (/** @type Properties */ props) => { loadStylesheet('card', stylesheet); return div( - { class: `tg-card mb-4 ${props.border ? 'tg-card-border' : ''} ${props.class}`, id: props.id ?? '' }, + { class: `tg-card mb-4 ${props.border ? 'tg-card-border' : ''} ${props.class}`, id: props.id ?? '', 'data-testid': props.testId ?? '' }, () => props.title || props.actionContent ? div( diff --git a/testgen/ui/components/frontend/js/components/checkbox.js b/testgen/ui/components/frontend/js/components/checkbox.js index d41d3039..b2619b9e 100644 --- a/testgen/ui/components/frontend/js/components/checkbox.js +++ b/testgen/ui/components/frontend/js/components/checkbox.js @@ -6,11 +6,12 @@ * @property {boolean?} indeterminate * @property {function(boolean, Event)?} onChange * @property {number?} width + * @property {testId?} testId */ import van from '../van.min.js'; import { getValue, loadStylesheet } from '../utils.js'; -const { input, label } = van.tags; +const { input, label, span } = van.tags; const Checkbox = (/** @type Properties */ props) => { loadStylesheet('checkbox', stylesheet); @@ -18,6 +19,7 @@ const Checkbox = (/** @type Properties */ props) => { return label( { class: 'flex-row fx-gap-2 clickable', + 'data-testid': props.testId ?? '', style: () => `width: ${props.width ? getValue(props.width) + 'px' : 'auto'}`, }, input({ @@ -30,7 +32,7 @@ const Checkbox = (/** @type Properties */ props) => { return onChange ? (/** @type Event */ event) => onChange(event.target.checked, event) : null; }), }), - props.label, + span({'data-testid': 'checkbox-label'}, props.label), ); }; diff --git a/testgen/ui/components/frontend/js/components/input.js b/testgen/ui/components/frontend/js/components/input.js index 009a8246..0d86d080 100644 --- a/testgen/ui/components/frontend/js/components/input.js +++ b/testgen/ui/components/frontend/js/components/input.js @@ -13,6 +13,7 @@ * @property {number?} width * @property {number?} height * @property {string?} style + * @property {string?} testId */ import van from '../van.min.js'; import { debounce, getValue, loadStylesheet, getRandomId } from '../utils.js'; @@ -55,6 +56,7 @@ const Input = (/** @type Properties */ props) => { id: domId, class: 'flex-column fx-gap-1 tg-input--label', style: () => `width: ${props.width ? getValue(props.width) + 'px' : 'auto'}; ${getValue(props.style)}`, + 'data-testid': props.testId ?? '', }, div( { class: 'flex-row fx-gap-1 text-caption' }, diff --git a/testgen/ui/components/frontend/js/components/line_chart.js b/testgen/ui/components/frontend/js/components/line_chart.js index 3eb7acdb..fd16bd06 100644 --- a/testgen/ui/components/frontend/js/components/line_chart.js +++ b/testgen/ui/components/frontend/js/components/line_chart.js @@ -223,6 +223,7 @@ const LineChart = ( tooltipExtraStyle.val = ''; showTooltip.val = false; }, + testId: lineId, }, line, ) @@ -263,6 +264,7 @@ const Legend = (options, lines) => { style: 'width: 32px; height: 32px;', tooltip: options?.refreshTooltip || null, onclick: options?.onRefreshClicked, + 'data-testid': 'refresh-history', }) : null, div( diff --git a/testgen/ui/components/frontend/js/components/score_breakdown.js b/testgen/ui/components/frontend/js/components/score_breakdown.js index 28c329e5..ee23e306 100644 --- a/testgen/ui/components/frontend/js/components/score_breakdown.js +++ b/testgen/ui/components/frontend/js/components/score_breakdown.js @@ -11,7 +11,7 @@ const ScoreBreakdown = (score, breakdown, category, scoreType, onViewDetails) => loadStylesheet('score-breakdown', stylesheet); return div( - { class: 'table' }, + { class: 'table', 'data-testid': 'score-breakdown' }, div( { class: 'flex-row fx-justify-space-between fx-align-flex-start text-caption' }, div( @@ -24,6 +24,7 @@ const ScoreBreakdown = (score, breakdown, category, scoreType, onViewDetails) => value: selectedCategory, options: ['table_name', 'column_name', 'semantic_data_type', 'dq_dimension'].map((c) => ({ label: CATEGORY_LABEL[c], value: c })), onChange: (value) => emitEvent('CategoryChanged', { payload: value }), + testId: 'groupby-selector', }); }, span('for'), @@ -39,6 +40,7 @@ const ScoreBreakdown = (score, breakdown, category, scoreType, onViewDetails) => value: selectedScoreType, options: scoreTypeOptions.map((s) => ({ label: SCORE_TYPE_LABEL[s], value: s, selected: s === scoreType })), onChange: (value) => emitEvent('ScoreTypeChanged', { payload: value }), + testId: 'score-type-selector', }); }, ), @@ -59,7 +61,7 @@ const ScoreBreakdown = (score, breakdown, category, scoreType, onViewDetails) => const columns = breakdownValue?.columns; return div( breakdownValue?.items?.map((row) => div( - { class: 'table-row flex-row' }, + { class: 'table-row flex-row', 'data-testid': 'score-breakdown-row' }, columns.map((columnName) => TableCell(row, columnName, scoreValue, categoryValue, scoreTypeValue, onViewDetails)), )), ); @@ -106,7 +108,7 @@ const TableCell = (row, column, score=undefined, category=undefined, scoreType=u const size = BREAKDOWN_COLUMNS_SIZES[column]; return div( - { style: `flex: ${size}; max-width: ${size}; word-wrap: break-word;` }, + { style: `flex: ${size}; max-width: ${size}; word-wrap: break-word;`, 'data-testid': 'score-breakdown-cell' }, span(row[column]), ); }; @@ -114,7 +116,7 @@ const TableCell = (row, column, score=undefined, category=undefined, scoreType=u const BreakdownColumnCell = (value, row) => { const size = BREAKDOWN_COLUMNS_SIZES.column_name; return div( - { class: 'flex-column', style: `flex: ${size}; max-width: ${size}; word-wrap: break-word;` }, + { class: 'flex-column', style: `flex: ${size}; max-width: ${size}; word-wrap: break-word;`, 'data-testid': 'score-breakdown-cell' }, Caption({ content: row.table_name, style: 'font-size: 12px;' }), span(value), ); @@ -122,7 +124,7 @@ const BreakdownColumnCell = (value, row) => { const ImpactCell = (value) => { return div( - { class: 'flex-row', style: `flex: ${BREAKDOWN_COLUMNS_SIZES.impact}` }, + { class: 'flex-row', style: `flex: ${BREAKDOWN_COLUMNS_SIZES.impact}`, 'data-testid': 'score-breakdown-cell' }, value && !String(value).startsWith('-') ? i( {class: 'material-symbols-rounded', style: 'font-size: 20px; color: #E57373;'}, @@ -135,7 +137,7 @@ const ImpactCell = (value) => { const ScoreCell = (value) => { return div( - { class: 'flex-row', style: `flex: ${BREAKDOWN_COLUMNS_SIZES.score}` }, + { class: 'flex-row', style: `flex: ${BREAKDOWN_COLUMNS_SIZES.score}`, 'data-testid': 'score-breakdown-cell' }, dot({ class: 'mr-2' }, getScoreColor(value)), span(value ?? '--'), ); @@ -150,11 +152,16 @@ const IssueCountCell = (value, row, score, category, scoreType, onViewDetails) = } return div( - { class: 'flex-row', style: `flex: ${BREAKDOWN_COLUMNS_SIZES.issue_ct}` }, + { class: 'flex-row', style: `flex: ${BREAKDOWN_COLUMNS_SIZES.issue_ct}`, 'data-testid': 'score-breakdown-cell' }, span({ class: 'mr-2', style: 'min-width: 40px;' }, value || '-'), (value && onViewDetails) ? div( - { class: 'flex-row clickable', style: 'color: var(--link-color);', onclick: () => onViewDetails(score.project_code, score.name, scoreType, category, drilldown) }, + { + class: 'flex-row clickable', + style: 'color: var(--link-color);', + 'data-testid': 'view-issues', + onclick: () => onViewDetails(score.project_code, score.name, scoreType, category, drilldown), + }, span('View'), i({class: 'material-symbols-rounded', style: 'font-size: 20px;'}, 'chevron_right'), ) diff --git a/testgen/ui/components/frontend/js/components/score_card.js b/testgen/ui/components/frontend/js/components/score_card.js index 7085a42d..7b191677 100644 --- a/testgen/ui/components/frontend/js/components/score_card.js +++ b/testgen/ui/components/frontend/js/components/score_card.js @@ -53,6 +53,7 @@ const ScoreCard = (score, actions, options) => { title: title, actionContent: actions, class: 'tg-score-card', + testId: 'scorecard', content: () => { const score_ = getValue(score); const categories = score_.dimensions ?? score_.categories ?? []; @@ -94,11 +95,11 @@ const ScoreCard = (score, actions, options) => { div( { class: 'tg-score-card--categories' }, categories.map(category => div( - { class: 'flex-row fx-align-flex-center fx-gap-2' }, + { class: 'flex-row fx-align-flex-center fx-gap-2', 'data-testid': 'scorecard-category' }, dot({}, getScoreColor(category.score)), - span({ class: 'tg-score-card--category-score' }, category.score ?? '--'), + span({ class: 'tg-score-card--category-score', 'data-testid': 'scorecard-category-score' }, category.score ?? '--'), span( - { class: 'tg-score-card--category-label', title: category.label, style: 'position: relative;' }, + { class: 'tg-score-card--category-label', title: category.label, 'data-testid': 'scorecard-category-label', style: 'position: relative;' }, category.label, ), )), @@ -138,11 +139,11 @@ const ScoreChart = (label, score, history, showHistory, trendColor) => { const yRanges = {old: {min: Math.min(...yValues), max: Math.max(...yValues)}, new: {min: 0, max: yLength}}; return svg( - { class: 'tg-score-chart', width: 100, height: 100, viewBox: "0 0 100 100", overflow: 'visible', style }, + { class: 'tg-score-chart', width: 100, height: 100, viewBox: "0 0 100 100", overflow: 'visible', 'data-testid': 'score-chart', style }, circle({ class: 'tg-score-chart--bg' }), circle({ class: 'tg-score-chart--fg' }), - text({ x: '50%', y: '40%', 'dominant-baseline': 'middle', 'text-anchor': 'middle', fill: 'var(--primary-text-color)', 'font-size': '18px', 'font-weight': 500 }, score ?? '-'), - text({ x: '50%', y: '40%', 'dominant-baseline': 'middle', 'text-anchor': 'middle', fill: 'var(--secondary-text-color)', 'font-size': '14px', class: 'tg-score-chart--label' }, label), + text({ x: '50%', y: '40%', 'dominant-baseline': 'middle', 'text-anchor': 'middle', fill: 'var(--primary-text-color)', 'font-size': '18px', 'font-weight': 500, 'data-testid': 'score-chart-value' }, score ?? '-'), + text({ x: '50%', y: '40%', 'dominant-baseline': 'middle', 'text-anchor': 'middle', fill: 'var(--secondary-text-color)', 'font-size': '14px', class: 'tg-score-chart--label', 'data-testid': 'score-chart-text' }, label), showHistory ? g( {fill: 'none', style: 'transform: translate(10px, 70px);'}, diff --git a/testgen/ui/components/frontend/js/components/score_history.js b/testgen/ui/components/frontend/js/components/score_history.js index 8d00ebde..93b7b115 100644 --- a/testgen/ui/components/frontend/js/components/score_history.js +++ b/testgen/ui/components/frontend/js/components/score_history.js @@ -34,7 +34,7 @@ const ScoreHistory = (props, ...entries) => { }; return div( - { ...props, class: `tg-score-trend flex-row ${props?.class ?? ''}` }, + { ...props, class: `tg-score-trend flex-row ${props?.class ?? ''}`, 'data-testid': 'score-trend' }, LineChart( { width: 600, diff --git a/testgen/ui/components/frontend/js/components/score_issues.js b/testgen/ui/components/frontend/js/components/score_issues.js index f102c777..cd37e0e1 100644 --- a/testgen/ui/components/frontend/js/components/score_issues.js +++ b/testgen/ui/components/frontend/js/components/score_issues.js @@ -67,7 +67,7 @@ const IssuesTable = ( const selectedIssues = van.state([]); return div( - { class: 'table' }, + { class: 'table', 'data-testid': 'score-issues' }, div( { class: 'flex-row fx-justify-space-between fx-align-flex-start'}, div( diff --git a/testgen/ui/components/frontend/js/components/select.js b/testgen/ui/components/frontend/js/components/select.js index 088bb7f5..78903659 100644 --- a/testgen/ui/components/frontend/js/components/select.js +++ b/testgen/ui/components/frontend/js/components/select.js @@ -17,6 +17,7 @@ * @property {number?} width * @property {number?} height * @property {string?} style + * @property {string?} testId */ import van from '../van.min.js'; import { getRandomId, getValue, loadStylesheet, isState, isEqual } from '../utils.js'; @@ -74,19 +75,21 @@ const Select = (/** @type {Properties} */ props) => { class: () => `flex-column fx-gap-1 text-caption tg-select--label ${getValue(props.disabled) ? 'disabled' : ''}`, style: () => `width: ${props.width ? getValue(props.width) + 'px' : 'auto'}; ${getValue(props.style)}`, onclick: van.derive(() => !getValue(props.disabled) ? () => opened.val = !opened.val : null), + 'data-testid': getValue(props.testId) ?? '', }, - props.label, + span({'data-testid': 'select-label'}, props.label), div( { class: () => `flex-row tg-select--field ${opened.val ? 'opened' : ''}`, style: () => getValue(props.height) ? `height: ${getValue(props.height)}px;` : '', + 'data-testid': 'select-input', }, div( - { class: 'tg-select--field--content' }, + { class: 'tg-select--field--content', 'data-testid': 'select-input-display' }, valueLabel, ), div( - { class: 'tg-select--field--icon' }, + { class: 'tg-select--field--icon', 'data-testid': 'select-input-trigger' }, i( { class: 'material-symbols-rounded' }, 'expand_more', @@ -96,7 +99,7 @@ const Select = (/** @type {Properties} */ props) => { Portal( {target: domId.val, targetRelative: true, opened}, () => div( - { class: 'tg-select--options-wrapper mt-1' }, + { class: 'tg-select--options-wrapper mt-1', 'data-testid': 'select-options' }, getValue(options).map(option => div( { @@ -105,6 +108,7 @@ const Select = (/** @type {Properties} */ props) => { changeSelection(option); event.stopPropagation(); }, + 'data-testid': 'select-options-item', }, span(option.label), ) diff --git a/testgen/ui/components/frontend/js/components/spark_line.js b/testgen/ui/components/frontend/js/components/spark_line.js index 79d2f9fb..89985808 100644 --- a/testgen/ui/components/frontend/js/components/spark_line.js +++ b/testgen/ui/components/frontend/js/components/spark_line.js @@ -8,6 +8,7 @@ * @property {boolean?} interactive * @property {Function?} onPointMouseEnter * @property {Function?} onPointMouseLeave + * @property {string?} testId * * @typedef Point * @type {object} @@ -34,7 +35,7 @@ const SparkLine = ( ) => { const display = van.derive(() => getValue(options.hidden) === true ? 'none' : ''); return g( - { fill: 'none', opacity: options.opacity ?? 1, style: 'overflow: visible;', display }, + { fill: 'none', opacity: options.opacity ?? 1, style: 'overflow: visible;', 'data-testid': options.testId, display }, polyline({ points: line.map(point => `${point.x} ${point.y}`).join(', '), style: `stroke: ${options.color}; stroke-width: ${options.stroke ?? 1};`, diff --git a/testgen/ui/components/frontend/js/pages/profiling_runs.js b/testgen/ui/components/frontend/js/pages/profiling_runs.js index 993530cf..dc955cb9 100644 --- a/testgen/ui/components/frontend/js/pages/profiling_runs.js +++ b/testgen/ui/components/frontend/js/pages/profiling_runs.js @@ -93,12 +93,12 @@ const ProfilingRunItem = ( /** @type boolean */ userCanRun, ) => { return div( - { class: 'table-row flex-row' }, + { class: 'table-row flex-row', 'data-testid': 'profiling-run-item' }, div( { style: `flex: ${columns[0]}` }, - div(formatTimestamp(item.start_time)), + div({'data-testid': 'profiling-run-item-starttime'}, formatTimestamp(item.start_time)), div( - { class: 'text-caption mt-1' }, + { class: 'text-caption mt-1', 'data-testid': 'profiling-run-item-tablegroup' }, item.table_groups_name, ), ), @@ -107,7 +107,7 @@ const ProfilingRunItem = ( div( ProfilingRunStatus(item), div( - { class: 'text-caption mt-1' }, + { class: 'text-caption mt-1', 'data-testid': 'profiling-run-item-duration' }, formatDuration(item.duration), ), ), @@ -120,11 +120,12 @@ const ProfilingRunItem = ( ), div( { style: `flex: ${columns[2]}` }, - div(item.schema_name), + div({'data-testid': 'profiling-run-item-schema'}, item.schema_name), div( { class: 'text-caption mt-1 mb-1', style: item.status === 'Complete' && !item.column_ct ? 'color: var(--red);' : '', + 'data-testid': 'profiling-run-item-counts', }, item.status === 'Complete' ? `${item.table_ct || 0} tables, ${item.column_ct || 0} columns` : null, ), @@ -155,6 +156,7 @@ const ProfilingRunItem = ( underline: true, right_icon: 'chevron_right', style: 'margin-top: 8px;', + 'data-testid': 'profiling-run-item-viewissues' }) : null, ), div( @@ -176,6 +178,7 @@ function ProfilingRunStatus(/** @type ProfilingRun */ item) { { class: 'flex-row', style: `color: var(--${attributes.color});`, + 'data-testid': 'profiling-run-item-status' }, attributes.label, () => { diff --git a/testgen/ui/components/frontend/js/pages/project_dashboard.js b/testgen/ui/components/frontend/js/pages/project_dashboard.js index 30a93d9d..940d7708 100644 --- a/testgen/ui/components/frontend/js/pages/project_dashboard.js +++ b/testgen/ui/components/frontend/js/pages/project_dashboard.js @@ -128,6 +128,7 @@ const ProjectDashboard = (/** @type Properties */ props) => { Card({ id: 'overview-project-summary', class: 'tg-overview--project', + testId: 'project-summary', border: true, content: [ () => div( @@ -167,6 +168,7 @@ const ProjectDashboard = (/** @type Properties */ props) => { icon: 'search', clearable: true, placeholder: 'Search table group names', + testId: 'table-groups-filter', onChange: (value) => tableGroupsSearchTerm.val = value, }), span({ style: 'margin-right: 1rem;' }), @@ -176,6 +178,7 @@ const ProjectDashboard = (/** @type Properties */ props) => { options: props.table_groups_sort_options?.val ?? [], height: 38, style: 'font-size: 14px;', + testId: 'table-groups-sort', }), ) : '', @@ -190,6 +193,7 @@ const ProjectDashboard = (/** @type Properties */ props) => { const TableGroupCard = (/** @type TableGroupSummary */ tableGroup) => { return Card({ + testId: 'table-group-summary-card', border: true, title: tableGroup.table_groups_name, actionContent: () => ExpanderToggle({ diff --git a/testgen/ui/components/frontend/js/pages/quality_dashboard.js b/testgen/ui/components/frontend/js/pages/quality_dashboard.js index 41175648..093fe249 100644 --- a/testgen/ui/components/frontend/js/pages/quality_dashboard.js +++ b/testgen/ui/components/frontend/js/pages/quality_dashboard.js @@ -106,6 +106,7 @@ const Toolbar = ( placeholder: 'Search scores', value: filterBy, onChange: options?.onsearch, + testId: 'scorecards-filter', }), Select({ id: 'score-dashboard-sort', @@ -115,6 +116,7 @@ const Toolbar = ( value: sortedBy, options: sortOptions, onChange: options?.onsort, + testId: 'scorecards-sort', }), span({ style: 'margin: 0 auto;' }), Button({ @@ -126,6 +128,7 @@ const Toolbar = ( onclick: () => emitEvent('LinkClicked', { href: 'quality-dashboard:explorer', params: { project_code: projectSummary.project_code }, + testId: 'scorecards-goto-explorer', }), }), Button({ @@ -135,6 +138,7 @@ const Toolbar = ( tooltipPosition: 'left', style: 'border: var(--button-stroked-border); border-radius: 4px;', onclick: () => emitEvent('RefreshData', {}), + testId: 'scorecards-refresh', }), ); }; diff --git a/testgen/ui/components/frontend/js/pages/score_explorer.js b/testgen/ui/components/frontend/js/pages/score_explorer.js index cd19054b..d887e033 100644 --- a/testgen/ui/components/frontend/js/pages/score_explorer.js +++ b/testgen/ui/components/frontend/js/pages/score_explorer.js @@ -259,11 +259,13 @@ const Toolbar = ( Checkbox({ label: 'Total Score', checked: displayTotalScore, + testId: 'include-total-score', onChange: (checked) => displayTotalScore.val = checked, }), Checkbox({ label: 'CDE Score', checked: displayCDEScore, + testId: 'include-cde-score', onChange: (checked) => displayCDEScore.val = checked, }), div( @@ -271,6 +273,7 @@ const Toolbar = ( Checkbox({ label: 'Category:', checked: displayCategory, + testId: 'include-category', onChange: (checked) => displayCategory.val = checked, }), Select({ @@ -279,6 +282,7 @@ const Toolbar = ( value: selectedCategory, options: categories.map((c) => ({ value: c, label: TRANSLATIONS[c] })), disabled: van.derive(() => !getValue(displayCategory)), + testId: 'category-selector', }), ), ), @@ -290,6 +294,7 @@ const Toolbar = ( height: 40, style: 'margin-right: 16px;', value: scoreName, + testId: 'scorecard-name-input', onChange: debounce((name) => scoreName.val = name, 300), }), () => { @@ -348,11 +353,11 @@ const Filter = ( }; return div( - { id, class: 'flex-row score-explorer--filter' }, - span({ class: 'text-secondary mr-1' }, `${TRANSLATIONS[field]} =`), + { id, class: 'flex-row score-explorer--filter', 'data-testid': 'explorer-filter' }, + span({ class: 'text-secondary mr-1', 'data-testid': 'explorer-filter-label' }, `${TRANSLATIONS[field]} =`), div( - { class: 'flex-row clickable', onclick: () => opened.val = true }, - () => span({}, getValue(value) ?? '(Empty)'), + { class: 'flex-row clickable', 'data-testid': 'explorer-filter-trigger', onclick: () => opened.val = true }, + () => span({'data-testid': 'explorer-filter-selected'}, getValue(value) ?? '(Empty)'), i({class: 'material-symbols-rounded'}, 'arrow_drop_down'), ), Portal( @@ -362,6 +367,7 @@ const Filter = ( i( { class: 'material-symbols-rounded clickable text-secondary', + 'data-testid': 'explorer-filter-remove', onclick: () => onRemove(position), }, 'clear', @@ -371,7 +377,7 @@ const Filter = ( const FilterFieldSelector = (/** @type string[] */ options, /** @type string */ value, /** @type Function */ onSelect) => { return div( - { class: 'flex-column score-explorer--selector mt-1' }, + { class: 'flex-column score-explorer--selector mt-1', 'data-testid': 'explorer-filter-field-selector' }, (options?.length ?? 0) > 0 ? options.map((v) => span({ class: () => `pr-4 pl-4 pt-3 pb-3 ${getValue(value) === v ? 'selected' : ''}`, style: 'cursor: pointer;', onclick: () => onSelect(v) }, TRANSLATIONS[v] ?? v) diff --git a/testgen/ui/components/frontend/js/pages/test_suites.js b/testgen/ui/components/frontend/js/pages/test_suites.js index a3f906f8..8ebf10e0 100644 --- a/testgen/ui/components/frontend/js/pages/test_suites.js +++ b/testgen/ui/components/frontend/js/pages/test_suites.js @@ -83,6 +83,7 @@ const TestSuites = (/** @type Properties */ props) => { allowNull: true, height: 38, style: 'font-size: 14px;', + testId: 'table-group-filter', onChange: (value) => emitEvent('FilterApplied', {payload: value}), }), userCanEdit @@ -100,8 +101,9 @@ const TestSuites = (/** @type Properties */ props) => { { class: 'flex-column' }, getValue(testSuites).map((/** @type TestSuite */ testSuite) => Card({ border: true, + testId: 'test-suite-card', title: () => div( - { class: 'flex-column tg-test-suites--card-title' }, + { class: 'flex-column tg-test-suites--card-title', 'data-testid': 'test-suite-title' }, h4(testSuite.test_suite), small(`${testSuite.connection_name} > ${testSuite.table_groups_name}`), ), @@ -128,7 +130,7 @@ const TestSuites = (/** @type Properties */ props) => { class: 'mb-4', }), Caption({ content: 'Description', style: 'margin-bottom: 2px;' }), - span(testSuite.test_suite_description ?? '--'), + span({'data-testid': 'test-suite-description'}, testSuite.test_suite_description ?? '--'), ), div( { class: 'flex-column' }, From 40b23834da04a8737ea841a5570915776adf5e9e Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Mon, 5 May 2025 16:18:35 -0400 Subject: [PATCH 12/34] fix(cli): make test result statuses consistent with ui --- .../template/get_entities/get_test_results_for_run_cli.sql | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/testgen/template/get_entities/get_test_results_for_run_cli.sql b/testgen/template/get_entities/get_test_results_for_run_cli.sql index 97161677..240cb014 100644 --- a/testgen/template/get_entities/get_test_results_for_run_cli.sql +++ b/testgen/template/get_entities/get_test_results_for_run_cli.sql @@ -3,8 +3,9 @@ SELECT ts.test_suite as test_suite_key, column_names as column_name, r.test_type, CASE - WHEN result_code = 1 THEN 'Pass' - WHEN result_code = 0 THEN 'Fail' + WHEN result_code = 1 THEN 'Passed' + WHEN result_code = 0 AND r.severity = 'Warning' THEN 'Warning' + WHEN result_code = 0 AND r.severity = 'Fail' THEN 'Failed' END as result, COALESCE(r.result_message, '') as result_message, result_measure, From 52c0925c5eb6a17936176cf15906677899a96888 Mon Sep 17 00:00:00 2001 From: Ricardo Boni Date: Mon, 5 May 2025 16:30:15 -0400 Subject: [PATCH 13/34] misc(scheduler): Improving how loaded jobs are logged --- testgen/scheduler/cli_scheduler.py | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/testgen/scheduler/cli_scheduler.py b/testgen/scheduler/cli_scheduler.py index f0c5e05e..31bd3338 100644 --- a/testgen/scheduler/cli_scheduler.py +++ b/testgen/scheduler/cli_scheduler.py @@ -33,6 +33,7 @@ def __init__(self): self._running_jobs: set[subprocess.Popen] = set() self._running_jobs_cond = threading.Condition() self.reload_timer = None + self._current_jobs = {} LOG.info("Starting CLI Scheduler with registered jobs: %s", ", ".join(JOB_REGISTRY.keys())) super().__init__() @@ -43,26 +44,30 @@ def get_jobs(self) -> Iterable[CliJob]: self.reload_timer = threading.Timer((110 - datetime.now().second) % 60 or 60, self.reload_jobs) self.reload_timer.start() - job_list = [] + jobs = {} for (job_model,) in JobSchedule.select_where(): if job_model.key not in JOB_REGISTRY: LOG.error("Job '%s' scheduled but not registered", job_model.key) continue - job_list.append( - CliJob( - cron_expr=job_model.cron_expr, - cron_tz=job_model.cron_tz, - delayed_policy=DelayedPolicy.SKIP, - key=job_model.key, - args=job_model.args, - kwargs=job_model.kwargs - ) + jobs[job_model.id] = CliJob( + cron_expr=job_model.cron_expr, + cron_tz=job_model.cron_tz, + delayed_policy=DelayedPolicy.SKIP, + key=job_model.key, + args=job_model.args, + kwargs=job_model.kwargs ) - LOG.info("Loaded %s schedule(s)", len(job_list)) + for job_id in jobs.keys() - self._current_jobs.keys(): + LOG.info("Enabled job: %s", jobs[job_id]) - return job_list + for job_id in self._current_jobs.keys() - jobs.keys(): + LOG.info("Disabled job: %s", self._current_jobs[job_id]) + + self._current_jobs = jobs + + return jobs.values() def start_job(self, job: CliJob, triggering_time: datetime) -> None: cmd = JOB_REGISTRY[job.key] From 3b01cf298dd192c144b7462bda1add6e04639a30 Mon Sep 17 00:00:00 2001 From: Ricardo Boni Date: Mon, 5 May 2025 16:35:43 -0400 Subject: [PATCH 14/34] misc(scheduler): Changing how we clean the expr field --- testgen/ui/views/dialogs/manage_schedules.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testgen/ui/views/dialogs/manage_schedules.py b/testgen/ui/views/dialogs/manage_schedules.py index b597dd1d..cc96ecc5 100644 --- a/testgen/ui/views/dialogs/manage_schedules.py +++ b/testgen/ui/views/dialogs/manage_schedules.py @@ -94,7 +94,7 @@ def add_schedule_form(self): match st.session_state.get("schedule_form_success", None): case True: st.success("Schedule added.", icon=":material/check:") - del st.session_state["schedule_cron_expr"] + st.session_state["schedule_cron_expr"] = "" del st.session_state["schedule_cron_tz"] del st.session_state["schedule_form_success"] case False: From d4219afffcec85f91aa0bfb5057ab27cadeca663 Mon Sep 17 00:00:00 2001 From: Luis Date: Tue, 6 May 2025 12:33:39 -0400 Subject: [PATCH 15/34] feat(scoring): add all categories for breakdown grouping --- .../run_refresh_score_cards_results.py | 3 +- testgen/common/models/scores.py | 46 +++++++++++++++++-- .../030_initialize_new_schema_structure.sql | 9 ++++ .../dbupgrade/0136_incremental_upgrade.sql | 12 +++++ .../frontend/js/components/score_breakdown.js | 27 +++++++---- 5 files changed, 84 insertions(+), 13 deletions(-) create mode 100644 testgen/template/dbupgrade/0136_incremental_upgrade.sql diff --git a/testgen/commands/run_refresh_score_cards_results.py b/testgen/commands/run_refresh_score_cards_results.py index dc4f93b2..cd0fa099 100644 --- a/testgen/commands/run_refresh_score_cards_results.py +++ b/testgen/commands/run_refresh_score_cards_results.py @@ -4,6 +4,7 @@ from testgen.common.models import with_database_session from testgen.common.models.scores import ( + SCORE_CATEGORIES, ScoreCard, ScoreDefinition, ScoreDefinitionBreakdownItem, @@ -122,7 +123,7 @@ def _score_card_to_results(score_card: ScoreCard) -> list[ScoreDefinitionResult] def _score_definition_to_results_breakdown(score_definition: ScoreDefinition) -> list[ScoreDefinitionBreakdownItem]: score_types = ["score", "cde_score"] - categories = ["column_name", "table_name", "dq_dimension", "semantic_data_type"] + categories = SCORE_CATEGORIES all_breakdown_items = [] for category in categories: diff --git a/testgen/common/models/scores.py b/testgen/common/models/scores.py index 1fd437bc..814dc233 100644 --- a/testgen/common/models/scores.py +++ b/testgen/common/models/scores.py @@ -14,6 +14,37 @@ from testgen.common.models import Base, engine, get_current_session from testgen.utils import is_uuid4 +SCORE_CATEGORIES = [ + "column_name", + "table_name", + "dq_dimension", + "semantic_data_type", + "table_groups_name", + "data_location", + "data_source", + "source_system", + "source_process", + "business_domain", + "stakeholder_group", + "transform_level", + "data_product", +] +Categories = Literal[ + "column_name", + "table_name", + "dq_dimension", + "semantic_data_type", + "table_groups_name", + "data_location", + "data_source", + "source_system", + "source_process", + "business_domain", + "stakeholder_group", + "transform_level", + "data_product", +] + class ScoreCategory(enum.Enum): table_groups_name = "table_groups_name" @@ -213,7 +244,7 @@ def as_cached_score_card(self) -> "ScoreCard": def get_score_card_breakdown( self, score_type: Literal["score", "cde_score"], - group_by: Literal["column_name", "table_name", "dq_dimension", "semantic_data_type"], + group_by: Categories, ) -> list[dict]: """ Executes a raw query to filter and aggregate the score details @@ -264,7 +295,7 @@ def get_score_card_breakdown( def get_score_card_issues( self, score_type: Literal["score", "cde_score"], - group_by: Literal["column_name", "table_name", "dq_dimension", "semantic_data_type"], + group_by: Categories, value: str, ): """ @@ -391,6 +422,15 @@ class ScoreDefinitionBreakdownItem(Base): column_name: str = Column(String, nullable=True) dq_dimension: str = Column(String, nullable=True) semantic_data_type: str = Column(String, nullable=True) + table_groups_name: str = Column(String, nullable=True) + data_location: str = Column(String, nullable=True) + data_source: str = Column(String, nullable=True) + source_system: str = Column(String, nullable=True) + source_process: str = Column(String, nullable=True) + business_domain: str = Column(String, nullable=True) + stakeholder_group: str = Column(String, nullable=True) + transform_level: str = Column(String, nullable=True) + data_product: str = Column(String, nullable=True) impact: float = Column(Float) score: float = Column(Float) issue_ct: int = Column(Integer) @@ -400,7 +440,7 @@ def filter( cls, *, definition_id: str, - category: Literal["column_name", "table_name", "dq_dimension", "semantic_data_type"], + category: Categories, score_type: Literal["score", "cde_score"], ) -> "Iterable[Self]": items = [] diff --git a/testgen/template/dbsetup/030_initialize_new_schema_structure.sql b/testgen/template/dbsetup/030_initialize_new_schema_structure.sql index 8907c1aa..f7978a4e 100644 --- a/testgen/template/dbsetup/030_initialize_new_schema_structure.sql +++ b/testgen/template/dbsetup/030_initialize_new_schema_structure.sql @@ -686,6 +686,15 @@ CREATE TABLE IF NOT EXISTS score_definition_results_breakdown ( column_name TEXT DEFAULT NULL, dq_dimension TEXT DEFAULT NULL, semantic_data_type TEXT DEFAULT NULL, + table_groups_name TEXT DEFAULT NULL, + data_location TEXT DEFAULT NULL, + data_source TEXT DEFAULT NULL, + source_system TEXT DEFAULT NULL, + source_process TEXT DEFAULT NULL, + business_domain TEXT DEFAULT NULL, + stakeholder_group TEXT DEFAULT NULL, + transform_level TEXT DEFAULT NULL, + data_product TEXT DEFAULT NULL, impact DOUBLE PRECISION DEFAULT 0.0, score DOUBLE PRECISION DEFAULT 0.0, issue_ct INTEGER DEFAULT 0 diff --git a/testgen/template/dbupgrade/0136_incremental_upgrade.sql b/testgen/template/dbupgrade/0136_incremental_upgrade.sql new file mode 100644 index 00000000..9341bfd1 --- /dev/null +++ b/testgen/template/dbupgrade/0136_incremental_upgrade.sql @@ -0,0 +1,12 @@ +SET SEARCH_PATH TO {SCHEMA_NAME}; + +ALTER TABLE score_definition_results_breakdown + ADD COLUMN table_groups_name TEXT DEFAULT NULL, + ADD COLUMN data_location TEXT DEFAULT NULL, + ADD COLUMN data_source TEXT DEFAULT NULL, + ADD COLUMN source_system TEXT DEFAULT NULL, + ADD COLUMN source_process TEXT DEFAULT NULL, + ADD COLUMN business_domain TEXT DEFAULT NULL, + ADD COLUMN stakeholder_group TEXT DEFAULT NULL, + ADD COLUMN transform_level TEXT DEFAULT NULL, + ADD COLUMN data_product TEXT DEFAULT NULL; diff --git a/testgen/ui/components/frontend/js/components/score_breakdown.js b/testgen/ui/components/frontend/js/components/score_breakdown.js index ee23e306..e75f79b8 100644 --- a/testgen/ui/components/frontend/js/components/score_breakdown.js +++ b/testgen/ui/components/frontend/js/components/score_breakdown.js @@ -22,7 +22,9 @@ const ScoreBreakdown = (score, breakdown, category, scoreType, onViewDetails) => return Select({ label: '', value: selectedCategory, - options: ['table_name', 'column_name', 'semantic_data_type', 'dq_dimension'].map((c) => ({ label: CATEGORY_LABEL[c], value: c })), + options: Object.entries(CATEGORIES) + .sort((A, B) => A[1].localeCompare(B[1])) + .map(([value, label]) => ({ value, label })), onChange: (value) => emitEvent('CategoryChanged', { payload: value }), testId: 'groupby-selector', }); @@ -49,7 +51,7 @@ const ScoreBreakdown = (score, breakdown, category, scoreType, onViewDetails) => () => div( { class: 'table-header breakdown-columns flex-row' }, getValue(breakdown)?.columns?.map(column => span({ - style: `flex: ${BREAKDOWN_COLUMNS_SIZES[column]};` }, + style: `flex: ${BREAKDOWN_COLUMNS_SIZES[column] ?? COLUMN_DEFAULT_SIZE};` }, getReadableColumn(column, getValue(scoreType)), )), ), @@ -106,7 +108,7 @@ const TableCell = (row, column, score=undefined, category=undefined, scoreType=u return componentByColumn[column](row[column], row, score, category, scoreType, onViewDetails); } - const size = BREAKDOWN_COLUMNS_SIZES[column]; + const size = BREAKDOWN_COLUMNS_SIZES[column] ?? COLUMN_DEFAULT_SIZE; return div( { style: `flex: ${size}; max-width: ${size}; word-wrap: break-word;`, 'data-testid': 'score-breakdown-cell' }, span(row[column]), @@ -114,7 +116,7 @@ const TableCell = (row, column, score=undefined, category=undefined, scoreType=u }; const BreakdownColumnCell = (value, row) => { - const size = BREAKDOWN_COLUMNS_SIZES.column_name; + const size = COLUMN_DEFAULT_SIZE; return div( { class: 'flex-column', style: `flex: ${size}; max-width: ${size}; word-wrap: break-word;`, 'data-testid': 'score-breakdown-cell' }, Caption({ content: row.table_name, style: 'font-size: 12px;' }), @@ -169,14 +171,24 @@ const IssueCountCell = (value, row, score, category, scoreType, onViewDetails) = ); }; -const CATEGORY_LABEL = { +const CATEGORIES = { table_name: 'Tables', column_name: 'Columns', semantic_data_type: 'Semantic Data Types', dq_dimension: 'Quality Dimensions', + table_groups_name: 'Table Group', + data_location: 'Data Location', + data_source: 'Data Source', + source_system: 'Source System', + source_process: 'Source Process', + business_domain: 'Business Domain', + stakeholder_group: 'Stakeholder Group', + transform_level: 'Transform Level', + data_product: 'Data Product', }; const BREAKDOWN_COLUMN_LABEL = { + ...CATEGORIES, table_name: 'Table', column_name: 'Table | Column', semantic_data_type: 'Semantic Data Type', @@ -191,11 +203,8 @@ const SCORE_TYPE_LABEL = { cde_score: 'CDE Score', }; +const COLUMN_DEFAULT_SIZE = '40%'; const BREAKDOWN_COLUMNS_SIZES = { - table_name: '40%', - column_name: '40%', - semantic_data_type: '40%', - dq_dimension: '40%', impact: '20%', score: '20%', issue_ct: '20%', From c893444ce159015270b151d3c0299b5f8fe2c5eb Mon Sep 17 00:00:00 2001 From: Ricardo Boni Date: Tue, 6 May 2025 14:12:49 -0400 Subject: [PATCH 16/34] feat(auth): The JWT signing token is now configurable BREAKING-CHANGE: The JWT signing key has to be set through env --- .../testgen-app/templates/_environment.yaml | 5 +++++ .../charts/testgen-app/templates/secrets.yaml | 1 + testgen/settings.py | 5 +++++ testgen/ui/services/user_session_service.py | 21 ++++++++++++++++--- 4 files changed, 29 insertions(+), 3 deletions(-) diff --git a/deploy/charts/testgen-app/templates/_environment.yaml b/deploy/charts/testgen-app/templates/_environment.yaml index 5d8bed73..a630b1ee 100644 --- a/deploy/charts/testgen-app/templates/_environment.yaml +++ b/deploy/charts/testgen-app/templates/_environment.yaml @@ -9,6 +9,11 @@ secretKeyRef: name: {{ .Values.testgen.authSecrets.name | quote }} key: "decrypt-password" +- name: TG_JWT_HASHING_KEY + valueFrom: + secretKeyRef: + name: {{ .Values.testgen.authSecrets.name | quote }} + key: "jwt-hashing-key" - name: TG_METADATA_DB_HOST value: {{ .Values.testgen.databaseHost | quote }} - name: TG_METADATA_DB_NAME diff --git a/deploy/charts/testgen-app/templates/secrets.yaml b/deploy/charts/testgen-app/templates/secrets.yaml index 17227a69..b5f3ed0e 100644 --- a/deploy/charts/testgen-app/templates/secrets.yaml +++ b/deploy/charts/testgen-app/templates/secrets.yaml @@ -12,4 +12,5 @@ type: Opaque data: decrypt-salt: {{ randAlphaNum 32 | b64enc | quote }} decrypt-password: {{ randAlphaNum 32 | b64enc | quote }} + jwt-hashing-key: {{ randBytes 32 | b64enc | quote }} {{- end }} diff --git a/testgen/settings.py b/testgen/settings.py index 5b79d9e4..8669b4d6 100644 --- a/testgen/settings.py +++ b/testgen/settings.py @@ -484,3 +484,8 @@ """ Disables sending usage data when set to any value except "true" and "yes". Defaults to "yes" """ + +JWT_HASHING_KEY_B64: str = os.getenv("TG_JWT_HASHING_KEY") +""" +Random key used to sign/verify the authentication token +""" diff --git a/testgen/ui/services/user_session_service.py b/testgen/ui/services/user_session_service.py index be0f27ae..a6741d20 100644 --- a/testgen/ui/services/user_session_service.py +++ b/testgen/ui/services/user_session_service.py @@ -1,3 +1,4 @@ +import base64 import datetime import logging import typing @@ -6,12 +7,12 @@ import jwt import streamlit as st +from testgen import settings from testgen.ui.queries import user_queries from testgen.ui.session import session RoleType = typing.Literal["admin", "data_quality", "analyst", "business", "catalog"] -JWT_HASHING_KEY = "dk_signature_key" AUTH_TOKEN_COOKIE_NAME = "dk_cookie_name" # noqa: S105 AUTH_TOKEN_EXPIRATION_DAYS = 1 DISABLED_ACTION_TEXT = "You do not have permissions to perform this action. Contact your administrator." @@ -19,6 +20,16 @@ LOG = logging.getLogger("testgen") +def _get_jwt_hashing_key() -> bytes: + try: + return base64.b64decode(settings.JWT_HASHING_KEY_B64.encode("ascii")) + except Exception as e: + raise ValueError( + "Error reading the JWT signing key from settings. Make sure you have a valid base 64 " + "string assigned to the TG_JWT_HASHING_KEY environment variable." + ) from e + + def load_user_session() -> None: # Replacing this with st.context.cookies does not work # Because it does not update when cookies are deleted on logout @@ -29,7 +40,7 @@ def load_user_session() -> None: token = cookies.get(AUTH_TOKEN_COOKIE_NAME) if token is not None: try: - token = jwt.decode(token, JWT_HASHING_KEY, algorithms=["HS256"]) + token = jwt.decode(token, _get_jwt_hashing_key(), algorithms=["HS256"]) if token["exp_date"] > datetime.datetime.utcnow().timestamp(): start_user_session(token["name"], token["username"]) except Exception: @@ -77,7 +88,11 @@ def get_auth_data(): return { "credentials": {"usernames": usernames}, - "cookie": {"expiry_days": AUTH_TOKEN_EXPIRATION_DAYS, "key": JWT_HASHING_KEY, "name": AUTH_TOKEN_COOKIE_NAME}, + "cookie": { + "expiry_days": AUTH_TOKEN_EXPIRATION_DAYS, + "key": _get_jwt_hashing_key(), + "name": AUTH_TOKEN_COOKIE_NAME, + }, "preauthorized": {"emails": preauthorized_list}, } From df630cd55c78cc9c4d8766e829d6ababbb538e3a Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Thu, 8 May 2025 00:30:27 -0400 Subject: [PATCH 17/34] misc(analytics): add events for nav, execution, and dialogs --- testgen/__main__.py | 33 +++++++++--- testgen/commands/run_execute_cat_tests.py | 34 ++++++++---- testgen/commands/run_execute_tests.py | 16 +++--- testgen/commands/run_generate_tests.py | 19 +++++-- testgen/commands/run_profiling_bridge.py | 53 +++++++++++++------ testgen/common/database/database_service.py | 10 ++-- testgen/common/mixpanel_service.py | 2 +- testgen/ui/navigation/router.py | 3 ++ .../ui/views/dialogs/generate_tests_dialog.py | 2 +- testgen/ui/views/hygiene_issues.py | 11 ++++ testgen/ui/views/profiling_runs.py | 2 +- testgen/ui/views/score_details.py | 10 +++- testgen/ui/views/score_explorer.py | 10 +++- testgen/ui/views/test_results.py | 14 ++++- testgen/ui/views/test_runs.py | 2 +- 15 files changed, 167 insertions(+), 54 deletions(-) diff --git a/testgen/__main__.py b/testgen/__main__.py index 83e21c67..87207243 100644 --- a/testgen/__main__.py +++ b/testgen/__main__.py @@ -113,12 +113,19 @@ def cli(ctx: Context, verbose: bool): help="The identifier for the table group used during a profile run. Use a table_group_id shown in list-table-groups.", default=None, ) -def run_profile(configuration: Configuration, table_group_id: str): +@click.option( + "-s", + "--source", + required=False, + type=click.STRING, + default="cli", +) +def run_profile(configuration: Configuration, table_group_id: str, source: str): click.echo(f"run-profile with table_group_id: {table_group_id}") spinner = None if not configuration.verbose: spinner = MoonSpinner("Processing ... ") - message = run_profiling_queries(table_group_id, spinner=spinner) + message = run_profiling_queries(table_group_id, spinner=spinner, source=source) click.echo("\n" + message) @@ -145,10 +152,17 @@ def run_profile(configuration: Configuration, table_group_id: str): required=False, default=None, ) +@click.option( + "-s", + "--source", + required=False, + type=click.STRING, + default="cli", +) @pass_configuration -def run_test_generation(configuration: Configuration, table_group_id: str, test_suite_key: str, generation_set: str): +def run_test_generation(configuration: Configuration, table_group_id: str, test_suite_key: str, generation_set: str, source: str): LOG.info("CurrentStep: Generate Tests - Main Procedure") - message = run_test_gen_queries(table_group_id, test_suite_key, generation_set) + message = run_test_gen_queries(table_group_id, test_suite_key, generation_set, source) LOG.info("Current Step: Generate Tests - Main Procedure Complete") click.echo("\n" + message) @@ -170,13 +184,20 @@ def run_test_generation(configuration: Configuration, table_group_id: str, test_ required=False, default=settings.DEFAULT_TEST_SUITE_KEY, ) +@click.option( + "-s", + "--source", + required=False, + type=click.STRING, + default="cli", +) @pass_configuration -def run_tests(configuration: Configuration, project_key: str, test_suite_key: str): +def run_tests(configuration: Configuration, project_key: str, test_suite_key: str, source: str): click.echo(f"run-tests for suite: {test_suite_key}") spinner = None if not configuration.verbose: spinner = MoonSpinner("Processing ... ") - message = run_execution_steps(project_key, test_suite_key, spinner=spinner) + message = run_execution_steps(project_key, test_suite_key, spinner=spinner, source=source) click.echo("\n" + message) diff --git a/testgen/commands/run_execute_cat_tests.py b/testgen/commands/run_execute_cat_tests.py index 496e2d45..02fa49e4 100644 --- a/testgen/commands/run_execute_cat_tests.py +++ b/testgen/commands/run_execute_cat_tests.py @@ -1,4 +1,5 @@ import logging +from datetime import UTC, datetime from testgen.commands.queries.execute_cat_tests_query import CCATExecutionSQL from testgen.commands.run_refresh_score_cards_results import run_refresh_score_cards_results @@ -9,6 +10,7 @@ WriteListToDB, date_service, ) +from testgen.common.mixpanel_service import MixpanelService LOG = logging.getLogger("testgen") @@ -60,14 +62,26 @@ def ParseCATResults(clsCATExecute): RunActionQueryList("DKTG", [strQuery]) -def FinalizeTestRun(clsCATExecute: CCATExecutionSQL): - lstQueries = [clsCATExecute.FinalizeTestResultsSQL(), - clsCATExecute.PushTestRunStatusUpdateSQL(), - clsCATExecute.FinalizeTestSuiteUpdateSQL(), - clsCATExecute.CalcPrevalenceTestResultsSQL(), - clsCATExecute.TestScoringRollupRunSQL(), - clsCATExecute.TestScoringRollupTableGroupSQL()] - RunActionQueryList(("DKTG"), lstQueries) +def FinalizeTestRun(clsCATExecute: CCATExecutionSQL, source: str): + _, row_counts = RunActionQueryList(("DKTG"), [ + clsCATExecute.FinalizeTestResultsSQL(), + clsCATExecute.PushTestRunStatusUpdateSQL(), + clsCATExecute.FinalizeTestSuiteUpdateSQL(), + ]) + + MixpanelService().send_event( + "run-tests", + source=source, + sql_flavor=clsCATExecute.flavor, + test_count=row_counts[0], + duration=(datetime.now(UTC) - date_service.parse_now(clsCATExecute.run_date)).total_seconds(), + ) + + RunActionQueryList(("DKTG"), [ + clsCATExecute.CalcPrevalenceTestResultsSQL(), + clsCATExecute.TestScoringRollupRunSQL(), + clsCATExecute.TestScoringRollupTableGroupSQL(), + ]) run_refresh_score_cards_results( project_code=clsCATExecute.project_code, add_history_entry=True, @@ -76,7 +90,7 @@ def FinalizeTestRun(clsCATExecute: CCATExecutionSQL): def run_cat_test_queries( - dctParms, strTestRunID, strTestTime, strProjectCode, strTestSuite, error_msg, minutes_offset=0, spinner=None + dctParms, strTestRunID, strTestTime, strProjectCode, strTestSuite, error_msg, minutes_offset=0, spinner=None, source=None ): booErrors = False @@ -146,4 +160,4 @@ def run_cat_test_queries( finally: LOG.info("Finalizing test run") - FinalizeTestRun(clsCATExecute) + FinalizeTestRun(clsCATExecute, source) diff --git a/testgen/commands/run_execute_tests.py b/testgen/commands/run_execute_tests.py index 321f59f1..b2de422e 100644 --- a/testgen/commands/run_execute_tests.py +++ b/testgen/commands/run_execute_tests.py @@ -103,10 +103,8 @@ def run_execution_steps_in_background(project_code, test_suite): empty_cache() background_thread = threading.Thread( target=run_execution_steps, - args=( - project_code, - test_suite - ), + args=(project_code, test_suite), + kwargs={"source": "ui"}, ) background_thread.start() else: @@ -115,7 +113,13 @@ def run_execution_steps_in_background(project_code, test_suite): subprocess.Popen(script) # NOQA S603 -def run_execution_steps(project_code: str, test_suite: str, minutes_offset: int=0, spinner: Spinner=None) -> str: +def run_execution_steps( + project_code: str, + test_suite: str, + minutes_offset: int=0, + spinner: Spinner=None, + source: str | None=None, +) -> str: # Initialize required parms for all steps has_errors = False error_msg = "" @@ -165,7 +169,7 @@ def run_execution_steps(project_code: str, test_suite: str, minutes_offset: int= LOG.info("CurrentStep: Execute Step - CAT Test Execution") if run_cat_test_queries( - test_exec_params, test_run_id, test_time, project_code, test_suite, error_msg, minutes_offset, spinner + test_exec_params, test_run_id, test_time, project_code, test_suite, error_msg, minutes_offset, spinner, source ): has_errors = True diff --git a/testgen/commands/run_generate_tests.py b/testgen/commands/run_generate_tests.py index bac1bfac..6e209d8f 100644 --- a/testgen/commands/run_generate_tests.py +++ b/testgen/commands/run_generate_tests.py @@ -2,11 +2,12 @@ from testgen.commands.queries.generate_tests_query import CDeriveTestsSQL from testgen.common import AssignConnectParms, RetrieveDBResultsToDictList, RetrieveTestGenParms, RunActionQueryList +from testgen.common.mixpanel_service import MixpanelService LOG = logging.getLogger("testgen") -def run_test_gen_queries(strTableGroupsID, strTestSuite, strGenerationSet=None): +def run_test_gen_queries(strTableGroupsID, strTestSuite, strGenerationSet=None, source=None): if strTableGroupsID is None: raise ValueError("Table Group ID was not specified") @@ -56,7 +57,8 @@ def run_test_gen_queries(strTableGroupsID, strTestSuite, strGenerationSet=None): LOG.info("CurrentStep: Creating new Test Suite") strQuery = clsTests.GetInsertTestSuiteSQL(booClean) if strQuery: - clsTests.test_suite_id, = RunActionQueryList("DKTG", [strQuery]) + insert_ids, _ = RunActionQueryList("DKTG", [strQuery]) + clsTests.test_suite_id = insert_ids[0] else: raise ValueError("Test Suite not found and could not be created") @@ -104,6 +106,15 @@ def run_test_gen_queries(strTableGroupsID, strTestSuite, strGenerationSet=None): if lstQueries: LOG.info("Running Test Generation Template Queries") RunActionQueryList("DKTG", lstQueries) - return "Test generation completed successfully." + message = "Test generation completed successfully." else: - return "No TestGen Queries were compiled." + message = "No TestGen Queries were compiled." + + MixpanelService().send_event( + "generate-tests", + source=source, + sql_flavor=clsTests.sql_flavor, + generation_set=clsTests.generation_set, + ) + + return message diff --git a/testgen/commands/run_profiling_bridge.py b/testgen/commands/run_profiling_bridge.py index d72423c3..17590e02 100644 --- a/testgen/commands/run_profiling_bridge.py +++ b/testgen/commands/run_profiling_bridge.py @@ -2,6 +2,7 @@ import subprocess import threading import uuid +from datetime import UTC, datetime import pandas as pd @@ -20,6 +21,7 @@ date_service, ) from testgen.common.database.database_service import empty_cache +from testgen.common.mixpanel_service import MixpanelService booClean = True LOG = logging.getLogger("testgen") @@ -234,7 +236,11 @@ def run_profiling_in_background(table_group_id): if settings.IS_DEBUG: LOG.info(msg + ". Running in debug mode (new thread instead of new process).") empty_cache() - background_thread = threading.Thread(target=run_profiling_queries, args=(table_group_id,)) + background_thread = threading.Thread( + target=run_profiling_queries, + args=(table_group_id), + kwargs={"source": "ui"}, + ) background_thread.start() else: LOG.info(msg) @@ -242,7 +248,7 @@ def run_profiling_in_background(table_group_id): subprocess.Popen(script) # NOQA S603 -def run_profiling_queries(strTableGroupsID, spinner=None): +def run_profiling_queries(strTableGroupsID, spinner=None, source=None): if strTableGroupsID is None: raise ValueError("Table Group ID was not specified") @@ -308,28 +314,29 @@ def run_profiling_queries(strTableGroupsID, spinner=None): if spinner: spinner.next() + table_count = 0 + column_count = 0 try: # Retrieve Column Metadata LOG.info("CurrentStep: Getting DDF from project") strQuery = clsProfiling.GetDDFQuery() lstResult = RetrieveDBResultsToDictList("PROJECT", strQuery) - - if len(lstResult) == 0: - LOG.warning("SQL retrieved 0 records") + column_count = len(lstResult) if lstResult: - if clsProfiling.profile_use_sampling == "Y": - # Get distinct tables - distinct_tables = set() - for item in lstResult: - schema_name = item["table_schema"] - table_name = item["table_name"] - distinct_tables.add(f"{schema_name}.{table_name}") + # Get distinct tables + distinct_tables = set() + for item in lstResult: + schema_name = item["table_schema"] + table_name = item["table_name"] + distinct_tables.add(f"{schema_name}.{table_name}") - # Convert the set to a list - distinct_tables_list = list(distinct_tables) + # Convert the set to a list + distinct_tables_list = list(distinct_tables) + table_count = len(distinct_tables_list) + if clsProfiling.profile_use_sampling == "Y": # Sampling tables lstQueries = [] for parm_sampling_table in distinct_tables_list: @@ -494,12 +501,24 @@ def run_profiling_queries(strTableGroupsID, spinner=None): raise finally: LOG.info("Updating the profiling run record") - lstProfileRunQuery = [ + RunActionQueryList("DKTG", [ clsProfiling.GetProfileRunInfoRecordUpdateQuery(), + ]) + + MixpanelService().send_event( + "run-profiling", + source=source, + sql_flavor=clsProfiling.flavor, + sampling=clsProfiling.profile_use_sampling == "Y", + table_count=table_count, + column_count=column_count, + duration=(datetime.now(UTC) - date_service.parse_now(clsProfiling.run_date)).total_seconds(), + ) + + RunActionQueryList("DKTG", [ clsProfiling.GetAnomalyScoringRollupRunQuery(), clsProfiling.GetAnomalyScoringRollupTableGroupQuery(), - ] - RunActionQueryList("DKTG", lstProfileRunQuery) + ]) run_refresh_score_cards_results( project_code=dctParms["project_code"], add_history_entry=True, diff --git a/testgen/common/database/database_service.py b/testgen/common/database/database_service.py index 38fa87a6..714aae01 100644 --- a/testgen/common/database/database_service.py +++ b/testgen/common/database/database_service.py @@ -393,7 +393,8 @@ def RunActionQueryList(strCredentialSet, lstQueries, strAdminNDS="N", user_overr ) as con: i = 0 n = len(lstQueries) - lstInsertedIds = [] + insert_ids = [] + row_counts = [] if n == 0: LOG.info("No queries to process") for q in lstQueries: @@ -402,20 +403,21 @@ def RunActionQueryList(strCredentialSet, lstQueries, strAdminNDS="N", user_overr LOG.info(f"(Processing {i} of {n})") tx = con.begin() exQ = con.execute(text(q)) + row_counts.append(exQ.rowcount) if exQ.rowcount == -1: strMsg = "Action query processed no records." else: strMsg = str(exQ.rowcount) + " records processed." try: - lstInsertedIds.append(exQ.fetchone()[0]) + insert_ids.append(exQ.fetchone()[0]) except Exception: - lstInsertedIds.append(None) + insert_ids.append(None) tx.commit() LOG.info(strMsg) - return lstInsertedIds + return insert_ids, row_counts diff --git a/testgen/common/mixpanel_service.py b/testgen/common/mixpanel_service.py index 53adefd4..703ff76b 100644 --- a/testgen/common/mixpanel_service.py +++ b/testgen/common/mixpanel_service.py @@ -35,7 +35,7 @@ def instance_id(self): @cached_property def distinct_id(self): - return self._hash_value(session.username) + return self._hash_value(session.username or self.instance_id) def _hash_value(self, value: bytes | str, digest_size: int = 8) -> str: if isinstance(value, str): diff --git a/testgen/ui/navigation/router.py b/testgen/ui/navigation/router.py index 754edc7b..7729672b 100644 --- a/testgen/ui/navigation/router.py +++ b/testgen/ui/navigation/router.py @@ -6,6 +6,7 @@ import streamlit as st import testgen.ui.navigation.page +from testgen.common.mixpanel_service import MixpanelService from testgen.ui.session import session from testgen.utils.singleton import Singleton @@ -83,6 +84,8 @@ def navigate(self, /, to: str, with_args: dict = {}) -> None: # noqa: B006 len((st.query_params or {}).keys()) != len(final_args.keys()) or any(st.query_params.get(name) != value for name, value in final_args.items()) ) + if is_different_page: + MixpanelService().send_event(f"nav-{to}") if is_different_page or query_params_changed: route = self._routes[to] session.page_args_pending_router = { diff --git a/testgen/ui/views/dialogs/generate_tests_dialog.py b/testgen/ui/views/dialogs/generate_tests_dialog.py index 76476450..ebd7e0fb 100644 --- a/testgen/ui/views/dialogs/generate_tests_dialog.py +++ b/testgen/ui/views/dialogs/generate_tests_dialog.py @@ -71,7 +71,7 @@ def generate_tests_dialog(test_suite: pd.Series) -> None: status_container.info("Starting test generation ...") try: - run_test_gen_queries(table_group_id, test_suite_name, selected_set) + run_test_gen_queries(table_group_id, test_suite_name, selected_set, "ui") except Exception as e: status_container.error(f"Test generation encountered errors: {e!s}.") diff --git a/testgen/ui/views/hygiene_issues.py b/testgen/ui/views/hygiene_issues.py index 654326c0..fb2b5ba9 100644 --- a/testgen/ui/views/hygiene_issues.py +++ b/testgen/ui/views/hygiene_issues.py @@ -11,6 +11,7 @@ import testgen.ui.services.query_service as dq from testgen.commands.run_rollup_scores import run_profile_rollup_scoring_queries from testgen.common import date_service +from testgen.common.mixpanel_service import MixpanelService from testgen.ui.components import widgets as testgen from testgen.ui.components.widgets.download_dialog import FILE_DATA_TYPE, download_dialog, zip_multi_file_data from testgen.ui.navigation.page import Page @@ -228,12 +229,22 @@ def render( if st.button( ":material/visibility: Source Data", help="View current source data for highlighted issue", use_container_width=True ): + MixpanelService().send_event( + "view-source-data", + page=self.path, + issue_type=selected_row["anomaly_name"], + ) source_data_dialog(selected_row) if st.button( ":material/download: Issue Report", use_container_width=True, help="Generate a PDF report for each selected issue", ): + MixpanelService().send_event( + "download-issue-report", + page=self.path, + issue_count=len(selected), + ) dialog_title = "Download Issue Report" if len(selected) == 1: download_dialog( diff --git a/testgen/ui/views/profiling_runs.py b/testgen/ui/views/profiling_runs.py index e67b58aa..4beb76ac 100644 --- a/testgen/ui/views/profiling_runs.py +++ b/testgen/ui/views/profiling_runs.py @@ -126,7 +126,7 @@ def arg_value_input(self) -> tuple[bool, list[typing.Any], dict[str, typing.Any] display_column="table_groups_name", required=True, ) - return bool(tg_id), [], {"table_group_id": tg_id} + return bool(tg_id), [], {"table_group_id": tg_id, "source": "scheduler"} def render_empty_state(project_code: str, user_can_run: bool) -> bool: diff --git a/testgen/ui/views/score_details.py b/testgen/ui/views/score_details.py index c96fd98c..66a9408f 100644 --- a/testgen/ui/views/score_details.py +++ b/testgen/ui/views/score_details.py @@ -6,6 +6,7 @@ import streamlit as st from testgen.commands.run_refresh_score_cards_results import run_recalculate_score_card +from testgen.common.mixpanel_service import MixpanelService from testgen.common.models import with_database_session from testgen.common.models.scores import ScoreDefinition, ScoreDefinitionBreakdownItem, SelectedIssue from testgen.ui.components import widgets as testgen @@ -20,10 +21,11 @@ from testgen.utils import format_score_card, format_score_card_breakdown, format_score_card_issues LOG = logging.getLogger("testgen") +PAGE_PATH = "quality-dashboard:score-details" class ScoreDetailsPage(Page): - path = "quality-dashboard:score-details" + path = PAGE_PATH can_activate: ClassVar = [ lambda: session.authentication_status, lambda: not user_session_service.user_has_catalog_role(), @@ -118,6 +120,12 @@ def select_score_type(score_type: str) -> None: def export_issue_reports(selected_issues: list[SelectedIssue]) -> None: + MixpanelService().send_event( + "download-issue-report", + page=PAGE_PATH, + issue_count=len(selected_issues), + ) + issues_data = get_score_card_issue_reports(selected_issues) dialog_title = "Download Issue Reports" if len(issues_data) == 1: diff --git a/testgen/ui/views/score_explorer.py b/testgen/ui/views/score_explorer.py index 17edfaaf..d88f2461 100644 --- a/testgen/ui/views/score_explorer.py +++ b/testgen/ui/views/score_explorer.py @@ -8,6 +8,7 @@ run_recalculate_score_card, run_refresh_score_cards_results, ) +from testgen.common.mixpanel_service import MixpanelService from testgen.common.models.scores import ScoreCategory, ScoreDefinition, ScoreDefinitionFilter, SelectedIssue from testgen.ui.components import widgets as testgen from testgen.ui.components.widgets.download_dialog import FILE_DATA_TYPE, download_dialog, zip_multi_file_data @@ -24,9 +25,10 @@ from testgen.ui.session import session from testgen.utils import format_score_card, format_score_card_breakdown, format_score_card_issues +PAGE_PATH = "quality-dashboard:explorer" class ScoreExplorerPage(Page): - path = "quality-dashboard:explorer" + path = PAGE_PATH can_activate: ClassVar = [ lambda: session.authentication_status, lambda: not user_session_service.user_has_catalog_role(), @@ -176,6 +178,12 @@ def set_breakdown_drilldown(drilldown: str | None) -> None: def export_issue_reports(selected_issues: list[SelectedIssue]) -> None: + MixpanelService().send_event( + "download-issue-report", + page=PAGE_PATH, + issue_count=len(selected_issues), + ) + issues_data = get_score_card_issue_reports(selected_issues) dialog_title = "Download Issue Reports" if len(issues_data) == 1: diff --git a/testgen/ui/views/test_results.py b/testgen/ui/views/test_results.py index f64251d2..e0307873 100644 --- a/testgen/ui/views/test_results.py +++ b/testgen/ui/views/test_results.py @@ -14,6 +14,7 @@ import testgen.ui.services.query_service as dq from testgen.commands.run_rollup_scores import run_test_rollup_scoring_queries from testgen.common import date_service +from testgen.common.mixpanel_service import MixpanelService from testgen.ui.components import widgets as testgen from testgen.ui.components.widgets.download_dialog import FILE_DATA_TYPE, download_dialog, zip_multi_file_data from testgen.ui.navigation.page import Page @@ -26,10 +27,11 @@ from testgen.utils import friendly_score, is_uuid4 ALWAYS_SPIN = False +PAGE_PATH = "test-runs:results" class TestResultsPage(Page): - path = "test-runs:results" + path = PAGE_PATH can_activate: typing.ClassVar = [ lambda: session.authentication_status, lambda: not user_session_service.user_has_catalog_role(), @@ -566,6 +568,11 @@ def show_result_detail( ":material/visibility: Source Data", help="View current source data for highlighted result", use_container_width=True ): + MixpanelService().send_event( + "view-source-data", + page=PAGE_PATH, + test_type=selected_row["test_name_short"], + ) source_data_dialog(selected_row) with v_col4: @@ -588,6 +595,11 @@ def show_result_detail( disabled=not report_eligible_rows, help=report_btn_help, ): + MixpanelService().send_event( + "download-issue-report", + page=PAGE_PATH, + issue_count=len(report_eligible_rows), + ) dialog_title = "Download Issue Report" if len(report_eligible_rows) == 1: download_dialog( diff --git a/testgen/ui/views/test_runs.py b/testgen/ui/views/test_runs.py index 621df7ab..979de69d 100644 --- a/testgen/ui/views/test_runs.py +++ b/testgen/ui/views/test_runs.py @@ -135,7 +135,7 @@ def arg_value_input(self) -> tuple[bool, list[typing.Any], dict[str, typing.Any] display_column="test_suite", required=True, ) - return bool(ts_id), [], {"test_suite_id": ts_id} + return bool(ts_id), [], {"test_suite_id": ts_id, "source": "scheduler"} def render_empty_state(project_code: str, user_can_run: bool) -> bool: From 9582fa58e0895c2b2bebd593e5453f2dd36ec8ca Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Thu, 8 May 2025 15:23:19 -0400 Subject: [PATCH 18/34] fix(analytics): simplify source datapoint - address review feedback --- testgen/__main__.py | 34 ++++--------------- testgen/commands/run_execute_cat_tests.py | 25 ++++++++------ testgen/commands/run_execute_tests.py | 4 +-- testgen/commands/run_generate_tests.py | 5 +-- testgen/commands/run_profiling_bridge.py | 27 ++++++++------- testgen/common/mixpanel_service.py | 2 +- testgen/scheduler/cli_scheduler.py | 3 +- testgen/settings.py | 5 +++ .../ui/views/dialogs/generate_tests_dialog.py | 2 +- testgen/ui/views/profiling_runs.py | 2 +- testgen/ui/views/test_runs.py | 2 +- 11 files changed, 50 insertions(+), 61 deletions(-) diff --git a/testgen/__main__.py b/testgen/__main__.py index 87207243..2f09b169 100644 --- a/testgen/__main__.py +++ b/testgen/__main__.py @@ -113,19 +113,12 @@ def cli(ctx: Context, verbose: bool): help="The identifier for the table group used during a profile run. Use a table_group_id shown in list-table-groups.", default=None, ) -@click.option( - "-s", - "--source", - required=False, - type=click.STRING, - default="cli", -) -def run_profile(configuration: Configuration, table_group_id: str, source: str): +def run_profile(configuration: Configuration, table_group_id: str): click.echo(f"run-profile with table_group_id: {table_group_id}") spinner = None if not configuration.verbose: spinner = MoonSpinner("Processing ... ") - message = run_profiling_queries(table_group_id, spinner=spinner, source=source) + message = run_profiling_queries(table_group_id, spinner=spinner) click.echo("\n" + message) @@ -152,17 +145,10 @@ def run_profile(configuration: Configuration, table_group_id: str, source: str): required=False, default=None, ) -@click.option( - "-s", - "--source", - required=False, - type=click.STRING, - default="cli", -) @pass_configuration -def run_test_generation(configuration: Configuration, table_group_id: str, test_suite_key: str, generation_set: str, source: str): +def run_test_generation(configuration: Configuration, table_group_id: str, test_suite_key: str, generation_set: str): LOG.info("CurrentStep: Generate Tests - Main Procedure") - message = run_test_gen_queries(table_group_id, test_suite_key, generation_set, source) + message = run_test_gen_queries(table_group_id, test_suite_key, generation_set) LOG.info("Current Step: Generate Tests - Main Procedure Complete") click.echo("\n" + message) @@ -184,20 +170,13 @@ def run_test_generation(configuration: Configuration, table_group_id: str, test_ required=False, default=settings.DEFAULT_TEST_SUITE_KEY, ) -@click.option( - "-s", - "--source", - required=False, - type=click.STRING, - default="cli", -) @pass_configuration -def run_tests(configuration: Configuration, project_key: str, test_suite_key: str, source: str): +def run_tests(configuration: Configuration, project_key: str, test_suite_key: str): click.echo(f"run-tests for suite: {test_suite_key}") spinner = None if not configuration.verbose: spinner = MoonSpinner("Processing ... ") - message = run_execution_steps(project_key, test_suite_key, spinner=spinner, source=source) + message = run_execution_steps(project_key, test_suite_key, spinner=spinner) click.echo("\n" + message) @@ -660,6 +639,7 @@ def run_ui(): "--", f"{'--debug' if settings.IS_DEBUG else ''}", ], + env={**os.environ, "TG_JOB_SOURCE": "UI"} ) except Exception: LOG.exception(f"Testgen UI exited with status code {status_code}") diff --git a/testgen/commands/run_execute_cat_tests.py b/testgen/commands/run_execute_cat_tests.py index 02fa49e4..28848bda 100644 --- a/testgen/commands/run_execute_cat_tests.py +++ b/testgen/commands/run_execute_cat_tests.py @@ -1,6 +1,7 @@ import logging from datetime import UTC, datetime +from testgen import settings from testgen.commands.queries.execute_cat_tests_query import CCATExecutionSQL from testgen.commands.run_refresh_score_cards_results import run_refresh_score_cards_results from testgen.common import ( @@ -62,20 +63,13 @@ def ParseCATResults(clsCATExecute): RunActionQueryList("DKTG", [strQuery]) -def FinalizeTestRun(clsCATExecute: CCATExecutionSQL, source: str): +def FinalizeTestRun(clsCATExecute: CCATExecutionSQL): _, row_counts = RunActionQueryList(("DKTG"), [ clsCATExecute.FinalizeTestResultsSQL(), clsCATExecute.PushTestRunStatusUpdateSQL(), clsCATExecute.FinalizeTestSuiteUpdateSQL(), ]) - - MixpanelService().send_event( - "run-tests", - source=source, - sql_flavor=clsCATExecute.flavor, - test_count=row_counts[0], - duration=(datetime.now(UTC) - date_service.parse_now(clsCATExecute.run_date)).total_seconds(), - ) + end_time = datetime.now(UTC) RunActionQueryList(("DKTG"), [ clsCATExecute.CalcPrevalenceTestResultsSQL(), @@ -88,9 +82,18 @@ def FinalizeTestRun(clsCATExecute: CCATExecutionSQL, source: str): refresh_date=date_service.parse_now(clsCATExecute.run_date), ) + MixpanelService().send_event( + "run-tests", + source=settings.ANALYTICS_JOB_SOURCE, + sql_flavor=clsCATExecute.flavor, + test_count=row_counts[0], + run_duration=(end_time - date_service.parse_now(clsCATExecute.run_date)).total_seconds(), + scoring_duration=(datetime.now(UTC) - end_time).total_seconds(), + ) + def run_cat_test_queries( - dctParms, strTestRunID, strTestTime, strProjectCode, strTestSuite, error_msg, minutes_offset=0, spinner=None, source=None + dctParms, strTestRunID, strTestTime, strProjectCode, strTestSuite, error_msg, minutes_offset=0, spinner=None ): booErrors = False @@ -160,4 +163,4 @@ def run_cat_test_queries( finally: LOG.info("Finalizing test run") - FinalizeTestRun(clsCATExecute, source) + FinalizeTestRun(clsCATExecute) diff --git a/testgen/commands/run_execute_tests.py b/testgen/commands/run_execute_tests.py index b2de422e..dc8028d1 100644 --- a/testgen/commands/run_execute_tests.py +++ b/testgen/commands/run_execute_tests.py @@ -104,7 +104,6 @@ def run_execution_steps_in_background(project_code, test_suite): background_thread = threading.Thread( target=run_execution_steps, args=(project_code, test_suite), - kwargs={"source": "ui"}, ) background_thread.start() else: @@ -118,7 +117,6 @@ def run_execution_steps( test_suite: str, minutes_offset: int=0, spinner: Spinner=None, - source: str | None=None, ) -> str: # Initialize required parms for all steps has_errors = False @@ -169,7 +167,7 @@ def run_execution_steps( LOG.info("CurrentStep: Execute Step - CAT Test Execution") if run_cat_test_queries( - test_exec_params, test_run_id, test_time, project_code, test_suite, error_msg, minutes_offset, spinner, source + test_exec_params, test_run_id, test_time, project_code, test_suite, error_msg, minutes_offset, spinner ): has_errors = True diff --git a/testgen/commands/run_generate_tests.py b/testgen/commands/run_generate_tests.py index 6e209d8f..17d97266 100644 --- a/testgen/commands/run_generate_tests.py +++ b/testgen/commands/run_generate_tests.py @@ -1,5 +1,6 @@ import logging +from testgen import settings from testgen.commands.queries.generate_tests_query import CDeriveTestsSQL from testgen.common import AssignConnectParms, RetrieveDBResultsToDictList, RetrieveTestGenParms, RunActionQueryList from testgen.common.mixpanel_service import MixpanelService @@ -7,7 +8,7 @@ LOG = logging.getLogger("testgen") -def run_test_gen_queries(strTableGroupsID, strTestSuite, strGenerationSet=None, source=None): +def run_test_gen_queries(strTableGroupsID, strTestSuite, strGenerationSet=None): if strTableGroupsID is None: raise ValueError("Table Group ID was not specified") @@ -112,7 +113,7 @@ def run_test_gen_queries(strTableGroupsID, strTestSuite, strGenerationSet=None, MixpanelService().send_event( "generate-tests", - source=source, + source=settings.ANALYTICS_JOB_SOURCE, sql_flavor=clsTests.sql_flavor, generation_set=clsTests.generation_set, ) diff --git a/testgen/commands/run_profiling_bridge.py b/testgen/commands/run_profiling_bridge.py index 17590e02..a766c6bd 100644 --- a/testgen/commands/run_profiling_bridge.py +++ b/testgen/commands/run_profiling_bridge.py @@ -238,8 +238,7 @@ def run_profiling_in_background(table_group_id): empty_cache() background_thread = threading.Thread( target=run_profiling_queries, - args=(table_group_id), - kwargs={"source": "ui"}, + args=(table_group_id,), ) background_thread.start() else: @@ -248,7 +247,7 @@ def run_profiling_in_background(table_group_id): subprocess.Popen(script) # NOQA S603 -def run_profiling_queries(strTableGroupsID, spinner=None, source=None): +def run_profiling_queries(strTableGroupsID, spinner=None): if strTableGroupsID is None: raise ValueError("Table Group ID was not specified") @@ -504,16 +503,7 @@ def run_profiling_queries(strTableGroupsID, spinner=None, source=None): RunActionQueryList("DKTG", [ clsProfiling.GetProfileRunInfoRecordUpdateQuery(), ]) - - MixpanelService().send_event( - "run-profiling", - source=source, - sql_flavor=clsProfiling.flavor, - sampling=clsProfiling.profile_use_sampling == "Y", - table_count=table_count, - column_count=column_count, - duration=(datetime.now(UTC) - date_service.parse_now(clsProfiling.run_date)).total_seconds(), - ) + end_time = datetime.now(UTC) RunActionQueryList("DKTG", [ clsProfiling.GetAnomalyScoringRollupRunQuery(), @@ -525,6 +515,17 @@ def run_profiling_queries(strTableGroupsID, spinner=None, source=None): refresh_date=date_service.parse_now(clsProfiling.run_date), ) + MixpanelService().send_event( + "run-profiling", + source=settings.ANALYTICS_JOB_SOURCE, + sql_flavor=clsProfiling.flavor, + sampling=clsProfiling.profile_use_sampling == "Y", + table_count=table_count, + column_count=column_count, + run_duration=(end_time - date_service.parse_now(clsProfiling.run_date)).total_seconds(), + scoring_duration=(datetime.now(UTC) - end_time).total_seconds(), + ) + return f""" Profiling completed {"with errors. Check log for details." if has_errors else "successfully."} Run ID: {profiling_run_id} diff --git a/testgen/common/mixpanel_service.py b/testgen/common/mixpanel_service.py index 703ff76b..77396eb9 100644 --- a/testgen/common/mixpanel_service.py +++ b/testgen/common/mixpanel_service.py @@ -35,7 +35,7 @@ def instance_id(self): @cached_property def distinct_id(self): - return self._hash_value(session.username or self.instance_id) + return self._hash_value(session.username or "") def _hash_value(self, value: bytes | str, digest_size: int = 8) -> str: if isinstance(value, str): diff --git a/testgen/scheduler/cli_scheduler.py b/testgen/scheduler/cli_scheduler.py index 31bd3338..7c82809e 100644 --- a/testgen/scheduler/cli_scheduler.py +++ b/testgen/scheduler/cli_scheduler.py @@ -1,4 +1,5 @@ import logging +import os import signal import subprocess import sys @@ -84,7 +85,7 @@ def start_job(self, job: CliJob, triggering_time: datetime) -> None: LOG.info("Executing '%s'", " ".join(exec_cmd)) - proc = subprocess.Popen(exec_cmd, start_new_session=True) # noqa: S603 + proc = subprocess.Popen(exec_cmd, start_new_session=True, env={**os.environ, "TG_JOB_SOURCE": "SCHEDULER"}) # noqa: S603 threading.Thread(target=self._proc_wrapper, args=(proc,)).start() def _proc_wrapper(self, proc: subprocess.Popen): diff --git a/testgen/settings.py b/testgen/settings.py index 8669b4d6..2d2c91c7 100644 --- a/testgen/settings.py +++ b/testgen/settings.py @@ -485,6 +485,11 @@ Disables sending usage data when set to any value except "true" and "yes". Defaults to "yes" """ +ANALYTICS_JOB_SOURCE: str = os.getenv("TG_JOB_SOURCE", "CLI") +""" +Identifies the job trigger for analytics purposes. +""" + JWT_HASHING_KEY_B64: str = os.getenv("TG_JWT_HASHING_KEY") """ Random key used to sign/verify the authentication token diff --git a/testgen/ui/views/dialogs/generate_tests_dialog.py b/testgen/ui/views/dialogs/generate_tests_dialog.py index ebd7e0fb..76476450 100644 --- a/testgen/ui/views/dialogs/generate_tests_dialog.py +++ b/testgen/ui/views/dialogs/generate_tests_dialog.py @@ -71,7 +71,7 @@ def generate_tests_dialog(test_suite: pd.Series) -> None: status_container.info("Starting test generation ...") try: - run_test_gen_queries(table_group_id, test_suite_name, selected_set, "ui") + run_test_gen_queries(table_group_id, test_suite_name, selected_set) except Exception as e: status_container.error(f"Test generation encountered errors: {e!s}.") diff --git a/testgen/ui/views/profiling_runs.py b/testgen/ui/views/profiling_runs.py index 4beb76ac..e67b58aa 100644 --- a/testgen/ui/views/profiling_runs.py +++ b/testgen/ui/views/profiling_runs.py @@ -126,7 +126,7 @@ def arg_value_input(self) -> tuple[bool, list[typing.Any], dict[str, typing.Any] display_column="table_groups_name", required=True, ) - return bool(tg_id), [], {"table_group_id": tg_id, "source": "scheduler"} + return bool(tg_id), [], {"table_group_id": tg_id} def render_empty_state(project_code: str, user_can_run: bool) -> bool: diff --git a/testgen/ui/views/test_runs.py b/testgen/ui/views/test_runs.py index 979de69d..621df7ab 100644 --- a/testgen/ui/views/test_runs.py +++ b/testgen/ui/views/test_runs.py @@ -135,7 +135,7 @@ def arg_value_input(self) -> tuple[bool, list[typing.Any], dict[str, typing.Any] display_column="test_suite", required=True, ) - return bool(ts_id), [], {"test_suite_id": ts_id, "source": "scheduler"} + return bool(ts_id), [], {"test_suite_id": ts_id} def render_empty_state(project_code: str, user_can_run: bool) -> bool: From cce9a8a1dc939a0223516907b3bc9af4a8e2003c Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Thu, 8 May 2025 15:23:47 -0400 Subject: [PATCH 19/34] fix(jwt): display jwt env variable error to user --- README.md | 3 +++ docs/local_development.md | 1 + testgen/ui/services/user_session_service.py | 9 +++++---- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 5b8b0947..ec03ed65 100644 --- a/README.md +++ b/README.md @@ -135,6 +135,9 @@ export TG_DECRYPT_SALT= export TESTGEN_USERNAME= export TESTGEN_PASSWORD= +# Set an arbitrary base64-encoded string to be used for signing authentication tokens +export TG_JWT_HASHING_KEY= + # Set an accessible path for storing application logs export TESTGEN_LOG_FILE_PATH= ``` diff --git a/docs/local_development.md b/docs/local_development.md index 22ede854..338070ce 100644 --- a/docs/local_development.md +++ b/docs/local_development.md @@ -64,6 +64,7 @@ Create a `local.env` file with the following environment variables, replacing th export TESTGEN_DEBUG=yes export TESTGEN_LOG_TO_FILE=no export TG_ANALYTICS=no +export TG_JWT_HASHING_KEY= export TESTGEN_USERNAME= export TESTGEN_PASSWORD= export TG_DECRYPT_SALT= diff --git a/testgen/ui/services/user_session_service.py b/testgen/ui/services/user_session_service.py index a6741d20..597bc45e 100644 --- a/testgen/ui/services/user_session_service.py +++ b/testgen/ui/services/user_session_service.py @@ -24,10 +24,11 @@ def _get_jwt_hashing_key() -> bytes: try: return base64.b64decode(settings.JWT_HASHING_KEY_B64.encode("ascii")) except Exception as e: - raise ValueError( - "Error reading the JWT signing key from settings. Make sure you have a valid base 64 " - "string assigned to the TG_JWT_HASHING_KEY environment variable." - ) from e + st.error( + "Error reading the JWT signing key from settings.\n\n Make sure you have a valid " + "base64 string assigned to the TG_JWT_HASHING_KEY environment variable." + ) + st.stop() def load_user_session() -> None: From 069f4a70deb703a868c820feedabb9ca2c087244 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Wed, 14 May 2025 00:46:37 -0400 Subject: [PATCH 20/34] fix(navigation): missing project_code in links --- testgen/ui/components/frontend/js/components/score_issues.js | 1 + testgen/ui/views/dialogs/run_profiling_dialog.py | 2 +- testgen/ui/views/dialogs/run_tests_dialog.py | 2 +- testgen/ui/views/score_details.py | 2 +- testgen/ui/views/table_groups/page.py | 2 +- 5 files changed, 5 insertions(+), 4 deletions(-) diff --git a/testgen/ui/components/frontend/js/components/score_issues.js b/testgen/ui/components/frontend/js/components/score_issues.js index cd37e0e1..2c319249 100644 --- a/testgen/ui/components/frontend/js/components/score_issues.js +++ b/testgen/ui/components/frontend/js/components/score_issues.js @@ -311,6 +311,7 @@ const TimeCell = (value, row) => { run_id: row.run_id, table_name: row.table, column_name: row.column, + selected: row.id, }, }), ); diff --git a/testgen/ui/views/dialogs/run_profiling_dialog.py b/testgen/ui/views/dialogs/run_profiling_dialog.py index b1077f8f..4250f1e7 100644 --- a/testgen/ui/views/dialogs/run_profiling_dialog.py +++ b/testgen/ui/views/dialogs/run_profiling_dialog.py @@ -65,7 +65,7 @@ def run_profiling_dialog(project_code: str, table_group: pd.Series | None = None testgen.link( label="Go to Profiling Runs", href=LINK_HREF, - params={ "table_group": table_group_id }, + params={ "project_code": project_code, "table_group": table_group_id }, right_icon="chevron_right", underline=False, height=40, diff --git a/testgen/ui/views/dialogs/run_tests_dialog.py b/testgen/ui/views/dialogs/run_tests_dialog.py index a5b9eb6b..d0cb0ada 100644 --- a/testgen/ui/views/dialogs/run_tests_dialog.py +++ b/testgen/ui/views/dialogs/run_tests_dialog.py @@ -69,7 +69,7 @@ def run_tests_dialog(project_code: str, test_suite: pd.Series | None = None, def testgen.link( label="Go to Test Runs", href=LINK_HREF, - params={ "test_suite": test_suite_id }, + params={ "project_code": project_code, "test_suite": test_suite_id }, right_icon="chevron_right", underline=False, height=40, diff --git a/testgen/ui/views/score_details.py b/testgen/ui/views/score_details.py index 66a9408f..877c77a1 100644 --- a/testgen/ui/views/score_details.py +++ b/testgen/ui/views/score_details.py @@ -187,7 +187,7 @@ def delete_score_card(definition_id: str) -> None: if delete_clicked(): score_definition.delete() get_all_score_cards.clear() - Router().navigate("quality-dashboard") + Router().navigate("quality-dashboard", { "project_code": score_definition.project_code }) def recalculate_score_history(definition_id: str) -> None: diff --git a/testgen/ui/views/table_groups/page.py b/testgen/ui/views/table_groups/page.py index d557e33c..5614f141 100644 --- a/testgen/ui/views/table_groups/page.py +++ b/testgen/ui/views/table_groups/page.py @@ -98,7 +98,7 @@ def render(self, connection_id: str, **_kwargs) -> None: testgen.link( label="Test Suites", href="test-suites", - params={"table_group_id": table_group["id"]}, + params={ "project_code": project_code, "table_group_id": table_group["id"] }, right_icon="chevron_right", key=f"tablegroups:keys:go-to-tsuites:{table_group['id']}", ) From 03b0eec29a3fb1a17e2c64ab2edabddbfba8d2c8 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Wed, 14 May 2025 00:47:04 -0400 Subject: [PATCH 21/34] fix(test-definitions): broken add/edit form --- testgen/ui/views/test_definitions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testgen/ui/views/test_definitions.py b/testgen/ui/views/test_definitions.py index d14d8a27..d50318f8 100644 --- a/testgen/ui/views/test_definitions.py +++ b/testgen/ui/views/test_definitions.py @@ -372,7 +372,7 @@ def show_test_form( "test_description": left_column.text_area( label="Test Description Override", max_chars=1000, - height=3, + height=114, placeholder=test_description_placeholder, value=test_description, help=test_description_help, From fb630b661142f8b81bebc76831b267dadc295ee4 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Wed, 14 May 2025 00:47:44 -0400 Subject: [PATCH 22/34] fix(sql): grant permissions for new job_schedules table --- testgen/template/dbsetup/075_grant_role_rights.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/testgen/template/dbsetup/075_grant_role_rights.sql b/testgen/template/dbsetup/075_grant_role_rights.sql index 33e1f98b..f5bd1013 100644 --- a/testgen/template/dbsetup/075_grant_role_rights.sql +++ b/testgen/template/dbsetup/075_grant_role_rights.sql @@ -37,7 +37,8 @@ GRANT SELECT, INSERT, DELETE, UPDATE ON {SCHEMA_NAME}.score_definition_results, {SCHEMA_NAME}.score_definition_results_breakdown, {SCHEMA_NAME}.score_definition_results_history, - {SCHEMA_NAME}.score_history_latest_runs + {SCHEMA_NAME}.score_history_latest_runs, + {SCHEMA_NAME}.job_schedules TO testgen_execute_role; From e04ca59256b4802ee13218d472b49e5e00ba3ad1 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Wed, 14 May 2025 00:49:30 -0400 Subject: [PATCH 23/34] fix(score-breakdown): display line for NULL grouping value --- testgen/common/models/scores.py | 8 +++++++- .../score_cards/get_score_card_breakdown_by_column.sql | 6 ++---- .../score_cards/get_score_card_breakdown_by_dimension.sql | 6 ++---- .../components/frontend/js/components/score_breakdown.js | 2 +- 4 files changed, 12 insertions(+), 10 deletions(-) diff --git a/testgen/common/models/scores.py b/testgen/common/models/scores.py index 814dc233..25105e33 100644 --- a/testgen/common/models/scores.py +++ b/testgen/common/models/scores.py @@ -264,7 +264,13 @@ def get_score_card_breakdown( "column_name": ["table_groups_id", "table_name", "column_name"], }.get(group_by, [group_by]) filters = " AND ".join(self._get_raw_query_filters(cde_only=score_type == "cde_score")) - join_condition = " AND ".join([f"test_records.{column} = profiling_records.{column}" for column in columns]) + + if group_by in ["table_groups_name", "table_name", "column_name"]: + join_condition = " AND ".join([f"test_records.{column} = profiling_records.{column}" for column in columns]) + else: + join_condition = f"""(test_records.{group_by} = profiling_records.{group_by} + OR (test_records.{group_by} IS NULL + AND profiling_records.{group_by} IS NULL))""" profile_records_filters = self._get_raw_query_filters( cde_only=score_type == "cde_score", diff --git a/testgen/template/score_cards/get_score_card_breakdown_by_column.sql b/testgen/template/score_cards/get_score_card_breakdown_by_column.sql index 1e255fc1..51af2a07 100644 --- a/testgen/template/score_cards/get_score_card_breakdown_by_column.sql +++ b/testgen/template/score_cards/get_score_card_breakdown_by_column.sql @@ -7,8 +7,7 @@ profiling_records AS ( SUM(record_ct) AS data_point_ct, SUM(record_ct * good_data_pct) / NULLIF(SUM(record_ct), 0) AS score FROM v_dq_profile_scoring_latest_by_column - WHERE NULLIF({group_by}, '') IS NOT NULL - AND {filters} + WHERE {filters} GROUP BY project_code, {columns} ), test_records AS ( @@ -19,8 +18,7 @@ test_records AS ( SUM(dq_record_ct) AS data_point_ct, SUM(dq_record_ct * good_data_pct) / NULLIF(SUM(dq_record_ct), 0) AS score FROM v_dq_test_scoring_latest_by_column - WHERE NULLIF({group_by}, '') IS NOT NULL - AND {filters} + WHERE {filters} GROUP BY project_code, {columns} ), parent AS ( diff --git a/testgen/template/score_cards/get_score_card_breakdown_by_dimension.sql b/testgen/template/score_cards/get_score_card_breakdown_by_dimension.sql index 9b3563a4..436e7ff7 100644 --- a/testgen/template/score_cards/get_score_card_breakdown_by_dimension.sql +++ b/testgen/template/score_cards/get_score_card_breakdown_by_dimension.sql @@ -7,8 +7,7 @@ profiling_records AS ( SUM(record_ct) AS data_point_ct, SUM(record_ct * good_data_pct) / NULLIF(SUM(record_ct), 0) AS score FROM v_dq_profile_scoring_latest_by_dimension - WHERE NULLIF({group_by}, '') IS NOT NULL - AND {filters} + WHERE {filters} GROUP BY project_code, {columns} ), test_records AS ( @@ -19,8 +18,7 @@ test_records AS ( SUM(dq_record_ct) AS data_point_ct, SUM(dq_record_ct * good_data_pct) / NULLIF(SUM(dq_record_ct), 0) AS score FROM v_dq_test_scoring_latest_by_dimension - WHERE NULLIF({group_by}, '') IS NOT NULL - AND {filters} + WHERE {filters} GROUP BY project_code, {columns} ), parent AS ( diff --git a/testgen/ui/components/frontend/js/components/score_breakdown.js b/testgen/ui/components/frontend/js/components/score_breakdown.js index e75f79b8..be530235 100644 --- a/testgen/ui/components/frontend/js/components/score_breakdown.js +++ b/testgen/ui/components/frontend/js/components/score_breakdown.js @@ -111,7 +111,7 @@ const TableCell = (row, column, score=undefined, category=undefined, scoreType=u const size = BREAKDOWN_COLUMNS_SIZES[column] ?? COLUMN_DEFAULT_SIZE; return div( { style: `flex: ${size}; max-width: ${size}; word-wrap: break-word;`, 'data-testid': 'score-breakdown-cell' }, - span(row[column]), + span(row[column] ?? '-'), ); }; From 656b7a66037e1e50dacc24fec120e23129b970b1 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Wed, 14 May 2025 17:57:13 -0400 Subject: [PATCH 24/34] fix(router): cookies are detected late on deployed instances --- testgen/ui/navigation/router.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/testgen/ui/navigation/router.py b/testgen/ui/navigation/router.py index 7729672b..4aae9eee 100644 --- a/testgen/ui/navigation/router.py +++ b/testgen/ui/navigation/router.py @@ -59,7 +59,11 @@ def run(self) -> None: # This hack is needed because the auth cookie is not retrieved on the first run # We have to store the page and wait until cookies are ready session.page_pending_cookies = current_page - st.rerun() + + # Don't use st.rerun() here! + # It will work fine locally, but cause a long initial load on deployed instances + # The time.sleep somehow causes the cookie to be detected quicker + time.sleep(0.3) def queue_navigation(self, /, to: str, with_args: dict | None = None) -> None: self._pending_navigation = {"to": to, "with_args": with_args or {}} From 8cefbce7fe518d15a8ae72576141f65ba7c03760 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Wed, 14 May 2025 17:57:40 -0400 Subject: [PATCH 25/34] fix(connection): bug in snowflake connection form --- testgen/ui/views/connections/page.py | 1 + 1 file changed, 1 insertion(+) diff --git a/testgen/ui/views/connections/page.py b/testgen/ui/views/connections/page.py index 92b87ce9..a8dd1787 100644 --- a/testgen/ui/views/connections/page.py +++ b/testgen/ui/views/connections/page.py @@ -98,6 +98,7 @@ def show_connection_form(self, selected_connection: dict, _mode: str, project_co FlavorForm = BaseConnectionForm.for_flavor(sql_flavor) if connection: connection["password"] = connection["password"] or "" + connection["private_key"] = connection["private_key"] or "" form_kwargs = connection or {"sql_flavor": sql_flavor, "connection_id": connection_id, "connection_name": connection_name} form = FlavorForm(**form_kwargs) From 2832d0921f742921232afbae49ba101805e18756 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Thu, 15 May 2025 01:34:17 -0400 Subject: [PATCH 26/34] fix(sidebar): logout cookies and case insensitive usernames --- testgen/ui/assets/style.css | 4 ++++ testgen/ui/components/frontend/js/components/sidebar.js | 2 +- testgen/ui/components/widgets/sidebar.py | 4 ++++ testgen/ui/services/user_session_service.py | 2 +- 4 files changed, 10 insertions(+), 2 deletions(-) diff --git a/testgen/ui/assets/style.css b/testgen/ui/assets/style.css index 25802d26..49a2f37f 100644 --- a/testgen/ui/assets/style.css +++ b/testgen/ui/assets/style.css @@ -47,6 +47,10 @@ header { /* ... */ /* Sidebar */ +[data-testid="stSidebarHeader"] { + padding: 16px 20px; +} + section[data-testid="stSidebar"] { width: 250px; z-index: 999; diff --git a/testgen/ui/components/frontend/js/components/sidebar.js b/testgen/ui/components/frontend/js/components/sidebar.js index 6e83f3f8..b2da6405 100644 --- a/testgen/ui/components/frontend/js/components/sidebar.js +++ b/testgen/ui/components/frontend/js/components/sidebar.js @@ -251,7 +251,7 @@ stylesheet.replace(` display: flex; flex-direction: column; justify-content: space-between; - height: calc(100% - 78px); + height: calc(100% - 64px); } .menu .menu--project { diff --git a/testgen/ui/components/widgets/sidebar.py b/testgen/ui/components/widgets/sidebar.py index 44356750..9ff16734 100644 --- a/testgen/ui/components/widgets/sidebar.py +++ b/testgen/ui/components/widgets/sidebar.py @@ -1,4 +1,5 @@ import logging +import time from typing import Literal from testgen.ui.components.utils.component import component @@ -70,6 +71,9 @@ def on_change(): javascript_service.clear_component_states() user_session_service.end_user_session() Router().queue_navigation(to="") + # Without the time.sleep, cookies sometimes don't get cleared on deployed instances + # (even though it works fine locally) + time.sleep(0.3) else: Router().queue_navigation( to=event_data.get("path") or session.user_default_page, diff --git a/testgen/ui/services/user_session_service.py b/testgen/ui/services/user_session_service.py index 597bc45e..4d2dc840 100644 --- a/testgen/ui/services/user_session_service.py +++ b/testgen/ui/services/user_session_service.py @@ -78,7 +78,7 @@ def get_auth_data(): preauthorized_list = [] for item in auth_data.itertuples(): - usernames[item.username] = { + usernames[item.username.lower()] = { "email": item.email, "name": item.name, "password": item.password, From 290b2d2e423e2e93379ed3e35d23d19675b7398b Mon Sep 17 00:00:00 2001 From: Luis Date: Thu, 15 May 2025 12:54:08 -0400 Subject: [PATCH 27/34] fix: avoid cross-site scripting when rendering html --- testgen/ui/services/form_service.py | 2 +- testgen/ui/views/connections/forms.py | 5 ++--- .../ui/views/dialogs/application_logs_dialog.py | 2 +- testgen/ui/views/score_details.py | 5 +---- testgen/ui/views/table_groups/page.py | 14 +++++++++++--- testgen/ui/views/test_definitions.py | 7 +++---- testgen/ui/views/test_suites.py | 14 +++++++++++--- 7 files changed, 30 insertions(+), 19 deletions(-) diff --git a/testgen/ui/services/form_service.py b/testgen/ui/services/form_service.py index e350bc96..09981f80 100644 --- a/testgen/ui/services/form_service.py +++ b/testgen/ui/services/form_service.py @@ -507,7 +507,7 @@ def render_html_list(dct_row, lst_columns, str_section_header=None, int_data_wid str_markdown += f"""
{label}{dct_row[col]!s}
""" with st.container(): - st.markdown(str_markdown, unsafe_allow_html=True) + st.html(str_markdown) st.divider() diff --git a/testgen/ui/views/connections/forms.py b/testgen/ui/views/connections/forms.py index 23cf64b8..3ece7588 100644 --- a/testgen/ui/views/connections/forms.py +++ b/testgen/ui/views/connections/forms.py @@ -288,15 +288,14 @@ def render_extra( elif self._uploaded_file is None and (cached_file_upload := st.session_state.get(cached_file_upload_key)): self._uploaded_file = cached_file_upload file_size = f"{round(self._uploaded_file.size / 1024, 2)}KB" - container.markdown( + container.html( f"""
draft {self._uploaded_file.name} {file_size}
- """, - unsafe_allow_html=True, + """ ) def reset_cache(self) -> None: diff --git a/testgen/ui/views/dialogs/application_logs_dialog.py b/testgen/ui/views/dialogs/application_logs_dialog.py index de8abc80..72de5d44 100644 --- a/testgen/ui/views/dialogs/application_logs_dialog.py +++ b/testgen/ui/views/dialogs/application_logs_dialog.py @@ -65,7 +65,7 @@ def application_logs_dialog(): show_data = log_data # Refresh button - col3.markdown("
", unsafe_allow_html=True) + col3.html("
") if col3.button("Refresh"): # Clear cache to refresh the log data st.cache_data.clear() diff --git a/testgen/ui/views/score_details.py b/testgen/ui/views/score_details.py index 877c77a1..37f78cd3 100644 --- a/testgen/ui/views/score_details.py +++ b/testgen/ui/views/score_details.py @@ -169,10 +169,7 @@ def delete_score_card(definition_id: str) -> None: delete_clicked, set_delelte_clicked = temp_value( "score-details:confirm-delete-score-val" ) - st.markdown( - f"Are you sure you want to delete the scorecard {score_definition.name}?", - unsafe_allow_html=True, - ) + st.html(f"Are you sure you want to delete the scorecard {score_definition.name}?") _, button_column = st.columns([.85, .15]) with button_column: diff --git a/testgen/ui/views/table_groups/page.py b/testgen/ui/views/table_groups/page.py index 5614f141..5d4a50d7 100644 --- a/testgen/ui/views/table_groups/page.py +++ b/testgen/ui/views/table_groups/page.py @@ -166,9 +166,17 @@ def delete_table_group_dialog(self, table_group: pd.Series): ) if not can_be_deleted: - st.markdown( - ":orange[This Table Group has related data, which may include profiling, test definitions and test results. If you proceed, all related data will be permanently deleted.
Are you sure you want to proceed?]", - unsafe_allow_html=True, + st.html( + """ +
+ + This Table Group has related data, which may include profiling, test definitions and test results. + If you proceed, all related data will be permanently deleted. + +
+ Are you sure you want to proceed? +
+ """ ) accept_cascade_delete = st.toggle("I accept deletion of this Table Group and all related TestGen data.") diff --git a/testgen/ui/views/test_definitions.py b/testgen/ui/views/test_definitions.py index d50318f8..8c657609 100644 --- a/testgen/ui/views/test_definitions.py +++ b/testgen/ui/views/test_definitions.py @@ -343,13 +343,12 @@ def show_test_form( # Using the test_type, display the default description and usage_notes if selected_test_type_row["test_description"]: - st.markdown( + st.html( f"""
{selected_test_type_row['test_description']}

- """, - unsafe_allow_html=True, + """ ) if selected_test_type_row["usage_notes"]: @@ -854,7 +853,7 @@ def show_test_defs_grid( ) if dct_selected_row: - st.markdown("

 
", unsafe_allow_html=True) + st.html("

 
") selected_row = dct_selected_row[0] str_test_id = selected_row["id"] row_selected = df[df["id"] == str_test_id].iloc[0] diff --git a/testgen/ui/views/test_suites.py b/testgen/ui/views/test_suites.py index efe7c19d..fc40ae5a 100644 --- a/testgen/ui/views/test_suites.py +++ b/testgen/ui/views/test_suites.py @@ -258,9 +258,17 @@ def delete_test_suite_dialog(test_suite_id: str) -> None: ) if not can_be_deleted: - st.markdown( - ":orange[This Test Suite has related data, which includes test definitions and may include test results. If you proceed, all related data will be permanently deleted.
Are you sure you want to proceed?]", - unsafe_allow_html=True, + st.html( + """ +
+ + This Test Suite has related data, which includes test definitions and may + include test results. If you proceed, all related data will be permanently deleted. + +
+ Are you sure you want to proceed? +
+ """ ) accept_cascade_delete = st.toggle("I accept deletion of this Test Suite and all related TestGen data.") From b836299b60d50297f13b0a843e3e09107089ce02 Mon Sep 17 00:00:00 2001 From: Luis Date: Tue, 20 May 2025 10:57:34 -0400 Subject: [PATCH 28/34] fix(scoring): force history date into utc timezone --- testgen/common/models/scores.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/testgen/common/models/scores.py b/testgen/common/models/scores.py index 25105e33..bf0039bf 100644 --- a/testgen/common/models/scores.py +++ b/testgen/common/models/scores.py @@ -2,7 +2,7 @@ import uuid from collections import defaultdict from collections.abc import Iterable -from datetime import datetime +from datetime import UTC, datetime from typing import Literal, Self, TypedDict import pandas as pd @@ -237,7 +237,11 @@ def as_cached_score_card(self) -> "ScoreCard": for entry in self.history[-50:]: if entry.category in history_categories: - score_card["history"].append({"score": entry.score, "category": entry.category, "time": entry.last_run_time}) + score_card["history"].append({ + "score": entry.score, + "category": entry.category, + "time": entry.last_run_time.replace(tzinfo=UTC), + }) return score_card From 7416dc40e0e1692e21cfcd890fac2fe2246c1120 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Tue, 20 May 2025 21:42:43 -0400 Subject: [PATCH 29/34] fix(cli): print run id even if score refresh fails --- testgen/commands/run_execute_cat_tests.py | 24 +++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/testgen/commands/run_execute_cat_tests.py b/testgen/commands/run_execute_cat_tests.py index 28848bda..b76e7246 100644 --- a/testgen/commands/run_execute_cat_tests.py +++ b/testgen/commands/run_execute_cat_tests.py @@ -71,16 +71,20 @@ def FinalizeTestRun(clsCATExecute: CCATExecutionSQL): ]) end_time = datetime.now(UTC) - RunActionQueryList(("DKTG"), [ - clsCATExecute.CalcPrevalenceTestResultsSQL(), - clsCATExecute.TestScoringRollupRunSQL(), - clsCATExecute.TestScoringRollupTableGroupSQL(), - ]) - run_refresh_score_cards_results( - project_code=clsCATExecute.project_code, - add_history_entry=True, - refresh_date=date_service.parse_now(clsCATExecute.run_date), - ) + try: + RunActionQueryList(("DKTG"), [ + clsCATExecute.CalcPrevalenceTestResultsSQL(), + clsCATExecute.TestScoringRollupRunSQL(), + clsCATExecute.TestScoringRollupTableGroupSQL(), + ]) + run_refresh_score_cards_results( + project_code=clsCATExecute.project_code, + add_history_entry=True, + refresh_date=date_service.parse_now(clsCATExecute.run_date), + ) + except Exception: + LOG.exception("Error refreshing scores after test run") + pass MixpanelService().send_event( "run-tests", From 77cd25575651b09131fa08aa7f189d1b42858b66 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Thu, 15 May 2025 19:00:47 -0400 Subject: [PATCH 30/34] fix(test-results): handle non-existent test definitions --- testgen/ui/views/test_results.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/testgen/ui/views/test_results.py b/testgen/ui/views/test_results.py index e0307873..38410faa 100644 --- a/testgen/ui/views/test_results.py +++ b/testgen/ui/views/test_results.py @@ -381,6 +381,10 @@ def get_test_result_history(selected_row): def show_test_def_detail(str_test_def_id): + if not str_test_def_id: + st.warning("Test definition no longer exists.") + return + df = get_test_definition(str_test_def_id) specs = [] @@ -758,9 +762,10 @@ def source_data_dialog(selected_row): def view_edit_test(button_container, test_definition_id): - with button_container: - if st.button(":material/edit: Edit Test", help="Edit the Test Definition", use_container_width=True): - show_test_form_by_id(test_definition_id) + if test_definition_id: + with button_container: + if st.button(":material/edit: Edit Test", help="Edit the Test Definition", use_container_width=True): + show_test_form_by_id(test_definition_id) def get_report_file_data(update_progress, tr_data) -> FILE_DATA_TYPE: From 92d4456cb5f178e8c13d65e6509fae49f87e74a5 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Tue, 20 May 2025 21:45:30 -0400 Subject: [PATCH 31/34] fix(tests): skip column validation for aggregate tests --- testgen/template/validate_tests/ex_get_test_column_list_tg.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/testgen/template/validate_tests/ex_get_test_column_list_tg.sql b/testgen/template/validate_tests/ex_get_test_column_list_tg.sql index 8ad80c3d..6beec89c 100644 --- a/testgen/template/validate_tests/ex_get_test_column_list_tg.sql +++ b/testgen/template/validate_tests/ex_get_test_column_list_tg.sql @@ -24,6 +24,7 @@ WHERE test_suite_id = '{TEST_SUITE_ID}' AND COALESCE(test_active, 'Y') = 'Y' AND t.test_scope = 'referential' + AND t.test_type NOT LIKE 'Aggregate_%' UNION -- FROM: groupby_names (should be referential) SELECT cat_test_id, @@ -60,6 +61,7 @@ WHERE test_suite_id = '{TEST_SUITE_ID}' AND COALESCE(test_active, 'Y') = 'Y' AND t.test_scope = 'referential' + AND t.test_type NOT LIKE 'Aggregate_%' UNION -- FROM: match_groupby_names (referential) SELECT cat_test_id, From c950481beb9c2b1e1dd79ad3dce8815eaa2608c6 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Wed, 21 May 2025 13:09:47 -0400 Subject: [PATCH 32/34] fix(test-schedules): incorrect parameters passed to cli --- testgen/ui/views/test_runs.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/testgen/ui/views/test_runs.py b/testgen/ui/views/test_runs.py index 621df7ab..ec6c96cc 100644 --- a/testgen/ui/views/test_runs.py +++ b/testgen/ui/views/test_runs.py @@ -123,19 +123,17 @@ def init(self) -> None: self.test_suites = get_db_test_suite_choices(self.project_code) def get_arg_value(self, job): - return self.test_suites.loc[ - self.test_suites["id"] == job.kwargs["test_suite_id"], "test_suite" - ].iloc[0] + return job.kwargs["test_suite_key"] def arg_value_input(self) -> tuple[bool, list[typing.Any], dict[str, typing.Any]]: - ts_id = testgen.select( + ts_name = testgen.select( label="Test Suite", options=self.test_suites, - value_column="id", + value_column="test_suite", display_column="test_suite", required=True, ) - return bool(ts_id), [], {"test_suite_id": ts_id} + return bool(ts_name), [], {"project_code": self.project_code, "test_suite_key": ts_name} def render_empty_state(project_code: str, user_can_run: bool) -> bool: From 852ed1726e57303a25030a4dd327cf9ffb4825cc Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Wed, 21 May 2025 15:16:35 -0400 Subject: [PATCH 33/34] fix(data-preview): use top for sql server --- .../ui/views/dialogs/data_preview_dialog.py | 42 +++++++++++-------- 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/testgen/ui/views/dialogs/data_preview_dialog.py b/testgen/ui/views/dialogs/data_preview_dialog.py index a1d199ee..dd8f6195 100644 --- a/testgen/ui/views/dialogs/data_preview_dialog.py +++ b/testgen/ui/views/dialogs/data_preview_dialog.py @@ -63,29 +63,35 @@ def get_preview_data( connection_df = db.retrieve_data(connection_query).iloc[0] if not connection_df.empty: + use_top = connection_df["sql_flavor"] == "mssql" query = f""" SELECT + {"TOP 100" if use_top else ""} {column_name or "*"} FROM {schema_name}.{table_name} - LIMIT 100 + {"LIMIT 100" if not use_top else ""} """ - df = db.retrieve_target_db_df( - connection_df["sql_flavor"], - connection_df["project_host"], - connection_df["project_port"], - connection_df["project_db"], - connection_df["project_user"], - connection_df["project_pw_encrypted"], - query, - connection_df["url"], - connection_df["connect_by_url"], - connection_df["connect_by_key"], - connection_df["private_key"], - connection_df["private_key_passphrase"], - connection_df["http_path"], - ) - df.index = df.index + 1 - return df + try: + df = db.retrieve_target_db_df( + connection_df["sql_flavor"], + connection_df["project_host"], + connection_df["project_port"], + connection_df["project_db"], + connection_df["project_user"], + connection_df["project_pw_encrypted"], + query, + connection_df["url"], + connection_df["connect_by_url"], + connection_df["connect_by_key"], + connection_df["private_key"], + connection_df["private_key_passphrase"], + connection_df["http_path"], + ) + except: + return pd.DataFrame() + else: + df.index = df.index + 1 + return df else: return pd.DataFrame() From 0a521a6755757fce7e77a1606e06f6ec9305ed64 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Wed, 21 May 2025 15:59:58 -0400 Subject: [PATCH 34/34] release: 3.7.9 -> 4.0.9 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index dec0cfbf..066d31db 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ build-backend = "setuptools.build_meta" [project] name = "dataops-testgen" -version = "3.7.9" +version = "4.0.9" description = "DataKitchen's Data Quality DataOps TestGen" authors = [ { "name" = "DataKitchen, Inc.", "email" = "info@datakitchen.io" },