diff --git a/deploy/install_arrow.sh b/deploy/install_arrow.sh deleted file mode 100644 index f413c1da..00000000 --- a/deploy/install_arrow.sh +++ /dev/null @@ -1,50 +0,0 @@ -#!/usr/bin/env bash - -export ARROW_VERSION=18.0.0 -export ARROW_SHA256=9c473f2c9914c59ab571761c9497cf0e5cfd3ea335f7782ccc6121f5cb99ae9b - -export ARROW_HOME=/dk -export PARQUET_HOME=/dk - -mkdir /arrow - -# Obtaining and expanding Arrow -wget -q https://github.com/apache/arrow/archive/apache-arrow-${ARROW_VERSION}.tar.gz -O /tmp/apache-arrow.tar.gz -echo "${ARROW_SHA256} *apache-arrow.tar.gz" | sha256sum /tmp/apache-arrow.tar.gz -tar -xvf /tmp/apache-arrow.tar.gz -C /arrow --strip-components 1 - -pushd /arrow/cpp - -# Configure the build using CMake -cmake --preset ninja-release-python - -# Configuring cmake for ARM only -if [ "$(uname -m)" = "arm64" ] || [ "$(uname -m)" = "aarch64" ]; then - cmake -DCMAKE_CXX_FLAGS="-march=armv8-a" -fi - -# Pre-fetch dependencies without building -cmake --build . --target re2_ep -- -j2 || true - -# Apply the patch to re2 after the dependencies are fetched but before the build -pushd re2_ep-prefix/src/re2_ep - -cat < -EOF - -popd - -# Finish processing dependencies after patch -cmake --build . --target re2_ep -- -j2 - -# Continue with the build and install Apache Arrow -cmake --build . --target install - -popd - -rm -rf /arrow /tmp/apache-arrow.tar.gz diff --git a/deploy/install_linuxodbc.sh b/deploy/install_linuxodbc.sh deleted file mode 100644 index b88b0fe5..00000000 --- a/deploy/install_linuxodbc.sh +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env bash - -# From: https://learn.microsoft.com/en-us/sql/connect/odbc/linux-mac/installing-the-microsoft-odbc-driver-for-sql-server -# modifications: Added --non-interactive and --no-cache flags, removed sudo, added aarch64 as an alias for arm64 - -case $(uname -m) in - x86_64) architecture="amd64" ;; - arm64) architecture="arm64" ;; - aarch64) architecture="arm64" ;; - *) architecture="unsupported" ;; -esac -if [[ "unsupported" == "$architecture" ]]; -then - echo "Alpine architecture $(uname -m) is not currently supported."; - exit; -fi - -#Download the desired package(s) -curl -O https://download.microsoft.com/download/7/6/d/76de322a-d860-4894-9945-f0cc5d6a45f8/msodbcsql18_18.4.1.1-1_$architecture.apk -curl -O https://download.microsoft.com/download/7/6/d/76de322a-d860-4894-9945-f0cc5d6a45f8/mssql-tools18_18.4.1.1-1_$architecture.apk - -#(Optional) Verify signature, if 'gpg' is missing install it using 'apk add gnupg': -curl -O https://download.microsoft.com/download/7/6/d/76de322a-d860-4894-9945-f0cc5d6a45f8/msodbcsql18_18.4.1.1-1_$architecture.sig -curl -O https://download.microsoft.com/download/7/6/d/76de322a-d860-4894-9945-f0cc5d6a45f8/mssql-tools18_18.4.1.1-1_$architecture.sig - -curl https://packages.microsoft.com/keys/microsoft.asc | gpg --import - -gpg --verify msodbcsql18_18.4.1.1-1_$architecture.sig msodbcsql18_18.4.1.1-1_$architecture.apk -gpg --verify mssql-tools18_18.4.1.1-1_$architecture.sig mssql-tools18_18.4.1.1-1_$architecture.apk - -#Install the package(s) -apk add --no-cache --non-interactive --allow-untrusted msodbcsql18_18.4.1.1-1_$architecture.apk -apk add --no-cache --non-interactive --allow-untrusted mssql-tools18_18.4.1.1-1_$architecture.apk diff --git a/deploy/testgen-base.dockerfile b/deploy/testgen-base.dockerfile index 8233eab3..9b5f03f6 100644 --- a/deploy/testgen-base.dockerfile +++ b/deploy/testgen-base.dockerfile @@ -6,77 +6,41 @@ ENV PYTHONDONTWRITEBYTECODE=1 ENV PYTHONFAULTHANDLER=1 ENV ACCEPT_EULA=Y -RUN apk update && apk add --no-cache \ +RUN apk update && apk upgrade && apk add --no-cache \ + # Tools needed for building the python wheels gcc \ g++ \ - bash \ - libffi-dev \ - openssl-dev \ - cargo \ - musl-dev \ - postgresql-dev \ + make \ cmake \ - rust \ - linux-headers \ - libc-dev \ - libgcc \ - libstdc++ \ - ca-certificates \ - zlib-dev \ - bzip2-dev \ - xz-dev \ - lz4-dev \ - zstd-dev \ - snappy-dev \ - brotli-dev \ - build-base \ - autoconf \ - boost-dev \ - flex \ - libxml2-dev \ - libxslt-dev \ - libjpeg-turbo-dev \ - ninja \ - git \ - curl \ - unixodbc-dev \ - gpg \ - openssl=3.3.2-r1 \ + musl-dev \ gfortran \ - openblas-dev - -RUN mkdir /dk + linux-headers=6.6-r0 \ + # Additional libraries needed and their dev counterparts. We add both so that we can remove + # the *-dev later, keeping the libraries + openblas=0.3.28-r0 \ + openblas-dev=0.3.28-r0 \ + unixodbc=2.3.12-r0 \ + unixodbc-dev=2.3.12-r0 -COPY --chmod=775 ./deploy/install_linuxodbc.sh /tmp/dk/install_linuxodbc.sh -RUN /tmp/dk/install_linuxodbc.sh - -COPY --chmod=775 ./deploy/install_arrow.sh /tmp/dk/install_arrow.sh -RUN /tmp/dk/install_arrow.sh +RUN apk add --no-cache \ + --repository https://dl-cdn.alpinelinux.org/alpine/v3.21/community \ + --repository https://dl-cdn.alpinelinux.org/alpine/v3.21/main \ + libarrow=18.1.0-r0 \ + apache-arrow-dev=18.1.0-r0 # Install TestGen's main project empty pyproject.toml to install (and cache) the dependencies first COPY ./pyproject.toml /tmp/dk/pyproject.toml +RUN mkdir /dk RUN python3 -m pip install --prefix=/dk /tmp/dk RUN apk del \ gcc \ g++ \ - bash \ - libffi-dev \ - openssl-dev \ - cargo \ - musl-dev \ - postgresql-dev \ + make \ cmake \ - rust \ + musl-dev \ + gfortran \ linux-headers \ - libc-dev \ - build-base \ - autoconf \ - boost-dev \ - flex \ - ninja \ - curl \ + openblas-dev \ unixodbc-dev \ - gpg \ - ca-certificates \ - git + apache-arrow-dev diff --git a/deploy/testgen.dockerfile b/deploy/testgen.dockerfile index 054de771..840867a1 100644 --- a/deploy/testgen.dockerfile +++ b/deploy/testgen.dockerfile @@ -1,38 +1,30 @@ -ARG TESTGEN_BASE_LABEL=v1 +ARG TESTGEN_BASE_LABEL=v3 -FROM datakitchen/dataops-testgen-base:${TESTGEN_BASE_LABEL} AS build-image - -# Now install everything -COPY . /tmp/dk/ -RUN python3 -m pip install --prefix=/dk /tmp/dk - -FROM python:3.12.7-alpine3.20 AS release-image +FROM datakitchen/dataops-testgen-base:${TESTGEN_BASE_LABEL} AS release-image # Args have to be set in current build stage: https://github.com/moby/moby/issues/37345 ARG TESTGEN_VERSION ARG TESTGEN_DOCKER_HUB_REPO -RUN addgroup -S testgen && adduser -S testgen -G testgen +# Now install everything +COPY . /tmp/dk/ +RUN python3 -m pip install --prefix=/dk /tmp/dk +RUN rm -Rf /tmp/dk -COPY --from=build-image --chown=testgen:testgen /dk/ /dk -COPY --from=build-image /usr/local/lib/ /usr/local/lib -COPY --from=build-image /usr/lib/ /usr/lib -COPY --from=build-image /opt/microsoft/ /opt/microsoft -COPY --from=build-image /etc/odbcinst.ini /etc/odbcinst.ini +RUN addgroup -S testgen && adduser -S testgen -G testgen -# The OpenSSL upgrade is not carried from the build image, so we have to upgrade it again -#RUN apk add --no-cache openssl=3.3.2-r1 +# Streamlit has to be able to write to these dirs +RUN mkdir /var/lib/testgen +RUN chown -R testgen:testgen /var/lib/testgen /dk/lib/python3.12/site-packages/streamlit/static ENV PYTHONPATH=/dk/lib/python3.12/site-packages -ENV PATH="$PATH:/dk/bin:/opt/mssql-tools/bin/" +ENV PATH=$PATH:/dk/bin ENV TESTGEN_VERSION=${TESTGEN_VERSION} ENV TG_RELEASE_CHECK=docker ENV TESTGEN_DOCKER_HUB_REPO=${TESTGEN_DOCKER_HUB_REPO} ENV STREAMLIT_SERVER_MAX_UPLOAD_SIZE=200 -RUN mkdir /var/lib/testgen && chown testgen:testgen /var/lib/testgen - USER testgen WORKDIR /dk diff --git a/pyproject.toml b/pyproject.toml index f540f05d..43c18c60 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ build-backend = "setuptools.build_meta" [project] name = "dataops-testgen" -version = "3.1.2" +version = "3.7.1" description = "DataKitchen's Data Quality DataOps TestGen" authors = [ { "name" = "DataKitchen, Inc.", "email" = "info@datakitchen.io" }, @@ -31,6 +31,7 @@ dependencies = [ "PyYAML==6.0.1", "click==8.1.3", "sqlalchemy==1.4.46", + "databricks-sql-connector==2.9.3", "snowflake-sqlalchemy==1.6.1", "pyodbc==5.0.0", "psycopg2-binary==2.9.9", @@ -56,17 +57,19 @@ dependencies = [ "xlsxwriter==3.2.0", "psutil==5.9.8", "concurrent_log_handler==0.9.25", - "cryptography==43.0.1", + "cryptography==44.0.1", "validators==0.33.0", "reportlab==4.2.2", - "pydantic==1.10.11", + "pydantic==1.10.13", "streamlit-pydantic==0.6.0", # Pinned to match the manually compiled libs or for security - "pyarrow==18.0.0", - "snowflake-connector-python==3.12.3", + "pyarrow==18.1.0", + "snowflake-connector-python==3.13.1", "matplotlib==3.9.2", "scipy==1.14.1", + "tornado==6.4.2", + "jinja2==3.1.6", ] [project.optional-dependencies] diff --git a/testgen/commands/queries/execute_cat_tests_query.py b/testgen/commands/queries/execute_cat_tests_query.py index aece6e86..f3c94ff8 100644 --- a/testgen/commands/queries/execute_cat_tests_query.py +++ b/testgen/commands/queries/execute_cat_tests_query.py @@ -39,9 +39,9 @@ def __init__(self, strProjectCode, strTestSuiteId, strTestSuite, strSQLFlavor, m def _get_rollup_scores_sql(self) -> CRollupScoresSQL: if not self._rollup_scores_sql: self._rollup_scores_sql = CRollupScoresSQL(self.test_run_id, self.table_groups_id) - + return self._rollup_scores_sql - + def _ReplaceParms(self, strInputString): strInputString = strInputString.replace("{MAX_QUERY_CHARS}", str(self.max_query_chars)) strInputString = strInputString.replace("{TEST_RUN_ID}", self.test_run_id) @@ -51,6 +51,7 @@ def _ReplaceParms(self, strInputString): strInputString = strInputString.replace("{TABLE_GROUPS_ID}", self.table_groups_id) strInputString = strInputString.replace("{SQL_FLAVOR}", self.flavor) + strInputString = strInputString.replace("{ID_SEPARATOR}", "`" if self.flavor == "databricks" else '"') strInputString = strInputString.replace("{CONCAT_OPERATOR}", self.concat_operator) strInputString = strInputString.replace("{SCHEMA_NAME}", self.target_schema) @@ -71,8 +72,9 @@ def _ReplaceParms(self, strInputString): strInputString = replace_templated_functions(strInputString, self.flavor) - # Adding escape character where ':' is referenced - strInputString = strInputString.replace(":", "\\:") + if self.flavor != "databricks": + # Adding escape character where ':' is referenced + strInputString = strInputString.replace(":", "\\:") return strInputString @@ -110,12 +112,12 @@ def PushTestRunStatusUpdateSQL(self): def FinalizeTestSuiteUpdateSQL(self): strQ = self._ReplaceParms(read_template_sql_file("ex_update_test_suite.sql", "execution")) return strQ - + def CalcPrevalenceTestResultsSQL(self): return self._ReplaceParms(read_template_sql_file("ex_calc_prevalence_test_results.sql", "execution")) def TestScoringRollupRunSQL(self): return self._get_rollup_scores_sql().GetRollupScoresTestRunQuery() - + def TestScoringRollupTableGroupSQL(self): return self._get_rollup_scores_sql().GetRollupScoresTestTableGroupQuery() diff --git a/testgen/commands/queries/execute_tests_query.py b/testgen/commands/queries/execute_tests_query.py index 23204a1e..20b0cf2d 100644 --- a/testgen/commands/queries/execute_tests_query.py +++ b/testgen/commands/queries/execute_tests_query.py @@ -53,6 +53,7 @@ def _ReplaceParms(self, strInputString: str): strInputString = strInputString.replace("{EXCEPTION_MESSAGE}", self.exception_message) strInputString = strInputString.replace("{START_TIME}", self.today) strInputString = strInputString.replace("{PROCESS_ID}", str(self.process_id)) + strInputString = strInputString.replace("{VARCHAR_TYPE}", "STRING" if self.flavor == "databricks" else "VARCHAR") strInputString = strInputString.replace( "{NOW}", date_service.get_now_as_string_with_offset(self.minutes_offset) ) @@ -88,9 +89,9 @@ def _ReplaceParms(self, strInputString: str): if parm == "subset_condition": strInputString = strInputString.replace("{SUBSET_DISPLAY}", value.replace("'", "''") if value else "") - - # Adding escape character where ':' is referenced - strInputString = strInputString.replace(":", "\\:") + if self.flavor != "databricks": + # Adding escape character where ':' is referenced + strInputString = strInputString.replace(":", "\\:") return strInputString diff --git a/testgen/commands/run_execute_cat_tests.py b/testgen/commands/run_execute_cat_tests.py index a19a95ae..496e2d45 100644 --- a/testgen/commands/run_execute_cat_tests.py +++ b/testgen/commands/run_execute_cat_tests.py @@ -7,6 +7,7 @@ RunActionQueryList, RunThreadedRetrievalQueryList, WriteListToDB, + date_service, ) LOG = logging.getLogger("testgen") @@ -67,7 +68,11 @@ def FinalizeTestRun(clsCATExecute: CCATExecutionSQL): clsCATExecute.TestScoringRollupRunSQL(), clsCATExecute.TestScoringRollupTableGroupSQL()] RunActionQueryList(("DKTG"), lstQueries) - run_refresh_score_cards_results(project_code=clsCATExecute.project_code) + run_refresh_score_cards_results( + project_code=clsCATExecute.project_code, + add_history_entry=True, + refresh_date=date_service.parse_now(clsCATExecute.run_date), + ) def run_cat_test_queries( diff --git a/testgen/commands/run_execute_tests.py b/testgen/commands/run_execute_tests.py index e5afd1a2..2ee93ba9 100644 --- a/testgen/commands/run_execute_tests.py +++ b/testgen/commands/run_execute_tests.py @@ -144,6 +144,7 @@ def run_execution_steps(project_code: str, test_suite: str, minutes_offset: int= test_exec_params["connect_by_key"], test_exec_params["private_key"], test_exec_params["private_key_passphrase"], + test_exec_params["http_path"], "PROJECT", ) diff --git a/testgen/commands/run_generate_tests.py b/testgen/commands/run_generate_tests.py index f1ca058f..bac1bfac 100644 --- a/testgen/commands/run_generate_tests.py +++ b/testgen/commands/run_generate_tests.py @@ -34,6 +34,7 @@ def run_test_gen_queries(strTableGroupsID, strTestSuite, strGenerationSet=None): dctParms["connect_by_key"], dctParms["private_key"], dctParms["private_key_passphrase"], + dctParms["http_path"], "PROJECT", ) diff --git a/testgen/commands/run_launch_db_config.py b/testgen/commands/run_launch_db_config.py index 8005fb5f..2a50126a 100644 --- a/testgen/commands/run_launch_db_config.py +++ b/testgen/commands/run_launch_db_config.py @@ -6,6 +6,7 @@ from testgen.common.credentials import get_tg_db, get_tg_schema from testgen.common.database.database_service import get_queries_for_command from testgen.common.encrypt import EncryptText, encrypt_ui_password +from testgen.common.models import with_database_session from testgen.common.models.scores import ScoreDefinition from testgen.common.read_file import get_template_files @@ -68,6 +69,7 @@ def _get_params_mapping() -> dict: } +@with_database_session def run_launch_db_config(delete_db: bool) -> None: params_mapping = _get_params_mapping() diff --git a/testgen/commands/run_profiling_bridge.py b/testgen/commands/run_profiling_bridge.py index dadf58d0..3bafb1d4 100644 --- a/testgen/commands/run_profiling_bridge.py +++ b/testgen/commands/run_profiling_bridge.py @@ -17,6 +17,7 @@ RunActionQueryList, RunThreadedRetrievalQueryList, WriteListToDB, + date_service, ) from testgen.common.database.database_service import empty_cache @@ -273,6 +274,7 @@ def run_profiling_queries(strTableGroupsID, spinner=None): dctParms["connect_by_key"], dctParms["private_key"], dctParms["private_key_passphrase"], + dctParms["http_path"], "PROJECT", ) @@ -498,7 +500,11 @@ def run_profiling_queries(strTableGroupsID, spinner=None): clsProfiling.GetAnomalyScoringRollupTableGroupQuery(), ] RunActionQueryList("DKTG", lstProfileRunQuery) - run_refresh_score_cards_results(project_code=dctParms["project_code"]) + run_refresh_score_cards_results( + project_code=dctParms["project_code"], + add_history_entry=True, + refresh_date=date_service.parse_now(clsProfiling.run_date), + ) if booErrors: str_error_status = "with errors. Check log for details." diff --git a/testgen/commands/run_quick_start.py b/testgen/commands/run_quick_start.py index 487c47dc..1ab68f40 100644 --- a/testgen/commands/run_quick_start.py +++ b/testgen/commands/run_quick_start.py @@ -83,6 +83,7 @@ def _prepare_connection_to_target_database(params_mapping): False, None, None, + None, "PROJECT", ) diff --git a/testgen/commands/run_refresh_score_cards_results.py b/testgen/commands/run_refresh_score_cards_results.py index 5d6d117b..dc4f93b2 100644 --- a/testgen/commands/run_refresh_score_cards_results.py +++ b/testgen/commands/run_refresh_score_cards_results.py @@ -1,13 +1,28 @@ +import datetime import logging import time -from testgen.common.models.scores import ScoreCard, ScoreDefinition, ScoreDefinitionBreakdownItem, ScoreDefinitionResult +from testgen.common.models import with_database_session +from testgen.common.models.scores import ( + ScoreCard, + ScoreDefinition, + ScoreDefinitionBreakdownItem, + ScoreDefinitionResult, + ScoreDefinitionResultHistoryEntry, +) LOG = logging.getLogger("testgen") -def run_refresh_score_cards_results(project_code: str | None = None, definition_id: str | None = None): +@with_database_session +def run_refresh_score_cards_results( + project_code: str | None = None, + definition_id: str | None = None, + add_history_entry: bool = False, + refresh_date: datetime.datetime | None = None, +): start_time = time.time() + _refresh_date = refresh_date or datetime.datetime.now(datetime.UTC) LOG.info("CurrentStep: Initializing scorecards results refresh") try: @@ -31,6 +46,24 @@ def run_refresh_score_cards_results(project_code: str | None = None, definition_ fresh_score_card = definition.as_score_card() definition.results = _score_card_to_results(fresh_score_card) definition.breakdown = _score_definition_to_results_breakdown(definition) + if add_history_entry: + LOG.info( + "CurrentStep: Adding history entry for scorecard %s in project %s", + definition.name, + definition.project_code, + ) + + historical_categories = ["score", "cde_score"] + for result in definition.results: + if result.category in historical_categories: + history_entry = ScoreDefinitionResultHistoryEntry( + definition_id=result.definition_id, + category=result.category, + score=result.score, + last_run_time=_refresh_date, + ) + definition.history.append(history_entry) + history_entry.add_as_cutoff() definition.save() LOG.info( "CurrentStep: Done rereshing scorecard %s in project %s", @@ -39,7 +72,7 @@ def run_refresh_score_cards_results(project_code: str | None = None, definition_ ) except Exception: LOG.exception( - "CurrentStep: Unexpected error refreshing scorecard %sin project %s", + "CurrentStep: Unexpected error refreshing scorecard %s in project %s", definition.name, definition.project_code, ) @@ -105,3 +138,25 @@ def _score_definition_to_results_breakdown(score_definition: ScoreDefinition) -> ]) return all_breakdown_items + + +@with_database_session +def run_recalculate_score_card(*, project_code: str, definition_id: str): + LOG.info("Recalculating history for scorecard %s in project %s", definition_id, project_code) + start_time = time.time() + + try: + definition = ScoreDefinition.get(str(definition_id)) + definition.recalculate_scores_history() + definition.save() + except Exception: + LOG.exception("CurrentStep: Stopping history recalculation after unexpected error") + return + + end_time = time.time() + LOG.info( + "Recalculating history for scorecard %s in project %s is over after %s seconds", + definition_id, + project_code, + round(end_time - start_time, 2), + ) diff --git a/testgen/commands/run_test_parameter_validation.py b/testgen/commands/run_test_parameter_validation.py index b60112fb..b2b98936 100644 --- a/testgen/commands/run_test_parameter_validation.py +++ b/testgen/commands/run_test_parameter_validation.py @@ -15,6 +15,7 @@ def run_parameter_validation_queries( dctParms, test_run_id="", test_time="", strTestSuite="" ): + LOG.info("CurrentStep: Initializing Test Parameter Validation") clsExecute = CTestParamValidationSQL(dctParms["sql_flavor"], dctParms["test_suite_id"]) clsExecute.run_date = test_time @@ -31,6 +32,7 @@ def run_parameter_validation_queries( if not test_columns: LOG.warning(f"No test columns are present to validate in Test Suite {strTestSuite}") missing_columns = [] + missing_tables = set() else: # Derive test schema list -- make CSV string from list of columns # to be used as criteria for retrieving data dictionary diff --git a/testgen/common/database/database_service.py b/testgen/common/database/database_service.py index feecc271..bf722c4b 100644 --- a/testgen/common/database/database_service.py +++ b/testgen/common/database/database_service.py @@ -42,6 +42,7 @@ class CConnectParms: private_key = "" private_key_passphrase = "" password = None + http_path = "" def __init__(self, connectname): self.connectname = connectname @@ -89,6 +90,7 @@ def AssignConnectParms( connect_by_key, private_key, private_key_passphrase, + http_path, connectname="PROJECT", password=None, ): @@ -109,6 +111,7 @@ def AssignConnectParms( clsConnectParms.connect_by_key = connect_by_key clsConnectParms.private_key = private_key clsConnectParms.private_key_passphrase = private_key_passphrase + clsConnectParms.http_path = http_path def _RetrieveProjectPW(strProjectCode, strConnID): @@ -181,6 +184,7 @@ def _GetDBCredentials(strCredentialSet): "connect_by_key": clsConnectParms.connect_by_key, "private_key": clsConnectParms.private_key, "private_key_passphrase": clsConnectParms.private_key_passphrase, + "http_path": clsConnectParms.http_path, } elif strCredentialSet == "DKTG": # Get credentials from functions in my_dk_credentials.py diff --git a/testgen/common/database/flavor/databricks_flavor_service.py b/testgen/common/database/flavor/databricks_flavor_service.py new file mode 100644 index 00000000..da451e9b --- /dev/null +++ b/testgen/common/database/flavor/databricks_flavor_service.py @@ -0,0 +1,25 @@ +from urllib.parse import quote_plus + +from testgen.common.database.flavor.flavor_service import FlavorService + + +class DatabricksFlavorService(FlavorService): + def __init__(self): + self.http_path = None + + def get_connection_string_head(self, strPW): + strConnect = f"{self.flavor}://{self.username}:{quote_plus(strPW)}@" + return strConnect + + def get_connection_string_from_fields(self, strPW, is_password_overwritten: bool = False): # NOQA ARG002 + strConnect = ( + f"{self.flavor}://{self.username}:{quote_plus(strPW)}@{self.host}:{self.port}/{self.dbname}" + f"?http_path={self.http_path}" + ) + return strConnect + + def get_pre_connection_queries(self): + return [] + + def get_connect_args(self, is_password_overwritten: bool = False): # NOQA ARG002 + return {} diff --git a/testgen/common/database/flavor/flavor_service.py b/testgen/common/database/flavor/flavor_service.py index 19288088..06c539ac 100644 --- a/testgen/common/database/flavor/flavor_service.py +++ b/testgen/common/database/flavor/flavor_service.py @@ -17,6 +17,7 @@ class FlavorService: connect_by_key = None private_key = None private_key_passphrase = None + http_path = None catalog = None def init(self, connection_params: dict): @@ -29,6 +30,7 @@ def init(self, connection_params: dict): self.flavor = connection_params.get("flavor") self.dbschema = connection_params.get("dbschema", None) self.connect_by_key = connection_params.get("connect_by_key", False) + self.http_path = connection_params.get("http_path", None) self.catalog = connection_params.get("catalog", None) private_key = connection_params.get("private_key", None) diff --git a/testgen/common/date_service.py b/testgen/common/date_service.py index e5e89c19..620e4143 100644 --- a/testgen/common/date_service.py +++ b/testgen/common/date_service.py @@ -1,4 +1,4 @@ -from datetime import datetime, timedelta +from datetime import UTC, datetime, timedelta import pandas as pd @@ -11,6 +11,10 @@ def get_now_as_string(): return datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S") +def parse_now(value: str) -> datetime: + return datetime.strptime(value, "%Y-%m-%d %H:%M:%S").replace(tzinfo=UTC) + + def get_now_as_string_with_offset(minutes_offset): ret = datetime.utcnow() if minutes_offset > 0: diff --git a/testgen/common/get_pipeline_parms.py b/testgen/common/get_pipeline_parms.py index af673ca1..cade94cf 100644 --- a/testgen/common/get_pipeline_parms.py +++ b/testgen/common/get_pipeline_parms.py @@ -12,19 +12,22 @@ def RetrieveProfilingParms(strTableGroupsID): if lstParms is None: raise ValueError("Project Connection Parameters not found") - elif ( - lstParms[0]["project_code"] == "" - or lstParms[0]["connection_id"] == "" - or lstParms[0]["sql_flavor"] == "" - or lstParms[0]["project_user"] == "" - or lstParms[0]["profile_use_sampling"] == "" - or lstParms[0]["profile_sample_percent"] == "" - or lstParms[0]["profile_sample_min_count"] == "" - or lstParms[0]["table_group_schema"] == "" - ): - raise ValueError("Project Connection parameters not correctly set") - else: - return lstParms[0] + + required_params = ( + "project_code", + "connection_id", + "sql_flavor", + "project_user", + "profile_use_sampling", + "profile_sample_percent", + "profile_sample_min_count", + "table_group_schema", + ) + + if missing := [param for param in required_params if not lstParms[0][param]]: + raise ValueError(f"Project Connection parameters are missing: {', '.join(missing)}.") + + return lstParms[0] def RetrieveTestGenParms(strTableGroupsID, strTestSuite): diff --git a/testgen/common/mixpanel_service.py b/testgen/common/mixpanel_service.py new file mode 100644 index 00000000..53adefd4 --- /dev/null +++ b/testgen/common/mixpanel_service.py @@ -0,0 +1,77 @@ +import json +import logging +import ssl +import uuid +from base64 import b64encode +from functools import cached_property, wraps +from hashlib import blake2b +from urllib.parse import urlencode +from urllib.request import Request, urlopen + +from testgen import settings +from testgen.ui.session import session +from testgen.utils.singleton import Singleton + +LOG = logging.getLogger("testgen") + + +def safe_method(method): + @wraps(method) + def wrapped(*args, **kwargs): + if settings.ANALYTICS_ENABLED: + try: + method(*args, **kwargs) + except Exception: + LOG.exception("Error processing analytics data") + + return wrapped + + +class MixpanelService(Singleton): + + @cached_property + def instance_id(self): + return settings.INSTANCE_ID or blake2b(uuid.getnode().to_bytes(8), digest_size=8).hexdigest() + + @cached_property + def distinct_id(self): + return self._hash_value(session.username) + + def _hash_value(self, value: bytes | str, digest_size: int = 8) -> str: + if isinstance(value, str): + value = value.encode() + return blake2b(value, salt=self.instance_id.encode(), digest_size=digest_size).hexdigest() + + @safe_method + def send_event(self, event_name, **properties): + properties.setdefault("instance_id", self.instance_id) + properties.setdefault("version", settings.VERSION) + properties.setdefault("distinct_id", self.distinct_id) + + track_payload = { + "event": event_name, + "properties": { + "token": settings.MIXPANEL_TOKEN, + **properties, + } + } + self.send_mp_request("track?ip=1", track_payload) + + def get_ssl_context(self): + ssl_context = ssl.create_default_context() + ssl_context.check_hostname = False + ssl_context.verify_mode = ssl.CERT_NONE + return ssl_context + + def send_mp_request(self, endpoint, payload): + try: + post_data = urlencode( + {"data": b64encode(json.dumps(payload).encode()).decode()} + ).encode() + + req = Request(f"{settings.MIXPANEL_URL}/{endpoint}", data=post_data, method="POST") # noqa: S310 + req.add_header("Content-Type", "application/x-www-form-urlencoded") + + urlopen(req, context=self.get_ssl_context(), timeout=settings.MIXPANEL_TIMEOUT) # noqa: S310 + except Exception: + LOG.exception("Failed to send analytics data") diff --git a/testgen/common/models/__init__.py b/testgen/common/models/__init__.py index cf536438..26b9b505 100644 --- a/testgen/common/models/__init__.py +++ b/testgen/common/models/__init__.py @@ -1,8 +1,11 @@ +import functools import platform +import threading import urllib.parse from sqlalchemy import create_engine from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import Session as SQLAlchemySession from sqlalchemy.orm import sessionmaker from testgen import settings @@ -24,3 +27,32 @@ engine, expire_on_commit=False, ) +_current_session_wrapper = threading.local() +_current_session_wrapper.value = None + + +def with_database_session(func): + """ + Set up a thread-global SQLAlchemy session to be accessed + calling `get_current_session()` from any place. + + NOTE: Call once on the main entry point. + """ + + @functools.wraps(func) + def wrapper(*args, **kwargs): + try: + session = get_current_session() + if session: + return func(*args, **kwargs) + + with Session() as session: + _current_session_wrapper.value = session + return func(*args, **kwargs) + finally: + _current_session_wrapper.value = None + return wrapper + + +def get_current_session() -> SQLAlchemySession: + return getattr(_current_session_wrapper, "value", None) diff --git a/testgen/common/models/scores.py b/testgen/common/models/scores.py index 89965b16..1fd437bc 100644 --- a/testgen/common/models/scores.py +++ b/testgen/common/models/scores.py @@ -2,15 +2,16 @@ import uuid from collections import defaultdict from collections.abc import Iterable +from datetime import datetime from typing import Literal, Self, TypedDict import pandas as pd -from sqlalchemy import Boolean, Column, Enum, Float, ForeignKey, Integer, String, select, text +from sqlalchemy import Boolean, Column, DateTime, Enum, Float, ForeignKey, Integer, String, select, text from sqlalchemy.dialects.postgresql import UUID from sqlalchemy.orm import relationship from testgen.common import read_template_sql_file -from testgen.common.models import Base, Session, engine +from testgen.common.models import Base, engine, get_current_session from testgen.utils import is_uuid4 @@ -54,6 +55,13 @@ class ScoreDefinition(Base): order_by="ScoreDefinitionBreakdownItem.impact.desc()", lazy="joined", ) + history: Iterable["ScoreDefinitionResultHistoryEntry"] = relationship( + "ScoreDefinitionResultHistoryEntry", + order_by="ScoreDefinitionResultHistoryEntry.last_run_time.asc()", + cascade="all, delete-orphan", + lazy="select", + back_populates="definition", + ) @classmethod def from_table_group(cls, table_group: dict) -> Self: @@ -72,11 +80,11 @@ def from_table_group(cls, table_group: dict) -> Self: def get(cls, id_: str) -> "Self | None": if not is_uuid4(id_): return None - + definition = None - with Session() as db_session: - query = select(ScoreDefinition).where(ScoreDefinition.id == id_) - definition = db_session.scalars(query).first() + db_session = get_current_session() + query = select(ScoreDefinition).where(ScoreDefinition.id == id_) + definition = db_session.scalars(query).first() return definition @classmethod @@ -87,28 +95,28 @@ def all( sorted_by: str | None = "name", ) -> "Iterable[Self]": definitions = [] - with Session() as db_session: - query = select(ScoreDefinition) - if name_filter: - query = query.where(ScoreDefinition.name.ilike(f"%{name_filter}%")) - if project_code: - query = query.where(ScoreDefinition.project_code == project_code) - query = query.order_by(text(sorted_by)) - definitions = db_session.scalars(query).unique().all() + db_session = get_current_session() + query = select(ScoreDefinition) + if name_filter: + query = query.where(ScoreDefinition.name.ilike(f"%{name_filter}%")) + if project_code: + query = query.where(ScoreDefinition.project_code == project_code) + query = query.order_by(text(sorted_by)) + definitions = db_session.scalars(query).unique().all() return definitions def save(self) -> None: - with Session() as db_session: - db_session.add(self) - db_session.flush([self]) - db_session.commit() - db_session.refresh(self, ["id"]) + db_session = get_current_session() + db_session.add(self) + db_session.flush([self]) + db_session.commit() + db_session.refresh(self, ["id"]) def delete(self) -> None: - with Session() as db_session: - db_session.add(self) - db_session.delete(self) - db_session.commit() + db_session = get_current_session() + db_session.add(self) + db_session.delete(self) + db_session.commit() def as_score_card(self) -> "ScoreCard": """ @@ -130,6 +138,7 @@ def as_score_card(self) -> "ScoreCard": "profiling_score": None, "testing_score": None, "categories": [], + "history": [], "definition": self, } @@ -139,25 +148,24 @@ def as_score_card(self) -> "ScoreCard": categories_query_template_file = "get_category_scores_by_dimension.sql" filters = " AND ".join(self._get_raw_query_filters()) - overall_scores = pd.read_sql_query( + overall_scores = get_current_session().execute( read_template_sql_file( overall_score_query_template_file, sub_directory="score_cards", - ).replace("{filters}", filters), - engine, - ) - overall_scores = overall_scores.iloc[0].to_dict() if not overall_scores.empty else {} + ).replace("{filters}", filters) + ).mappings().first() or {} categories_scores = [] if (category := self.category): - categories_scores = pd.read_sql_query( - read_template_sql_file( - categories_query_template_file, - sub_directory="score_cards", - ).replace("{category}", category.value).replace("{filters}", filters), - engine, - ) - categories_scores = [category.to_dict() for _, category in categories_scores.iterrows()] + categories_scores = [ + dict(result) + for result in get_current_session().execute( + read_template_sql_file( + categories_query_template_file, + sub_directory="score_cards", + ).replace("{category}", category.value).replace("{filters}", filters) + ).mappings().all() + ] return { "id": self.id, @@ -168,9 +176,40 @@ def as_score_card(self) -> "ScoreCard": "profiling_score": overall_scores.get("profiling_score") if self.total_score else None, "testing_score": overall_scores.get("testing_score") if self.total_score else None, "categories": categories_scores, + "history": [], + "definition": self, + } + + def as_cached_score_card(self) -> "ScoreCard": + """Reads the cached values to build a scorecard""" + root_keys: list[str] = ["score", "profiling_score", "testing_score", "cde_score"] + score_card: ScoreCard = { + "id": self.id, + "project_code": self.project_code, + "name": self.name, + "categories": [], + "history": [], "definition": self, } + for result in sorted(self.results, key=lambda r: r.category): + if result.category in root_keys: + score_card[result.category] = result.score + continue + score_card["categories"].append({"label": result.category, "score": result.score}) + + history_categories: list[str] = [] + if self.total_score: + history_categories.append("score") + if self.cde_score: + history_categories.append("cde_score") + + for entry in self.history[-50:]: + if entry.category in history_categories: + score_card["history"].append({"score": entry.score, "category": entry.category, "time": entry.last_run_time}) + + return score_card + def get_score_card_breakdown( self, score_type: Literal["score", "cde_score"], @@ -265,6 +304,33 @@ def get_score_card_issues( results = pd.read_sql_query(query, engine) return [row.to_dict() for _, row in results.iterrows()] + def recalculate_scores_history(self) -> None: + """ + Executes a raw query to get the total score and cde score for + each history entry of this definition. + + Query templates: + get_historical_overall_scores_by_column.sql + """ + template = "get_historical_overall_scores_by_column.sql" + query = ( + read_template_sql_file(template, sub_directory="score_cards") + .replace("{filters}", " AND ".join(self._get_raw_query_filters())) + .replace("{definition_id}", str(self.id)) + ) + overall_scores = get_current_session().execute(query).mappings().all() + current_history: dict[tuple[datetime, str, str], ScoreDefinitionResultHistoryEntry] = {} + for entry in self.history: + current_history[(entry.last_run_time, entry.category,)] = entry + + renewed_history: dict[tuple[datetime, str, str], float] = {} + for scores in overall_scores: + renewed_history[(scores["last_run_time"], "score",)] = scores["score"] + renewed_history[(scores["last_run_time"], "cde_score",)] = scores["cde_score"] + + for key, entry in current_history.items(): + entry.score = renewed_history[key] + def _get_raw_query_filters(self, cde_only: bool = False, prefix: str | None = None) -> list[str]: values_by_field = defaultdict(list) for filter_ in self.filters: @@ -338,13 +404,13 @@ def filter( score_type: Literal["score", "cde_score"], ) -> "Iterable[Self]": items = [] - with Session() as db_session: - query = select(ScoreDefinitionBreakdownItem).where( - ScoreDefinitionBreakdownItem.definition_id == definition_id, - ScoreDefinitionBreakdownItem.category == category, - ScoreDefinitionBreakdownItem.score_type == score_type, - ).order_by(ScoreDefinitionBreakdownItem.impact.desc()) - items = db_session.scalars(query).unique().all() + db_session = get_current_session() + query = select(ScoreDefinitionBreakdownItem).where( + ScoreDefinitionBreakdownItem.definition_id == definition_id, + ScoreDefinitionBreakdownItem.category == category, + ScoreDefinitionBreakdownItem.score_type == score_type, + ).order_by(ScoreDefinitionBreakdownItem.impact.desc()) + items = db_session.scalars(query).unique().all() return items def to_dict(self) -> dict: @@ -360,6 +426,40 @@ def to_dict(self) -> dict: } +class ScoreDefinitionResultHistoryEntry(Base): + __tablename__ = "score_definition_results_history" + + definition_id: str = Column( + UUID(as_uuid=True), + ForeignKey("score_definitions.id", ondelete="CASCADE"), + primary_key=True, + ) + category: str = Column(String, nullable=False, primary_key=True) + score: float = Column(Float, nullable=True) + last_run_time: datetime = Column(DateTime(timezone=False), nullable=False, primary_key=True) + + definition: ScoreDefinition = relationship("ScoreDefinition", back_populates="history") + + def add_as_cutoff(self): + """ + Insert new records into table 'score_history_latest_runs' + corresponding to the latest profiling and test runs as of + `self.last_run_time`. + + Query templates: + add_latest_runs.sql + """ + # ruff: noqa: RUF027 + query = ( + read_template_sql_file("add_latest_runs.sql", sub_directory="score_cards") + .replace("{project_code}", self.definition.project_code) + .replace("{definition_id}", str(self.definition_id)) + .replace("{score_history_cutoff_time}", self.last_run_time.isoformat()) + ) + session = get_current_session() + session.execute(query) + + class ScoreCard(TypedDict): id: str project_code: str @@ -369,6 +469,7 @@ class ScoreCard(TypedDict): profiling_score: float testing_score: float categories: list["CategoryScore"] + history: list["HistoryEntry"] definition: ScoreDefinition | None @@ -380,3 +481,9 @@ class CategoryScore(TypedDict): class SelectedIssue(TypedDict): id: str issue_type: Literal["hygiene", "test"] + + +class HistoryEntry(TypedDict): + score: float + category: Literal["score", "cde_score"] + time: datetime diff --git a/testgen/settings.py b/testgen/settings.py index 2a708af5..5b79d9e4 100644 --- a/testgen/settings.py +++ b/testgen/settings.py @@ -466,3 +466,21 @@ File paths for SSL certificate and private key to support HTTPS. Both files must be provided. """ + + +MIXPANEL_URL: str = "https://api.mixpanel.com" +MIXPANEL_TIMEOUT: int = 3 +MIXPANEL_TOKEN: str = "973680ddf8c2b512e6f6d1f2959149eb" +""" +Mixpanel configuration +""" + +INSTANCE_ID: str | None = os.getenv("TG_INSTANCE_ID", None) +""" +Random ID that uniquely identifies the instance. +""" + +ANALYTICS_ENABLED: bool = os.getenv("TG_ANALYTICS", "yes").lower() in ("true", "yes") +""" +Disables sending usage data when set to any value except "true" and "yes". Defaults to "yes" +""" diff --git a/testgen/template/dbsetup/020_create_standard_functions_sprocs.sql b/testgen/template/dbsetup/020_create_standard_functions_sprocs.sql index 811670b8..40e7d585 100644 --- a/testgen/template/dbsetup/020_create_standard_functions_sprocs.sql +++ b/testgen/template/dbsetup/020_create_standard_functions_sprocs.sql @@ -53,38 +53,6 @@ $$ $$; -CREATE OR REPLACE FUNCTION {SCHEMA_NAME}.fn_PrepColumnName(value_to_check TEXT) - RETURNS TEXT AS -$$ -DECLARE - keyword_arr TEXT[] := ARRAY ['ALL', 'ALTER', 'ANALYSE', 'ANALYZE', 'AND', 'ANY', 'ARRAY', 'AS', 'ASC', 'ASYMMETRIC', - 'AUTHORIZATION', 'BINARY', 'BOTH', 'CASE', 'CAST', 'CHECK', 'COLLATE', 'COLLATION', - 'COLUMN', 'CONCURRENTLY', 'CONSTRAINT', 'CREATE', 'CROSS', 'CURRENT_CATALOG', - 'CURRENT_DATE', 'CURRENT_ROLE', 'CURRENT_SCHEMA', 'CURRENT_TIME', 'CURRENT_TIMESTAMP', - 'CURRENT_USER', 'CREDENTIALS', - 'DEFAULT', 'DEFERRABLE', 'DESC', 'DISTINCT', 'DO', 'DROP', - 'ELSE', 'END', 'EXCEPT', 'FALSE', 'FETCH', 'FOR', 'FOREIGN', 'FREEZE', 'FROM', 'FULL', - 'GRANT', 'GROUP', 'HAVING', 'ILIKE', 'IN', 'INITIALLY', 'INNER', 'INTERSECT', 'INTO', - 'IS', 'ISNULL', 'JOIN', 'LATERAL', 'LEADING', 'LEFT', 'LIKE', 'LIMIT', 'LOCALTIME', - 'LOCALTIMESTAMP', 'NATURAL', 'NOT', 'NOTNULL', 'NULL', 'OFFSET', 'ON', 'ONLY', 'OR', - 'ORDER', 'OUTER', 'OVERLAPS', 'PLACING', 'PRIMARY', 'REFERENCES', 'RETURNING', 'RIGHT', - 'SELECT', 'SESSION_USER', 'SIMILAR', 'SOME', 'SYMMETRIC', 'TABLE', 'TABLESAMPLE', - 'THEN', 'TIMESTAMP', 'TIMEZONE', 'TO', 'TRAILING', 'TRUE', 'UNION', 'UNIQUE', 'USER', 'USING', - 'VARIADIC', 'VERBOSE', 'WHEN', 'WHERE', 'WINDOW', 'WITH']; -- Add more keywords here -BEGIN - -- Check if the value matches any of the keywords (case-insensitive) - IF value_to_check ILIKE ANY (keyword_arr) THEN - RETURN '"' || value_to_check || '"'; - -- Check if the value contains a space or a comma or it starts with a number - ELSIF value_to_check !~ '^[a-zA-Z_][a-zA-Z0-9_]*$' THEN - RETURN '"' || value_to_check || '"'; - ELSE - RETURN value_to_check; - END IF; -END; -$$ LANGUAGE plpgsql; - - CREATE OR REPLACE FUNCTION {SCHEMA_NAME}.fn_pct(numerator NUMERIC, denominator NUMERIC, decs INTEGER DEFAULT 0) returns NUMERIC language plpgsql as @@ -104,12 +72,14 @@ DECLARE BEGIN lower_case_sql_flavor := LOWER(sql_flavor); - IF lower_case_sql_flavor = 'postgres' OR lower_case_sql_flavor = 'postgresql' THEN + IF lower_case_sql_flavor IN ('postgres', 'postgresql') THEN escaped_value := QUOTE_LITERAL(var_value); - ELSIF lower_case_sql_flavor = 'redshift' OR lower_case_sql_flavor = 'snowflake' THEN + ELSIF lower_case_sql_flavor IN ('redshift', 'snowflake') THEN escaped_value := TRIM(LEADING 'E' FROM QUOTE_LITERAL(var_value)); ELSIF lower_case_sql_flavor = 'mssql' THEN escaped_value := '''' || REPLACE(var_value, '''', '''''') || ''''; + ELSIF lower_case_sql_flavor = 'databricks' THEN + escaped_value := '''' || REPLACE(REPLACE(var_value, '\', '\\'), '''', '\''') || ''''; ELSE RAISE EXCEPTION 'Invalid sql_flavor name: %', sql_flavor; END IF; @@ -196,7 +166,7 @@ $$ The approximation formula uses a series expansion to estimate the CDF, which is accurate for most practical purposes. - + To estimate the count of observations that fall outside a certain Z-score (both above and below), you can use the `normal_cdf()` function. For a total number of observations N, the proportion of values outside the Z-score @@ -336,4 +306,3 @@ CREATE AGGREGATE {SCHEMA_NAME}.sum_ln (double precision) ( FINALFUNC = sum_ln_agg_final, INITCOND = '0' ); - diff --git a/testgen/template/dbsetup/030_initialize_new_schema_structure.sql b/testgen/template/dbsetup/030_initialize_new_schema_structure.sql index ce01983f..f44b4276 100644 --- a/testgen/template/dbsetup/030_initialize_new_schema_structure.sql +++ b/testgen/template/dbsetup/030_initialize_new_schema_structure.sql @@ -77,7 +77,8 @@ CREATE TABLE connections ( connect_by_url BOOLEAN default FALSE, connect_by_key BOOLEAN DEFAULT FALSE, private_key BYTEA, - private_key_passphrase BYTEA + private_key_passphrase BYTEA, + http_path VARCHAR(200) ); CREATE TABLE table_groups @@ -831,5 +832,35 @@ CREATE INDEX cix_tr_pc_ts ON test_results(test_suite_id) WHERE observability_status = 'Queued'; +CREATE TABLE IF NOT EXISTS score_definition_results_history ( + definition_id UUID CONSTRAINT score_definitions_filters_score_definitions_definition_id_fk + REFERENCES score_definitions (id) + ON DELETE CASCADE, + category TEXT NOT NULL, + score DOUBLE PRECISION DEFAULT NULL, + last_run_time TIMESTAMP NOT NULL +); + +CREATE INDEX sdrh_def_last_run + ON score_definition_results_history(definition_id, last_run_time); + +CREATE TABLE score_history_latest_runs ( + definition_id UUID, + score_history_cutoff_time TIMESTAMP, + table_groups_id UUID, + last_profiling_run_id UUID, + test_suite_id UUID, + last_test_run_id UUID +); + +CREATE INDEX shlast_runs_def_cutoff + ON score_history_latest_runs(definition_id, score_history_cutoff_time); + +CREATE INDEX shlast_runs_pro_run + ON score_history_latest_runs(last_profiling_run_id); + +CREATE INDEX shlast_runs_tst_run + ON score_history_latest_runs(last_test_run_id); + INSERT INTO tg_revision (component, revision) VALUES ('metadata_db', 0); diff --git a/testgen/template/dbsetup/040_populate_new_schema_project.sql b/testgen/template/dbsetup/040_populate_new_schema_project.sql index 8ac7fdc6..7e6672c2 100644 --- a/testgen/template/dbsetup/040_populate_new_schema_project.sql +++ b/testgen/template/dbsetup/040_populate_new_schema_project.sql @@ -11,7 +11,7 @@ SELECT '{PROJECT_CODE}' as project_code, INSERT INTO connections (project_code, sql_flavor, project_host, project_port, project_user, project_db, - connection_name, project_pw_encrypted, max_threads, max_query_chars) + connection_name, project_pw_encrypted, http_path, max_threads, max_query_chars) SELECT '{PROJECT_CODE}' as project_code, '{SQL_FLAVOR}' as sql_flavor, '{PROJECT_HOST}' as project_host, @@ -20,6 +20,7 @@ SELECT '{PROJECT_CODE}' as project_code, '{PROJECT_DB}' as project_db, '{CONNECTION_NAME}' as connection_name, '{PROJECT_PW_ENCRYPTED}' as project_pw_encrypted, + '{PROJECT_HTTP_PATH}' as http_path, '{MAX_THREADS}'::INTEGER as max_threads, '{MAX_QUERY_CHARS}'::INTEGER as max_query_chars; diff --git a/testgen/template/dbsetup/050_populate_new_schema_metadata.sql b/testgen/template/dbsetup/050_populate_new_schema_metadata.sql index e9a2060d..f7dfed09 100644 --- a/testgen/template/dbsetup/050_populate_new_schema_metadata.sql +++ b/testgen/template/dbsetup/050_populate_new_schema_metadata.sql @@ -198,7 +198,16 @@ VALUES ('2001', 'Combo_Match', 'redshift', 'ex_data_match_generic.sql'), ('2306', 'Aggregate_Balance', 'postgresql', 'ex_aggregate_match_same_generic.sql'), ('2307', 'Timeframe_Combo_Gain', 'postgresql', 'ex_window_match_no_drops_postgresql.sql'), ('2308', 'Timeframe_Combo_Match', 'postgresql', 'ex_window_match_same_postgresql.sql'), - ('2309', 'Aggregate_Increase', 'postgresql', 'ex_aggregate_match_num_incr_generic.sql'); + ('2309', 'Aggregate_Increase', 'postgresql', 'ex_aggregate_match_num_incr_generic.sql'), + + ('2401', 'Combo_Match', 'databricks', 'ex_data_match_generic.sql'), + ('2402', 'Aggregate_Minimum', 'databricks', 'ex_aggregate_match_no_drops_generic.sql'), + ('2403', 'Distribution_Shift', 'databricks', 'ex_relative_entropy_generic.sql'), + ('2404', 'CUSTOM', 'databricks', 'ex_custom_query_generic.sql'), + ('2406', 'Aggregate_Balance', 'databricks', 'ex_aggregate_match_same_generic.sql'), + ('2407', 'Timeframe_Combo_Gain', 'databricks', 'ex_window_match_no_drops_databricks.sql'), + ('2408', 'Timeframe_Combo_Match', 'databricks', 'ex_window_match_same_databricks.sql'), + ('2409', 'Aggregate_Increase', 'databricks', 'ex_aggregate_match_num_incr_generic.sql'); TRUNCATE TABLE cat_test_conditions; @@ -323,6 +332,7 @@ VALUES ('1001', 'Alpha_Trunc', 'redshift', 'MAX(LENGTH({COLUMN_NAME}))', '<', ' ('4028', 'Unique', 'postgresql', 'COUNT(*) - COUNT(DISTINCT {COLUMN_NAME})', '>', '{THRESHOLD_VALUE}'), ('4029', 'Unique_Pct', 'postgresql', 'ABS( 2.0 * ASIN( SQRT({BASELINE_UNIQUE_CT}::FLOAT / {BASELINE_VALUE_CT}::FLOAT ) ) - 2 * ASIN( SQRT( COUNT( DISTINCT {COLUMN_NAME} )::FLOAT / NULLIF(COUNT( {COLUMN_NAME} ), 0)::FLOAT )) )', '>=', '{THRESHOLD_VALUE}'), ('4030', 'Weekly_Rec_Ct', 'postgresql', 'MAX(<%DATEDIFF_WEEK;''1800-01-01''::DATE;{COLUMN_NAME}%>) - MIN(<%DATEDIFF_WEEK;''1800-01-01''::DATE;{COLUMN_NAME}%>)+1 - COUNT(DISTINCT <%DATEDIFF_WEEK;''1800-01-01''::DATE;{COLUMN_NAME}%>)', '>', '{THRESHOLD_VALUE}'), + ('1031', 'Variability_Increase', 'redshift', '100.0*STDDEV(CAST({COLUMN_NAME} AS FLOAT))/{BASELINE_SD}', '>', '{THRESHOLD_VALUE}'), ('1032', 'Variability_Decrease', 'redshift', '100.0*STDDEV(CAST({COLUMN_NAME} AS FLOAT))/{BASELINE_SD}', '<', '{THRESHOLD_VALUE}'), ('2031', 'Variability_Increase', 'snowflake', '100.0*STDDEV(CAST({COLUMN_NAME} AS FLOAT))/{BASELINE_SD}', '>', '{THRESHOLD_VALUE}'), @@ -331,6 +341,8 @@ VALUES ('1001', 'Alpha_Trunc', 'redshift', 'MAX(LENGTH({COLUMN_NAME}))', '<', ' ('3032', 'Variability_Decrease', 'mssql', '100.0*STDEV(CAST({COLUMN_NAME} AS FLOAT))/{BASELINE_SD}', '<', '{THRESHOLD_VALUE}'), ('4031', 'Variability_Increase', 'postgresql', '100.0*STDDEV(CAST({COLUMN_NAME} AS FLOAT))/{BASELINE_SD}', '>', '{THRESHOLD_VALUE}'), ('4032', 'Variability_Decrease', 'postgresql', '100.0*STDDEV(CAST({COLUMN_NAME} AS FLOAT))/{BASELINE_SD}', '<', '{THRESHOLD_VALUE}'), + ('6031', 'Variability_Increase', 'databricks', '100.0*STDDEV_SAMP(CAST({COLUMN_NAME} AS FLOAT))/{BASELINE_SD}', '>', '{THRESHOLD_VALUE}'), + ('6032', 'Variability_Decrease', 'databricks', '100.0*STDDEV_SAMP(CAST({COLUMN_NAME} AS FLOAT))/{BASELINE_SD}', '<', '{THRESHOLD_VALUE}'), ('5001', 'Alpha_Trunc', 'trino', 'MAX(LENGTH({COLUMN_NAME}))', '<', '{THRESHOLD_VALUE}'), ('5002', 'Avg_Shift', 'trino', 'ABS( (CAST(AVG({COLUMN_NAME} AS REAL)) - {BASELINE_AVG}) / SQRT(((CAST(COUNT({COLUMN_NAME}) AS REAL)-1)*STDDEV({COLUMN_NAME})^2 + (CAST({BASELINE_VALUE_CT} AS REAL)-1) * CAST({BASELINE_SD} AS REAL)^2) /NULLIF(CAST(COUNT({COLUMN_NAME}) AS REAL) + CAST({BASELINE_VALUE_CT} AS REAL), 0) ))', '>=', '{THRESHOLD_VALUE}'), @@ -365,30 +377,64 @@ VALUES ('1001', 'Alpha_Trunc', 'redshift', 'MAX(LENGTH({COLUMN_NAME}))', '<', ' ('5031', 'Variability_Increase', 'trino', '100.0*STDDEV(CAST({COLUMN_NAME} AS REAL))/{BASELINE_SD}', '>', '{THRESHOLD_VALUE}'), ('5032', 'Variability_Decrease', 'trino', '100.0*STDDEV(CAST({COLUMN_NAME} AS REAL))/{BASELINE_SD}', '<', '{THRESHOLD_VALUE}'), + ('6001', 'Alpha_Trunc', 'databricks', 'MAX(LENGTH({COLUMN_NAME}))', '<', '{THRESHOLD_VALUE}'), + ('6002', 'Avg_Shift', 'databricks', 'ABS( (AVG({COLUMN_NAME}::FLOAT) - {BASELINE_AVG}) / SQRT(((COUNT({COLUMN_NAME})::FLOAT-1)*POWER(STDDEV_SAMP({COLUMN_NAME}),2) + ({BASELINE_VALUE_CT}::FLOAT-1) * POWER({BASELINE_SD}::FLOAT,2)) /NULLIF(COUNT({COLUMN_NAME})::FLOAT + {BASELINE_VALUE_CT}::FLOAT, 0) ))', '>=', '{THRESHOLD_VALUE}'), + ('6003', 'Condition_Flag', 'databricks', 'SUM(CASE WHEN {CUSTOM_QUERY} THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'), + ('6004', 'Constant', 'databricks', 'SUM(CASE WHEN {COLUMN_NAME} <> {BASELINE_VALUE} THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'), + ('6005', 'Daily_Record_Ct', 'databricks', '<%DATEDIFF_DAY;MIN({COLUMN_NAME});MAX({COLUMN_NAME})%>+1-COUNT(DISTINCT {COLUMN_NAME})', '<', '{THRESHOLD_VALUE}'), + ('6006', 'Dec_Trunc', 'databricks', 'ROUND(SUM(ABS({COLUMN_NAME})::DECIMAL(18,4) % 1), 0)', '<', '{THRESHOLD_VALUE}'), + ('6007', 'Distinct_Date_Ct', 'databricks', 'COUNT(DISTINCT {COLUMN_NAME})', '<', '{THRESHOLD_VALUE}'), + ('6008', 'Distinct_Value_Ct', 'databricks', 'COUNT(DISTINCT {COLUMN_NAME})', '<>', '{THRESHOLD_VALUE}'), + ('6009', 'Email_Format', 'databricks', 'SUM(CASE WHEN NOT REGEXP_LIKE({COLUMN_NAME}::STRING, ''^[A-Za-z0-9._''''%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$'') THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'), + ('6010', 'Future_Date', 'databricks', 'SUM(GREATEST(0, SIGN({COLUMN_NAME}::DATE - ''{RUN_DATE}''::DATE)))', '>', '{THRESHOLD_VALUE}'), + ('6011', 'Future_Date_1Y', 'databricks', 'SUM(GREATEST(0, SIGN({COLUMN_NAME}::DATE - (''{RUN_DATE}''::DATE+365))))', '>', '{THRESHOLD_VALUE}'), + ('6012', 'Incr_Avg_Shift', 'databricks', 'COALESCE(ABS( ({BASELINE_AVG} - (SUM({COLUMN_NAME}) - {BASELINE_SUM}) / NULLIF(COUNT({COLUMN_NAME})::FLOAT - {BASELINE_VALUE_CT}, 0)) / {BASELINE_SD} ), 0)', '>=', '{THRESHOLD_VALUE}'), + ('6013', 'LOV_All', 'databricks', 'STRING_AGG(DISTINCT {COLUMN_NAME}, ''|'') WITHIN GROUP (ORDER BY {COLUMN_NAME})', '<>', '{THRESHOLD_VALUE}'), + ('6014', 'LOV_Match', 'databricks', 'SUM(CASE WHEN NULLIF({COLUMN_NAME}, '''') NOT IN {BASELINE_VALUE} THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'), + ('6015', 'Min_Date', 'databricks', 'SUM(CASE WHEN {COLUMN_NAME} < ''{BASELINE_VALUE}'' THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'), + ('6016', 'Min_Val', 'databricks', 'SUM(CASE WHEN {COLUMN_NAME} < {BASELINE_VALUE} THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'), + ('6017', 'Missing_Pct', 'databricks', 'ABS( 2.0 * ASIN( SQRT( {BASELINE_VALUE_CT}::FLOAT / {BASELINE_CT}::FLOAT ) ) - 2 * ASIN( SQRT( COUNT({COLUMN_NAME})::FLOAT / NULLIF(COUNT(*), 0)::FLOAT )) )', '>=', '{THRESHOLD_VALUE}'), + ('6018', 'Monthly_Rec_Ct', 'databricks', '(MAX(<%DATEDIFF_MONTH;{COLUMN_NAME};''{RUN_DATE}''::DATE%>) - MIN(<%DATEDIFF_MONTH;{COLUMN_NAME};''{RUN_DATE}''::DATE%>) + 1) - COUNT(DISTINCT <%DATEDIFF_MONTH;{COLUMN_NAME};''{RUN_DATE}''::DATE%>)', '>', '{THRESHOLD_VALUE}'), + ('6019', 'Outlier_Pct_Above', 'databricks', 'SUM(CASE WHEN {COLUMN_NAME}::FLOAT > {BASELINE_AVG}+(2.0*{BASELINE_SD}) THEN 1 ELSE 0 END)::FLOAT / NULLIF(COUNT({COLUMN_NAME}), 0)::FLOAT', '>', '{THRESHOLD_VALUE}'), + ('6020', 'Outlier_Pct_Below', 'databricks', 'SUM(CASE WHEN {COLUMN_NAME}::FLOAT < {BASELINE_AVG}-(2.0*{BASELINE_SD}) THEN 1 ELSE 0 END)::FLOAT / NULLIF(COUNT({COLUMN_NAME}), 0)::FLOAT', '>', '{THRESHOLD_VALUE}'), + ('6021', 'Pattern_Match', 'databricks', 'COUNT(NULLIF({COLUMN_NAME}, '''')) - SUM(REGEXP_LIKE(NULLIF({COLUMN_NAME}::STRING, ''''), ''{BASELINE_VALUE}'')::BIGINT)', '>', '{THRESHOLD_VALUE}'), + ('6022', 'Recency', 'databricks', '<%DATEDIFF_DAY;MAX({COLUMN_NAME});''{RUN_DATE}''::DATE%>', '>', '{THRESHOLD_VALUE}'), + ('6023', 'Required', 'databricks', 'COUNT(*) - COUNT( {COLUMN_NAME} )', '>', '{THRESHOLD_VALUE}'), + ('6024', 'Row_Ct', 'databricks', 'COUNT(*)', '<', '{THRESHOLD_VALUE}'), + ('6025', 'Row_Ct_Pct', 'databricks', 'ABS(ROUND(100.0 * (COUNT(*) - {BASELINE_CT})::FLOAT / {BASELINE_CT}::FLOAT, 2))', '>', '{THRESHOLD_VALUE}'), + ('6026', 'Street_Addr_Pattern', 'databricks', '100.0*SUM((regexp_like({COLUMN_NAME}::STRING, ''^[0-9]{1,5}[a-zA-Z]?\\s\\w{1,5}\\.?\\s?\\w*\\s?\\w*\\s[a-zA-Z]{1,6}\\.?\\s?[0-9]{0,5}[A-Z]{0,1}$''))::BIGINT)::FLOAT / NULLIF(COUNT({COLUMN_NAME}), 0)::FLOAT', '<', '{THRESHOLD_VALUE}'), + ('6027', 'US_State', 'databricks', 'SUM(CASE WHEN {COLUMN_NAME} NOT IN ('''',''AL'',''AK'',''AS'',''AZ'',''AR'',''CA'',''CO'',''CT'',''DE'',''DC'',''FM'',''FL'',''GA'',''GU'',''HI'',''ID'',''IL'',''IN'',''IA'',''KS'',''KY'',''LA'',''ME'',''MH'',''MD'',''MA'',''MI'',''MN'',''MS'',''MO'',''MT'',''NE'',''NV'',''NH'',''NJ'',''NM'',''NY'',''NC'',''ND'',''MP'',''OH'',''OK'',''OR'',''PW'',''PA'',''PR'',''RI'',''SC'',''SD'',''TN'',''TX'',''UT'',''VT'',''VI'',''VA'',''WA'',''WV'',''WI'',''WY'',''AE'',''AP'',''AA'') THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'), + ('6028', 'Unique', 'databricks', 'COUNT(*) - COUNT(DISTINCT {COLUMN_NAME})', '>', '{THRESHOLD_VALUE}'), + ('6029', 'Unique_Pct', 'databricks', 'ABS( 2.0 * ASIN( SQRT({BASELINE_UNIQUE_CT}::FLOAT / {BASELINE_VALUE_CT}::FLOAT ) ) - 2 * ASIN( SQRT( COUNT( DISTINCT {COLUMN_NAME} )::FLOAT / NULLIF(COUNT( {COLUMN_NAME} ), 0)::FLOAT )) )', '>=', '{THRESHOLD_VALUE}'), + ('6030', 'Weekly_Rec_Ct', 'databricks', 'CAST(<%DATEDIFF_WEEK;MIN({COLUMN_NAME});MAX({COLUMN_NAME})%> + 1 - COUNT(DISTINCT DATE_TRUNC(''week'', {COLUMN_NAME})) AS INT)', '>', '{THRESHOLD_VALUE}'), + ('1033', 'Valid_Month', 'redshift', 'SUM(CASE WHEN NULLIF({COLUMN_NAME}, '''') NOT IN ({BASELINE_VALUE}) THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'), ('2033', 'Valid_Month', 'snowflake', 'SUM(CASE WHEN NULLIF({COLUMN_NAME}, '''') NOT IN ({BASELINE_VALUE}) THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'), ('3033', 'Valid_Month', 'mssql', 'SUM(CASE WHEN NULLIF({COLUMN_NAME}, '''') NOT IN ({BASELINE_VALUE}) THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'), ('4033', 'Valid_Month', 'postgresql', 'SUM(CASE WHEN NULLIF({COLUMN_NAME}, '''') NOT IN ({BASELINE_VALUE}) THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'), ('5033', 'Valid_Month', 'trino', 'SUM(CASE WHEN NULLIF({COLUMN_NAME}, '''') NOT IN ({BASELINE_VALUE}) THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'), + ('6033', 'Valid_Month', 'databricks', 'SUM(CASE WHEN NULLIF({COLUMN_NAME}, '''') NOT IN ({BASELINE_VALUE}) THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'), ('1034', 'Valid_US_Zip', 'redshift', 'SUM(CASE WHEN TRANSLATE({COLUMN_NAME},''012345678'',''999999999'') NOT IN (''99999'', ''999999999'', ''99999-9999'') THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'), ('4034', 'Valid_US_Zip', 'postgresql', 'SUM(CASE WHEN TRANSLATE({COLUMN_NAME},''012345678'',''999999999'') NOT IN (''99999'', ''999999999'', ''99999-9999'') THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'), ('2034', 'Valid_US_Zip', 'snowflake', 'SUM(CASE WHEN TRANSLATE({COLUMN_NAME},''012345678'',''999999999'') NOT IN (''99999'', ''999999999'', ''99999-9999'') THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'), ('5034', 'Valid_US_Zip', 'trino', 'SUM(CASE WHEN TRANSLATE({COLUMN_NAME},''012345678'',''999999999'') NOT IN (''99999'', ''999999999'', ''99999-9999'') THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'), ('3034', 'Valid_US_Zip', 'mssql', 'SUM(CASE WHEN TRANSLATE({COLUMN_NAME},''012345678'',''999999999'') NOT IN (''99999'', ''999999999'', ''99999-9999'') THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'), + ('6034', 'Valid_US_Zip', 'databricks', 'SUM(CASE WHEN TRANSLATE({COLUMN_NAME},''012345678'',''999999999'') NOT IN (''99999'', ''999999999'', ''99999-9999'') THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'), ('1035', 'Valid_US_Zip3', 'redshift', 'SUM(CASE WHEN TRANSLATE({COLUMN_NAME},''012345678'',''999999999'') <> ''999'' THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'), ('4035', 'Valid_US_Zip3', 'postgresql', 'SUM(CASE WHEN TRANSLATE({COLUMN_NAME},''012345678'',''999999999'') <> ''999'' THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'), ('2035', 'Valid_US_Zip3', 'snowflake', 'SUM(CASE WHEN TRANSLATE({COLUMN_NAME},''012345678'',''999999999'') <> ''999'' THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'), ('5035', 'Valid_US_Zip3', 'trino', 'SUM(CASE WHEN TRANSLATE({COLUMN_NAME},''012345678'',''999999999'') <> ''999'' THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'), ('3035', 'Valid_US_Zip3', 'mssql', 'SUM(CASE WHEN TRANSLATE({COLUMN_NAME},''012345678'',''999999999'') <> ''999'' THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'), + ('6035', 'Valid_US_Zip3', 'databricks', 'SUM(CASE WHEN TRANSLATE({COLUMN_NAME},''012345678'',''999999999'') <> ''999'' THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'), ('1036', 'Valid_Characters', 'redshift', 'SUM(CASE WHEN TRANSLATE({COLUMN_NAME}, CHR(160) || CHR(8203) || CHR(65279) || CHR(8239) || CHR(8201) || CHR(12288) || CHR(8204), ''XXXXXXX'') <> {COLUMN_NAME} OR {COLUMN_NAME} LIKE '' %'' OR {COLUMN_NAME} LIKE ''''''%'''''' OR {COLUMN_NAME} LIKE ''"%"'' THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'), ('4036', 'Valid_Characters', 'postgresql', 'SUM(CASE WHEN TRANSLATE({COLUMN_NAME}, CHR(160) || CHR(8203) || CHR(65279) || CHR(8239) || CHR(8201) || CHR(12288) || CHR(8204), ''XXXXXXX'') <> {COLUMN_NAME} OR {COLUMN_NAME} LIKE '' %'' OR {COLUMN_NAME} LIKE ''''''%'''''' OR {COLUMN_NAME} LIKE ''"%"'' THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'), ('2036', 'Valid_Characters', 'snowflake', 'SUM(CASE WHEN TRANSLATE({COLUMN_NAME}, CHAR(160) || CHAR(8203) || CHAR(65279) || CHAR(8239) || CHAR(8201) || CHAR(12288) || CHAR(8204), ''XXXXXXX'') <> {COLUMN_NAME} OR {COLUMN_NAME} LIKE '' %'' OR {COLUMN_NAME} LIKE ''''''%'''''' OR {COLUMN_NAME} LIKE ''"%"'' THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'), ('5036', 'Valid_Characters', 'trino', 'SUM(CASE WHEN TRANSLATE({COLUMN_NAME}, CHR(160) || CHR(8203) || CHR(65279) || CHR(8239) || CHR(8201) || CHR(12288) || CHR(8204), ''XXXXXXX'') <> {COLUMN_NAME} OR {COLUMN_NAME} LIKE '' %'' OR {COLUMN_NAME} LIKE ''''''%'''''' OR {COLUMN_NAME} LIKE ''"%"'' THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'), - ('3036', 'Valid_Characters', 'mssql', 'SUM(CASE WHEN TRANSLATE({COLUMN_NAME}, NCHAR(160) || NCHAR(8203) || NCHAR(65279) || NCHAR(8239) || NCHAR(8201) || NCHAR(12288) || NCHAR(8204), ''XXXXXXX'') <> {COLUMN_NAME} OR {COLUMN_NAME} LIKE '' %'' OR {COLUMN_NAME} LIKE ''''''%'''''' OR {COLUMN_NAME} LIKE ''"%"'' THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'); - + ('3036', 'Valid_Characters', 'mssql', 'SUM(CASE WHEN TRANSLATE({COLUMN_NAME}, NCHAR(160) || NCHAR(8203) || NCHAR(65279) || NCHAR(8239) || NCHAR(8201) || NCHAR(12288) || NCHAR(8204), ''XXXXXXX'') <> {COLUMN_NAME} OR {COLUMN_NAME} LIKE '' %'' OR {COLUMN_NAME} LIKE ''''''%'''''' OR {COLUMN_NAME} LIKE ''"%"'' THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'), + ('6036', 'Valid_Characters', 'databricks', 'SUM(CASE WHEN TRANSLATE({COLUMN_NAME}, CHR(160) || CHR(8203) || CHR(65279) || CHR(8239) || CHR(8201) || CHR(12288) || CHR(8204), ''XXXXXXX'') <> {COLUMN_NAME} OR {COLUMN_NAME} LIKE '' %'' OR {COLUMN_NAME} LIKE ''''''%'''''' OR {COLUMN_NAME} LIKE ''"%"'' THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'); TRUNCATE TABLE target_data_lookups; @@ -1192,7 +1238,167 @@ WHERE {SUBSET_CONDITION} ('1269', '1100', 'Profile Anomaly', 'Potential_PII', 'redshift', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC LIMIT 500;'), ('1270', '1100', 'Profile Anomaly', 'Potential_PII', 'snowflake', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC LIMIT 500;'), ('1271', '1100', 'Profile Anomaly', 'Potential_PII', 'mssql', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC LIMIT 500;'), - ('1272', '1100', 'Profile Anomaly', 'Potential_PII', 'postgresql', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC LIMIT 500;') + ('1272', '1100', 'Profile Anomaly', 'Potential_PII', 'postgresql', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC LIMIT 500;'), + + ('1273', '1001', 'Profile Anomaly' , 'Suggested_Type', 'databricks', NULL, 'SELECT `{COLUMN_NAME}`, COUNT(*) AS record_ct FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY `{COLUMN_NAME}` ORDER BY record_ct DESC LIMIT 20;'), + ('1274', '1002', 'Profile Anomaly' , 'Non_Standard_Blanks', 'databricks', NULL, 'SELECT DISTINCT `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE CASE WHEN `{COLUMN_NAME}` IN (''.'', ''?'', '' '') THEN 1 WHEN LOWER(`{COLUMN_NAME}`::STRING) REGEXP ''-{2,}'' OR LOWER(`{COLUMN_NAME}`::STRING) REGEXP ''0{2,}'' OR LOWER(`{COLUMN_NAME}`::STRING) REGEXP ''9{2,}'' OR LOWER(`{COLUMN_NAME}`::STRING) REGEXP ''x{2,}'' OR LOWER(`{COLUMN_NAME}`::STRING) REGEXP ''z{2,}'' THEN 1 WHEN LOWER(`{COLUMN_NAME}`) IN (''blank'',''error'',''missing'',''tbd'', ''n/a'',''#na'',''none'',''null'',''unknown'') THEN 1 WHEN LOWER(`{COLUMN_NAME}`) IN (''(blank)'',''(error)'',''(missing)'',''(tbd)'', ''(n/a)'',''(#na)'',''(none)'',''(null)'',''(unknown)'') THEN 1 WHEN LOWER(`{COLUMN_NAME}`) IN (''[blank]'',''[error]'',''[missing]'',''[tbd]'', ''[n/a]'',''[#na]'',''[none]'',''[null]'',''[unknown]'') THEN 1 WHEN `{COLUMN_NAME}` = '''' THEN 1 WHEN `{COLUMN_NAME}` IS NULL THEN 1 ELSE 0 END = 1 GROUP BY `{COLUMN_NAME}` ORDER BY `{COLUMN_NAME}`;'), + ('1275', '1003', 'Profile Anomaly' , 'Invalid_Zip_USA', 'databricks', NULL, 'SELECT `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE TRANSLATE(`{COLUMN_NAME}`,''012345678'',''999999999'') NOT IN (''99999'', ''999999999'', ''99999-9999'') GROUP BY `{COLUMN_NAME}` ORDER BY `{COLUMN_NAME}` LIMIT 500;'), + ('1276', '1004', 'Profile Anomaly' , 'Multiple_Types_Minor', 'databricks', NULL, 'SELECT DISTINCT column_name, columns.table_name, CASE WHEN data_type ILIKE ''timestamp%'' THEN lower(data_type) WHEN data_type ILIKE ''date'' THEN lower(data_type) WHEN data_type ILIKE ''boolean'' THEN ''boolean'' WHEN data_type = ''TEXT'' THEN ''varchar('' || CAST(character_maximum_length AS STRING) || '')'' WHEN data_type ILIKE ''char%'' THEN ''char('' || CAST(character_maximum_length AS STRING) || '')'' WHEN data_type = ''NUMBER'' AND numeric_precision = 38 AND numeric_scale = 0 THEN ''bigint'' WHEN data_type ILIKE ''num%'' THEN ''numeric('' || CAST(numeric_precision AS STRING) || '','' || CAST(numeric_scale AS STRING) || '')'' ELSE data_type END AS data_type FROM information_schema.columns JOIN information_schema.tables ON columns.table_name = tables.table_name AND columns.table_schema = tables.table_schema WHERE columns.table_schema = ''{TARGET_SCHEMA}'' AND columns.column_name = ''{COLUMN_NAME}'' AND tables.table_type = ''BASE TABLE'' ORDER BY data_type, table_name;'), + ('1277', '1005', 'Profile Anomaly' , 'Multiple_Types_Major', 'databricks', NULL, 'SELECT DISTINCT column_name, columns.table_name, CASE WHEN data_type ILIKE ''timestamp%'' THEN lower(data_type) WHEN data_type ILIKE ''date'' THEN lower(data_type) WHEN data_type ILIKE ''boolean'' THEN ''boolean'' WHEN data_type = ''TEXT'' THEN ''varchar('' || CAST(character_maximum_length AS STRING) || '')'' WHEN data_type ILIKE ''char%'' THEN ''char('' || CAST(character_maximum_length AS STRING) || '')'' WHEN data_type = ''NUMBER'' AND numeric_precision = 38 AND numeric_scale = 0 THEN ''bigint'' WHEN data_type ILIKE ''num%'' THEN ''numeric('' || CAST(numeric_precision AS STRING) || '','' || CAST(numeric_scale AS STRING) || '')'' ELSE data_type END AS data_type FROM information_schema.columns JOIN information_schema.tables ON columns.table_name = tables.table_name AND columns.table_schema = tables.table_schema WHERE columns.table_schema = ''{TARGET_SCHEMA}'' AND columns.column_name = ''{COLUMN_NAME}'' AND tables.table_type = ''BASE TABLE'' ORDER BY data_type, table_name;'), + ('1278', '1006', 'Profile Anomaly' , 'No_Values', 'databricks', NULL, 'SELECT DISTINCT `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY `{COLUMN_NAME}` ORDER BY `{COLUMN_NAME}`;' ), + ('1279', '1007', 'Profile Anomaly' , 'Column_Pattern_Mismatch', 'databricks', NULL, 'SELECT A.* FROM (SELECT DISTINCT b.top_pattern, `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}, (SELECT trim(split_part(''{DETAIL_EXPRESSION}'', ''|'', 4)) AS top_pattern) b WHERE REGEXP_REPLACE(REGEXP_REPLACE( REGEXP_REPLACE( `{COLUMN_NAME}`::STRING, ''[a-z]'', ''a''), ''[A-Z]'', ''A''), ''[0-9]'', ''N'') = b.top_pattern GROUP BY b.top_pattern, `{COLUMN_NAME}` ORDER BY count DESC LIMIT 5) A UNION ALL SELECT B.* FROM (SELECT DISTINCT b.top_pattern, `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}, (SELECT trim(split_part(''{DETAIL_EXPRESSION}'', ''|'', 6)) AS top_pattern) b WHERE REGEXP_REPLACE(REGEXP_REPLACE( REGEXP_REPLACE( `{COLUMN_NAME}`::STRING, ''[a-z]'', ''a''), ''[A-Z]'', ''A''), ''[0-9]'', ''N'') = b.top_pattern GROUP BY b.top_pattern, `{COLUMN_NAME}` ORDER BY count DESC LIMIT 5) B UNION ALL SELECT C.* FROM (SELECT DISTINCT b.top_pattern, `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}, (SELECT trim(split_part(''{DETAIL_EXPRESSION}'', ''|'', 8)) AS top_pattern) b WHERE REGEXP_REPLACE(REGEXP_REPLACE( REGEXP_REPLACE( `{COLUMN_NAME}`::STRING, ''[a-z]'', ''a''), ''[A-Z]'', ''A''), ''[0-9]'', ''N'') = b.top_pattern GROUP BY b.top_pattern, `{COLUMN_NAME}` ORDER BY count DESC LIMIT 5) C UNION ALL SELECT D.* FROM (SELECT DISTINCT b.top_pattern, `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}, (SELECT trim(split_part(''{DETAIL_EXPRESSION}'', ''|'', 10)) AS top_pattern) b WHERE REGEXP_REPLACE(REGEXP_REPLACE( REGEXP_REPLACE( `{COLUMN_NAME}`::STRING, ''[a-z]'', ''a''), ''[A-Z]'', ''A''), ''[0-9]'', ''N'') = b.top_pattern GROUP BY b.top_pattern, `{COLUMN_NAME}` ORDER BY count DESC LIMIT 5) D ORDER BY top_pattern DESC, count DESC;' ), + ('1280', '1008', 'Profile Anomaly' , 'Table_Pattern_Mismatch', 'databricks', NULL, 'SELECT DISTINCT column_name, columns.table_name FROM information_schema.columns JOIN information_schema.tables ON columns.table_name = tables.table_name AND columns.table_schema = tables.table_schema WHERE columns.table_schema = ''{TARGET_SCHEMA}'' AND columns.column_name = ''{COLUMN_NAME}'' AND UPPER(tables.table_type) = ''BASE TABLE'' ORDER BY table_name; ' ), + ('1281', '1009', 'Profile Anomaly' , 'Leading_Spaces', 'databricks', NULL, 'SELECT DISTINCT `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE (CASE WHEN `{COLUMN_NAME}` BETWEEN '' !'' AND ''!'' THEN 1 ELSE 0 END) = 1 GROUP BY `{COLUMN_NAME}` ORDER BY `{COLUMN_NAME}`;' ), + ('1282', '1010', 'Profile Anomaly' , 'Quoted_Values', 'databricks', NULL, 'SELECT DISTINCT `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE (CASE WHEN `{COLUMN_NAME}` ILIKE ''"%"'' OR `{COLUMN_NAME}` ILIKE ''''''%'''''' THEN 1 ELSE 0 END) = 1 GROUP BY `{COLUMN_NAME}` ORDER BY `{COLUMN_NAME}`;' ), + ('1283', '1011', 'Profile Anomaly' , 'Char_Column_Number_Values', 'databricks', NULL, 'SELECT A.* FROM (SELECT DISTINCT ''Numeric'' as data_type, `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE <%IS_NUM;`{COLUMN_NAME}`%> = 1 GROUP BY `{COLUMN_NAME}` ORDER BY count DESC LIMIT 10) AS A UNION ALL SELECT B.* FROM (SELECT DISTINCT ''Non-Numeric'' as data_type, `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE <%IS_NUM;`{COLUMN_NAME}`%> != 1 GROUP BY `{COLUMN_NAME}` ORDER BY count DESC) AS B ORDER BY data_type, count DESC LIMIT 10;' ), + ('1284', '1012', 'Profile Anomaly' , 'Char_Column_Date_Values', 'databricks', NULL, 'SELECT A.* FROM (SELECT DISTINCT ''Date'' as data_type, `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE <%IS_DATE;`{COLUMN_NAME}`%> = 1 GROUP BY `{COLUMN_NAME}` ORDER BY count DESC LIMIT 10) AS A UNION ALL SELECT B.* FROM (SELECT DISTINCT ''Non-Date'' as data_type, `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE <%IS_DATE;`{COLUMN_NAME}`%> != 1 GROUP BY `{COLUMN_NAME}` ORDER BY count DESC) AS B ORDER BY data_type, count DESC LIMIT 10;' ), + ('1285', '1013', 'Profile Anomaly' , 'Small Missing Value Ct', 'databricks', NULL, 'SELECT DISTINCT `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE (CASE WHEN `{COLUMN_NAME}` IN (''.'', ''?'', '' '') THEN 1 WHEN LOWER(`{COLUMN_NAME}`::STRING) REGEXP ''-{2,}'' OR LOWER(`{COLUMN_NAME}`::STRING) REGEXP ''0{2,}'' OR LOWER(`{COLUMN_NAME}`::STRING) REGEXP ''9{2,}'' OR LOWER(`{COLUMN_NAME}`::STRING) REGEXP ''x{2,}'' OR LOWER(`{COLUMN_NAME}`::STRING) REGEXP ''z{2,}'' THEN 1 WHEN LOWER(`{COLUMN_NAME}`) IN (''blank'',''error'',''missing'',''tbd'', ''n/a'',''#na'',''none'',''null'',''unknown'') THEN 1 WHEN LOWER(`{COLUMN_NAME}`) IN (''(blank)'',''(error)'',''(missing)'',''(tbd)'', ''(n/a)'',''(#na)'',''(none)'',''(null)'',''(unknown)'') THEN 1 WHEN LOWER(`{COLUMN_NAME}`) IN (''[blank]'',''[error]'',''[missing]'',''[tbd]'', ''[n/a]'',''[#na]'',''[none]'',''[null]'',''[unknown]'') THEN 1 WHEN `{COLUMN_NAME}` = '''' THEN 1 WHEN `{COLUMN_NAME}` IS NULL THEN 1 ELSE 0 END) = 1 GROUP BY `{COLUMN_NAME}` ORDER BY `{COLUMN_NAME}`;' ), + ('1286', '1014', 'Profile Anomaly' , 'Small Divergent Value Ct', 'databricks', NULL, 'SELECT DISTINCT `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY `{COLUMN_NAME}` ORDER BY count DESC;' ), + ('1287', '1015', 'Profile Anomaly' , 'Boolean_Value_Mismatch', 'databricks', NULL, 'SELECT DISTINCT `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY `{COLUMN_NAME}` ORDER BY count DESC;' ), + ('1288', '1016', 'Profile Anomaly' , 'Potential_Duplicates', 'databricks', NULL, 'SELECT DISTINCT `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY `{COLUMN_NAME}` HAVING count > 1 ORDER BY count DESC LIMIT 500;' ), + ('1289', '1017', 'Profile Anomaly' , 'Standardized_Value_Matches', 'databricks', NULL, 'WITH CTE AS ( SELECT DISTINCT UPPER(TRANSLATE(`{COLUMN_NAME}`, '' '''',.-'', '''')) as possible_standard_value, COUNT(DISTINCT `{COLUMN_NAME}`) FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY UPPER(TRANSLATE(`{COLUMN_NAME}`, '' '''',.-'', '''')) HAVING COUNT(DISTINCT `{COLUMN_NAME}`) > 1 ) SELECT DISTINCT a.`{COLUMN_NAME}`, possible_standard_value, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} a, cte b WHERE UPPER(TRANSLATE(a.`{COLUMN_NAME}`, '' '''',.-'', '''')) = b.possible_standard_value GROUP BY a.`{COLUMN_NAME}`, possible_standard_value ORDER BY possible_standard_value ASC, count DESC LIMIT 500;' ), + ('1290', '1018', 'Profile Anomaly' , 'Unlikely_Date_Values', 'databricks', NULL, 'SELECT DISTINCT `{COLUMN_NAME}`, ''{PROFILE_RUN_DATE}'' :: DATE AS profile_run_date, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} a WHERE (`{COLUMN_NAME}` < ''1900-01-01''::DATE) OR (`{COLUMN_NAME}` > ''{PROFILE_RUN_DATE}'' :: DATE + INTERVAL ''30 year'' ) GROUP BY `{COLUMN_NAME}` ORDER BY `{COLUMN_NAME}` DESC LIMIT 500;' ), + ('1291', '1019', 'Profile Anomaly' , 'Recency_One_Year', 'databricks', NULL, 'created_in_ui' ), + ('1292', '1020', 'Profile Anomaly' , 'Recency_Six_Months', 'databricks', NULL, 'created_in_ui' ), + ('1293', '1021', 'Profile Anomaly' , 'Unexpected US States', 'databricks', NULL, 'SELECT DISTINCT `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY `{COLUMN_NAME}` ORDER BY `{COLUMN_NAME}` DESC LIMIT 500;' ), + ('1294', '1022', 'Profile Anomaly' , 'Unexpected Emails', 'databricks', NULL, 'SELECT DISTINCT `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY `{COLUMN_NAME}` ORDER BY `{COLUMN_NAME}` DESC LIMIT 500;' ), + ('1295', '1023', 'Profile Anomaly' , 'Small_Numeric_Value_Ct', 'databricks', NULL, 'SELECT A.* FROM (SELECT DISTINCT ''Numeric'' as data_type, `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE <%IS_NUM;`{COLUMN_NAME}`%> = 1 GROUP BY `{COLUMN_NAME}` ORDER BY count DESC LIMIT 10) AS A UNION ALL SELECT B.* FROM (SELECT DISTINCT ''Non-Numeric'' as data_type, `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE <%IS_NUM;`{COLUMN_NAME}`%> != 1 GROUP BY `{COLUMN_NAME}` ORDER BY count DESC) AS B ORDER BY data_type, count DESC LIMIT 10;' ), + ('1296', '1024', 'Profile Anomaly' , 'Invalid_Zip3_USA', 'databricks', NULL, 'SELECT `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE TRANSLATE(`{COLUMN_NAME}`,''012345678'',''999999999'') <> ''999'' GROUP BY `{COLUMN_NAME}` ORDER BY count DESC, `{COLUMN_NAME}` LIMIT 500;'), + ('1297', '1025', 'Profile Anomaly' , 'Delimited_Data_Embedded', 'databricks', NULL, 'SELECT DISTINCT `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE REGEXP_LIKE(`{COLUMN_NAME}`::STRING, ''^([^,|\t]{1,20}[,|\t]){2,}[^,|\t]{0,20}([,|\t]{0,1}[^,|\t]{0,20})*$'') AND NOT REGEXP_LIKE(`{COLUMN_NAME}`::STRING, ''.*\\s(and|but|or|yet)\\s.*'') GROUP BY `{COLUMN_NAME}` ORDER BY count DESC LIMIT 500;' ), + + ('1298', '1004', 'Test Results', 'Alpha_Trunc', 'databricks', NULL, 'SELECT DISTINCT `{COLUMN_NAME}` , LEN(`{COLUMN_NAME}`) as current_max_length, {THRESHOLD_VALUE} as previous_max_length FROM {TARGET_SCHEMA}.{TABLE_NAME}, (SELECT MAX(LEN(`{COLUMN_NAME}`)) as max_length FROM {TARGET_SCHEMA}.{TABLE_NAME}) a WHERE LEN(`{COLUMN_NAME}`) = a.max_length AND a.max_length < {THRESHOLD_VALUE} LIMIT 500;'), + ('1299', '1005', 'Test Results', 'Avg_Shift', 'databricks', NULL, 'SELECT AVG(`{COLUMN_NAME}` :: FLOAT) AS current_average FROM {TARGET_SCHEMA}.{TABLE_NAME};'), + ('1300', '1006', 'Test Results', 'Condition_Flag', 'databricks', NULL, 'SELECT * FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {CUSTOM_QUERY} LIMIT 500;'), + ('1301', '1007', 'Test Results', 'Constant', 'databricks', NULL, 'SELECT DISTINCT `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE `{COLUMN_NAME}` <> {BASELINE_VALUE} GROUP BY `{COLUMN_NAME}` LIMIT 500;'), + ('1302', '1009', 'Test Results', 'Daily_Record_Ct', 'databricks', NULL, 'WITH date_bounds AS( SELECT MIN(`{COLUMN_NAME}`) AS min_date, MAX(`{COLUMN_NAME}`) AS max_date FROM {TARGET_SCHEMA}.{TABLE_NAME}), all_dates AS ( SELECT EXPLODE(SEQUENCE(min_date, max_date, INTERVAL 1 DAY)) AS all_dates FROM date_bounds ), existing_periods AS ( SELECT DISTINCT CAST(`{COLUMN_NAME}` AS DATE) AS period, COUNT(1) AS period_count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY CAST(`{COLUMN_NAME}` AS DATE) ), missing_dates AS ( SELECT d.all_dates AS missing_period FROM all_dates d LEFT JOIN existing_periods e ON d.all_dates = e.period WHERE e.period IS NULL ) SELECT m.missing_period, MAX(e1.period) AS prior_available_date, MAX(e1.period_count) AS prior_available_date_count, MIN(e2.period) AS next_available_date, MAX(e2.period_count) AS next_available_date_count FROM missing_dates m LEFT JOIN existing_periods e1 ON e1.period < m.missing_period LEFT JOIN existing_periods e2 ON e2.period > m.missing_period GROUP BY m.missing_period ORDER BY m.missing_period LIMIT 500;'), + ('1303', '1011', 'Test Results', 'Dec_Trunc', 'databricks', NULL, 'SELECT DISTINCT LENGTH(SPLIT_PART(`{COLUMN_NAME}`::STRING, ''.'', 2)) AS decimal_scale, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY decimal_scale LIMIT 500;'), + ('1304', '1012', 'Test Results', 'Distinct_Date_Ct', 'databricks', NULL, 'SELECT DISTINCT `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE `{COLUMN_NAME}` IS NOT NULL GROUP BY `{COLUMN_NAME}` ORDER BY `{COLUMN_NAME}` DESC LIMIT 500;'), + ('1305', '1013', 'Test Results', 'Distinct_Value_Ct', 'databricks', NULL, 'SELECT DISTINCT `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE `{COLUMN_NAME}` IS NOT NULL GROUP BY `{COLUMN_NAME}` ORDER BY `{COLUMN_NAME}` DESC LIMIT 500;'), + ('1306', '1014', 'Test Results', 'Email_Format', 'databricks', NULL, 'SELECT DISTINCT `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE REGEXP_LIKE(`{COLUMN_NAME}`::STRING, ''^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,}$'') != 1 GROUP BY `{COLUMN_NAME}` LIMIT 500;'), + ('1307', '1015', 'Test Results', 'Future_Date', 'databricks', NULL, 'SELECT DISTINCT `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE GREATEST(0, SIGN(`{COLUMN_NAME}`::DATE - ''{TEST_DATE}''::DATE)) > {THRESHOLD_VALUE} GROUP BY `{COLUMN_NAME}` LIMIT 500;'), + ('1308', '1016', 'Test Results', 'Future_Date_1Y', 'databricks', NULL, 'SELECT DISTINCT `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE GREATEST(0, SIGN(`{COLUMN_NAME}`::DATE - (''{TEST_DATE}''::DATE + 365))) > {THRESHOLD_VALUE} GROUP BY `{COLUMN_NAME}` LIMIT 500;'), + ('1309', '1017', 'Test Results', 'Incr_Avg_Shift', 'databricks', NULL, 'SELECT AVG(`{COLUMN_NAME}` :: FLOAT) AS current_average, SUM(`{COLUMN_NAME}` ::FLOAT) AS current_sum, NULLIF(COUNT(`{COLUMN_NAME}` )::FLOAT, 0) as current_value_count FROM {TARGET_SCHEMA}.{TABLE_NAME};'), + ('1310', '1018', 'Test Results', 'LOV_All', 'databricks', NULL, 'SELECT ARRAY_JOIN(ARRAY_SORT(COLLECT_SET(`{COLUMN_NAME}`)), ''|'') AS aggregated_values FROM {TARGET_SCHEMA}.{TABLE_NAME} HAVING ARRAY_JOIN(ARRAY_SORT(COLLECT_SET(`{COLUMN_NAME}`)), ''|'') <> ''{THRESHOLD_VALUE}'' LIMIT 500;'), + ('1311', '1019', 'Test Results', 'LOV_Match', 'databricks', NULL, 'SELECT DISTINCT NULLIF(`{COLUMN_NAME}`, '''') AS `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE NULLIF(`{COLUMN_NAME}`, '''') NOT IN {BASELINE_VALUE} GROUP BY `{COLUMN_NAME}` LIMIT 500;'), + ('1312', '1020', 'Test Results', 'Min_Date', 'databricks', NULL, 'SELECT DISTINCT `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE `{COLUMN_NAME}` :: DATE < ''{BASELINE_VALUE}'' :: DATE GROUP BY `{COLUMN_NAME}` LIMIT 500;'), + ('1313', '1021', 'Test Results', 'Min_Val', 'databricks', NULL, 'SELECT DISTINCT `{COLUMN_NAME}`, (ABS(`{COLUMN_NAME}`) - ABS({BASELINE_VALUE})) AS difference_from_baseline FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE `{COLUMN_NAME}` < {BASELINE_VALUE} LIMIT 500;'), + ('1314', '1022', 'Test Results', 'Missing_Pct', 'databricks', NULL, 'SELECT * FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE `{COLUMN_NAME}` IS NULL OR `{COLUMN_NAME}` :: VARCHAR(255) = '''' LIMIT 10;'), + ('1315', '1023', 'Test Results', 'Monthly_Rec_Ct', 'databricks', NULL, 'WITH daterange AS( SELECT explode( sequence( date_trunc(''month'', (SELECT MIN(`{COLUMN_NAME}`) FROM {TARGET_SCHEMA}.{TABLE_NAME})), date_trunc(''month'', (SELECT MAX(`{COLUMN_NAME}`) FROM {TARGET_SCHEMA}.{TABLE_NAME})), interval 1 month) ) AS all_dates ), existing_periods AS ( SELECT DISTINCT date_trunc(''month'', `{COLUMN_NAME}`) AS period, COUNT(1) AS period_count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY date_trunc(''month'', `{COLUMN_NAME}`) ) SELECT p.missing_period, p.prior_available_month, e.period_count AS prior_available_month_count, p.next_available_month, f.period_count AS next_available_month_count FROM ( SELECT d.all_dates AS missing_period, MAX(b.period) AS prior_available_month, MIN(c.period) AS next_available_month FROM daterange d LEFT JOIN existing_periods a ON d.all_dates = a.period LEFT JOIN existing_periods b ON b.period < d.all_dates LEFT JOIN existing_periods c ON c.period > d.all_dates WHERE a.period IS NULL AND d.all_dates BETWEEN b.period AND c.period GROUP BY d.all_dates ) p LEFT JOIN existing_periods e ON p.prior_available_month = e.period LEFT JOIN existing_periods f ON p.next_available_month = f.period ORDER BY p.missing_period;'), + ('1316', '1024', 'Test Results', 'Outlier_Pct_Above', 'databricks', NULL, 'SELECT ({BASELINE_AVG} + (2*{BASELINE_SD})) AS outlier_threshold, `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE `{COLUMN_NAME}` :: FLOAT > ({BASELINE_AVG} + (2*{BASELINE_SD})) GROUP BY `{COLUMN_NAME}` ORDER BY `{COLUMN_NAME}` DESC;'), + ('1317', '1025', 'Test Results', 'Outlier_Pct_Below', 'databricks', NULL, 'SELECT ({BASELINE_AVG} + (2*{BASELINE_SD})) AS outlier_threshold, `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE `{COLUMN_NAME}` :: FLOAT < ({BASELINE_AVG} + (2*{BASELINE_SD})) GROUP BY `{COLUMN_NAME}` ORDER BY `{COLUMN_NAME}` DESC;'), + ('1318', '1026', 'Test Results', 'Pattern_Match', 'databricks', NULL, 'SELECT DISTINCT `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE REGEXP_LIKE(NULLIF(`{COLUMN_NAME}`::STRING, ''''),''{BASELINE_VALUE}'') != 1 GROUP BY `{COLUMN_NAME}`;'), + ('1319', '1028', 'Test Results', 'Recency', 'databricks', NULL, 'SELECT DISTINCT col AS latest_date_available, ''{TEST_DATE}'' :: DATE as test_run_date FROM (SELECT MAX(`{COLUMN_NAME}`) AS col FROM {TARGET_SCHEMA}.{TABLE_NAME}) WHERE ABS(<%DATEDIFF_DAY;col;''{TEST_DATE}''::DATE%>) > {THRESHOLD_VALUE};'), + ('1320', '1030', 'Test Results', 'Required', 'databricks', NULL, 'SELECT * FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE `{COLUMN_NAME}` IS NULL LIMIT 500;'), + ('1321', '1031', 'Test Results', 'Row_Ct', 'databricks', NULL, 'WITH CTE AS (SELECT COUNT(*) AS current_count FROM {TARGET_SCHEMA}.{TABLE_NAME}) SELECT current_count, ABS(ROUND(100 *(current_count - {THRESHOLD_VALUE}) :: FLOAT / {THRESHOLD_VALUE} :: FLOAT,2)) AS row_count_pct_decrease FROM cte WHERE current_count < {THRESHOLD_VALUE};'), + ('1322', '1032', 'Test Results', 'Row_Ct_Pct', 'databricks', NULL, 'WITH CTE AS (SELECT COUNT(*) AS current_count FROM {TARGET_SCHEMA}.{TABLE_NAME}) SELECT current_count, {BASELINE_CT} AS baseline_count, ABS(ROUND(100 * (current_count - {BASELINE_CT}) :: FLOAT / {BASELINE_CT} :: FLOAT,2)) AS row_count_pct_difference FROM cte;'), + ('1323', '1033', 'Test Results', 'Street_Addr_Pattern', 'databricks', NULL, 'SELECT DISTINCT `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE REGEXP_LIKE(`{COLUMN_NAME}`::STRING, ''^[0-9]{1,5}[a-zA-Z]?\\s\\w{1,5}\\.?\\s?\\w*\\s?\\w*\\s[a-zA-Z]{1,6}\\.?\\s?[0-9]{0,5}[A-Z]{0,1}$'') != 1 GROUP BY `{COLUMN_NAME}` ORDER BY count DESC LIMIT 500;'), + ('1324', '1036', 'Test Results', 'US_State', 'databricks', NULL, 'SELECT DISTINCT `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE NULLIF(`{COLUMN_NAME}`, '''') NOT IN (''AL'',''AK'',''AS'',''AZ'',''AR'',''CA'',''CO'',''CT'',''DE'',''DC'',''FM'',''FL'',''GA'',''GU'',''HI'',''ID'',''IL'',''IN'',''IA'',''KS'',''KY'',''LA'',''ME'',''MH'',''MD'',''MA'',''MI'',''MN'',''MS'',''MO'',''MT'',''NE'',''NV'',''NH'',''NJ'',''NM'',''NY'',''NC'',''ND'',''MP'',''OH'',''OK'',''OR'',''PW'',''PA'',''PR'',''RI'',''SC'',''SD'',''TN'',''TX'',''UT'',''VT'',''VI'',''VA'',''WA'',''WV'',''WI'',''WY'',''AE'',''AP'',''AA'') GROUP BY `{COLUMN_NAME}` LIMIT 500;'), + ('1325', '1034', 'Test Results', 'Unique', 'databricks', NULL, 'SELECT DISTINCT `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY `{COLUMN_NAME}` HAVING count > 1 ORDER BY count DESC LIMIT 500;'), + ('1326', '1035', 'Test Results', 'Unique_Pct', 'databricks', NULL, 'SELECT DISTINCT `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY `{COLUMN_NAME}` ORDER BY count DESC LIMIT 500;'), + ('1327', '1037', 'Test Results', 'Weekly_Rec_Ct', 'databricks', NULL, 'WITH daterange AS( SELECT explode(sequence( date_trunc(''week'', (SELECT min(`{COLUMN_NAME}`) FROM {TARGET_SCHEMA}.{TABLE_NAME})), date_trunc(''week'', (SELECT max(`{COLUMN_NAME}`) FROM {TARGET_SCHEMA}.{TABLE_NAME})), interval 1 week)) AS all_dates ), existing_periods AS ( SELECT DISTINCT date_trunc(''week'', `{COLUMN_NAME}`) AS period, COUNT(1) AS period_count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY date_trunc(''week'', `{COLUMN_NAME}`) ) SELECT p.missing_period, p.prior_available_week, e.period_count AS prior_available_week_count, p.next_available_week, f.period_count AS next_available_week_count FROM ( SELECT d.all_dates AS missing_period, MAX(b.period) AS prior_available_week, MIN(c.period) AS next_available_week FROM daterange d LEFT JOIN existing_periods a ON d.all_dates = a.period LEFT JOIN existing_periods b ON b.period < d.all_dates LEFT JOIN existing_periods c ON c.period > d.all_dates WHERE a.period IS NULL AND d.all_dates BETWEEN b.period AND c.period GROUP BY d.all_dates ) p LEFT JOIN existing_periods e ON p.prior_available_week = e.period LEFT JOIN existing_periods f ON p.next_available_week = f.period ORDER BY p.missing_period;'), + ('1328', '1040', 'Test Results', 'Variability_Increase', 'databricks', NULL, 'SELECT STDDEV(CAST(`{COLUMN_NAME}` AS FLOAT)) as current_standard_deviation FROM {TARGET_SCHEMA}.{TABLE_NAME};'), + ('1329', '1041', 'Test Results', 'Variability_Decrease', 'databricks', NULL, 'SELECT STDDEV(CAST(`{COLUMN_NAME}` AS FLOAT)) as current_standard_deviation FROM {TARGET_SCHEMA}.{TABLE_NAME};'), + + ('1230', '1027', 'Profile Anomaly' , 'Variant_Coded_Values', 'databricks', NULL, 'SELECT `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE LOWER(`{COLUMN_NAME}`) IN (SELECT TRIM(value) FROM (SELECT EXPLODE(SPLIT(SUBSTRING(''{DETAIL_EXPRESSION}'', INSTR(''{DETAIL_EXPRESSION}'', '':'') + 2), ''\\|'')) AS value)) GROUP BY `{COLUMN_NAME}`;'), + ('1330', '1043', 'Test Results', 'Valid_Characters', 'databricks', NULL, 'SELECT `{COLUMN_NAME}`, COUNT(*) AS record_ct FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE REGEXP_LIKE(`{COLUMN_NAME}`, ''.*[[:cntrl:]].*'') OR `{COLUMN_NAME}`::STRING LIKE '' %'' OR `{COLUMN_NAME}`::STRING LIKE ''''''%'''''' OR `{COLUMN_NAME}`::STRING LIKE ''"%"'' GROUP BY `{COLUMN_NAME}` ORDER BY record_ct DESC LIMIT 20;'), + ('1331', '1044', 'Test Results', 'Valid_US_Zip', 'databricks', NULL, 'SELECT `{COLUMN_NAME}`, COUNT(*) AS record_ct FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE TRANSLATE(`{COLUMN_NAME}`,''012345678'',''999999999'') NOT IN (''99999'', ''999999999'', ''99999-9999'') GROUP BY `{COLUMN_NAME}` ORDER BY record_ct DESC LIMIT 20;'), + ('1332', '1045', 'Test Results', 'Valid_US_Zip3', 'databricks', NULL, 'SELECT `{COLUMN_NAME}`, COUNT(*) AS record_ct FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE TRANSLATE(`{COLUMN_NAME}`,''012345678'',''999999999'') NOT IN (''99999'', ''999999999'', ''99999-9999'') GROUP BY `{COLUMN_NAME}` ORDER BY record_ct DESC LIMIT 20;'), + + ('1333', '1500', 'Test Results', 'Aggregate_Balance', 'databricks', NULL, 'SELECT * + FROM ( SELECT {GROUPBY_NAMES}, SUM(TOTAL) AS total, SUM(MATCH_TOTAL) AS MATCH_TOTAL + FROM + ( SELECT {GROUPBY_NAMES}, {COLUMN_NAME_NO_QUOTES} AS total, NULL AS match_total + FROM {TARGET_SCHEMA}.{TABLE_NAME} + WHERE {SUBSET_CONDITION} + GROUP BY {GROUPBY_NAMES} + {HAVING_CONDITION} + UNION ALL + SELECT {MATCH_GROUPBY_NAMES}, NULL AS total, {MATCH_COLUMN_NAMES} AS match_total + FROM {MATCH_SCHEMA_NAME}.{MATCH_TABLE_NAME} + WHERE {MATCH_SUBSET_CONDITION} + GROUP BY {MATCH_GROUPBY_NAMES} + {MATCH_HAVING_CONDITION} ) a + GROUP BY {GROUPBY_NAMES} ) s + WHERE total <> match_total OR (total IS NOT NULL AND match_total IS NULL) OR (total IS NULL AND match_total IS NOT NULL) +ORDER BY {GROUPBY_NAMES};'), + ('1334', '1501', 'Test Results', 'Aggregate_Minimum', 'databricks', NULL, 'SELECT * +FROM ( SELECT {GROUPBY_NAMES}, SUM(TOTAL) as total, SUM(MATCH_TOTAL) as MATCH_TOTAL + FROM + ( SELECT {GROUPBY_NAMES}, {COLUMN_NAME_NO_QUOTES} as total, NULL as match_total + FROM {TARGET_SCHEMA}.{TABLE_NAME} + WHERE {SUBSET_CONDITION} + GROUP BY {GROUPBY_NAMES} + {HAVING_CONDITION} + UNION ALL + SELECT {MATCH_GROUPBY_NAMES}, NULL as total, {MATCH_COLUMN_NAMES} as match_total + FROM {MATCH_SCHEMA_NAME}.{MATCH_TABLE_NAME} + WHERE {MATCH_SUBSET_CONDITION} + GROUP BY {MATCH_GROUPBY_NAMES} + {MATCH_HAVING_CONDITION} ) a + GROUP BY {GROUPBY_NAMES} ) s + WHERE total < match_total OR (total IS NULL AND match_total IS NOT NULL) +ORDER BY {GROUPBY_NAMES};'), + ('1335', '1502', 'Test Results', 'Combo_Match', 'databricks', NULL, 'SELECT * + FROM ( SELECT {COLUMN_NAME_NO_QUOTES} + FROM {TARGET_SCHEMA}.{TABLE_NAME} + WHERE {SUBSET_CONDITION} + GROUP BY {COLUMN_NAME_NO_QUOTES} + {HAVING_CONDITION} + EXCEPT + SELECT {MATCH_GROUPBY_NAMES} + FROM {MATCH_SCHEMA_NAME}.{MATCH_TABLE_NAME} + WHERE {MATCH_SUBSET_CONDITION} + GROUP BY {MATCH_GROUPBY_NAMES} + {MATCH_HAVING_CONDITION} + ) test +ORDER BY {COLUMN_NAME_NO_QUOTES};'), + ('1336', '1503', 'Test Results', 'Distribution_Shift', 'databricks', NULL, 'WITH latest_ver + AS ( SELECT {CONCAT_COLUMNS} as category, + COUNT(*)::FLOAT / SUM(COUNT(*)) OVER ()::FLOAT AS pct_of_total + FROM {TARGET_SCHEMA}.{TABLE_NAME} v1 + WHERE {SUBSET_CONDITION} + GROUP BY {COLUMN_NAME_NO_QUOTES} ), +older_ver + AS ( SELECT {CONCAT_MATCH_GROUPBY} as category, + COUNT(*)::FLOAT / SUM(COUNT(*)) OVER ()::FLOAT AS pct_of_total + FROM {MATCH_SCHEMA_NAME}.{TABLE_NAME} v2 + WHERE {MATCH_SUBSET_CONDITION} + GROUP BY {MATCH_GROUPBY_NAMES} ) +SELECT COALESCE(l.category, o.category) AS category, + o.pct_of_total AS old_pct, + l.pct_of_total AS new_pct + FROM latest_ver l +FULL JOIN older_ver o + ON (l.category = o.category) +ORDER BY COALESCE(l.category, o.category)'), + ('1337', '1509', 'Test Results', 'Timeframe_Combo_Match', 'databricks', NULL, ' ( +SELECT ''Prior Timeframe'' as missing_from, {COLUMN_NAME} +FROM {TARGET_SCHEMA}.{TABLE_NAME} +WHERE {SUBSET_CONDITION} + AND {WINDOW_DATE_COLUMN} >= (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {TARGET_SCHEMA}.{TABLE_NAME}) - {WINDOW_DAYS} +EXCEPT +SELECT ''Prior Timeframe'' as missing_from, {COLUMN_NAME} +FROM {TARGET_SCHEMA}.{TABLE_NAME} +WHERE {SUBSET_CONDITION} + AND {WINDOW_DATE_COLUMN} >= (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {TARGET_SCHEMA}.{TABLE_NAME}) - 2 * {WINDOW_DAYS} + AND {WINDOW_DATE_COLUMN} < (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {TARGET_SCHEMA}.{TABLE_NAME}) - {WINDOW_DAYS} +) +UNION ALL +( +SELECT ''Latest Timeframe'' as missing_from, {COLUMN_NAME} +FROM {TARGET_SCHEMA}.{TABLE_NAME} +WHERE {SUBSET_CONDITION} + AND {WINDOW_DATE_COLUMN} >= (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {TARGET_SCHEMA}.{TABLE_NAME}) - 2 * {WINDOW_DAYS} + AND {WINDOW_DATE_COLUMN} < (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {TARGET_SCHEMA}.{TABLE_NAME}) - {WINDOW_DAYS} + EXCEPT +SELECT ''Latest Timeframe'' as missing_from, {COLUMN_NAME} +FROM {TARGET_SCHEMA}.{TABLE_NAME} +WHERE {SUBSET_CONDITION} + AND {WINDOW_DATE_COLUMN} >= (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {TARGET_SCHEMA}.{TABLE_NAME}) - {WINDOW_DAYS} +)'), + ('1338', '1100', 'Profile Anomaly', 'Potential_PII', 'databricks', NULL, 'SELECT `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY `{COLUMN_NAME}` ORDER BY `{COLUMN_NAME}` DESC LIMIT 500;') + + ; diff --git a/testgen/template/dbsetup/060_create_standard_views.sql b/testgen/template/dbsetup/060_create_standard_views.sql index b244e2c0..e93f4f77 100644 --- a/testgen/template/dbsetup/060_create_standard_views.sql +++ b/testgen/template/dbsetup/060_create_standard_views.sql @@ -284,7 +284,8 @@ INNER JOIN table_groups tg INNER JOIN data_table_chars dtc ON (dcc.table_id = dtc.table_id) INNER JOIN profiling_runs pr - ON (tg.last_complete_profile_run_id = pr.id); + ON (tg.last_complete_profile_run_id = pr.id) +WHERE dcc.drop_date IS NULL; DROP VIEW IF EXISTS v_dq_profile_scoring_latest_by_dimension; @@ -327,7 +328,8 @@ LEFT JOIN (profile_anomaly_results p ON (pr.profile_run_id = p.profile_run_id AND pr.column_name = p.column_name AND pr.table_name = p.table_name) -WHERE p.disposition = 'Confirmed' OR p.disposition IS NULL +WHERE (p.disposition = 'Confirmed' OR p.disposition IS NULL) + AND dcc.drop_date IS NULL GROUP BY pr.profile_run_id, pr.table_groups_id, pr.table_name, pr.column_name, tg.table_groups_name, tg.data_location, @@ -388,6 +390,7 @@ LEFT JOIN data_column_chars dcc WHERE r.dq_prevalence IS NOT NULL AND s.dq_score_exclude = FALSE AND (r.disposition IS NULL OR r.disposition = 'Confirmed') + AND dcc.drop_date IS NULL GROUP BY r.table_groups_id, r.table_name, r.column_names, r.test_suite_id, r.test_run_id, tg.table_groups_name, dcc.data_source, dtc.data_source, tg.data_source, tg.data_location, dcc.data_source, dtc.data_source, @@ -456,6 +459,7 @@ LEFT JOIN data_column_chars dcc ON (r.table_groups_id = dcc.table_groups_id AND r.table_name = dcc.table_name AND r.column_names = dcc.column_name) +WHERE dcc.drop_date IS NULL GROUP BY r.table_groups_id, r.test_run_id, r.test_suite_id, tg.table_groups_name, dcc.data_source, dtc.data_source, tg.data_source, tg.data_location, dcc.data_source, dtc.data_source, @@ -467,3 +471,123 @@ GROUP BY r.table_groups_id, r.test_run_id, r.test_suite_id, dcc.data_product, dtc.data_product, tg.data_product, dcc.functional_data_type, r.dq_dimension, r.test_time, r.table_name, dcc.column_name, tg.project_code; + + +-- ============================================================================== +-- | Scoring History Views +-- ============================================================================== +CREATE OR REPLACE VIEW v_dq_profile_scoring_history_by_column +AS +SELECT tg.project_code, + sr.definition_id, + sr.score_history_cutoff_time, + pr.table_groups_id, + pr.profile_run_id, + tg.table_groups_name, + tg.data_location, + COALESCE(dcc.data_source, dtc.data_source, tg.data_source) as data_source, + COALESCE(dcc.source_system, dtc.source_system, tg.source_system) as source_system, + COALESCE(dcc.source_process, dtc.source_process, tg.source_process) as source_process, + COALESCE(dcc.business_domain, dtc.business_domain, tg.business_domain) as business_domain, + COALESCE(dcc.stakeholder_group, dtc.stakeholder_group, tg.stakeholder_group) as stakeholder_group, + COALESCE(dcc.transform_level, dtc.transform_level, tg.transform_level) as transform_level, + COALESCE(dcc.critical_data_element, dtc.critical_data_element) as critical_data_element, + COALESCE(dcc.data_product, dtc.data_product, tg.data_product) as data_product, + dcc.functional_data_type as semantic_data_type, + pr.table_name, + pr.column_name, + pr.run_date, + MAX(pr.record_ct) as record_ct, + COUNT(p.anomaly_id) as issue_ct, + SUM_LN(COALESCE(p.dq_prevalence, 0.0)) as good_data_pct + FROM profile_results pr +INNER JOIN score_history_latest_runs sr + ON (pr.profile_run_id = sr.last_profiling_run_id) +INNER JOIN data_column_chars dcc + ON (pr.table_groups_id = dcc.table_groups_id + AND pr.table_name = dcc.table_name + AND pr.column_name = dcc.column_name) +INNER JOIN data_table_chars dtc + ON (dcc.table_id = dtc.table_id) +INNER JOIN table_groups tg + ON (pr.table_groups_id = tg.id) +LEFT JOIN (profile_anomaly_results p + INNER JOIN profile_anomaly_types t + ON p.anomaly_id = t.id) + ON (pr.profile_run_id = p.profile_run_id + AND pr.column_name = p.column_name + AND pr.table_name = p.table_name) +WHERE p.disposition = 'Confirmed' OR p.disposition IS NULL +GROUP BY pr.profile_run_id, + sr.definition_id, + sr.score_history_cutoff_time, + pr.table_groups_id, + pr.table_name, pr.column_name, + tg.table_groups_name, tg.data_location, + COALESCE(dcc.data_source, dtc.data_source, tg.data_source), + COALESCE(dcc.source_system, dtc.source_system, tg.source_system), + COALESCE(dcc.source_process, dtc.source_process, tg.source_process), + COALESCE(dcc.business_domain, dtc.business_domain, tg.business_domain), + COALESCE(dcc.stakeholder_group, dtc.stakeholder_group, tg.stakeholder_group), + COALESCE(dcc.transform_level, dtc.transform_level, tg.transform_level), + COALESCE(dcc.critical_data_element, dtc.critical_data_element), + COALESCE(dcc.data_product, dtc.data_product, tg.data_product), + dcc.functional_data_type, pr.run_date, + tg.project_code ; + +CREATE OR REPLACE VIEW v_dq_test_scoring_history_by_column +AS +SELECT + tg.project_code, + sr.definition_id, + sr.score_history_cutoff_time, + r.table_groups_id, + r.test_suite_id, + r.test_run_id, + tg.table_groups_name, + tg.data_location, + COALESCE(dcc.data_source, dtc.data_source, tg.data_source) as data_source, + COALESCE(dcc.source_system, dtc.source_system, tg.source_system) as source_system, + COALESCE(dcc.source_process, dtc.source_process, tg.source_process) as source_process, + COALESCE(dcc.business_domain, dtc.business_domain, tg.business_domain) as business_domain, + COALESCE(dcc.stakeholder_group, dtc.stakeholder_group, tg.stakeholder_group) as stakeholder_group, + COALESCE(dcc.transform_level, dtc.transform_level, tg.transform_level) as transform_level, + COALESCE(dcc.critical_data_element, dtc.critical_data_element) as critical_data_element, + COALESCE(dcc.data_product, dtc.data_product, tg.data_product) as data_product, + dcc.functional_data_type as semantic_data_type, + r.test_time, r.table_name, r.column_names as column_name, + COUNT(*) as test_ct, + SUM(r.result_code) as passed_ct, + SUM(1 - r.result_code) as issue_ct, + MAX(r.dq_record_ct) as dq_record_ct, + SUM_LN(COALESCE(r.dq_prevalence, 0.0)) as good_data_pct + FROM test_results r +INNER JOIN test_suites s + ON (r.test_suite_id = s.id) +INNER JOIN score_history_latest_runs sr + ON (r.test_run_id = sr.last_test_run_id) +INNER JOIN table_groups tg + ON r.table_groups_id = tg.id +LEFT JOIN data_table_chars dtc + ON (r.table_groups_id = dtc.table_groups_id + AND r.table_name = dtc.table_name) +LEFT JOIN data_column_chars dcc + ON (r.table_groups_id = dcc.table_groups_id + AND r.table_name = dcc.table_name + AND r.column_names = dcc.column_name) + WHERE r.dq_prevalence IS NOT NULL + AND s.dq_score_exclude = FALSE + AND (r.disposition IS NULL OR r.disposition = 'Confirmed') +GROUP BY sr.definition_id, + sr.score_history_cutoff_time, + r.table_groups_id, r.table_name, r.column_names, + r.test_suite_id, r.test_run_id, tg.table_groups_name, dcc.data_source, dtc.data_source, + tg.data_source, tg.data_location, dcc.data_source, dtc.data_source, + tg.data_source, dcc.source_system, dtc.source_system, tg.source_system, + dcc.source_process, dtc.source_process, tg.source_process, dcc.business_domain, + dtc.business_domain, tg.business_domain, dcc.stakeholder_group, dtc.stakeholder_group, + tg.stakeholder_group, dcc.transform_level, dtc.transform_level, tg.transform_level, + dcc.critical_data_element, dtc.critical_data_element, + dcc.data_product, dtc.data_product, tg.data_product, + dcc.functional_data_type, r.test_time, + tg.project_code; diff --git a/testgen/template/dbsetup/075_grant_role_rights.sql b/testgen/template/dbsetup/075_grant_role_rights.sql index 0bcc89a3..33e1f98b 100644 --- a/testgen/template/dbsetup/075_grant_role_rights.sql +++ b/testgen/template/dbsetup/075_grant_role_rights.sql @@ -35,7 +35,9 @@ GRANT SELECT, INSERT, DELETE, UPDATE ON {SCHEMA_NAME}.score_definitions, {SCHEMA_NAME}.score_definition_filters, {SCHEMA_NAME}.score_definition_results, - {SCHEMA_NAME}.score_definition_results_breakdown + {SCHEMA_NAME}.score_definition_results_breakdown, + {SCHEMA_NAME}.score_definition_results_history, + {SCHEMA_NAME}.score_history_latest_runs TO testgen_execute_role; diff --git a/testgen/template/dbupgrade/0128_incremental_upgrade.sql b/testgen/template/dbupgrade/0128_incremental_upgrade.sql new file mode 100644 index 00000000..6bde14a8 --- /dev/null +++ b/testgen/template/dbupgrade/0128_incremental_upgrade.sql @@ -0,0 +1,31 @@ +SET SEARCH_PATH TO {SCHEMA_NAME}; + +ALTER TABLE connections ADD COLUMN http_path VARCHAR(200); + +DROP FUNCTION IF EXISTS fn_PrepColumnName; + +CREATE OR REPLACE FUNCTION {SCHEMA_NAME}.fn_quote_literal_escape(var_value varchar, sql_flavor varchar) RETURNS varchar + LANGUAGE plpgsql +AS +$$ +DECLARE + escaped_value varchar; + lower_case_sql_flavor varchar; +BEGIN + lower_case_sql_flavor := LOWER(sql_flavor); + + IF lower_case_sql_flavor IN ('postgres', 'postgresql') THEN + escaped_value := QUOTE_LITERAL(var_value); + ELSIF lower_case_sql_flavor IN ('redshift', 'snowflake') THEN + escaped_value := TRIM(LEADING 'E' FROM QUOTE_LITERAL(var_value)); + ELSIF lower_case_sql_flavor = 'mssql' THEN + escaped_value := '''' || REPLACE(var_value, '''', '''''') || ''''; + ELSIF lower_case_sql_flavor = 'databricks' THEN + escaped_value := '''' || REPLACE(REPLACE(var_value, '\', '\\'), '''', '\''') || ''''; + ELSE + RAISE EXCEPTION 'Invalid sql_flavor name: %', sql_flavor; + END IF; + + RETURN escaped_value; +END; +$$; diff --git a/testgen/template/dbupgrade/0129_incremental_upgrade.sql b/testgen/template/dbupgrade/0129_incremental_upgrade.sql new file mode 100644 index 00000000..20ebdef7 --- /dev/null +++ b/testgen/template/dbupgrade/0129_incremental_upgrade.sql @@ -0,0 +1,13 @@ +SET SEARCH_PATH TO {SCHEMA_NAME}; + +CREATE TABLE IF NOT EXISTS score_definition_results_history ( + definition_id UUID CONSTRAINT score_definitions_filters_score_definitions_definition_id_fk + REFERENCES score_definitions (id) + ON DELETE CASCADE, + category TEXT NOT NULL, + score DOUBLE PRECISION DEFAULT NULL, + last_run_time TIMESTAMP NOT NULL +); + +CREATE INDEX sdrh_def_last_run + ON score_definition_results_history(definition_id, last_run_time); diff --git a/testgen/template/dbupgrade/0130_incremental_upgrade.sql b/testgen/template/dbupgrade/0130_incremental_upgrade.sql new file mode 100644 index 00000000..03376608 --- /dev/null +++ b/testgen/template/dbupgrade/0130_incremental_upgrade.sql @@ -0,0 +1,32 @@ +SET SEARCH_PATH TO {SCHEMA_NAME}; + +CREATE OR REPLACE VIEW v_dq_profile_scoring_latest_by_column +AS +SELECT + tg.project_code, + dcc.table_groups_id, + tg.last_complete_profile_run_id as profile_run_id, + tg.table_groups_name, + tg.data_location, + COALESCE(dcc.data_source, dtc.data_source, tg.data_source) as data_source, + COALESCE(dcc.source_system, dtc.source_system, tg.source_system) as source_system, + COALESCE(dcc.source_process, dtc.source_process, tg.source_process) as source_process, + COALESCE(dcc.business_domain, dtc.business_domain, tg.business_domain) as business_domain, + COALESCE(dcc.stakeholder_group, dtc.stakeholder_group, tg.stakeholder_group) as stakeholder_group, + COALESCE(dcc.transform_level, dtc.transform_level, tg.transform_level) as transform_level, + COALESCE(dcc.critical_data_element, dtc.critical_data_element) as critical_data_element, + COALESCE(dcc.data_product, dtc.data_product, tg.data_product) as data_product, + dcc.functional_data_type as semantic_data_type, + dtc.table_name, dcc.column_name, + pr.profiling_starttime as profiling_run_date, + dcc.valid_profile_issue_ct as issue_ct, + dtc.last_profile_record_ct as record_ct, + dcc.dq_score_profiling AS good_data_pct + FROM data_column_chars dcc +INNER JOIN table_groups tg + ON (dcc.table_groups_id = tg.id) +INNER JOIN data_table_chars dtc + ON (dcc.table_id = dtc.table_id) +INNER JOIN profiling_runs pr + ON (tg.last_complete_profile_run_id = pr.id) +WHERE dcc.drop_date IS NULL; diff --git a/testgen/template/dbupgrade/0131_incremental_upgrade.sql b/testgen/template/dbupgrade/0131_incremental_upgrade.sql new file mode 100644 index 00000000..0932df0f --- /dev/null +++ b/testgen/template/dbupgrade/0131_incremental_upgrade.sql @@ -0,0 +1,20 @@ +SET SEARCH_PATH TO {SCHEMA_NAME}; + +CREATE TABLE score_history_latest_runs ( + definition_id UUID, + score_history_cutoff_time TIMESTAMP, + table_groups_id UUID, + last_profiling_run_id UUID, + test_suite_id UUID, + last_test_run_id UUID +); + +CREATE INDEX shlast_runs_def_cutoff + ON score_history_latest_runs(definition_id, score_history_cutoff_time); + +CREATE INDEX shlast_runs_pro_run + ON score_history_latest_runs(last_profiling_run_id); + +CREATE INDEX shlast_runs_tst_run + ON score_history_latest_runs(last_test_run_id); + diff --git a/testgen/template/dbupgrade/0132_incremental_upgrade.sql b/testgen/template/dbupgrade/0132_incremental_upgrade.sql new file mode 100644 index 00000000..c62758e0 --- /dev/null +++ b/testgen/template/dbupgrade/0132_incremental_upgrade.sql @@ -0,0 +1,105 @@ +SET SEARCH_PATH TO {SCHEMA_NAME}; + +-- Pre-populate the score_history_latest_runs table with existing profiling and test runs +DO $$ +DECLARE + current_project VARCHAR(30); + current_definition UUID; + cutoff_time TIMESTAMP; +BEGIN + -- For each project + FOR current_project IN SELECT project_code FROM projects LOOP + -- and, for each score definition within this project + FOR current_definition IN SELECT id FROM score_definitions WHERE project_code = current_project LOOP + -- iterate over all existing profiling cutoff times for the project + FOR cutoff_time IN SELECT profiling_starttime AS time_ FROM profiling_runs WHERE project_code = current_project LOOP + -- delete existing cutoff times + DELETE FROM score_history_latest_runs + WHERE definition_id = current_definition + AND score_history_cutoff_time = cutoff_time; + + -- and insert the latest profiling runs + WITH ranked_profiling AS ( + SELECT + project_code, + table_groups_id, + id as profiling_run_id, + ROW_NUMBER() OVER (PARTITION BY table_groups_id ORDER BY profiling_starttime DESC) as rank + FROM profiling_runs r + WHERE project_code = current_project + AND profiling_starttime <= cutoff_time + AND r.status = 'Complete' + ) + INSERT INTO score_history_latest_runs + (definition_id, score_history_cutoff_time, table_groups_id, last_profiling_run_id) + SELECT current_definition as definition_id, cutoff_time as score_history_cutoff_time, table_groups_id, profiling_run_id + FROM ranked_profiling + WHERE rank = 1; + + -- and insert the latest test runs + WITH ranked_test_runs AS ( + SELECT + r.test_suite_id, + r.id as test_run_id, + ROW_NUMBER() OVER (PARTITION BY test_suite_id ORDER BY test_starttime DESC) as rank + FROM test_runs r + INNER JOIN test_suites s + ON (r.test_suite_id = s.id) + WHERE s.project_code = current_project + AND r.test_starttime <= cutoff_time + AND r.status = 'Complete' + ) + INSERT INTO score_history_latest_runs + (definition_id, score_history_cutoff_time, test_suite_id, last_test_run_id) + SELECT current_definition AS definition_id, cutoff_time AS score_history_cutoff_time, test_suite_id, test_run_id + FROM ranked_test_runs + WHERE rank = 1; + END LOOP; + + -- also, iterate over all existing tests cutoff times for the project + FOR cutoff_time IN SELECT test_starttime AS time_ FROM test_runs AS tr INNER JOIN test_suites AS ts ON (ts.id = tr.test_suite_id) WHERE ts.project_code = current_project LOOP + -- delete existing cutoff times + DELETE FROM score_history_latest_runs + WHERE definition_id = current_definition + AND score_history_cutoff_time = cutoff_time; + + -- and insert the latest profiling runs + WITH ranked_profiling AS ( + SELECT + project_code, + table_groups_id, + id as profiling_run_id, + ROW_NUMBER() OVER (PARTITION BY table_groups_id ORDER BY profiling_starttime DESC) as rank + FROM profiling_runs r + WHERE project_code = current_project + AND profiling_starttime <= cutoff_time + AND r.status = 'Complete' + ) + INSERT INTO score_history_latest_runs + (definition_id, score_history_cutoff_time, table_groups_id, last_profiling_run_id) + SELECT current_definition as definition_id, cutoff_time as score_history_cutoff_time, table_groups_id, profiling_run_id + FROM ranked_profiling + WHERE rank = 1; + + -- and insert the latest test runs + WITH ranked_test_runs AS ( + SELECT + r.test_suite_id, + r.id as test_run_id, + ROW_NUMBER() OVER (PARTITION BY test_suite_id ORDER BY test_starttime DESC) as rank + FROM test_runs r + INNER JOIN test_suites s + ON (r.test_suite_id = s.id) + WHERE s.project_code = current_project + AND r.test_starttime <= cutoff_time + AND r.status = 'Complete' + ) + INSERT INTO score_history_latest_runs + (definition_id, score_history_cutoff_time, test_suite_id, last_test_run_id) + SELECT current_definition AS definition_id, cutoff_time AS score_history_cutoff_time, test_suite_id, test_run_id + FROM ranked_test_runs + WHERE rank = 1; + END LOOP; + END LOOP; + END LOOP; +END $$; diff --git a/testgen/template/dbupgrade/0133_incremental_upgrade.sql b/testgen/template/dbupgrade/0133_incremental_upgrade.sql new file mode 100644 index 00000000..715c7160 --- /dev/null +++ b/testgen/template/dbupgrade/0133_incremental_upgrade.sql @@ -0,0 +1,9 @@ +SET SEARCH_PATH TO {SCHEMA_NAME}; + +UPDATE auth_users + SET role = 'data_quality' + WHERE role = 'edit'; + +UPDATE auth_users + SET role = 'analyst' + WHERE role = 'read'; diff --git a/testgen/template/dbupgrade/0134_incremental_upgrade.sql b/testgen/template/dbupgrade/0134_incremental_upgrade.sql new file mode 100644 index 00000000..47872b80 --- /dev/null +++ b/testgen/template/dbupgrade/0134_incremental_upgrade.sql @@ -0,0 +1,192 @@ +SET SEARCH_PATH TO {SCHEMA_NAME}; + +CREATE OR REPLACE VIEW v_dq_profile_scoring_history_by_column +AS +SELECT tg.project_code, + sr.definition_id, + sr.score_history_cutoff_time, + pr.table_groups_id, + pr.profile_run_id, + tg.table_groups_name, + tg.data_location, + COALESCE(dcc.data_source, dtc.data_source, tg.data_source) as data_source, + COALESCE(dcc.source_system, dtc.source_system, tg.source_system) as source_system, + COALESCE(dcc.source_process, dtc.source_process, tg.source_process) as source_process, + COALESCE(dcc.business_domain, dtc.business_domain, tg.business_domain) as business_domain, + COALESCE(dcc.stakeholder_group, dtc.stakeholder_group, tg.stakeholder_group) as stakeholder_group, + COALESCE(dcc.transform_level, dtc.transform_level, tg.transform_level) as transform_level, + COALESCE(dcc.critical_data_element, dtc.critical_data_element) as critical_data_element, + COALESCE(dcc.data_product, dtc.data_product, tg.data_product) as data_product, + dcc.functional_data_type as semantic_data_type, + pr.table_name, + pr.column_name, + pr.run_date, + MAX(pr.record_ct) as record_ct, + COUNT(p.anomaly_id) as issue_ct, + SUM_LN(COALESCE(p.dq_prevalence, 0.0)) as good_data_pct + FROM profile_results pr +INNER JOIN score_history_latest_runs sr + ON (pr.profile_run_id = sr.last_profiling_run_id) +INNER JOIN data_column_chars dcc + ON (pr.table_groups_id = dcc.table_groups_id + AND pr.table_name = dcc.table_name + AND pr.column_name = dcc.column_name) +INNER JOIN data_table_chars dtc + ON (dcc.table_id = dtc.table_id) +INNER JOIN table_groups tg + ON (pr.table_groups_id = tg.id) +LEFT JOIN (profile_anomaly_results p + INNER JOIN profile_anomaly_types t + ON p.anomaly_id = t.id) + ON (pr.profile_run_id = p.profile_run_id + AND pr.column_name = p.column_name + AND pr.table_name = p.table_name) +WHERE p.disposition = 'Confirmed' OR p.disposition IS NULL +GROUP BY pr.profile_run_id, + sr.definition_id, + sr.score_history_cutoff_time, + pr.table_groups_id, + pr.table_name, pr.column_name, + tg.table_groups_name, tg.data_location, + COALESCE(dcc.data_source, dtc.data_source, tg.data_source), + COALESCE(dcc.source_system, dtc.source_system, tg.source_system), + COALESCE(dcc.source_process, dtc.source_process, tg.source_process), + COALESCE(dcc.business_domain, dtc.business_domain, tg.business_domain), + COALESCE(dcc.stakeholder_group, dtc.stakeholder_group, tg.stakeholder_group), + COALESCE(dcc.transform_level, dtc.transform_level, tg.transform_level), + COALESCE(dcc.critical_data_element, dtc.critical_data_element), + COALESCE(dcc.data_product, dtc.data_product, tg.data_product), + dcc.functional_data_type, pr.run_date, + tg.project_code ; + +CREATE OR REPLACE VIEW v_dq_test_scoring_history_by_column +AS +SELECT + tg.project_code, + sr.definition_id, + sr.score_history_cutoff_time, + r.table_groups_id, + r.test_suite_id, + r.test_run_id, + tg.table_groups_name, + tg.data_location, + COALESCE(dcc.data_source, dtc.data_source, tg.data_source) as data_source, + COALESCE(dcc.source_system, dtc.source_system, tg.source_system) as source_system, + COALESCE(dcc.source_process, dtc.source_process, tg.source_process) as source_process, + COALESCE(dcc.business_domain, dtc.business_domain, tg.business_domain) as business_domain, + COALESCE(dcc.stakeholder_group, dtc.stakeholder_group, tg.stakeholder_group) as stakeholder_group, + COALESCE(dcc.transform_level, dtc.transform_level, tg.transform_level) as transform_level, + COALESCE(dcc.critical_data_element, dtc.critical_data_element) as critical_data_element, + COALESCE(dcc.data_product, dtc.data_product, tg.data_product) as data_product, + dcc.functional_data_type as semantic_data_type, + r.test_time, r.table_name, r.column_names as column_name, + COUNT(*) as test_ct, + SUM(r.result_code) as passed_ct, + SUM(1 - r.result_code) as issue_ct, + MAX(r.dq_record_ct) as dq_record_ct, + SUM_LN(COALESCE(r.dq_prevalence, 0.0)) as good_data_pct + FROM test_results r +INNER JOIN test_suites s + ON (r.test_suite_id = s.id) +INNER JOIN score_history_latest_runs sr + ON (r.test_run_id = sr.last_test_run_id) +INNER JOIN table_groups tg + ON r.table_groups_id = tg.id +LEFT JOIN data_table_chars dtc + ON (r.table_groups_id = dtc.table_groups_id + AND r.table_name = dtc.table_name) +LEFT JOIN data_column_chars dcc + ON (r.table_groups_id = dcc.table_groups_id + AND r.table_name = dcc.table_name + AND r.column_names = dcc.column_name) + WHERE r.dq_prevalence IS NOT NULL + AND s.dq_score_exclude = FALSE + AND (r.disposition IS NULL OR r.disposition = 'Confirmed') +GROUP BY sr.definition_id, + sr.score_history_cutoff_time, + r.table_groups_id, r.table_name, r.column_names, + r.test_suite_id, r.test_run_id, tg.table_groups_name, dcc.data_source, dtc.data_source, + tg.data_source, tg.data_location, dcc.data_source, dtc.data_source, + tg.data_source, dcc.source_system, dtc.source_system, tg.source_system, + dcc.source_process, dtc.source_process, tg.source_process, dcc.business_domain, + dtc.business_domain, tg.business_domain, dcc.stakeholder_group, dtc.stakeholder_group, + tg.stakeholder_group, dcc.transform_level, dtc.transform_level, tg.transform_level, + dcc.critical_data_element, dtc.critical_data_element, + dcc.data_product, dtc.data_product, tg.data_product, + dcc.functional_data_type, r.test_time, + tg.project_code; + +DO $$ +DECLARE + current_project VARCHAR(30); + current_definition UUID; + current_definition_filter RECORD; + where_condition TEXT; + existing_history_entries TIMESTAMP[]; + history_entry RECORD; +BEGIN + FOR current_project IN SELECT project_code FROM projects LOOP + FOR current_definition IN SELECT id FROM score_definitions WHERE project_code = current_project LOOP + + -- Build the where condition for the scores queries + where_condition := format('WHERE definition_id = %L AND project_code = %L', current_definition, current_project); + FOR current_definition_filter IN SELECT field, string_agg(quote_literal(value), ', ') AS values FROM score_definition_filters WHERE definition_id = current_definition GROUP BY field LOOP + where_condition := where_condition || format(' AND %I IN (%s)', current_definition_filter.field, current_definition_filter.values); + END LOOP; + + -- Get already existing history entries + SELECT ARRAY_AGG(last_run_time) INTO existing_history_entries FROM score_definition_results_history WHERE definition_id = current_definition; + + FOR history_entry IN EXECUTE format(' + SELECT DISTINCT ON (last_run_time) + COALESCE(profiling_scores.project_code, test_scores.project_code) AS project_code, + COALESCE(profiling_scores.definition_id, test_scores.definition_id) AS definition_id, + COALESCE(profiling_scores.last_run_time, test_scores.last_run_time) AS last_run_time, + (COALESCE(profiling_scores.score, 1) * COALESCE(test_scores.score, 1)) AS score, + (COALESCE(profiling_scores.cde_score, 1) * COALESCE(test_scores.cde_score, 1)) AS cde_score + FROM ( + SELECT + project_code, + definition_id, + score_history_cutoff_time AS last_run_time, + SUM(good_data_pct * record_ct) / NULLIF(SUM(record_ct), 0) AS score, + SUM(CASE critical_data_element WHEN true THEN (good_data_pct * record_ct) ELSE 0 END) + / NULLIF(SUM(CASE critical_data_element WHEN true THEN record_ct ELSE 0 END), 0) AS cde_score + FROM v_dq_profile_scoring_history_by_column + %s + GROUP BY project_code, definition_id, score_history_cutoff_time + ) AS profiling_scores + FULL OUTER JOIN ( + SELECT + project_code, + definition_id, + score_history_cutoff_time AS last_run_time, + SUM(good_data_pct * dq_record_ct) / NULLIF(SUM(dq_record_ct), 0) AS score, + SUM(CASE critical_data_element WHEN true THEN (good_data_pct * dq_record_ct) ELSE 0 END) + / NULLIF(SUM(CASE critical_data_element WHEN true THEN dq_record_ct ELSE 0 END), 0) AS cde_score + FROM v_dq_test_scoring_history_by_column + %s + GROUP BY project_code, definition_id, score_history_cutoff_time + ) AS test_scores + ON ( + test_scores.project_code = profiling_scores.project_code + AND test_scores.definition_id = profiling_scores.definition_id + AND test_scores.last_run_time = profiling_scores.last_run_time + ) + ', where_condition, where_condition) LOOP + -- If a history entry with this `last_run_time` does not exist + CONTINUE WHEN history_entry.last_run_time = ANY(existing_history_entries); + + -- insert it for both score and cde score + EXECUTE format(' + INSERT INTO score_definition_results_history (definition_id, category, score, last_run_time) + VALUES (%L, %L, %L, %L) + ', history_entry.definition_id, 'score', history_entry.score, history_entry.last_run_time); + EXECUTE format(' + INSERT INTO score_definition_results_history (definition_id, category, score, last_run_time) + VALUES (%L, %L, %L, %L) + ', history_entry.definition_id, 'cde_score', history_entry.cde_score, history_entry.last_run_time); + END LOOP; + END LOOP; + END LOOP; +END $$; diff --git a/testgen/template/exec_cat_tests/ex_cat_build_agg_table_tests.sql b/testgen/template/exec_cat_tests/ex_cat_build_agg_table_tests.sql index c5e5fb6e..2f821506 100644 --- a/testgen/template/exec_cat_tests/ex_cat_build_agg_table_tests.sql +++ b/testgen/template/exec_cat_tests/ex_cat_build_agg_table_tests.sql @@ -31,7 +31,7 @@ WITH test_detail -- Nested parm replacements - part of query, not Python parms REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE( c.measure, - '{COLUMN_NAME}', COALESCE(fn_PrepColumnName(t.column_name), '')), + '{COLUMN_NAME}', '{ID_SEPARATOR}' || COALESCE(t.column_name, '') || '{ID_SEPARATOR}'), '{BASELINE_CT}', COALESCE(t.baseline_ct, '')), '{BASELINE_UNIQUE_CT}', COALESCE(t.baseline_unique_ct, '')), '{BASELINE_VALUE}', COALESCE(t.baseline_value, '') ), @@ -49,7 +49,7 @@ WITH test_detail -- Nested parm replacements - standard REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE( c.measure || c.test_operator || c.test_condition, - '{COLUMN_NAME}', COALESCE(fn_PrepColumnName(t.column_name), '')), + '{COLUMN_NAME}', '{ID_SEPARATOR}' || COALESCE(t.column_name, '') || '{ID_SEPARATOR}'), '{BASELINE_CT}', COALESCE(t.baseline_ct, '')), '{BASELINE_UNIQUE_CT}', COALESCE(t.baseline_unique_ct, '')), '{BASELINE_VALUE}', COALESCE(t.baseline_value, '') ), diff --git a/testgen/template/flavors/databricks/data_chars/schema_ddf_query_databricks.sql b/testgen/template/flavors/databricks/data_chars/schema_ddf_query_databricks.sql new file mode 100644 index 00000000..c486d94a --- /dev/null +++ b/testgen/template/flavors/databricks/data_chars/schema_ddf_query_databricks.sql @@ -0,0 +1,29 @@ +SELECT '{PROJECT_CODE}' AS project_code, + CURRENT_TIMESTAMP AS refresh_timestamp, + c.table_schema, + c.table_name, + c.column_name, + CASE + WHEN lower(c.full_data_type) = 'timestamp' THEN 'timestamp_ntz' + WHEN lower(c.full_data_type) = 'string' THEN 'varchar' + WHEN lower(c.full_data_type) IN ('double', 'float') THEN 'numeric' + WHEN lower(c.full_data_type) LIKE 'decimal%' THEN 'numeric(' || c.numeric_precision || ',' || c.numeric_scale || ')' + ELSE lower(c.full_data_type) + END AS data_type, + c.character_maximum_length, + c.ordinal_position, + CASE + WHEN lower(c.data_type) RLIKE '(string|char|varchar|text)' THEN 'A' + WHEN lower(c.data_type) = 'boolean' THEN 'B' + WHEN lower(c.data_type) IN ('date', 'timestamp') THEN 'D' + WHEN lower(c.data_type) IN ('byte', 'short', 'int', 'integer', 'long', 'bigint', 'float', 'double') THEN 'N' + WHEN lower(c.data_type) LIKE 'decimal%' THEN 'N' + ELSE 'X' + END AS general_type, + CASE + WHEN c.numeric_scale > 0 THEN 1 + ELSE 0 + END AS is_decimal +FROM information_schema.columns c +WHERE c.table_schema = '{DATA_SCHEMA}' {TABLE_CRITERIA} +ORDER BY c.table_schema, c.table_name, c.ordinal_position; diff --git a/testgen/template/flavors/databricks/exec_query_tests/ex_window_match_no_drops_databricks.sql b/testgen/template/flavors/databricks/exec_query_tests/ex_window_match_no_drops_databricks.sql new file mode 100644 index 00000000..5ca11540 --- /dev/null +++ b/testgen/template/flavors/databricks/exec_query_tests/ex_window_match_no_drops_databricks.sql @@ -0,0 +1,45 @@ +SELECT '{TEST_TYPE}' as test_type, + '{TEST_DEFINITION_ID}' as test_definition_id, + '{TEST_SUITE_ID}' as test_suite_id, + '{TEST_RUN_ID}' as test_run_id, + '{RUN_DATE}' as test_time, + '{START_TIME}' as starttime, + CURRENT_TIMESTAMP as endtime, + '{SCHEMA_NAME}' as schema_name, + '{TABLE_NAME}' as table_name, + '{COLUMN_NAME_NO_QUOTES}' as column_names, + '{SKIP_ERRORS}' as threshold_value, + {SKIP_ERRORS} as skip_errors, + '{INPUT_PARAMETERS}' as input_parameters, + CASE WHEN COUNT (*) > {SKIP_ERRORS} THEN 0 ELSE 1 END as result_code, + CASE + WHEN COUNT(*) > 0 THEN + CONCAT( + CONCAT( CAST(COUNT(*) AS {VARCHAR_TYPE}), ' error(s) identified, ' ), + CONCAT( + CASE + WHEN COUNT(*) > {SKIP_ERRORS} THEN 'exceeding limit of ' + ELSE 'within limit of ' + END, + '{SKIP_ERRORS}.' + ) + ) + ELSE 'No errors found.' + END AS result_message, + COUNT(*) as result_measure, + '{SUBSET_DISPLAY}' as subset_condition, + NULL as result_query +FROM ( + SELECT {COLUMN_NAME_NO_QUOTES} + FROM {SCHEMA_NAME}.{TABLE_NAME} + WHERE {SUBSET_CONDITION} + AND {WINDOW_DATE_COLUMN} >= DATEADD(day, - 2 * {WINDOW_DAYS}, (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {SCHEMA_NAME}.{TABLE_NAME})) + AND {WINDOW_DATE_COLUMN} < DATEADD(day, - {WINDOW_DAYS}, (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {SCHEMA_NAME}.{TABLE_NAME})) + GROUP BY {COLUMN_NAME_NO_QUOTES} + EXCEPT + SELECT {COLUMN_NAME_NO_QUOTES} + FROM {SCHEMA_NAME}.{TABLE_NAME} + WHERE {SUBSET_CONDITION} + AND {WINDOW_DATE_COLUMN} >= DATEADD(day, - {WINDOW_DAYS}, (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {SCHEMA_NAME}.{TABLE_NAME})) + GROUP BY {COLUMN_NAME_NO_QUOTES} + ) test; diff --git a/testgen/template/flavors/databricks/exec_query_tests/ex_window_match_same_databricks.sql b/testgen/template/flavors/databricks/exec_query_tests/ex_window_match_same_databricks.sql new file mode 100644 index 00000000..80953b2c --- /dev/null +++ b/testgen/template/flavors/databricks/exec_query_tests/ex_window_match_same_databricks.sql @@ -0,0 +1,58 @@ +SELECT '{TEST_TYPE}' as test_type, + '{TEST_DEFINITION_ID}' as test_definition_id, + '{TEST_SUITE_ID}' as test_suite_id, + '{TEST_RUN_ID}' as test_run_id, + '{RUN_DATE}' as test_time, + '{START_TIME}' as starttime, + CURRENT_TIMESTAMP as endtime, + '{SCHEMA_NAME}' as schema_name, + '{TABLE_NAME}' as table_name, + '{COLUMN_NAME_NO_QUOTES}' as column_names, + '{SKIP_ERRORS}' as threshold_value, + {SKIP_ERRORS} as skip_errors, + '{INPUT_PARAMETERS}' as input_parameters, + CASE WHEN COUNT (*) > {SKIP_ERRORS} THEN 0 ELSE 1 END as result_code, + CASE + WHEN COUNT(*) > 0 THEN + CONCAT( + CONCAT( CAST(COUNT(*) AS {VARCHAR_TYPE}), ' error(s) identified, ' ), + CONCAT( + CASE + WHEN COUNT(*) > {SKIP_ERRORS} THEN 'exceeding limit of ' + ELSE 'within limit of ' + END, + '{SKIP_ERRORS}.' + ) + ) + ELSE 'No errors found.' + END AS result_message, + COUNT(*) as result_measure, + '{SUBSET_DISPLAY}' as subset_condition, + NULL as result_query + FROM ( + ( +SELECT 'Prior Timeframe' as missing_from, {COLUMN_NAME} +FROM {SCHEMA_NAME}.{TABLE_NAME} +WHERE {SUBSET_CONDITION} + AND {WINDOW_DATE_COLUMN} >= DATEADD(day, - {WINDOW_DAYS}, (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {SCHEMA_NAME}.{TABLE_NAME})) +EXCEPT +SELECT 'Prior Timeframe' as missing_from, {COLUMN_NAME} +FROM {SCHEMA_NAME}.{TABLE_NAME} +WHERE {SUBSET_CONDITION} + AND {WINDOW_DATE_COLUMN} >= DATEADD(day, - 2 * {WINDOW_DAYS}, (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {SCHEMA_NAME}.{TABLE_NAME})) + AND {WINDOW_DATE_COLUMN} < DATEADD(day, - {WINDOW_DAYS}, (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {SCHEMA_NAME}.{TABLE_NAME})) +) +UNION ALL +( +SELECT 'Latest Timeframe' as missing_from, {COLUMN_NAME} +FROM {SCHEMA_NAME}.{TABLE_NAME} +WHERE {SUBSET_CONDITION} + AND {WINDOW_DATE_COLUMN} >= DATEADD(day, - 2 * {WINDOW_DAYS}, (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {SCHEMA_NAME}.{TABLE_NAME})) + AND {WINDOW_DATE_COLUMN} < DATEADD(day, - {WINDOW_DAYS}, (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {SCHEMA_NAME}.{TABLE_NAME})) + EXCEPT +SELECT 'Latest Timeframe' as missing_from, {COLUMN_NAME} +FROM {SCHEMA_NAME}.{TABLE_NAME} +WHERE {SUBSET_CONDITION} + AND {WINDOW_DATE_COLUMN} >= DATEADD(day, - {WINDOW_DAYS}, (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {SCHEMA_NAME}.{TABLE_NAME})) +) + ) test; diff --git a/testgen/template/flavors/databricks/profiling/project_profiling_query_databricks.yaml b/testgen/template/flavors/databricks/profiling/project_profiling_query_databricks.yaml new file mode 100644 index 00000000..18c24243 --- /dev/null +++ b/testgen/template/flavors/databricks/profiling/project_profiling_query_databricks.yaml @@ -0,0 +1,276 @@ +--- +strTemplate01_sampling: "SELECT " +strTemplate01_else: "SELECT " +strTemplate02_all: | + {CONNECTION_ID} as connection_id, + '{PROJECT_CODE}' as project_code, + '{TABLE_GROUPS_ID}' as table_groups_id, + '{DATA_SCHEMA}' AS schema_name, + '{RUN_DATE}' AS run_date, + '{DATA_TABLE}' AS table_name, + {COL_POS} AS position, + '{COL_NAME_SANITIZED}' AS column_name, + '{COL_TYPE}' AS column_type, + '{COL_GEN_TYPE}' AS general_type, + COUNT(*) AS record_ct, + COUNT(`{COL_NAME}`) AS value_ct, + COUNT(DISTINCT `{COL_NAME}`) AS distinct_value_ct, + SUM(CASE WHEN `{COL_NAME}` IS NULL THEN 1 ELSE 0 END) AS null_value_ct, +strTemplate03_ADN: MIN(LEN(`{COL_NAME}`)) AS min_length, + MAX(LEN(`{COL_NAME}`)) AS max_length, + AVG(CAST(NULLIF(LEN(`{COL_NAME}`), 0) AS FLOAT)) AS avg_length, +strTemplate03_else: NULL as min_length, + NULL as max_length, + NULL as avg_length, +strTemplate04_A: SUM(CASE + WHEN LTRIM(RTRIM(`{COL_NAME}`)) RLIKE '0([.]0*)' THEN 1 ELSE 0 + END) AS zero_value_ct, +strTemplate04_N: CAST(SUM( 1 - ABS(SIGN(`{COL_NAME}`)))AS BIGINT ) AS zero_value_ct, +strTemplate04_else: NULL as zero_value_ct, +strTemplate05_A: COUNT(DISTINCT UPPER(REPLACE(TRANSLATE(`{COL_NAME}`,' '''',.-',REPEAT(' ', LEN(' '''',.-'))),' ',''))) as distinct_std_value_ct, + SUM(CASE + WHEN `{COL_NAME}` = '' THEN 1 + ELSE 0 + END) AS zero_length_ct, + SUM( CASE + WHEN `{COL_NAME}` BETWEEN ' !' AND '!' THEN 1 + ELSE 0 + END ) AS lead_space_ct, + SUM( CASE WHEN `{COL_NAME}` LIKE '"%"' OR `{COL_NAME}` LIKE '\'%\'' THEN 1 ELSE 0 END ) as quoted_value_ct, + SUM( CASE WHEN `{COL_NAME}` RLIKE '[0-9]' THEN 1 ELSE 0 END ) as includes_digit_ct, + SUM( CASE + WHEN `{COL_NAME}` IN ('.', '?') OR `{COL_NAME}` RLIKE '^\s+$' THEN 1 + WHEN LEN(`{COL_NAME}`) > 1 + AND ( LOWER(`{COL_NAME}`) LIKE '%..%' OR LOWER(`{COL_NAME}`) RLIKE '--' + OR (LEN(REPLACE(`{COL_NAME}`, '0', ''))= 0 ) + OR (LEN(REPLACE(`{COL_NAME}`, '9', ''))= 0 ) + OR (LEN(REPLACE(LOWER(`{COL_NAME}`), 'x', ''))= 0 ) + OR (LEN(REPLACE(LOWER(`{COL_NAME}`), 'z', ''))= 0 ) + ) THEN 1 + WHEN LOWER(`{COL_NAME}`) IN ('blank','error','missing','tbd', + 'n/a','#na','none','null','unknown') THEN 1 + WHEN LOWER(`{COL_NAME}`) IN ('(blank)','(error)','(missing)','(tbd)', + '(n/a)','(#na)','(none)','(null)','(unknown)') THEN 1 + WHEN LOWER(`{COL_NAME}`) IN ('[blank]','[error]','[missing]','[tbd]', + '[n/a]','[#na]','[none]','[null]','[unknown]') THEN 1 + ELSE 0 + END ) AS filled_value_ct, + LEFT(MIN(NULLIF(`{COL_NAME}`, '')), 100) AS min_text, + LEFT(MAX(NULLIF(`{COL_NAME}`, '')), 100) AS max_text, + SUM(CASE + WHEN TRANSLATE(`{COL_NAME}`, 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', ' ') = `{COL_NAME}` THEN 0 + WHEN TRANSLATE(`{COL_NAME}`, 'abcdefghijklmnopqrstuvwxyz', ' ') = `{COL_NAME}` THEN 1 + ELSE 0 + END) AS upper_case_ct, + SUM(CASE + WHEN TRANSLATE(`{COL_NAME}`, 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', ' ') = `{COL_NAME}` THEN 0 + WHEN TRANSLATE(`{COL_NAME}`, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', ' ') = `{COL_NAME}` THEN 1 + ELSE 0 + END) AS lower_case_ct, + SUM(CASE + WHEN TRANSLATE(`{COL_NAME}`, 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', ' ') = `{COL_NAME}` THEN 1 + ELSE 0 + END) AS non_alpha_ct, + SUM(<%IS_NUM;LEFT(`{COL_NAME}`, 31)%>) AS numeric_ct, + SUM(<%IS_DATE;LEFT(`{COL_NAME}`, 26)%>) AS date_ct, + CASE + WHEN CAST(SUM( CASE WHEN UPPER(`{COL_NAME}`) RLIKE '[1-9]{1,5} [A-Z]+ .*' + THEN 1 END ) as FLOAT) /CAST(COUNT(`{COL_NAME}`) AS FLOAT) > 0.8 THEN 'STREET_ADDR' + WHEN CAST(SUM(CASE WHEN `{COL_NAME}` IN ('AL','AK','AS','AZ','AR','CA','CO','CT','DE','DC','FM','FL','GA','GU','HI','ID','IL','IN','IA','KS','KY','LA','ME','MH','MD','MA','MI','MN','MS','MO','MT','NE','NV','NH','NJ','NM','NY','NC','ND','MP','OH','OK','OR','PW','PA','PR','RI','SC','SD','TN','TX','UT','VT','VI','VA','WA','WV','WI','WY','AE','AP','AA') + THEN 1 END) AS FLOAT)/CAST(COUNT(`{COL_NAME}`) AS FLOAT) > 0.9 THEN 'STATE_USA' + WHEN CAST(SUM( CASE WHEN `{COL_NAME}` RLIKE '\\+1\\s*\\(?\\d{3}\\)?[-. ]*\\d{3}[-. ]*\\d{4}' + THEN 1 END) AS FLOAT)/CAST(COUNT(`{COL_NAME}`) AS FLOAT) > 0.9 THEN 'PHONE_USA' + WHEN CAST(SUM( CASE WHEN `{COL_NAME}` RLIKE '[_a-zA-Z0-9.-]+@[a-zA-Z0-9.-]+.[a-zA-Z][a-zA-Z]+' + THEN 1 END) AS FLOAT)/CAST(COUNT(`{COL_NAME}`) AS FLOAT) > 0.9 THEN 'EMAIL' + WHEN CAST(SUM( CASE WHEN TRANSLATE(`{COL_NAME}`,'012345678','999999999') IN ('99999', '999999999', '99999-9999') + THEN 1 END) AS FLOAT)/CAST(COUNT(`{COL_NAME}`) AS FLOAT) > 0.9 THEN 'ZIP_USA' + WHEN CAST(SUM( CASE WHEN `{COL_NAME}` NOT LIKE ' %' + AND `{COL_NAME}` RLIKE '[a-z0-9 _-]%' + AND (`{COL_NAME}` LIKE '%.txt' + OR `{COL_NAME}` LIKE '%.csv' + OR `{COL_NAME}` LIKE '%.tsv' + OR `{COL_NAME}` LIKE '%.dat' + OR `{COL_NAME}` LIKE '%.doc' + OR `{COL_NAME}` LIKE '%.pdf' + OR `{COL_NAME}` LIKE '%.xlsx') + THEN 1 END) AS FLOAT)/CAST(COUNT(`{COL_NAME}`) AS FLOAT) > 0.9 THEN 'FILE_NAME' + WHEN CAST(SUM( CASE WHEN `{COL_NAME}` RLIKE '[0-9][0-9][0-9][0-9][- ][0-9][0-9][0-9][0-9][- ][0-9][0-9][0-9][0-9][- ][0-9][0-9][0-9][0-9]' + THEN 1 END) AS FLOAT)/CAST(COUNT(`{COL_NAME}`) AS FLOAT) > 0.8 THEN 'CREDIT_CARD' + WHEN CAST(SUM( CASE WHEN ( `{COL_NAME}` LIKE '%,%,%,%' + OR `{COL_NAME}` LIKE '%|%|%|%' + OR `{COL_NAME}` LIKE '%^%^%^%' + OR `{COL_NAME}` LIKE '%' || CHAR(9) || '%' || CHAR(9) || '%' || CHAR(9) || '%' ) + AND NOT ( `{COL_NAME}` LIKE '% and %' + OR `{COL_NAME}` LIKE '% but %' + OR `{COL_NAME}` LIKE '% or %' + OR `{COL_NAME}` LIKE '% yet %' ) + AND COALESCE(CAST(LEN(`{COL_NAME}`) - LEN(REPLACE(`{COL_NAME}`, ',', '')) as FLOAT) + / CAST(NULLIF(LEN(`{COL_NAME}`) - LEN(REPLACE(`{COL_NAME}`, ' ', '')), 0) as FLOAT), 1) > 0.6 + THEN 1 END) AS FLOAT)/CAST(COUNT(`{COL_NAME}`) AS FLOAT) > 0.8 THEN 'DELIMITED_DATA' + WHEN CAST(SUM ( CASE WHEN `{COL_NAME}` RLIKE '[0-8][0-9][0-9][- ][0-9][0-9][- ][0-9][0-9][0-9][0-9]' + AND LEFT(`{COL_NAME}`, 3) NOT BETWEEN '734' AND '749' + AND LEFT(`{COL_NAME}`, 3) <> '666' THEN 1 END) AS FLOAT)/CAST(COUNT(`{COL_NAME}`) AS FLOAT) > 0.9 THEN 'SSN' + END as std_pattern_match, +strTemplate05_else: NULL as distinct_std_value_ct, + NULL as zero_length_ct, + NULL as lead_space_ct, + NULL as quoted_value_ct, + NULL as includes_digit_ct, + NULL as filled_value_ct, + NULL as min_text, + NULL as max_text, + NULL as upper_case_ct, + NULL as lower_case_ct, + NULL as non_alpha_ct, + NULL as numeric_ct, + NULL as date_ct, + NULL as std_pattern_match, +strTemplate06_A_patterns: (SELECT CONCAT_WS(' | ', collect_list(ct_pattern)) + FROM ( + SELECT + TRANSLATE( + `{COL_NAME}`, 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789', 'aaaaaaaaaaaaaaaaaaaaaaaaaaAAAAAAAAAAAAAAAAAAAAAAAAAANNNNNNNNNN' + ) AS pattern, + COUNT(*) AS ct, + ct || ' | ' || pattern AS ct_pattern + FROM `{DATA_SCHEMA}`.`{DATA_TABLE}` + WHERE + trim(`{COL_NAME}`) != '' AND + ( + (SELECT MAX(LEN(`{COL_NAME}`)) FROM `{DATA_SCHEMA}`.`{DATA_TABLE}`) BETWEEN 3 and 25 + ) + GROUP BY pattern + HAVING len(pattern) > 0 + ORDER BY ct DESC + LIMIT 5 + )) AS top_patterns, +strTemplate06_else: NULL as top_patterns, +strTemplate07_A_freq: ( SELECT LEFT(CONCAT_WS(' | ', collect_list(val)), 1000) as concat_vals + FROM ( + SELECT CAST(COUNT(*) as VARCHAR(10)) || ' | ' || `{COL_NAME}` as val, + COUNT(*) as ct + FROM {DATA_SCHEMA}.{DATA_TABLE} + WHERE `{COL_NAME}` > ' ' + GROUP BY `{COL_NAME}` + HAVING `{COL_NAME}` > ' ' + ORDER BY COUNT(*) DESC, val ASC + LIMIT 10 + ) ps + ) AS top_freq_values, +strTemplate07_else: NULL as top_freq_values, +strTemplate08_N: MIN(`{COL_NAME}`) AS min_value, + MIN(CASE WHEN `{COL_NAME}` > 0 THEN `{COL_NAME}` ELSE NULL END) AS min_value_over_0, + MAX(`{COL_NAME}`) AS max_value, + AVG(CAST(`{COL_NAME}` AS FLOAT)) AS avg_value, + STDDEV_SAMP(CAST(`{COL_NAME}` AS FLOAT)) AS stdev_value, + MIN(pct_25) as percentile_25, + MIN(pct_50) as percentile_50, + MIN(pct_75) as percentile_75, +strTemplate08_else: NULL as min_value, + NULL as min_value_over_0, + NULL as max_value, + NULL as avg_value, + NULL as stdev_value, + NULL as percentile_25, + NULL as percentile_50, + NULL as percentile_75, +strTemplate10_N_dec: SUM(ROUND((`{COL_NAME}` % 1), 5)) as fractional_sum, + +strTemplate10_else: NULL as fractional_sum, + +strTemplate11_D: CASE + WHEN MIN(`{COL_NAME}`) IS NULL THEN NULL + ELSE CASE WHEN MIN(`{COL_NAME}`) >= CAST('0001-01-01' as date) THEN MIN(`{COL_NAME}`) ELSE CAST('0001-01-01' as date) END + END as min_date, + MAX(`{COL_NAME}`) as max_date, + SUM(CASE + WHEN <%DATEDIFF_MONTH; `{COL_NAME}`; '{RUN_DATE}'::TIMESTAMP%> > 12 THEN 1 + ELSE 0 + END) AS before_1yr_date_ct, + SUM(CASE + WHEN <%DATEDIFF_MONTH; `{COL_NAME}`; '{RUN_DATE}'::TIMESTAMP%> > 60 THEN 1 + ELSE 0 + END) AS before_5yr_date_ct, + SUM(CASE + WHEN <%DATEDIFF_MONTH; `{COL_NAME}`; '{RUN_DATE}'::TIMESTAMP%> > 240 THEN 1 + ELSE 0 + END) AS before_20yr_date_ct, + SUM(CASE + WHEN <%DATEDIFF_MONTH; `{COL_NAME}`; '{RUN_DATE}'::TIMESTAMP%> > 1200 THEN 1 + ELSE 0 + END) AS before_100yr_date_ct, + SUM(CASE + WHEN <%DATEDIFF_DAY; `{COL_NAME}`; '{RUN_DATE}'::TIMESTAMP%> BETWEEN 0 AND 365 THEN 1 + ELSE 0 + END) AS within_1yr_date_ct, + SUM(CASE + WHEN <%DATEDIFF_DAY; `{COL_NAME}`; '{RUN_DATE}'::TIMESTAMP%> BETWEEN 0 AND 30 THEN 1 + ELSE 0 + END) AS within_1mo_date_ct, + SUM(CASE + WHEN `{COL_NAME}` > '{RUN_DATE}' THEN 1 ELSE 0 + END) AS future_date_ct, + SUM(CASE + WHEN <%DATEDIFF_MONTH; `{COL_NAME}`; '{RUN_DATE}'::TIMESTAMP%> > 240 THEN 1 + ELSE 0 + END) AS distant_future_date_ct, + COUNT(DISTINCT <%DATEDIFF_DAY; `{COL_NAME}`; '{RUN_DATE}'::DATE%>) as date_days_present, + COUNT(DISTINCT <%DATEDIFF_WEEK; `{COL_NAME}`; '{RUN_DATE}'::DATE%>) as date_weeks_present, + COUNT(DISTINCT <%DATEDIFF_MONTH; `{COL_NAME}`; '{RUN_DATE}'::DATE%>) as date_months_present, + +strTemplate11_else: NULL as min_date, + NULL as max_date, + NULL as before_1yr_date_ct, + NULL as before_5yr_date_ct, + NULL as before_20yr_date_ct, + NULL AS before_100yr_date_ct, + NULL as within_1yr_date_ct, + NULL as within_1mo_date_ct, + NULL as future_date_ct, + NULL as distant_future_date_ct, + NULL as date_days_present, + NULL as date_weeks_present, + NULL as date_months_present, + +strTemplate12_B: SUM(CAST(`{COL_NAME}` AS INTEGER)) AS boolean_true_ct, + +strTemplate12_else: NULL as boolean_true_ct, + +strTemplate13_ALL: NULL AS datatype_suggestion, +strTemplate14_A_do_patterns: ( SELECT COUNT(DISTINCT TRANSLATE(`{COL_NAME}`, + 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789', + 'aaaaaaaaaaaaaaaaaaaaaaaaaaAAAAAAAAAAAAAAAAAAAAAAAAAANNNNNNNNNN' + ) + ) AS pattern_ct + FROM {DATA_SCHEMA}.{DATA_TABLE} + WHERE `{COL_NAME}` > ' ' ) AS distinct_pattern_ct, + SUM(CAST(SIGN(LEN(TRIM(`{COL_NAME}`)) - LEN(REPLACE(TRIM(`{COL_NAME}`),' ',''))) AS BIGINT)) AS embedded_space_ct, + AVG(CAST(LEN(TRIM(`{COL_NAME}`)) - LEN(REPLACE(TRIM(`{COL_NAME}`),' ','')) AS FLOAT)) AS avg_embedded_spaces, + +strTemplate14_A_no_patterns: NULL as distinct_pattern_ct, + SUM(CAST(SIGN(LEN(RTRIM(LTRIM(`{COL_NAME}`))) - LEN(REPLACE(RTRIM(LTRIM(`{COL_NAME}`)),' ',''))) AS BIGINT)) AS embedded_space_ct, + AVG(CAST(LEN(RTRIM(LTRIM(`{COL_NAME}`))) - LEN(REPLACE(RTRIM(LTRIM(`{COL_NAME}`)),' ','')) AS FLOAT)) AS avg_embedded_spaces, + +strTemplate14_else: NULL as distinct_pattern_ct, + NULL as embedded_space_ct, + NULL as avg_embedded_spaces, + +strTemplate15_ALL: NULL as functional_data_type, + NULL as functional_table_type, + +strTemplate16_ALL: " '{PROFILE_RUN_ID}' as profile_run_id" + +strTemplate98_sampling: ' FROM {DATA_SCHEMA}.{DATA_TABLE} LIMIT {SAMPLE_SIZE}' + +strTemplate98_else: ' FROM {DATA_SCHEMA}.{DATA_TABLE}' + +strTemplate99_N: | + , (SELECT + PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY `{COL_NAME}`) OVER () AS pct_25, + PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY `{COL_NAME}`) OVER () AS pct_50, + PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY `{COL_NAME}`) OVER () AS pct_75 + FROM {DATA_SCHEMA}.{DATA_TABLE} LIMIT 1) pctile + +strTemplate99_else: ' ' + +strTemplate100_sampling: ' ORDER BY RAND()' diff --git a/testgen/template/flavors/databricks/profiling/project_secondary_profiling_query_databricks.sql b/testgen/template/flavors/databricks/profiling/project_secondary_profiling_query_databricks.sql new file mode 100644 index 00000000..483fb373 --- /dev/null +++ b/testgen/template/flavors/databricks/profiling/project_secondary_profiling_query_databricks.sql @@ -0,0 +1,35 @@ +-- Get Freqs for selected columns +WITH ranked_vals +AS + (SELECT `{COL_NAME}`, + COUNT(*) AS ct, + ROW_NUMBER() OVER (ORDER BY COUNT(*) DESC) AS rn + FROM {DATA_SCHEMA}.{DATA_TABLE} + WHERE `{COL_NAME}` > ' ' + GROUP BY `{COL_NAME}` + ), +consol_vals +AS ( + SELECT COALESCE ( + CASE WHEN rn <= 10 THEN '| ' || `{COL_NAME}` || ' | ' || ct ELSE NULL END, + '| Other Values (' || COUNT(DISTINCT CAST(`{COL_NAME}` as STRING)) || ') | ' || SUM(ct) + ) AS val, + MIN (rn) as min_rn + FROM ranked_vals + GROUP BY CASE WHEN rn <= 10 THEN '| ' || `{COL_NAME}` || ' | ' || ct ELSE NULL + END + ) +SELECT '{PROJECT_CODE}' as project_code, + '{DATA_SCHEMA}' as schema_name, + '{RUN_DATE}' as run_date, + '{DATA_TABLE}' as table_name, + '{COL_NAME}' as column_name, + REPLACE(CONCAT_WS('^#^', ARRAY_SORT( + COLLECT_LIST(val), + (left, right) -> CASE WHEN CAST(SPLIT(left, '\\|')[0] AS INT) < CAST(SPLIT(right, '\\|')[0] AS INT) THEN -1 ELSE 1 END + )), '^#^', '\n') AS top_freq_values, + (SELECT MD5(CONCAT_WS('|', ARRAY_SORT(COLLECT_LIST(NULLIF(dist_col_name,''))))) as dvh + FROM (SELECT DISTINCT `{COL_NAME}` as dist_col_name + FROM {DATA_SCHEMA}.{DATA_TABLE}) a + ) as distinct_value_hash +FROM consol_vals; diff --git a/testgen/template/flavors/databricks/profiling/templated_functions.yaml b/testgen/template/flavors/databricks/profiling/templated_functions.yaml new file mode 100644 index 00000000..a7706e26 --- /dev/null +++ b/testgen/template/flavors/databricks/profiling/templated_functions.yaml @@ -0,0 +1,23 @@ +IS_NUM: CASE + WHEN {$1} RLIKE '^\\s*[+-]?\\$?\\s*[0-9]+(,[0-9]{3})*(\\.[0-9]*)?[\\%]?\\s*$' THEN 1 + ELSE 0 + END + +IS_DATE: CASE + WHEN to_date({$1}, 'yyyyMMdd') IS NOT NULL THEN 1 + WHEN to_date({$1}, 'yyyy-MM-dd') IS NOT NULL THEN 1 + WHEN to_date({$1}, 'MM/dd/yyyy') IS NOT NULL THEN 1 + WHEN to_date({$1}, 'MM-dd-yyyy') IS NOT NULL THEN 1 + WHEN to_date({$1}, 'MM-dd-yy') IS NOT NULL THEN 1 + WHEN to_date({$1}, 'dd LLL yyyy') IS NOT NULL AND RIGHT({$1}, 4)::INT BETWEEN 1800 AND 2200 THEN 1 + WHEN to_date({$1}, 'dd LLLL yyyy') IS NOT NULL AND RIGHT({$1}, 4)::INT BETWEEN 1800 AND 2200 THEN 1 + WHEN to_date({$1}, 'yyyy-MM-dd HH:mm:ss SSSSSS') IS NOT NULL AND LEFT({$1}, 4)::INT BETWEEN 1800 AND 2200 THEN 1 + WHEN to_date({$1}, 'yyyy-MM-dd HH:mm:ss') IS NOT NULL AND LEFT({$1}, 4)::INT BETWEEN 1800 AND 2200 THEN 1 + ELSE 0 + END + +DATEDIFF_MONTH: (YEAR({$2}) * 12 + MONTH({$2}) - YEAR({$1}) * 12 - MONTH({$1})) + +DATEDIFF_WEEK: CAST(DATEDIFF(DATE_TRUNC('week', {$2} + INTERVAL 1 DAY), DATE_TRUNC('week', {$1} + INTERVAL 1 DAY)) / 7 AS INT) + +DATEDIFF_DAY: EXTRACT(DAY FROM DATE({$2}) - DATE({$1})) diff --git a/testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_no_drops_generic.sql b/testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_no_drops_generic.sql index 7a992b4e..e376ef71 100644 --- a/testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_no_drops_generic.sql +++ b/testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_no_drops_generic.sql @@ -15,7 +15,7 @@ SELECT '{TEST_TYPE}' as test_type, CASE WHEN COUNT(*) > 0 THEN CONCAT( - CONCAT( CAST(COUNT(*) AS VARCHAR), ' error(s) identified, ' ), + CONCAT( CAST(COUNT(*) AS {VARCHAR_TYPE}), ' error(s) identified, ' ), CONCAT( CASE WHEN COUNT(*) > {SKIP_ERRORS} THEN 'exceeding limit of ' diff --git a/testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_num_incr_generic.sql b/testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_num_incr_generic.sql index 7152b464..c9660494 100644 --- a/testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_num_incr_generic.sql +++ b/testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_num_incr_generic.sql @@ -8,9 +8,9 @@ SELECT '{TEST_TYPE}' as test_type, as input_parameters, CASE WHEN COUNT(*) > COALESCE(skip_errors, 0) THEN 0 ELSE 1 END as result_code, CONCAT( - CONCAT( 'Mismatched measures: ', CAST( COALESCE(COUNT(*), 0) AS VARCHAR) ), + CONCAT( 'Mismatched measures: ', CAST( COALESCE(COUNT(*), 0) AS {VARCHAR_TYPE}) ), CONCAT( ', Threshold: ', - CONCAT( CAST(COALESCE(skip_errors, 0) AS VARCHAR), '.') + CONCAT( CAST(COALESCE(skip_errors, 0) AS {VARCHAR_TYPE}), '.') ) ) AS result_message, COUNT(*) as result_measure, diff --git a/testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_percent_above_generic.sql b/testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_percent_above_generic.sql index 755d27d8..6e20b995 100644 --- a/testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_percent_above_generic.sql +++ b/testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_percent_above_generic.sql @@ -15,7 +15,7 @@ SELECT '{TEST_TYPE}' as test_type, CASE WHEN COUNT(*) > 0 THEN CONCAT( - CONCAT( CAST(COUNT(*) AS VARCHAR), ' error(s) identified, ' ), + CONCAT( CAST(COUNT(*) AS {VARCHAR_TYPE}), ' error(s) identified, ' ), CONCAT( CASE WHEN COUNT(*) > {SKIP_ERRORS} THEN 'exceeding limit of ' diff --git a/testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_percent_within_generic.sql b/testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_percent_within_generic.sql index fcf30705..78864287 100644 --- a/testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_percent_within_generic.sql +++ b/testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_percent_within_generic.sql @@ -15,7 +15,7 @@ SELECT '{TEST_TYPE}' as test_type, CASE WHEN COUNT(*) > 0 THEN CONCAT( - CONCAT( CAST(COUNT(*) AS VARCHAR), ' error(s) identified, ' ), + CONCAT( CAST(COUNT(*) AS {VARCHAR_TYPE}), ' error(s) identified, ' ), CONCAT( CASE WHEN COUNT(*) > {SKIP_ERRORS} THEN 'exceeding limit of ' diff --git a/testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_same_generic.sql b/testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_same_generic.sql index 6923e221..e9790a55 100644 --- a/testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_same_generic.sql +++ b/testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_same_generic.sql @@ -15,7 +15,7 @@ SELECT '{TEST_TYPE}' as test_type, CASE WHEN COUNT(*) > 0 THEN CONCAT( - CONCAT( CAST(COUNT(*) AS VARCHAR), ' error(s) identified, ' ), + CONCAT( CAST(COUNT(*) AS {VARCHAR_TYPE}), ' error(s) identified, ' ), CONCAT( CASE WHEN COUNT(*) > {SKIP_ERRORS} THEN 'exceeding limit of ' diff --git a/testgen/template/flavors/generic/exec_query_tests/ex_custom_query_generic.sql b/testgen/template/flavors/generic/exec_query_tests/ex_custom_query_generic.sql index e12d926a..096dc351 100644 --- a/testgen/template/flavors/generic/exec_query_tests/ex_custom_query_generic.sql +++ b/testgen/template/flavors/generic/exec_query_tests/ex_custom_query_generic.sql @@ -19,7 +19,7 @@ SELECT '{TEST_TYPE}' as test_type, CASE WHEN COUNT(*) > 0 THEN CONCAT( - CONCAT( CAST(COUNT(*) AS VARCHAR), ' error(s) identified, ' ), + CONCAT( CAST(COUNT(*) AS {VARCHAR_TYPE}), ' error(s) identified, ' ), CONCAT( CASE WHEN COUNT(*) > {SKIP_ERRORS} THEN 'exceeding limit of ' diff --git a/testgen/template/flavors/generic/exec_query_tests/ex_data_match_2way_generic.sql b/testgen/template/flavors/generic/exec_query_tests/ex_data_match_2way_generic.sql index 256db878..a52f4a36 100644 --- a/testgen/template/flavors/generic/exec_query_tests/ex_data_match_2way_generic.sql +++ b/testgen/template/flavors/generic/exec_query_tests/ex_data_match_2way_generic.sql @@ -15,7 +15,7 @@ SELECT '{TEST_TYPE}' as test_type, CASE WHEN COUNT(*) > 0 THEN CONCAT( - CONCAT( CAST(COUNT(*) AS VARCHAR), ' error(s) identified, ' ), + CONCAT( CAST(COUNT(*) AS {VARCHAR_TYPE}), ' error(s) identified, ' ), CONCAT( CASE WHEN COUNT(*) > {SKIP_ERRORS} THEN 'exceeding limit of ' diff --git a/testgen/template/flavors/generic/exec_query_tests/ex_data_match_generic.sql b/testgen/template/flavors/generic/exec_query_tests/ex_data_match_generic.sql index 7c7adc0e..9d6702b5 100644 --- a/testgen/template/flavors/generic/exec_query_tests/ex_data_match_generic.sql +++ b/testgen/template/flavors/generic/exec_query_tests/ex_data_match_generic.sql @@ -15,7 +15,7 @@ SELECT '{TEST_TYPE}' as test_type, CASE WHEN COUNT(*) > 0 THEN CONCAT( - CONCAT( CAST(COUNT(*) AS VARCHAR), ' error(s) identified, ' ), + CONCAT( CAST(COUNT(*) AS {VARCHAR_TYPE}), ' error(s) identified, ' ), CONCAT( CASE WHEN COUNT(*) > {SKIP_ERRORS} THEN 'exceeding limit of ' diff --git a/testgen/template/flavors/generic/exec_query_tests/ex_prior_match_generic.sql b/testgen/template/flavors/generic/exec_query_tests/ex_prior_match_generic.sql index b4c5fd13..654f7a1a 100644 --- a/testgen/template/flavors/generic/exec_query_tests/ex_prior_match_generic.sql +++ b/testgen/template/flavors/generic/exec_query_tests/ex_prior_match_generic.sql @@ -8,9 +8,9 @@ SELECT '{TEST_TYPE}' as test_type, as input_parameters, CASE WHEN COUNT(*) > COALESCE(skip_errors, 0) THEN 0 ELSE 1 END as result_code, CONCAT( - CONCAT( 'Mismatched measures: ', CAST( COALESCE(COUNT(*), 0) AS VARCHAR) ), + CONCAT( 'Mismatched measures: ', CAST( COALESCE(COUNT(*), 0) AS {VARCHAR_TYPE}) ), CONCAT( ', Threshold: ', - CONCAT( CAST(COALESCE(skip_errors, 0) AS VARCHAR), '.') + CONCAT( CAST(COALESCE(skip_errors, 0) AS {VARCHAR_TYPE}), '.') ) ) AS result_message, COUNT(*) as result_measure, diff --git a/testgen/template/flavors/generic/exec_query_tests/ex_relative_entropy_generic.sql b/testgen/template/flavors/generic/exec_query_tests/ex_relative_entropy_generic.sql index b6e340fc..9a5e5d79 100644 --- a/testgen/template/flavors/generic/exec_query_tests/ex_relative_entropy_generic.sql +++ b/testgen/template/flavors/generic/exec_query_tests/ex_relative_entropy_generic.sql @@ -41,7 +41,7 @@ SELECT '{TEST_TYPE}' as test_type, '{INPUT_PARAMETERS}' as input_parameters, CASE WHEN js_divergence > {THRESHOLD_VALUE} THEN 0 ELSE 1 END as result_code, CONCAT('Divergence Level: ', - CONCAT(CAST(js_divergence AS VARCHAR), + CONCAT(CAST(js_divergence AS {VARCHAR_TYPE}), ', Threshold: {THRESHOLD_VALUE}.')) as result_message, js_divergence as result_measure, '{SUBSET_DISPLAY}' as subset_condition, diff --git a/testgen/template/flavors/generic/exec_query_tests/ex_window_match_no_drops_generic.sql b/testgen/template/flavors/generic/exec_query_tests/ex_window_match_no_drops_generic.sql index f1f07eb8..19ccf9b8 100644 --- a/testgen/template/flavors/generic/exec_query_tests/ex_window_match_no_drops_generic.sql +++ b/testgen/template/flavors/generic/exec_query_tests/ex_window_match_no_drops_generic.sql @@ -15,7 +15,7 @@ SELECT '{TEST_TYPE}' as test_type, CASE WHEN COUNT(*) > 0 THEN CONCAT( - CONCAT( CAST(COUNT(*) AS VARCHAR), ' error(s) identified, ' ), + CONCAT( CAST(COUNT(*) AS {VARCHAR_TYPE}), ' error(s) identified, ' ), CONCAT( CASE WHEN COUNT(*) > {SKIP_ERRORS} THEN 'exceeding limit of ' diff --git a/testgen/template/flavors/generic/exec_query_tests/ex_window_match_same_generic.sql b/testgen/template/flavors/generic/exec_query_tests/ex_window_match_same_generic.sql index 3e60ba27..3825f8b7 100644 --- a/testgen/template/flavors/generic/exec_query_tests/ex_window_match_same_generic.sql +++ b/testgen/template/flavors/generic/exec_query_tests/ex_window_match_same_generic.sql @@ -15,7 +15,7 @@ SELECT '{TEST_TYPE}' as test_type, CASE WHEN COUNT(*) > 0 THEN CONCAT( - CONCAT( CAST(COUNT(*) AS VARCHAR), ' error(s) identified, ' ), + CONCAT( CAST(COUNT(*) AS {VARCHAR_TYPE}), ' error(s) identified, ' ), CONCAT( CASE WHEN COUNT(*) > {SKIP_ERRORS} THEN 'exceeding limit of ' diff --git a/testgen/template/parms/parms_profiling.sql b/testgen/template/parms/parms_profiling.sql index 245c5211..fb786ebc 100644 --- a/testgen/template/parms/parms_profiling.sql +++ b/testgen/template/parms/parms_profiling.sql @@ -10,6 +10,7 @@ SELECT cc.project_code, cc.project_port, cc.project_user, cc.project_db, + cc.http_path, tg.id::VARCHAR(50) as table_groups_id, tg.table_group_schema, CASE diff --git a/testgen/template/parms/parms_test_execution.sql b/testgen/template/parms/parms_test_execution.sql index 101d5e23..85fe7fe0 100644 --- a/testgen/template/parms/parms_test_execution.sql +++ b/testgen/template/parms/parms_test_execution.sql @@ -20,7 +20,8 @@ SELECT ts.project_code, cc.max_threads, cc.max_query_chars, cc.url, - cc.connect_by_url + cc.connect_by_url, + cc.http_path FROM test_suites ts JOIN connections cc ON (ts.connection_id = cc.connection_id) JOIN table_groups tg ON (ts.table_groups_id = tg.id) diff --git a/testgen/template/parms/parms_test_gen.sql b/testgen/template/parms/parms_test_gen.sql index 13dd4a1a..b9730edd 100644 --- a/testgen/template/parms/parms_test_gen.sql +++ b/testgen/template/parms/parms_test_gen.sql @@ -13,6 +13,7 @@ ts.id::VARCHAR as test_suite_id, cc.url, cc.connect_by_url, + cc.http_path, CURRENT_TIMESTAMP AT TIME ZONE 'UTC' - CAST(tg.profiling_delay_days AS integer) * INTERVAL '1 day' as profiling_as_of_date FROM table_groups tg diff --git a/testgen/template/score_cards/add_latest_runs.sql b/testgen/template/score_cards/add_latest_runs.sql new file mode 100644 index 00000000..f06abad0 --- /dev/null +++ b/testgen/template/score_cards/add_latest_runs.sql @@ -0,0 +1,30 @@ +-- Insert latest profiling runs as of cutoff +WITH ranked_profiling + AS (SELECT project_code, table_groups_id, id as profiling_run_id, + ROW_NUMBER() OVER (PARTITION BY table_groups_id ORDER BY profiling_starttime DESC) as rank + FROM profiling_runs r + WHERE project_code = '{project_code}' + AND profiling_starttime <= '{score_history_cutoff_time}' + AND r.status = 'Complete') +INSERT INTO score_history_latest_runs + (definition_id, score_history_cutoff_time, table_groups_id, last_profiling_run_id) +SELECT '{definition_id}' as definition_id, '{score_history_cutoff_time}' as score_history_cutoff_time, table_groups_id, profiling_run_id + FROM ranked_profiling + WHERE rank = 1; + +-- Insert latest test runs of cutoff +WITH ranked_test_runs + AS (SELECT r.test_suite_id, + r.id as test_run_id, + ROW_NUMBER() OVER (PARTITION BY test_suite_id ORDER BY test_starttime DESC) as rank + FROM test_runs r + INNER JOIN test_suites s + ON (r.test_suite_id = s.id) + WHERE s.project_code = '{project_code}' + AND r.test_starttime <= '{score_history_cutoff_time}' + AND r.status = 'Complete') +INSERT INTO score_history_latest_runs + (definition_id, score_history_cutoff_time, test_suite_id, last_test_run_id) +SELECT '{definition_id}' as definition_id, '{score_history_cutoff_time}' as score_history_cutoff_time, test_suite_id, test_run_id + FROM ranked_test_runs + WHERE rank = 1; diff --git a/testgen/template/score_cards/get_historical_overall_scores_by_column.sql b/testgen/template/score_cards/get_historical_overall_scores_by_column.sql new file mode 100644 index 00000000..0c9b4596 --- /dev/null +++ b/testgen/template/score_cards/get_historical_overall_scores_by_column.sql @@ -0,0 +1,51 @@ +SELECT DISTINCT ON (last_run_time) + COALESCE(profiling_scores.project_code, test_scores.project_code) AS project_code, + COALESCE(profiling_scores.definition_id, test_scores.definition_id) AS definition_id, + COALESCE(profiling_scores.last_run_time, test_scores.last_run_time) AS last_run_time, + (COALESCE(profiling_scores.score, 1) * COALESCE(test_scores.score, 1)) AS score, + (COALESCE(profiling_scores.cde_score, 1) * COALESCE(test_scores.cde_score, 1)) AS cde_score +FROM ( + SELECT + project_code, + history.definition_id, + history.last_run_time, + SUM(good_data_pct * record_ct) / NULLIF(SUM(record_ct), 0) AS score, + SUM(CASE critical_data_element WHEN true THEN (good_data_pct * record_ct) ELSE 0 END) + / NULLIF(SUM(CASE critical_data_element WHEN true THEN record_ct ELSE 0 END), 0) AS cde_score + FROM v_dq_profile_scoring_history_by_column + INNER JOIN score_definition_results_history AS history + ON ( + history.definition_id = v_dq_profile_scoring_history_by_column.definition_id + AND history.last_run_time = v_dq_profile_scoring_history_by_column.score_history_cutoff_time + ) + WHERE {filters} + AND history.definition_id = '{definition_id}' + GROUP BY project_code, + history.definition_id, + history.last_run_time +) AS profiling_scores +FULL OUTER JOIN ( + SELECT + project_code, + history.definition_id, + history.last_run_time, + SUM(good_data_pct * dq_record_ct) / NULLIF(SUM(dq_record_ct), 0) AS score, + SUM(CASE critical_data_element WHEN true THEN (good_data_pct * dq_record_ct) ELSE 0 END) + / NULLIF(SUM(CASE critical_data_element WHEN true THEN dq_record_ct ELSE 0 END), 0) AS cde_score + FROM v_dq_test_scoring_history_by_column + INNER JOIN score_definition_results_history AS history + ON ( + history.definition_id = v_dq_test_scoring_history_by_column.definition_id + AND history.last_run_time = v_dq_test_scoring_history_by_column.score_history_cutoff_time + ) + WHERE {filters} + AND history.definition_id = '{definition_id}' + GROUP BY project_code, + history.definition_id, + history.last_run_time +) AS test_scores + ON ( + test_scores.project_code = profiling_scores.project_code + AND test_scores.definition_id = profiling_scores.definition_id + AND test_scores.last_run_time = profiling_scores.last_run_time + ) diff --git a/testgen/ui/app.py b/testgen/ui/app.py index 71cf18f3..8dd99e55 100644 --- a/testgen/ui/app.py +++ b/testgen/ui/app.py @@ -5,6 +5,7 @@ from testgen import settings from testgen.common.docker_service import check_basic_configuration +from testgen.common.models import with_database_session from testgen.ui import bootstrap from testgen.ui.assets import get_asset_path from testgen.ui.components import widgets as testgen @@ -13,11 +14,13 @@ from testgen.ui.session import session +@with_database_session def render(log_level: int = logging.INFO): st.set_page_config( page_title="TestGen", page_icon=get_asset_path("favicon.ico"), layout="wide", + initial_sidebar_state="collapsed" if user_session_service.user_has_catalog_role() else "auto" ) application = get_application(log_level=log_level) @@ -33,6 +36,8 @@ def render(log_level: int = logging.INFO): session.dbschema = db.get_schema() projects = project_service.get_projects() + if not session.project: + session.project = st.query_params.get("project_code") if not session.project and len(projects) > 0: project_service.set_current_project(projects[0]["code"]) @@ -45,7 +50,8 @@ def render(log_level: int = logging.INFO): if not hide_sidebar: with st.sidebar: testgen.sidebar( - project=project_service.get_project_by_code(session.project)["project_name"], + projects=projects, + current_project=session.project, menu=application.menu.update_version(application.get_version()), username=session.username, current_page=session.current_page, diff --git a/testgen/ui/assets/flavors/databricks.svg b/testgen/ui/assets/flavors/databricks.svg new file mode 100644 index 00000000..8bc00ee1 --- /dev/null +++ b/testgen/ui/assets/flavors/databricks.svg @@ -0,0 +1,47 @@ + + + + + + + diff --git a/testgen/ui/assets/style.css b/testgen/ui/assets/style.css index f631cc77..b444185b 100644 --- a/testgen/ui/assets/style.css +++ b/testgen/ui/assets/style.css @@ -16,7 +16,7 @@ body { --secondary-text-color: #0000008a; --disabled-text-color: #00000042; --caption-text-color: rgba(49, 51, 63, 0.6); /* Match Streamlit's caption color */ - --border-color: rgba(0, 0, 0, .12); + --border-color: rgba(0, 0, 0, .12); --sidebar-background-color: white; --sidebar-item-hover-color: #f5f5f5; @@ -29,6 +29,10 @@ body { --dk-card-background: #fff; --dk-tooltip-background: rgb(255, 255, 255); + + --portal-background: white; + --portal-box-shadow: rgba(0, 0, 0, 0.16) 0px 4px 16px; + --select-hover-background: rgb(240, 242, 246); } img.dk-logo-img { @@ -91,6 +95,15 @@ div[data-testid="stDialog"] div[role="dialog"] { } /* */ + +div[data-testid="stDialog"] div[role="dialog"]:has(i.s-dialog) { + width: calc(35rem); +} + +div[data-testid="stDialog"] div[role="dialog"]:has(i.xl-dialog) { + width: calc(95rem); +} + div[data-testid="stSpinner"] { background: transparent; } @@ -360,6 +373,10 @@ div[data-testid="stVerticalBlockBorderWrapper"]:has(> div > div[data-testid="stV --dk-text-value-background: unset; --dk-card-background: #14181f; --dk-tooltip-background: rgb(14, 17, 23); + + --portal-background: #14181f; + --portal-box-shadow: rgba(0, 0, 0, 0.95) 0px 4px 16px; + --select-hover-background: rgba(255, 255, 255, .32); } /* Main content */ diff --git a/testgen/ui/components/frontend/css/shared.css b/testgen/ui/components/frontend/css/shared.css index a60f368b..8100174a 100644 --- a/testgen/ui/components/frontend/css/shared.css +++ b/testgen/ui/components/frontend/css/shared.css @@ -28,7 +28,7 @@ body { --disabled-text-color: #00000042; --caption-text-color: rgba(49, 51, 63, 0.6); /* Match Streamlit's caption color */ --form-field-color: rgb(240, 242, 246); /* Match Streamlit's form field color */ - --border-color: rgba(0, 0, 0, .12); + --border-color: rgba(0, 0, 0, .12); --tooltip-color: #333d; --dk-card-background: #fff; @@ -40,7 +40,7 @@ body { --field-underline-color: #9e9e9e; --button-hover-state-opacity: 0.12; - --button-generic-background-color: #ffffff; + --button-generic-background-color: #ffffff; --button-basic-background: transparent; --button-basic-text-color: rgba(0, 0, 0, .87); @@ -73,7 +73,8 @@ body { --button-warn-stroked-text-color: var(--red); --button-warn-stroked-background: transparent; - --select-portal-background: white; + --portal-background: white; + --portal-box-shadow: rgba(0, 0, 0, 0.16) 0px 4px 16px; --select-hover-background: rgb(240, 242, 246); } @@ -101,16 +102,17 @@ body { --button-basic-background: transparent; --button-basic-text-color: rgba(255, 255, 255); --button-basic-hover-state-background: rgba(255, 255, 255, .54); - + --button-basic-flat-text-color: rgba(255, 255, 255); --button-basic-flat-background: rgba(255, 255, 255, .54); - + --button-basic-stroked-text-color: rgba(255, 255, 255, .87); --button-basic-stroked-background: transparent; --button-stroked-border: 1px solid var(--border-color); - --select-portal-background: rgb(38, 39, 48); + --portal-background: #14181f; + --portal-box-shadow: rgba(0, 0, 0, 0.95) 0px 4px 16px; --select-hover-background: rgba(255, 255, 255, .32); } } diff --git a/testgen/ui/components/frontend/js/axis_utils.js b/testgen/ui/components/frontend/js/axis_utils.js index 6c7e8352..1822092e 100644 --- a/testgen/ui/components/frontend/js/axis_utils.js +++ b/testgen/ui/components/frontend/js/axis_utils.js @@ -51,4 +51,26 @@ function niceBounds(axisStart, axisEnd, tickCount = 4) { }; } -export { niceBounds }; +/** + * + * @typedef Range + * @type {object} + * @property {number} max + * @property {number} min + * + * @param {number} value + * @param {({new: Range, old: Range})} ranges + * @property {number?} zero + */ +function scale(value, ranges, zero=0) { + const oldRange = (ranges.old.max - ranges.old.min); + const newRange = (ranges.new.max - ranges.new.min); + + if (oldRange === 0) { + return zero; + } + + return ((value - ranges.old.min) * newRange / oldRange) + ranges.new.min; +} + +export { niceBounds, scale }; diff --git a/testgen/ui/components/frontend/js/components/attribute.js b/testgen/ui/components/frontend/js/components/attribute.js index 8cfc1378..106bd7e0 100644 --- a/testgen/ui/components/frontend/js/components/attribute.js +++ b/testgen/ui/components/frontend/js/components/attribute.js @@ -19,7 +19,7 @@ const Attribute = (/** @type Properties */ props) => { return div( { style: () => `width: ${props.width ? getValue(props.width) + 'px' : 'auto'}` }, div( - { class: 'flex-row fx-gap-1 text-caption text-capitalize mb-1' }, + { class: 'flex-row fx-gap-1 text-caption mb-1' }, props.label, () => getValue(props.help) ? withTooltip( diff --git a/testgen/ui/components/frontend/js/components/button.js b/testgen/ui/components/frontend/js/components/button.js index fbdd90b0..9c08d1ce 100644 --- a/testgen/ui/components/frontend/js/components/button.js +++ b/testgen/ui/components/frontend/js/components/button.js @@ -3,6 +3,7 @@ * @type {object} * @property {(string)} type * @property {(string|null)} color + * @property {(string|null)} width * @property {(string|null)} label * @property {(string|null)} icon * @property {(int|null)} iconSize @@ -36,10 +37,9 @@ const DEFAULT_ICON_SIZE = 18; const Button = (/** @type Properties */ props) => { loadStylesheet('button', stylesheet); - const buttonType = getValue(props.type); const width = getValue(props.width); - const isIconOnly = buttonType === BUTTON_TYPE.ICON || (getValue(props.icon) && !getValue(props.label)); - + const isIconOnly = getValue(props.type) === BUTTON_TYPE.ICON || (getValue(props.icon) && !getValue(props.label)); + if (!window.testgen.isPage) { Streamlit.setFrameHeight(40); if (isIconOnly) { // Force a 40px width for the parent iframe & handle window resizing @@ -61,14 +61,14 @@ const Button = (/** @type Properties */ props) => { return button( { id: getValue(props.id) ?? undefined, - class: `tg-button tg-${buttonType}-button tg-${getValue(props.color) ?? 'basic'}-button ${buttonType !== 'icon' && isIconOnly ? 'tg-icon-button' : ''}`, + class: () => `tg-button tg-${getValue(props.type)}-button tg-${getValue(props.color) ?? 'basic'}-button ${getValue(props.type) !== 'icon' && isIconOnly ? 'tg-icon-button' : ''}`, style: () => `width: ${isIconOnly ? '' : (width ?? '100%')}; ${getValue(props.style)}`, onclick: onClickHandler, disabled: props.disabled, onmouseenter: props.tooltip ? (() => showTooltip.val = true) : undefined, onmouseleave: props.tooltip ? (() => showTooltip.val = false) : undefined, }, - () => getValue(props.tooltip) ? Tooltip({ + () => window.testgen.isPage && getValue(props.tooltip) ? Tooltip({ text: props.tooltip, show: showTooltip, position: props.tooltipPosition, diff --git a/testgen/ui/components/frontend/js/components/checkbox.js b/testgen/ui/components/frontend/js/components/checkbox.js index bb01ba9a..d41d3039 100644 --- a/testgen/ui/components/frontend/js/components/checkbox.js +++ b/testgen/ui/components/frontend/js/components/checkbox.js @@ -4,7 +4,7 @@ * @property {string} label * @property {boolean?} checked * @property {boolean?} indeterminate - * @property {function?} onChange + * @property {function(boolean, Event)?} onChange * @property {number?} width */ import van from '../van.min.js'; @@ -27,7 +27,7 @@ const Checkbox = (/** @type Properties */ props) => { indeterminate: props.indeterminate, onchange: van.derive(() => { const onChange = props.onChange?.val ?? props.onChange; - return onChange ? (event) => onChange(event.target.checked) : null; + return onChange ? (/** @type Event */ event) => onChange(event.target.checked, event) : null; }), }), props.label, @@ -47,7 +47,7 @@ stylesheet.replace(` border-radius: 4px; position: relative; transition-property: border-color, background-color; - transition-duration: 0.3s; + transition-duration: 0.3s; } .tg-checkbox--input:focus, diff --git a/testgen/ui/components/frontend/js/components/icon.js b/testgen/ui/components/frontend/js/components/icon.js index 930c4bbe..59ad154c 100644 --- a/testgen/ui/components/frontend/js/components/icon.js +++ b/testgen/ui/components/frontend/js/components/icon.js @@ -16,6 +16,7 @@ const Icon = (/** @type Properties */ props, /** @type string */ icon) => { { class: () => `material-symbols-rounded tg-icon text-secondary ${getValue(props.classes)}`, style: () => `font-size: ${getValue(props.size) || DEFAULT_SIZE}px;`, + ...props, }, icon, ); diff --git a/testgen/ui/components/frontend/js/components/input.js b/testgen/ui/components/frontend/js/components/input.js index 2a115e95..009a8246 100644 --- a/testgen/ui/components/frontend/js/components/input.js +++ b/testgen/ui/components/frontend/js/components/input.js @@ -1,21 +1,24 @@ /** * @typedef Properties * @type {object} + * @property {string?} id * @property {string?} label * @property {string?} help * @property {(string | number)?} value * @property {string?} placeholder + * @property {string[]?} autocompleteOptions * @property {string?} icon * @property {boolean?} clearable - * @property {function?} onChange + * @property {function(string)?} onChange * @property {number?} width * @property {number?} height * @property {string?} style */ import van from '../van.min.js'; -import { debounce, getValue, loadStylesheet } from '../utils.js'; +import { debounce, getValue, loadStylesheet, getRandomId } from '../utils.js'; import { Icon } from './icon.js'; import { withTooltip } from './tooltip.js'; +import { Portal } from './portal.js'; const { div,input, label, i } = van.tags; const defaultHeight = 32; @@ -25,14 +28,31 @@ const clearIconSize = 20; const Input = (/** @type Properties */ props) => { loadStylesheet('input', stylesheet); + const domId = van.derive(() => getValue(props.id) ?? getRandomId()); const value = van.derive(() => getValue(props.value) ?? ''); van.derive(() => { const onChange = props.onChange?.val ?? props.onChange; - onChange?.(value.val); + if (value.val !== value.oldVal) { + onChange(value.val); + } }); + const autocompleteOpened = van.state(false); + const autocompleteOptions = van.derive(() => { + const filtered = getValue(props.autocompleteOptions)?.filter(option => option.toLowerCase().includes(value.val.toLowerCase())); + if (!filtered?.length) { + autocompleteOpened.val = false; + } + return filtered; + }); + const onAutocomplete = (/** @type string */ option) => { + autocompleteOpened.val = false; + value.val = option; + }; + return label( { + id: domId, class: 'flex-column fx-gap-1 tg-input--label', style: () => `width: ${props.width ? getValue(props.width) + 'px' : 'auto'}; ${getValue(props.style)}`, }, @@ -66,8 +86,32 @@ const Input = (/** @type Properties */ props) => { style: () => `height: ${getValue(props.height) || defaultHeight}px;`, value, placeholder: () => getValue(props.placeholder) ?? '', - oninput: debounce(event => value.val = event.target.value, 300), + oninput: debounce((/** @type Event */ event) => value.val = event.target.value, 300), + onclick: van.derive(() => autocompleteOptions.val?.length + ? () => autocompleteOpened.val = true + : null + ), }), + Portal( + { target: domId.val, targetRelative: true, opened: autocompleteOpened }, + () => div( + { class: 'tg-input--options-wrapper' }, + autocompleteOptions.val?.map(option => + div( + { + class: 'tg-input--option', + onclick: (/** @type Event */ event) => { + // https://stackoverflow.com/questions/61273446/stop-click-event-propagation-on-a-label + event.preventDefault(); + event.stopPropagation(); + onAutocomplete(option); + }, + }, + option, + ) + ), + ), + ), ); }; @@ -110,6 +154,7 @@ stylesheet.replace(` } .tg-input--field::placeholder { + font-style: italic; color: var(--disabled-text-color); } @@ -118,6 +163,39 @@ stylesheet.replace(` outline: none; border-color: var(--primary-color); } + +.tg-input--options-wrapper { + border-radius: 8px; + background: var(--portal-background); + box-shadow: var(--portal-box-shadow); + min-height: 40px; + max-height: 400px; + overflow: auto; + z-index: 99; +} + +.tg-input--options-wrapper > .tg-input--option:first-child { + border-top-left-radius: 8px; + border-top-right-radius: 8px; +} + +.tg-input--options-wrapper > .tg-input--option:last-child { + border-bottom-left-radius: 8px; + border-bottom-right-radius: 8px; +} + +.tg-input--option { + display: flex; + align-items: center; + height: 32px; + padding: 0px 8px; + cursor: pointer; + font-size: 14px; + color: var(--primary-text-color); +} +.tg-input--option:hover { + background: var(--select-hover-background); +} `); export { Input }; diff --git a/testgen/ui/components/frontend/js/components/line_chart.js b/testgen/ui/components/frontend/js/components/line_chart.js new file mode 100644 index 00000000..3eb7acdb --- /dev/null +++ b/testgen/ui/components/frontend/js/components/line_chart.js @@ -0,0 +1,315 @@ +/** + * @import { Point } from './spark_line.js'; + * + * @typedef TrendChartOptions + * @type {object} + * @property {number?} width + * @property {number?} height + * @property {Ticks?} ticks + * @property {number?} xMinSpanBetweenTicks + * @property {number?} yMinSpanBetweenTicks + * @property {number?} padding + * @property {number?} xAxisLeftPadding + * @property {number?} xAxisRightPadding + * @property {number?} yAxisTopPadding + * @property {number?} yAxisBottomPadding + * @property {string?} axisColor + * @property {number?} axisWidth + * @property {number?} tooltipOffsetX + * @property {number?} tooltipOffsetY + * @property {TrendChartFormatters?} formatters + * @property {TrendChartValueGetters?} getters + * @property {Function?} lineDiscriminator + * @property {Function?} lineColor + * @property {Function?} onShowPointTooltip + * @property {Function?} onRefreshClicked + * + * @typedef Ticks + * @type {object} + * @property {Array} x + * @property {Array} y + * + * @typedef TrendChartValueGetters + * @type {object} + * @property {(item: any) => number} x + * @property {(item: any) => number} y + * + * @typedef TrendChartFormatters + * @type {object} + * @property {(tick: number) => string} x + * @property {(tick: number) => string} y + * + * @typedef TrendLegendOptions + * @type {object} + * @property {Point} origin + * @property {Point} end + * @property {string?} refreshTooltip + * @property {() => void} onRefreshClicked + * @property {(lineId: string) => void} onLineClicked + * @property {(lineId: string) => void} onLineMouseEnter + * @property {(lineId: string) => void} onLineMouseLeave + */ +import van from '../van.min.js'; +import { getValue } from '../utils.js'; +import { colorMap } from '../display_utils.js'; +import { Tooltip } from './tooltip.js'; +import { SparkLine } from './spark_line.js'; +import { Button } from './button.js'; +import { scale } from '../axis_utils.js'; + +const { div, i, span } = van.tags(); +const { circle, foreignObject, g, line, polyline, svg, text } = van.tags("http://www.w3.org/2000/svg"); + +/** + * Draws 2D coordinate system and sparklines inside. + * + * @param {TrendChartOptions} options + * @param {Array | Array} values + */ +const LineChart = ( + options, + ...values +) => { + const _options = { + ...defaultOptions, + ...(options ?? {}), + }; + const variables = { + 'axis-color': _options.axisColor, + 'axis-width': _options.axisWidth, + 'line-width': _options.lineWidth, + }; + const style = Object.entries(variables).map(([key, value]) => `--${key}: ${value}`).join(';'); + const origin = {x: _options.padding, y: _options.padding}; + const end = {x: _options.width - _options.padding, y: _options.height - _options.padding}; + const xAxis = {x1: origin.x, y1: end.y, x2: end.x, y2: end.y}; + const yAxis = {x1: end.x, y1: origin.y, x2: end.x, y2: end.y}; + + let /** @type {Array} */ xValues = _options.ticks?.x; + let /** @type {Array} */ yValues = _options.ticks?.y; + + if (!xValues) { + xValues = Array.from(values.reduce((set, v) => set.add(_options.getters.x(v)), new Set())) + .sort((a, b) => a - b); + } + + if (!yValues) { + yValues = Array.from(values.reduce((set, v) => set.add(_options.getters.y(v)), new Set())) + .sort((a, b) => a - b); + } + + const xTicks = xValues.filter((value, idx, ticks) => { + return idx === 0 || ((value - ticks[idx - 1]) >= _options.xMinSpanBetweenTicks); + }).map((value) => ({ value, label: _options.formatters.x(value) })); + const yTicks = yValues.filter((value, idx, ticks) => { + return idx === 0 || ((value - ticks[idx - 1]) >= _options.yMinSpanBetweenTicks); + }).map((value) => ({ value, label: _options.formatters.y(value) })); + + const asSVGX = (/** @type {number} */ value) => { + return scale(value, { + old: {min: Math.min(...xValues), max: Math.max(...xValues)}, + new: {min: origin.x + _options.xAxisLeftPadding, max: end.x - _options.xAxisRightPadding}, + }, origin.x + _options.xAxisLeftPadding); + }; + const asSVGY = (/** @type {number} */ value) => { + return _options.height - scale(value, { + old: {min: Math.min(...yValues), max: Math.max(...yValues)}, + new: {min: origin.y + _options.yAxisBottomPadding, max: end.y - _options.yAxisTopPadding}, + }, end.y - _options.yAxisTopPadding); + }; + + const lines = values + .map(v => ({...v, x: asSVGX(_options.getters.x(v)), y: asSVGY(_options.getters.y(v))})) + .reduce((lines, value) => { + const lineId = _options.lineDiscriminator(value); + if (!Object.keys(lines).includes(String(lineId))) { + lines[lineId] = []; + } + lines[lineId].push(value); + return lines; + }, {}); + const linesStates = Object.keys(lines).reduce((result, lineId) => ({ + ...result, + [lineId]: { + dimmed: van.state(false), + hidden: van.state(false), + }, + }), {}); + const linesOpacity = Object.entries(linesStates).reduce((result, [lineId, {dimmed, hidden}]) => ({ + ...result, + [lineId]: van.derive(() => (getValue(dimmed) || getValue(hidden)) ? 0.2 : 1.0), + }), {}); + + function dimAllExcept(lineId) { + if (linesStates[lineId].hidden.val) { + return; + } + + Object.values(linesStates).forEach(states => states.dimmed.val = true); + linesStates[lineId].dimmed.val = false; + } + + function resetDimmedLines() { + Object.values(linesStates).forEach(states => states.dimmed.val = false); + } + + function toggleLineVisibility(lineId) { + linesStates[lineId].hidden.val = !linesStates[lineId].hidden.val; + } + + const tooltipText = van.state(''); + const showTooltip = van.state(false); + const tooltipExtraStyle = van.state(''); + const tooltip = Tooltip({ + text: tooltipText, + show: showTooltip, + position: '--', + style: tooltipExtraStyle, + }); + + return svg( + { + width: '100%', + height: '100%', + viewBox: `0 0 ${_options.width} ${_options.height}`, + style: `${style}; overflow: visible;`, + }, + + Legend( + { + origin, + end, + refreshTooltip: 'Recalculate Trend', + onLineMouseEnter: dimAllExcept, + onLineMouseLeave: resetDimmedLines, + onLineClicked: toggleLineVisibility, + onRefreshClicked: _options.onRefreshClicked, + }, + Object.entries(lines).map(([lineId, _], idx) => ({ id: lineId, color: _options.lineColor(lineId, idx), opacity: linesOpacity[lineId] })), + ), + + line({...xAxis, style: 'stroke: var(--axis-color); stroke-width: var(--axis-width)'}), + xTicks.map(({ value }) => circle({ cx: asSVGX(value), cy: end.y, r: 2, 'pointer-events': 'none', fill: 'var(--axis-color)' })), + xTicks.map(({ value, label }) => { + const dx = Math.max(5, label.length * 5.5 / 2); + return text({x: asSVGX(value), y: end.y, dx: -dx, dy: 20, style: 'stroke: var(--axis-color); stroke-width: .1; fill: var(--axis-color);' }, label); + }), + + line({...yAxis, style: 'stroke: var(--axis-color); stroke-width: var(--axis-width)'}), + yTicks.map(({ value, label }) => text({ + x: end.x, + y: asSVGY(value), + dx: 5, + dy: 5, + style: 'stroke: var(--axis-color); stroke-width: .1; fill: var(--axis-color);' }, + label, + )), + + Object.entries(lines).map(([lineId, line], idx) => + SparkLine( + { + color: _options.lineColor(lineId, idx), + stroke: _options.lineWidth, + opacity: linesOpacity[lineId], + hidden: linesStates[lineId].hidden, + interactive: _options.onShowPointTooltip != undefined, + onPointMouseEnter: (point, line) => { + tooltipText.val = _options.onShowPointTooltip?.(point, line); + tooltipExtraStyle.val = `transform: translate(${point.x + _options.tooltipOffsetX}px, ${point.y + _options.tooltipOffsetY}px);`; + showTooltip.val = true; + }, + onPointMouseLeave: () => { + tooltipText.val = ''; + tooltipExtraStyle.val = ''; + showTooltip.val = false; + }, + }, + line, + ) + ), + + _options.onShowPointTooltip + ? foreignObject({fill: 'none', width: '100%', height: '100%', 'pointer-events': 'none', style: 'overflow: visible;'}, tooltip) + : '', + ); +}; + +/** + * Renders a representation of each line displayed in the chart and allows reacting to events on each. + * + * @param {TrendLegendOptions} options + * @param {Array<{lineId: string, color: string, opacity: number}>} lines + */ +const Legend = (options, lines) => { + const title = 'Score Trend'; + const lineLength = 15; + const lineHeight = 4; + + return foreignObject( + { + x: 0, + y: 0, + width: '100%', + height: '40', + overflow: 'visible', + }, + div( + {class: 'flex-row pt-2 pl-6 pr-6'}, + span({class: 'mr-1 text-secondary', style: 'font-size: 16px; font-weight: 500;'}, title), + options?.onRefreshClicked ? + Button({ + type: 'icon', + icon: 'refresh', + style: 'width: 32px; height: 32px;', + tooltip: options?.refreshTooltip || null, + onclick: options?.onRefreshClicked, + }) + : null, + div( + {class: 'flex-row ml-7', style: 'margin-right: auto;'}, + ...lines.map((line) => + div( + { + class: 'flex-row clickable mr-3', + style: () => `opacity: ${getValue(line.opacity)}`, + onclick: () => options?.onLineClicked(line.id), + onmouseenter: () => options?.onLineMouseEnter(line.id), + onmouseleave: () => options?.onLineMouseLeave(line.id), + }, + i({style: `width: ${lineLength}px; height: ${lineHeight}px; background: ${line.color}; display: block; margin-right: 2px; border-radius: 10px;`}), + span({class: 'text-caption'}, line.id), + ) + ), + ), + ) + ); +}; + +const defaultOptions = { + width: 600, + height: 200, + padding: 32, + xMinSpanBetweenTicks: 10, + yMinSpanBetweenTicks: 10, + xAxisLeftPadding: 16, + xAxisRightPadding: 16, + yAxisTopPadding: 16, + yAxisBottomPadding: 16, + axisColor: colorMap.grey, + axisWidth: 2, + lineWidth: 3, + tooltipOffsetX: 10, + tooltipOffsetY: 10, + formatters: { + x: String, + y: String, + }, + getters: { + x: (/** @type {Point} */ item) => item.x, + y: (/** @type {Point} */ item) => item.y, + }, + lineDiscriminator: (/** @type {Point} */ item) => '0', + lineColor: (lineId, idx) => ['blue', 'green', 'yellow', 'brown'][idx] ?? 'grey', +}; + +export { LineChart }; diff --git a/testgen/ui/components/frontend/js/components/link.js b/testgen/ui/components/frontend/js/components/link.js index bc03b4da..ae460d5d 100644 --- a/testgen/ui/components/frontend/js/components/link.js +++ b/testgen/ui/components/frontend/js/components/link.js @@ -14,6 +14,9 @@ * @property {number?} width * @property {string?} style * @property {string?} class + * @property {string?} tooltip + * @property {string?} tooltipPosition + * @property {boolean?} disabled */ import { emitEvent, enforceElementWidth, getValue, loadStylesheet } from '../utils.js'; import van from '../van.min.js'; @@ -30,15 +33,23 @@ const Link = (/** @type Properties */ props) => { if (width) { enforceElementWidth(window.frameElement, width); } + if (props.tooltip) { + window.frameElement.parentElement.setAttribute('data-tooltip', props.tooltip.val); + window.frameElement.parentElement.setAttribute('data-tooltip-position', props.tooltipPosition.val); + } } const href = getValue(props.href); const params = getValue(props.params) ?? {}; const open_new = !!getValue(props.open_new); + const showTooltip = van.state(false); return a( { - class: `tg-link ${getValue(props.underline) ? 'tg-link--underline' : ''} ${getValue(props.class) ?? ''}`, + class: `tg-link + ${getValue(props.underline) ? 'tg-link--underline' : ''} + ${getValue(props.disabled) ? 'disabled' : ''} + ${getValue(props.class) ?? ''}`, style: props.style, href: `/${href}${getQueryFromParams(params)}`, target: open_new ? '_blank' : '', @@ -47,7 +58,14 @@ const Link = (/** @type Properties */ props) => { event.stopPropagation(); emitEvent('LinkClicked', { href, params }); }, + onmouseenter: props.tooltip ? (() => showTooltip.val = true) : undefined, + onmouseleave: props.tooltip ? (() => showTooltip.val = false) : undefined, }, + () => getValue(props.tooltip) ? Tooltip({ + text: props.tooltip, + show: showTooltip, + position: props.tooltipPosition, + }) : '', div( {class: 'tg-link--wrapper'}, props.left_icon ? LinkIcon(props.left_icon, props.left_icon_size, 'left') : undefined, @@ -89,6 +107,11 @@ stylesheet.replace(` cursor: pointer; } + .tg-link.disabled { + pointer-events: none; + cursor: not-allowed; + } + .tg-link .tg-link--wrapper { display: flex; align-items: center; diff --git a/testgen/ui/components/frontend/js/components/paginator.js b/testgen/ui/components/frontend/js/components/paginator.js index 9443c1dd..602302b2 100644 --- a/testgen/ui/components/frontend/js/components/paginator.js +++ b/testgen/ui/components/frontend/js/components/paginator.js @@ -4,7 +4,7 @@ * @property {number} count * @property {number} pageSize * @property {number?} pageIndex - * @property {function?} onChange + * @property {function(number)?} onChange */ import van from '../van.min.js'; diff --git a/testgen/ui/components/frontend/js/components/portal.js b/testgen/ui/components/frontend/js/components/portal.js index e2a63ad2..072395d8 100644 --- a/testgen/ui/components/frontend/js/components/portal.js +++ b/testgen/ui/components/frontend/js/components/portal.js @@ -1,12 +1,13 @@ /** * Container for any floating elements anchored to another element. - * + * * NOTE: Ensure options is an object and turn individual properties into van.state * if dynamic updates are needed. - * + * * @typedef Options * @type {object} * @property {string} target + * @property {boolean?} targetRelative * @property {boolean} opened * @property {(string|undefined)} style * @property {(string|undefined)} class @@ -17,7 +18,7 @@ import { getValue } from '../utils.js'; const { div } = van.tags; const Portal = (/** @type Options */ options, ...args) => { - const { target } = getValue(options); + const { target, targetRelative } = getValue(options); const id = `${target}-portal`; window.testgen.portals[id] = { domId: id, targetId: target, opened: options.opened }; @@ -29,8 +30,8 @@ const Portal = (/** @type Options */ options, ...args) => { const anchor = document.getElementById(target); const anchorRect = anchor.getBoundingClientRect(); - const top = anchorRect.top + anchorRect.height; - const left = anchorRect.left; + const top = (targetRelative ? 0 : anchorRect.top) + anchorRect.height; + const left = targetRelative ? 0 : anchorRect.left; const minWidth = anchorRect.width; return div( @@ -44,4 +45,4 @@ const Portal = (/** @type Options */ options, ...args) => { }; }; -export { Portal }; \ No newline at end of file +export { Portal }; diff --git a/testgen/ui/components/frontend/js/components/radio_group.js b/testgen/ui/components/frontend/js/components/radio_group.js index 0c7f5e4b..aad826e3 100644 --- a/testgen/ui/components/frontend/js/components/radio_group.js +++ b/testgen/ui/components/frontend/js/components/radio_group.js @@ -3,13 +3,13 @@ * @type {object} * @property {string} label * @property {string | number | boolean | null} value - * + * * @typedef Properties * @type {object} * @property {string} label * @property {Option[]} options * @property {string | number | boolean | null} selected - * @property {function?} onChange + * @property {function(string | number | boolean | null)?} onChange * @property {number?} width */ import van from '../van.min.js'; @@ -24,13 +24,13 @@ const RadioGroup = (/** @type Properties */ props) => { return div( { style: () => `width: ${props.width ? getValue(props.width) + 'px' : 'auto'}` }, div( - { class: 'text-caption text-capitalize mb-1' }, + { class: 'text-caption mb-1' }, props.label, ), () => div( { class: 'flex-row fx-gap-4 tg-radio-group' }, getValue(props.options).map(option => label( - { class: 'flex-row fx-gap-2 text-capitalize clickable' }, + { class: 'flex-row fx-gap-2 clickable' }, input({ type: 'radio', name: groupName, @@ -64,7 +64,7 @@ stylesheet.replace(` border-radius: 9px; position: relative; transition-property: border-color, background-color; - transition-duration: 0.3s; + transition-duration: 0.3s; } .tg-radio-group--input:focus, diff --git a/testgen/ui/components/frontend/js/components/score_card.js b/testgen/ui/components/frontend/js/components/score_card.js index f500e38c..7085a42d 100644 --- a/testgen/ui/components/frontend/js/components/score_card.js +++ b/testgen/ui/components/frontend/js/components/score_card.js @@ -8,6 +8,17 @@ * @property {number} testing_score * @property {number} cde_score * @property {Array} categories + * @property {Array} history + * + * @typedef HistoryEntry + * @type {object} + * @property {number} score + * @property {string} category + * @property {string} time + * + * @typedef ScoreCardOptions + * @type {object} + * @property {boolean} showHistory */ import van from '../van.min.js'; import { Card } from './card.js'; @@ -15,14 +26,25 @@ import { dot } from './dot.js'; import { Attribute } from './attribute.js'; import { getScoreColor } from '../score_utils.js'; import { getValue, loadStylesheet } from '../utils.js'; +import { scale } from '../axis_utils.js'; +import { SparkLine } from './spark_line.js'; +import { colorMap } from '../display_utils.js'; const { div, i, span } = van.tags; -const { circle, svg, text } = van.tags("http://www.w3.org/2000/svg"); +const { circle, g, rect, svg, text } = van.tags("http://www.w3.org/2000/svg"); -const ScoreCard = ( - /** @type {Score} */ score, - /** @type {(Function|Array|any|undefined)}*/ actions, -) => { +/** + * Render a scorecard's charts for total and CDE scores and the individual + * categories score. + * + * All three "sections" are optional and can be missing. + * + * @param {Score} score + * @param {(Function|Array|any|undefined)} actions + * @param {ScoreCardOptions?} options + * @returns {HTMLElement} + */ +const ScoreCard = (score, actions, options) => { loadStylesheet('score-card', stylesheet); const title = van.derive(() => getValue(score)?.name ?? ''); @@ -36,18 +58,35 @@ const ScoreCard = ( const categories = score_.dimensions ?? score_.categories ?? []; const categoriesLabel = score_.categories_label ?? 'Quality Dimension'; + const overallScoreHistory = score.history?.filter(e => e.category === 'score') ?? []; + const cdeScoreHistory = score.history?.filter(e => e.category === 'cde_score') ?? []; + return div( { class: 'flex-row fx-justify-center fx-align-flex-start' }, score_.score ? div( { class: 'mr-4' }, - ScoreChart("Total Score", score_.score), + ScoreChart( + "Total Score", + score_.score, + score.history?.filter(e => e.category === 'score') ?? [], + (options?.showHistory ?? false) && overallScoreHistory.length > 1, + colorMap.teal, + ), div( { class: 'flex-row fx-justify-center fx-gap-2 mt-1' }, Attribute({ label: 'Profiling', value: score_.profiling_score }), Attribute({ label: 'Testing', value: score_.testing_score }), ), ) : '', - score_.cde_score ? ScoreChart("CDE Score", score_.cde_score) : '', + score_.cde_score + ? ScoreChart( + "CDE Score", + score_.cde_score, + score.history?.filter(e => e.category === 'cde_score') ?? [], + (options?.showHistory ?? false) && cdeScoreHistory.length > 1, + colorMap.purpleLight, + ) + : '', (score_.cde_score && categories.length > 0) ? i({ class: 'mr-4 ml-4' }) : '', categories.length > 0 ? div( { class: 'flex-column' }, @@ -75,8 +114,12 @@ const ScoreCard = ( * * @param {string} label * @param {number} score + * @param {Array} history + * @param {boolean} showHistory + * @param {string?} trendColor + * @returns {SVGElement} */ -const ScoreChart = (label, score) => { +const ScoreChart = (label, score, history, showHistory, trendColor) => { const variables = { size: '100px', 'stroke-width': '4px', @@ -87,19 +130,32 @@ const ScoreChart = (label, score) => { dash: `calc((${score ?? 100} * var(--circumference)) / 100)`, }; const style = Object.entries(variables).map(([key, value]) => `--${key}: ${value}`).join(';'); + const historyLine = history.map(e => ({ x: Date.parse(e.time), y: e.score })); + const yLength = 30; + const xValues = historyLine.map(line => line.x); + const yValues = historyLine.map(line => line.y); + const xRanges = {old: {min: Math.min(...xValues), max: Math.max(...xValues)}, new: {min: 0, max: 80}}; + const yRanges = {old: {min: Math.min(...yValues), max: Math.max(...yValues)}, new: {min: 0, max: yLength}}; return svg( - { class: 'tg-score-chart', width: "100", height: "100", viewBox: "0 0 100 100", style }, + { class: 'tg-score-chart', width: 100, height: 100, viewBox: "0 0 100 100", overflow: 'visible', style }, circle({ class: 'tg-score-chart--bg' }), circle({ class: 'tg-score-chart--fg' }), text({ x: '50%', y: '40%', 'dominant-baseline': 'middle', 'text-anchor': 'middle', fill: 'var(--primary-text-color)', 'font-size': '18px', 'font-weight': 500 }, score ?? '-'), text({ x: '50%', y: '40%', 'dominant-baseline': 'middle', 'text-anchor': 'middle', fill: 'var(--secondary-text-color)', 'font-size': '14px', class: 'tg-score-chart--label' }, label), + + showHistory ? g( + {fill: 'none', style: 'transform: translate(10px, 70px);'}, + rect({ width: 80, height: 30, x: 0, y: 0, rx: 2, ry: 2, fill: 'var(--dk-card-background)', stroke: 'var(--empty)' }), + SparkLine({color: trendColor}, historyLine.map(line => ({ x: scale(line.x, xRanges), y: yLength - scale(line.y, yRanges, yLength)}))), + ) : null, ); }; const stylesheet = new CSSStyleSheet(); stylesheet.replace(` .tg-score-card { + height: 216px; width: fit-content; box-sizing: border-box; border: 1px solid var(--border-color); diff --git a/testgen/ui/components/frontend/js/components/score_history.js b/testgen/ui/components/frontend/js/components/score_history.js new file mode 100644 index 00000000..8d00ebde --- /dev/null +++ b/testgen/ui/components/frontend/js/components/score_history.js @@ -0,0 +1,83 @@ +/** + * @typedef ScoreHistoryEntry + * @type {object} + * @property {number} score + * @property {('score'|'cde_score')} category + * @property {string} time + */ +import van from '../van.min.js'; +import { emitEvent, getValue, loadStylesheet } from '../utils.js'; +import { colorMap } from '../display_utils.js'; +import { LineChart } from './line_chart.js'; + +const { div, span, strong } = van.tags; + +const TRANSLATIONS = { + score: 'Total Score', + cde_score: 'CDE Score', +}; + +/** + * Render the scorecard history as line charts for the enabled scores. + * + * @param {Object} props + * @param {...ScoreHistoryEntry} entries + * @returns {HTMLElment} + */ +const ScoreHistory = (props, ...entries) => { + loadStylesheet('score-trend', stylesheet); + + const lineColors = { + [TRANSLATIONS.score]: colorMap.teal, + [TRANSLATIONS.cde_score]: colorMap.purpleLight, + default: colorMap.grey, + }; + + return div( + { ...props, class: `tg-score-trend flex-row ${props?.class ?? ''}` }, + LineChart( + { + width: 600, + height: 200, + tooltipOffsetX: -100, + tooltipOffsetY: 10, + xMinSpanBetweenTicks: 3 * 24 * 60 * 60 * 1000, + yMinSpanBetweenTicks: 5, + getters: { + x: (/** @type {ScoreHistoryEntry} */ entry) => Date.parse(entry.time), + y: (/** @type {ScoreHistoryEntry} */ entry) => Number(entry.score), + }, + formatters: { + x: (value) => new Intl.DateTimeFormat("en-US", {month: 'short', day: 'numeric'}).format(value), + y: (value) => String(Math.trunc(value)), + }, + lineDiscriminator: (/** @type {ScoreHistoryEntry} */ entry) => TRANSLATIONS[entry.category], + lineColor: (lineId) => lineColors[lineId] ?? lineColors.default, + onShowPointTooltip: (point, _) => { + return div( + { class: 'flex-column fx-align-flex-start fx-justify-flex-start'}, + strong(TRANSLATIONS[point.category]), + span(point.score), + span(Intl.DateTimeFormat("en-US", {dateStyle: 'long', timeStyle: 'long'}).format(Date.parse(point.time))), + ); + }, + onRefreshClicked: getValue(props.showRefresh) ? () => emitEvent('RecalculateHistory', { payload: getValue(props.score).id }) : undefined, + }, + ...entries, + ), + ); +}; + +const stylesheet = new CSSStyleSheet(); +stylesheet.replace(` +.tg-score-trend { + width: fit-content; + box-sizing: border-box; + border: 1px solid var(--border-color); + border-radius: 8px; + margin-bottom: unset !important; + background-color: var(--dk-card-background); +} +`); + +export { ScoreHistory }; diff --git a/testgen/ui/components/frontend/js/components/select.js b/testgen/ui/components/frontend/js/components/select.js index 7420a008..088bb7f5 100644 --- a/testgen/ui/components/frontend/js/components/select.js +++ b/testgen/ui/components/frontend/js/components/select.js @@ -19,7 +19,7 @@ * @property {string?} style */ import van from '../van.min.js'; -import { getRandomId, getValue, getParents, loadStylesheet, isState, isEqual } from '../utils.js'; +import { getRandomId, getValue, loadStylesheet, isState, isEqual } from '../utils.js'; import { Portal } from './portal.js'; const { div, i, label, span } = van.tags; @@ -73,11 +73,7 @@ const Select = (/** @type {Properties} */ props) => { id: domId, class: () => `flex-column fx-gap-1 text-caption tg-select--label ${getValue(props.disabled) ? 'disabled' : ''}`, style: () => `width: ${props.width ? getValue(props.width) + 'px' : 'auto'}; ${getValue(props.style)}`, - onclick: () => { - if (!getValue(props.disabled)) { - opened.val = true; - } - }, + onclick: van.derive(() => !getValue(props.disabled) ? () => opened.val = !opened.val : null), }, props.label, div( @@ -98,7 +94,7 @@ const Select = (/** @type {Properties} */ props) => { ), ), Portal( - {target: domId.val, opened}, + {target: domId.val, targetRelative: true, opened}, () => div( { class: 'tg-select--options-wrapper mt-1' }, getValue(options).map(option => @@ -121,6 +117,9 @@ const Select = (/** @type {Properties} */ props) => { const stylesheet = new CSSStyleSheet(); stylesheet.replace(` +.tg-select--label { + position: relative; +} .tg-select--label.disabled { cursor: not-allowed; color: var(--disabled-text-color); @@ -171,8 +170,8 @@ stylesheet.replace(` .tg-select--options-wrapper { border-radius: 8px; - background: var(--select-portal-background); - box-shadow: rgba(0, 0, 0, 0.16) 0px 4px 16px; + background: var(--portal-background); + box-shadow: var(--portal-box-shadow); min-height: 40px; max-height: 400px; overflow: auto; diff --git a/testgen/ui/components/frontend/js/components/sidebar.js b/testgen/ui/components/frontend/js/components/sidebar.js index 5e96ec5f..239c3a2c 100644 --- a/testgen/ui/components/frontend/js/components/sidebar.js +++ b/testgen/ui/components/frontend/js/components/sidebar.js @@ -23,13 +23,19 @@ * @property {string} code * @property {string} name * + * @typedef Permissions + * @type {object} + * @property {boolean} can_edit + * * @typedef Properties * @type {object} * @property {Menu} menu - * @property {string} project + * @property {Project[]} projects * @property {string} username * @property {string} current_page + * @property {string} current_project * @property {string} logout_path + * @property {Permissions} permissions */ const van = window.top.van; const { a, button, div, i, img, label, option, select, span } = van.tags; @@ -44,30 +50,93 @@ const Sidebar = (/** @type {Properties} */ props) => { window.testgen.loadedStylesheets.sidebar = true; } + const currentProject = van.derive(() => props.projects.val.find(({ code }) => code === props.current_project.val)); + return div( {class: 'menu'}, div( - { class: 'menu--project' }, - div({ class: 'caption' }, 'Project'), - div(props.project), + div( + { class: 'menu--project' }, + div({ class: 'caption' }, 'Project'), + () => props.projects.val.length > 1 + ? ProjectSelect(props.projects, currentProject) + : div(currentProject.val.name), + ), + () => { + const menuItems = props.menu?.val.items || []; + return div( + {class: 'content'}, + menuItems.map(item => + item.items?.length > 0 + ? MenuSection(item, props.current_page) + : MenuItem(item, props.current_page)) + ); + }, + ), + div( + span({class: 'menu--username'}, props.username), + div( + { class: 'menu--buttons' }, + button( + { + class: 'tg-button logout', + onclick: (event) => navigate(event, props.logout_path?.val), + }, + i({class: 'material-symbols-rounded'}, 'logout'), + span('Logout'), + ), + props.permissions.val?.can_edit ? button( + { + class: 'tg-button', + onclick: () => emitEvent({ view_logs: true }), + }, + 'App Logs', + ) : null, + ), + () => Version(props.menu?.val.version), ), - () => { - const menuItems = props.menu?.val.items || []; - return div( - {class: 'content'}, - menuItems.map(item => - item.items?.length > 0 - ? MenuSection(item, props.current_page) - : MenuItem(item, props.current_page)) - ); + ); +}; + +const ProjectSelect = (/** @type Project[] */ projects, /** @type string */ currentProject) => { + const opened = van.state(false); + van.derive(() => { + const clickHandler = () => opened.val = false; + if (opened.val) { + document.addEventListener('click', clickHandler); + } else { + document.removeEventListener('click', clickHandler); + } + }); + + return div( + { + class: 'project-select', + onclick: (/** @type Event */ event) => event.stopPropagation(), }, - button( - { class: `tg-button logout`, onclick: (event) => navigate(event, props.logout_path?.val) }, - i({class: 'material-symbols-rounded'}, 'logout'), - span('Logout'), + div( + { + class: 'project-select--label', + onclick: () => opened.val = !opened.val, + }, + div(currentProject.val.name), + i({ class: 'material-symbols-rounded' }, 'arrow_drop_down'), ), - span({class: 'menu--username'}, props.username), - () => Version(props.menu?.val.version), + () => opened.val + ? div( + { class: 'project-select--options-wrapper' }, + projects.val.map(({ name, code }) => div( + { + class: `project-select--option ${code === currentProject.val.code ? 'selected' : ''}`, + onclick: () => { + opened.val = false; + emitEvent({ project: code }); + }, + }, + name, + )), + ) + : '', ); }; @@ -106,7 +175,7 @@ const Version = (/** @type {Version} */ version) => { return div( {class: classes, onclick: () => { expanded.val = !expanded.val; }}, VersionRow( - 'version', + 'Version', version.current, i({class: 'material-symbols-rounded version--dropdown-icon'}, icon), ), @@ -127,6 +196,12 @@ const VersionRow = (/** @type string */ label, /** @type string */ version, icon ); }; +function emitEvent(/** @type Object */ data) { + if (Sidebar.StreamlitInstance) { + Sidebar.StreamlitInstance.sendData(data); + } +} + function navigate(/** @type object */ event, /** @type string */ path, /** @type string */ currentPage = null) { // Needed to prevent page refresh // Returning false does not work because VanJS does not use inline handlers -> https://github.com/vanjs-org/van/discussions/246 @@ -135,7 +210,7 @@ function navigate(/** @type object */ event, /** @type string */ path, /** @type event.stopPropagation(); if (Sidebar.StreamlitInstance && path !== currentPage) { - Sidebar.StreamlitInstance.sendData(path); + Sidebar.StreamlitInstance.sendData({ path }); } } @@ -158,20 +233,54 @@ stylesheet.replace(` position: relative; display: flex; flex-direction: column; + justify-content: space-between; height: calc(100% - 76px); } -.menu > .menu--project { +.menu .menu--project { padding: 0 20px; margin-bottom: 16px; } -.menu > .menu--username { +.project-select { + position: relative; +} + +.project-select--label { + display: flex; +} + +.project-select--options-wrapper { position: absolute; + border-radius: 8px; + background: var(--portal-background); + box-shadow: var(--portal-box-shadow); + min-width: 200px; + min-height: 40px; + max-height: 400px; + overflow: auto; + z-index: 99; +} - left: 0; - bottom: 0; +.project-select--option { + display: flex; + align-items: center; + height: 40px; + padding: 0px 16px; + cursor: pointer; + font-size: 14px; + color: var(--primary-text-color); +} +.project-select--option:hover { + background: var(--select-hover-background); +} +.project-select--option.selected { + background: var(--select-hover-background); + color: var(--primary-color); +} + +.menu .menu--username { padding-left: 16px; padding-bottom: 8px; @@ -183,11 +292,11 @@ stylesheet.replace(` color: var(--secondary-text-color); } -.menu > .menu--username:before { +.menu .menu--username:before { content: 'User: '; } -.menu > .content > .menu--section > .menu--section--label { +.menu .content > .menu--section > .menu--section--label { padding: 8px 16px; font-size: 15px; color: var(--disabled-text-color); @@ -224,6 +333,11 @@ stylesheet.replace(` background: var(--sidebar-item-hover-color); } +.menu .menu--buttons { + display: flex; + justify-content: space-between; +} + .menu .version { color: var(--secondary-text-color); display: flex; diff --git a/testgen/ui/components/frontend/js/components/spark_line.js b/testgen/ui/components/frontend/js/components/spark_line.js new file mode 100644 index 00000000..79d2f9fb --- /dev/null +++ b/testgen/ui/components/frontend/js/components/spark_line.js @@ -0,0 +1,66 @@ +/** + * @typedef SparklineOptions + * @type {object} + * @property {string} color + * @property {number} stroke + * @property {number?} opacity + * @property {bool?} hidden + * @property {boolean?} interactive + * @property {Function?} onPointMouseEnter + * @property {Function?} onPointMouseLeave + * + * @typedef Point + * @type {object} + * @property {number} x + * @property {number} y +*/ +import { getValue } from '../utils.js'; +import van from '../van.min.js'; + +const { circle, g, polyline } = van.tags("http://www.w3.org/2000/svg"); +const defaultCircleRadius = 3; +const onHoverCircleRadius = 5; + +/** + * Creates a line to be redenred inside an SVG. + * + * @param {SparklineOptions} options + * @param {Array} line + * @returns + */ +const SparkLine = ( + /** @type {SparklineOptions} */ options, + /** @type {Array} */ line, +) => { + const display = van.derive(() => getValue(options.hidden) === true ? 'none' : ''); + return g( + { fill: 'none', opacity: options.opacity ?? 1, style: 'overflow: visible;', display }, + polyline({ + points: line.map(point => `${point.x} ${point.y}`).join(', '), + style: `stroke: ${options.color}; stroke-width: ${options.stroke ?? 1};`, + }), + options?.interactive + ? line.map(point => { + const circleRadius = van.state(defaultCircleRadius); + + return circle({ + cx: point.x, + cy: point.y, + r: circleRadius, + 'pointer-events': 'all', + fill: options.color, + onmouseenter: () => { + circleRadius.val = onHoverCircleRadius; + options?.onPointMouseEnter?.(point, line); + }, + onmouseleave: () => { + circleRadius.val = defaultCircleRadius; + options?.onPointMouseLeave?.(point, line); + }, + }); + }) + : '', + ); +}; + +export { SparkLine }; diff --git a/testgen/ui/components/frontend/js/components/toggle.js b/testgen/ui/components/frontend/js/components/toggle.js new file mode 100644 index 00000000..b8b5ca14 --- /dev/null +++ b/testgen/ui/components/frontend/js/components/toggle.js @@ -0,0 +1,86 @@ +/** + * @typedef Properties + * @type {object} + * @property {string} label + * @property {boolean?} checked + * @property {function(boolean)?} onChange + */ +import van from '../van.min.js'; +import { loadStylesheet } from '../utils.js'; + +const { input, label } = van.tags; + +const Toggle = (/** @type Properties */ props) => { + loadStylesheet('toggle', stylesheet); + + return label( + { class: 'flex-row fx-gap-2 clickable' }, + input({ + type: 'checkbox', + role: 'switch', + class: 'tg-toggle--input clickable', + checked: props.checked, + onchange: van.derive(() => { + const onChange = props.onChange?.val ?? props.onChange; + return onChange ? (/** @type Event */ event) => onChange(event.target.checked) : null; + }), + }), + props.label, + ); +}; + +const stylesheet = new CSSStyleSheet(); +stylesheet.replace(` +.tg-toggle--input { + appearance: none; + margin: 0; + width: 28px; + height: 16px; + flex-shrink: 0; + border-radius: 8px; + background-color: var(--disabled-text-color); + position: relative; + transition-property: background-color; + transition-duration: 0.3s; +} + +.tg-toggle--input::after { + content: ''; + position: absolute; + top: 2px; + left: 2px; + width: 12px; + height: 12px; + border-radius: 6px; + background-color: #fff; + transition-property: left; + transition-duration: 0.3s; +} + +.tg-toggle--input:focus, +.tg-toggle--input:focus-visible { + outline: none; +} + +.tg-toggle--input:focus-visible::before { + content: ''; + box-sizing: border-box; + position: absolute; + top: -3px; + left: -3px; + width: 34px; + height: 22px; + border: 3px solid var(--border-color); + border-radius: 11px; +} + +.tg-toggle--input:checked { + background-color: var(--primary-color); +} + +.tg-toggle--input:checked::after { + left: 14px; +} +`); + +export { Toggle }; diff --git a/testgen/ui/components/frontend/js/components/tooltip.js b/testgen/ui/components/frontend/js/components/tooltip.js index 32cf1dc9..38a814e2 100644 --- a/testgen/ui/components/frontend/js/components/tooltip.js +++ b/testgen/ui/components/frontend/js/components/tooltip.js @@ -9,6 +9,7 @@ * @property {boolean} show * @property {('top-left' | 'top' | 'top-right' | 'right' | 'bottom-right' | 'bottom' | 'bottom-left' | 'left')?} position * @property {number} width + * @property {string?} style */ import van from '../van.min.js'; import { getValue, loadStylesheet } from '../utils.js'; @@ -22,7 +23,7 @@ const Tooltip = (/** @type Properties */ props) => { return span( { class: () => `tg-tooltip ${getValue(props.position) || defaultPosition} ${getValue(props.show) ? '' : 'hidden'}`, - style: () => `opacity: ${getValue(props.show) ? 1 : 0}; max-width: ${getValue(props.width) || '400'}px;`, + style: () => `opacity: ${getValue(props.show) ? 1 : 0}; max-width: ${getValue(props.width) || '400'}px; ${getValue(props.style) ?? ''}`, }, props.text, div({ class: 'tg-tooltip--triangle' }), diff --git a/testgen/ui/components/frontend/js/components/tree.js b/testgen/ui/components/frontend/js/components/tree.js index a0e0d47f..6d603fe3 100644 --- a/testgen/ui/components/frontend/js/components/tree.js +++ b/testgen/ui/components/frontend/js/components/tree.js @@ -10,23 +10,41 @@ * @property {number?} level * @property {boolean?} expanded * @property {boolean?} hidden + * @property {boolean?} selected + * + * @typedef SelectedNode + * @type {object} + * @property {string} id + * @property {boolean} all + * @property {SelectedNode[]?} children * * @typedef Properties * @type {object} * @property {string} id + * @property {string} classes * @property {TreeNode[]} nodes * @property {string} selected - * @property {string} classes + * @property {function(string)?} onSelect + * @property {boolean?} multiSelect + * @property {boolean?} multiSelectToggle + * @property {function(SelectedNode[] | null)?} onMultiSelect + * @property {(function(TreeNode): boolean) | null} isNodeHidden + * @property {(function(): boolean) | null} hasActiveFilters + * @property {function()?} onResetFilters */ import van from '../van.min.js'; -import { emitEvent, getValue, loadStylesheet } from '../utils.js'; +import { getValue, loadStylesheet, getRandomId, isState } from '../utils.js'; import { Input } from './input.js'; import { Button } from './button.js'; +import { Portal } from './portal.js'; +import { Icon } from './icon.js'; +import { Checkbox } from './checkbox.js'; +import { Toggle } from './toggle.js'; -const { div, i } = van.tags; +const { div, h3, span } = van.tags; const levelOffset = 14; -const Tree = (/** @type Properties */ props) => { +const Tree = (/** @type Properties */ props, /** @type any? */ filtersContent) => { loadStylesheet('tree', stylesheet); // Use only initial prop value as default and maintain internal state @@ -35,90 +53,216 @@ const Tree = (/** @type Properties */ props) => { const treeNodes = van.derive(() => { const nodes = getValue(props.nodes) || []; - const treeSelected = initTreeState(nodes, initialSelection); + const treeSelected = initTreeState(nodes, selected.rawVal); if (!treeSelected) { selected.val = null; } return nodes; }); + const multiSelect = isState(props.multiSelect) ? props.multiSelect : van.state(!!props.multiSelect); + const noMatches = van.derive(() => treeNodes.val.every(node => node.hidden.val)); + + van.derive(() => { + const onSelect = props.onSelect?.val ?? props.onSelect; + if (!multiSelect.val && onSelect) { + onSelect(selected.val); + } + }); + + van.derive(() => { + if (!multiSelect.val) { + selectTree(treeNodes.val, false); + } + props.onMultiSelect(multiSelect.val ? [] : null); + }); + return div( { id: props.id, class: () => `flex-column ${getValue(props.classes)}`, }, - div( - { class: 'flex-row fx-gap-1 tg-tree--actions' }, - Input({ - icon: 'search', - clearable: true, - onChange: (value) => searchTree(treeNodes.val, value), - }), - Button({ - type: 'icon', - icon: 'expand_all', - style: 'width: 24px; height: 24px; padding: 4px;', - tooltip: 'Expand All', - tooltipPosition: 'bottom', - onclick: () => expandOrCollapseTree(treeNodes.val, true), - }), - Button({ - type: 'icon', - icon: 'collapse_all', - style: 'width: 24px; height: 24px; padding: 4px;', - tooltip: 'Collapse All', - tooltipPosition: 'bottom', - onclick: () => expandOrCollapseTree(treeNodes.val, false), - }), - ), + Toolbar(treeNodes, props, filtersContent), + props.multiSelectToggle + ? div( + { class: 'mt-1 mb-2 ml-1 text-secondary' }, + Toggle({ + label: 'Select multiple', + checked: multiSelect, + onChange: (/** @type boolean */ checked) => multiSelect.val = checked, + }), + ) + : null, div( { class: 'tg-tree' }, () => div( - { class: 'tg-tree--nodes' }, - treeNodes.val.map(node => TreeNode(node, selected)), + { + class: 'tg-tree--nodes', + onclick: van.derive(() => multiSelect.val ? () => props.onMultiSelect(getMultiSelection(treeNodes.val)) : null), + }, + treeNodes.val.map(node => TreeNode(node, selected, multiSelect.val)), ), ), + () => noMatches.val + ? span({ class: 'tg-tree--empty mt-7 mb-7 text-secondary' }, 'No matching itens found') + : '', + ); +}; + +const Toolbar = ( + /** @type { val: TreeNode[] } */ nodes, + /** @type Properties */ props, + /** @type any? */ filtersContent, +) => { + const search = van.state(''); + const filterDomId = `tree-filters-${getRandomId()}`; + const filtersOpened = van.state(false); + const filtersActive = van.state(false); + const isNodeHidden = (/** @type TreeNode */ node) => !node.label.includes(search.val) || props.isNodeHidden?.(node); + + return div( + { class: 'flex-row fx-gap-1 tg-tree--actions' }, + Input({ + icon: 'search', + clearable: true, + onChange: (/** @type string */ value) => { + search.val = value; + filterTree(nodes.val, isNodeHidden); + if (value) { + expandOrCollapseTree(nodes.val, true); + } + }, + }), + filtersContent ? [ + div( + { class: () => `tg-tree--filter-button ${filtersActive.val ? 'active' : ''}` }, + Button({ + id: filterDomId, + type: 'icon', + icon: 'filter_list', + style: 'width: 24px; height: 24px; padding: 4px;', + tooltip: () => filtersActive.val ? 'Filters active' : 'Filters', + tooltipPosition: 'bottom', + onclick: () => filtersOpened.val = !filtersOpened.val, + }), + ), + Portal( + { target: filterDomId, opened: filtersOpened }, + () => div( + { class: 'tg-tree--filters' }, + h3( + { class: 'flex-row fx-justify-space-between'}, + 'Filters', + Button({ + type: 'icon', + icon: 'close', + iconSize: 22, + onclick: () => filtersOpened.val = false, + }), + ), + filtersContent, + div( + { class: 'flex-row fx-justify-space-between mt-4' }, + Button({ + label: 'Reset filters', + width: '110px', + disabled: () => !props.hasActiveFilters(), + onclick: props.onResetFilters, + }), + Button({ + type: 'stroked', + color: 'primary', + label: 'Apply', + width: '80px', + onclick: () => { + filterTree(nodes.val, isNodeHidden); + filtersActive.val = props.hasActiveFilters(); + filtersOpened.val = false; + }, + }), + ), + ), + ) + ] : null, + Button({ + type: 'icon', + icon: 'expand_all', + style: 'width: 24px; height: 24px; padding: 4px;', + tooltip: 'Expand All', + tooltipPosition: 'bottom', + onclick: () => expandOrCollapseTree(nodes.val, true), + }), + Button({ + type: 'icon', + icon: 'collapse_all', + style: 'width: 24px; height: 24px; padding: 4px;', + tooltip: 'Collapse All', + tooltipPosition: 'bottom', + onclick: () => expandOrCollapseTree(nodes.val, false), + }), ); }; const TreeNode = ( /** @type TreeNode */ node, /** @type string */ selected, + /** @type boolean */ multiSelect, ) => { const hasChildren = !!node.children?.length; return div( + { + onclick: multiSelect + ? (/** @type Event */ event) => { + if (hasChildren) { + if (!event.fromChild) { + // Prevent the default behavior of toggling the "checked" property - we want to control it + event.preventDefault(); + selectTree( + node.children, + node.selected.val ? false : node.children.some(child => !child.hidden.val && !child.selected.val), + ); + } + node.selected.val = node.children.every(child => child.selected.val); + } else { + node.selected.val = !node.selected.val; + event.fromChild = true; + } + } + : null, + }, div( { class: () => `tg-tree--row flex-row clickable ${node.classes || ''} ${selected.val === node.id ? 'selected' : ''} ${node.hidden.val ? 'hidden' : ''}`, style: `padding-left: ${levelOffset * node.level}px;`, - onclick: () => { - selected.val = node.id; - emitEvent('TreeNodeSelected', { payload: node.id }); - }, + onclick: () => selected.val = node.id, }, - i( + Icon( { - class: `material-symbols-rounded text-secondary ${hasChildren ? '' : 'invisible'}`, - onclick: () => { + classes: hasChildren ? '' : 'invisible', + onclick: (/** @type Event */ event) => { + event.stopPropagation(); node.expanded.val = hasChildren ? !node.expanded.val : false; }, }, () => node.expanded.val ? 'arrow_drop_down' : 'arrow_right', ), - node.icon ? i( - { - class: 'material-symbols-rounded tg-tree--row-icon', - style: `font-size: ${node.iconSize || 24}px;`, - }, - node.icon, - ) : null, + multiSelect + ? [ + Checkbox({ + checked: () => node.selected.val, + indeterminate: hasChildren ? () => !node.selected.val && node.children.some(({ selected }) => selected.val) : false, + }), + span({ class: 'mr-1' }), + ] + : null, + node.icon ? Icon({ size: 24, classes: 'tg-tree--row-icon' }, node.icon) : null, node.label, ), hasChildren ? div( { class: () => node.expanded.val ? '' : 'hidden' }, - node.children.map(node => TreeNode(node, selected)), + node.children.map(node => TreeNode(node, selected, multiSelect)), ) : null, ); }; @@ -139,19 +283,20 @@ const initTreeState = ( } node.expanded = van.state(expanded); node.hidden = van.state(false); + node.selected = van.state(false); treeExpanded = treeExpanded || expanded; }); return treeExpanded; }; -const searchTree = ( +const filterTree = ( /** @type TreeNode[] */ nodes, - /** @type string */ search, + /** @type function(TreeNode): boolean */ isNodeHidden, ) => { nodes.forEach(node => { - let hidden = !node.label.includes(search); + let hidden = isNodeHidden(node); if (node.children) { - searchTree(node.children, search); + filterTree(node.children, isNodeHidden); hidden = hidden && node.children.every(child => child.hidden.rawVal); } node.hidden.val = hidden; @@ -168,7 +313,44 @@ const expandOrCollapseTree = ( node.expanded.val = expanded; } }); -} +}; + +const selectTree = ( + /** @type TreeNode[] */ nodes, + /** @type boolean */ selected, +) => { + nodes.forEach(node => { + if (!selected || !node.hidden.val) { + node.selected.val = selected; + if (node.children) { + selectTree(node.children, selected); + } + } + }); +}; + +/** + * @param {TreeNode[]} nodes + * @returns {SelectedNode[]} + */ +const getMultiSelection = (nodes) => { + const selected = []; + nodes.forEach(node => { + if (node.children) { + const selectedChildren = getMultiSelection(node.children); + if (selectedChildren.length) { + selected.push({ + id: node.id, + all: selectedChildren.length === node.children.length, + children: selectedChildren, + }); + } + } else if (node.selected.val) { + selected.push({ id: node.id }); + } + }); + return selected; +}; const stylesheet = new CSSStyleSheet(); stylesheet.replace(` @@ -176,6 +358,10 @@ stylesheet.replace(` overflow: auto; } +.tg-tree--empty { + text-align: center; +} + .tg-tree--actions { margin: 4px; } @@ -184,6 +370,32 @@ stylesheet.replace(` flex: auto; } +.tg-tree--filter-button { + position: relative; + border-radius: 4px; + border: 1px solid transparent; + transition: 0.3s; +} + +.tg-tree--filter-button.active { + border-color: var(--primary-color); +} + +.tg-tree--filters { + border-radius: 8px; + background: var(--dk-card-background); + box-shadow: var(--portal-box-shadow); + padding: 16px; + overflow: visible; + z-index: 99; +} + +.tg-tree--filters > h3 { + margin: 0 0 12px; + font-size: 18px; + font-weight: 500; +} + .tg-tree--nodes { width: fit-content; min-width: 100%; diff --git a/testgen/ui/components/frontend/js/data_profiling/column_distribution.js b/testgen/ui/components/frontend/js/data_profiling/column_distribution.js index 569cf55e..e40ca9f7 100644 --- a/testgen/ui/components/frontend/js/data_profiling/column_distribution.js +++ b/testgen/ui/components/frontend/js/data_profiling/column_distribution.js @@ -4,15 +4,17 @@ * @typedef Properties * @type {object} * @property {boolean?} border + * @property {boolean?} dataPreview */ import van from '../van.min.js'; import { Card } from '../components/card.js'; import { Attribute } from '../components/attribute.js'; +import { Button } from '../components/button.js'; import { SummaryBar } from '../components/summary_bar.js'; import { PercentBar } from '../components/percent_bar.js'; import { FrequencyBars } from '../components/frequency_bars.js'; import { BoxPlot } from '../components/box_plot.js'; -import { loadStylesheet } from '../utils.js'; +import { loadStylesheet, emitEvent, getValue } from '../utils.js'; import { formatTimestamp, roundDigits } from '../display_utils.js'; const { div, span } = van.tags; @@ -36,10 +38,20 @@ const ColumnDistributionCard = (/** @type Properties */ props, /** @type Column border: props.border, title: `Value Distribution ${item.is_latest_profile ? '*' : ''}`, content: item.profile_run_id && columnFunction ? columnFunction(item) : null, - actionContent: item.profile_run_id ? null : span( - { class: 'text-secondary' }, - 'No profiling data available', - ), + actionContent: item.profile_run_id + ? (getValue(props.dataPreview) + ? Button({ + type: 'stroked', + label: 'Data Preview', + icon: 'pageview', + width: 'auto', + onclick: () => emitEvent('DataPreviewClicked', { payload: item }), + }) + : null) + : span( + { class: 'text-secondary' }, + 'No profiling data available', + ), }) }; diff --git a/testgen/ui/components/frontend/js/data_profiling/column_profiling_results.js b/testgen/ui/components/frontend/js/data_profiling/column_profiling_results.js index fb9ffb78..8f40ec28 100644 --- a/testgen/ui/components/frontend/js/data_profiling/column_profiling_results.js +++ b/testgen/ui/components/frontend/js/data_profiling/column_profiling_results.js @@ -2,19 +2,20 @@ * @typedef Properties * @type {object} * @property {Column} column + * @property {boolean?} data_preview */ import van from '../van.min.js'; import { Streamlit } from '../streamlit.js'; import { getValue, resizeFrameHeightToElement, resizeFrameHeightOnDOMChange, loadStylesheet } from '../utils.js'; import { ColumnDistributionCard } from './column_distribution.js'; import { DataCharacteristicsCard } from './data_characteristics.js'; -import { LatestProfilingLink } from './data_profiling_utils.js'; +import { LatestProfilingTime } from './data_profiling_utils.js'; import { HygieneIssuesCard, PotentialPIICard } from './data_issues.js'; const { div, h2, span } = van.tags; const ColumnProfilingResults = (/** @type Properties */ props) => { - loadStylesheet('data-catalog', stylesheet); + loadStylesheet('column-profiling-results', stylesheet); Streamlit.setFrameHeight(1); // Non-zero value is needed to render window.testgen.isPage = true; @@ -44,10 +45,10 @@ const ColumnProfilingResults = (/** @type Properties */ props) => { ), column.val.column_name, ), - column.val.is_latest_profile ? LatestProfilingLink(column.val) : null, + column.val.is_latest_profile ? LatestProfilingTime({}, column.val) : null, ), DataCharacteristicsCard({ border: true }, column.val), - ColumnDistributionCard({ border: true }, column.val), + ColumnDistributionCard({ border: true, dataPreview: !!props.data_preview?.val }, column.val), column.val.hygiene_issues ? [ PotentialPIICard({ border: true }, column.val), HygieneIssuesCard({ border: true }, column.val), diff --git a/testgen/ui/components/frontend/js/data_profiling/data_characteristics.js b/testgen/ui/components/frontend/js/data_profiling/data_characteristics.js index df31fac2..6aeb3aa2 100644 --- a/testgen/ui/components/frontend/js/data_profiling/data_characteristics.js +++ b/testgen/ui/components/frontend/js/data_profiling/data_characteristics.js @@ -29,9 +29,6 @@ const DataCharacteristicsCard = (/** @type Properties */ props, /** @type Column } else { attributes.push( { key: 'functional_table_type', label: `Semantic Table Type ${item.is_latest_profile ? '*' : ''}` }, - { key: 'record_ct', label: 'Row Count' }, - { key: 'column_ct', label: 'Column Count' }, - { key: 'data_point_ct', label: 'Data Point Count' }, ); } if (item.add_date) { @@ -48,7 +45,7 @@ const DataCharacteristicsCard = (/** @type Properties */ props, /** @type Column border: props.border, title: `${item.type} Characteristics`, content: div( - { class: 'flex-row fx-gap-4' }, + { class: 'flex-row fx-gap-4 fx-justify-space-between' }, div( { class: 'flex-row fx-flex-wrap fx-gap-4' }, attributes.map(({ key, label }) => { diff --git a/testgen/ui/components/frontend/js/data_profiling/data_issues.js b/testgen/ui/components/frontend/js/data_profiling/data_issues.js index d8a032ef..f62720b7 100644 --- a/testgen/ui/components/frontend/js/data_profiling/data_issues.js +++ b/testgen/ui/components/frontend/js/data_profiling/data_issues.js @@ -1,6 +1,6 @@ /** * @import { Column, Table, HygieneIssue, TestIssue } from './data_profiling_utils.js'; - * + * * @typedef Attribute * @type {object} * @property {string} key @@ -8,10 +8,11 @@ * @property {string} label * @property {string} classes * @property {function?} value_function - * + * * @typedef Properties * @type {object} * @property {boolean?} border + * @property {boolean?} noLinks */ import van from '../van.min.js'; import { Card } from '../components/card.js'; @@ -60,7 +61,7 @@ const PotentialPIICard = (/** @type Properties */ props, /** @type Table | Colum } const potentialPII = item.hygiene_issues.filter(({ issue_likelihood }) => issue_likelihood === 'Potential PII'); - const linkProps = { + const linkProps = props.noLinks ? null : { href: 'profiling-runs:hygiene', params: { run_id: item.profile_run_id, issue_class: 'Potential PII' }, }; @@ -89,7 +90,7 @@ const HygieneIssuesCard = (/** @type Properties */ props, /** @type Table | Colu } const hygieneIssues = item.hygiene_issues.filter(({ issue_likelihood }) => issue_likelihood !== 'Potential PII'); - const linkProps = { + const linkProps = props.noLinks ? null : { href: 'profiling-runs:hygiene', params: { run_id: item.profile_run_id, @@ -121,18 +122,23 @@ const TestIssuesCard = (/** @type Properties */ props, /** @type Table | Column { class: 'text-secondary' }, issue.test_suite, ), - Link({ - href: 'test-runs:results', - params: { - run_id: issue.test_run_id, - table_name: item.table_name, - column_name: item.column_name, - selected: issue.id, - }, - open_new: true, - label: formatTimestamp(issue.test_run_date), - style: 'font-size: 12px; margin-top: 2px;', - }), + props.noLinks + ? span( + { style: 'font-size: 12px; margin-top: 2px;' }, + formatTimestamp(issue.test_run_date) + ) + : Link({ + href: 'test-runs:results', + params: { + run_id: issue.test_run_id, + table_name: item.table_name, + column_name: item.column_name, + selected: issue.id, + }, + open_new: true, + label: formatTimestamp(issue.test_run_date), + style: 'font-size: 12px; margin-top: 2px;', + }), ), }, ]; @@ -150,7 +156,7 @@ const TestIssuesCard = (/** @type Properties */ props, /** @type Table | Column noneContent = span( { class: 'text-secondary flex-row fx-gap-1 fx-justify-content-flex-end' }, `No test results yet for ${item.type}.`, - Link({ + props.noLinks ? null : Link({ href: 'test-suites', params: { table_group_id: item.table_group_id }, open_new: true, diff --git a/testgen/ui/components/frontend/js/data_profiling/data_profiling_utils.js b/testgen/ui/components/frontend/js/data_profiling/data_profiling_utils.js index ed236745..232f1192 100644 --- a/testgen/ui/components/frontend/js/data_profiling/data_profiling_utils.js +++ b/testgen/ui/components/frontend/js/data_profiling/data_profiling_utils.js @@ -24,6 +24,7 @@ * @property {'column'} type * @property {string} column_name * @property {string} table_name + * @property {string} schema_name * @property {string} table_group_id * @property {string} connection_id * * Characteristics @@ -55,6 +56,14 @@ * @property {string?} table_transform_level * @property {string?} table_aggregation_level * @property {string?} table_data_product + * * Table Group Tags + * @property {string} table_group_data_source + * @property {string} table_group_source_system + * @property {string} table_group_source_process + * @property {string} table_group_business_domain + * @property {string} table_group_stakeholder_group + * @property {string} table_group_transform_level + * @property {string} table_group_data_product * * Profile & Test Runs * @property {string?} profile_run_id * @property {number?} profile_run_date @@ -123,6 +132,7 @@ * @property {string} id * @property {'table'} type * @property {string} table_name + * @property {string} schema_name * @property {string} table_group_id * @property {string} connection_id * * Characteristics @@ -132,7 +142,7 @@ * @property {number} data_point_ct * @property {number} add_date * @property {number} drop_date - * * Tags + * * Table Tags * @property {string} description * @property {boolean} critical_data_element * @property {string} data_source @@ -143,6 +153,14 @@ * @property {string} transform_level * @property {string} aggregation_level * @property {string} data_product + * * Table Group Tags + * @property {string} table_group_data_source + * @property {string} table_group_source_system + * @property {string} table_group_source_process + * @property {string} table_group_business_domain + * @property {string} table_group_stakeholder_group + * @property {string} table_group_transform_level + * @property {string} table_group_data_product * * Profile & Test Runs * @property {string} profile_run_id * @property {number} profile_run_date @@ -160,7 +178,7 @@ import van from '../van.min.js'; import { Link } from '../components/link.js'; import { formatTimestamp } from '../display_utils.js'; -const { span } = van.tags; +const { span, b } = van.tags; const TABLE_ICON = { icon: 'table', iconSize: 20 }; const COLUMN_ICONS = { @@ -178,8 +196,16 @@ const getColumnIcon = (/** @type Column */ column) => { return COLUMN_ICONS[type]; }; -const LatestProfilingLink = (/** @type Table | Column */ item) => { - let text = 'as of latest profiling run on '; +/** + * @typedef Properties + * @type {object} + * @property {boolean?} noLinks + */ +const LatestProfilingTime = (/** @type Properties */ props, /** @type Table | Column */ item) => { + let text = [ + 'as of latest profiling run on ', + props.noLinks ? b(formatTimestamp(item.profile_run_date)) : null, + ]; let link = Link({ href: 'profiling-runs:results', params: { @@ -207,9 +233,10 @@ const LatestProfilingLink = (/** @type Table | Column */ item) => { } return span( { class: 'flex-row fx-gap-1 fx-justify-content-flex-end text-secondary' }, - `* ${text}`, - link, + '* ', + text, + props.noLinks ? null : link, ); } -export { TABLE_ICON, getColumnIcon, LatestProfilingLink }; +export { TABLE_ICON, getColumnIcon, LatestProfilingTime }; diff --git a/testgen/ui/components/frontend/js/data_profiling/table_size.js b/testgen/ui/components/frontend/js/data_profiling/table_size.js new file mode 100644 index 00000000..3f7af986 --- /dev/null +++ b/testgen/ui/components/frontend/js/data_profiling/table_size.js @@ -0,0 +1,38 @@ +/** + * @import { Table } from './data_profiling_utils.js'; + * + * @typedef Properties + * @type {object} + */ +import van from '../van.min.js'; +import { Card } from '../components/card.js'; +import { Attribute } from '../components/attribute.js'; +import { Button } from '../components/button.js'; +import { emitEvent } from '../utils.js'; + +const { div } = van.tags; + +const TableSizeCard = (/** @type Properties */ _props, /** @type Table */ item) => { + const attributes = [ + { key: 'column_ct', label: 'Column Count' }, + { key: 'record_ct', label: 'Row Count' }, + { key: 'data_point_ct', label: 'Data Point Count' }, + ] + + return Card({ + title: 'Table Size', + content: div( + { class: 'flex-row fx-flex-wrap fx-gap-4' }, + attributes.map(({ key, label }) => Attribute({ label, value: item[key], width: 250 })), + ), + actionContent: Button({ + type: 'stroked', + label: 'Data Preview', + icon: 'pageview', + width: 'auto', + onclick: () => emitEvent('DataPreviewClicked', { payload: item }), + }), + }); +}; + +export { TableSizeCard }; diff --git a/testgen/ui/components/frontend/js/display_utils.js b/testgen/ui/components/frontend/js/display_utils.js index 1e4de4b8..e5fb11a2 100644 --- a/testgen/ui/components/frontend/js/display_utils.js +++ b/testgen/ui/components/frontend/js/display_utils.js @@ -18,7 +18,7 @@ function formatDuration(/** @type string */ duration) { if (!duration) { return '--'; } - + const [ hour, minute, second ] = duration.split(':'); let formatted = [ { value: Number(hour), unit: 'h' }, @@ -37,6 +37,13 @@ function roundDigits(/** @type number | string */ number, /** @type number */ pr return parseFloat(Number(number).toPrecision(precision)); } +function capitalize(/** @type string */ text) { + return text.toLowerCase() + .split(' ') + .map((s) => s.charAt(0).toUpperCase() + s.substring(1)) + .join(' '); +} + // https://m2.material.io/design/color/the-color-system.html#tools-for-picking-colors const colorMap = { red: '#EF5350', // Red 400 @@ -58,4 +65,6 @@ const colorMap = { emptyLight: 'var(--empty-light)', // Light: Gray 50, Dark: Gray 900 } -export { formatTimestamp, formatDuration, roundDigits, colorMap }; +const DISABLED_ACTION_TEXT = 'You do not have permissions to perform this action. Contact your administrator.'; + +export { formatTimestamp, formatDuration, roundDigits, capitalize, colorMap, DISABLED_ACTION_TEXT }; diff --git a/testgen/ui/components/frontend/js/pages/data_catalog.js b/testgen/ui/components/frontend/js/pages/data_catalog.js index 685d09d6..62f4f8de 100644 --- a/testgen/ui/components/frontend/js/pages/data_catalog.js +++ b/testgen/ui/components/frontend/js/pages/data_catalog.js @@ -1,5 +1,6 @@ /** * @import { Column, Table } from '../data_profiling/data_profiling_utils.js'; + * @import { TreeNode } from '../components/tree.js'; * * @typedef ColumnPath * @type {object} @@ -9,13 +10,23 @@ * @property {string} table_name * @property {'A' | 'B' | 'D' | 'N' | 'T' | 'X'} general_type * @property {string} functional_data_type - * @property {number} column_drop_date + * @property {number} drop_date * @property {number} table_drop_date + * @property {boolean} critical_data_element + * @property {boolean} table_critical_data_element + * + * @typedef Permissions + * @type {object} + * @property {boolean} can_edit + * @property {boolean} can_navigate * * @typedef Properties * @type {object} * @property {ColumnPath[]} columns * @property {Table | Column} selected + * @property {Object.} tag_values + * @property {string} last_saved_timestamp + * @property {Permissions} permissions */ import van from '../van.min.js'; import { Tree } from '../components/tree.js'; @@ -29,8 +40,14 @@ import { emitEvent, getValue, loadStylesheet } from '../utils.js'; import { ColumnDistributionCard } from '../data_profiling/column_distribution.js'; import { DataCharacteristicsCard } from '../data_profiling/data_characteristics.js'; import { PotentialPIICard, HygieneIssuesCard, TestIssuesCard } from '../data_profiling/data_issues.js'; -import { getColumnIcon, TABLE_ICON, LatestProfilingLink } from '../data_profiling/data_profiling_utils.js'; +import { getColumnIcon, TABLE_ICON, LatestProfilingTime } from '../data_profiling/data_profiling_utils.js'; import { RadioGroup } from '../components/radio_group.js'; +import { Checkbox } from '../components/checkbox.js'; +import { Select } from '../components/select.js'; +import { capitalize } from '../display_utils.js'; +import { TableSizeCard } from '../data_profiling/table_size.js'; +import { Card } from '../components/card.js'; +import { Button } from '../components/button.js'; const { div, h2, span, i } = van.tags; @@ -38,12 +55,35 @@ const { div, h2, span, i } = van.tags; const EMPTY_IMAGE = new Image(1, 1); EMPTY_IMAGE.src = ''; +const TAG_KEYS = [ + 'data_source', + 'source_system', + 'source_process', + 'business_domain', + 'stakeholder_group', + 'transform_level', + 'aggregation_level', + 'data_product', +]; +const TAG_HELP = { + data_source: 'Original source of the dataset', + source_system: 'Enterprise system source for the dataset', + source_process: 'Process, program, or data flow that produced the dataset', + business_domain: 'Business division responsible for the dataset, e.g., Finance, Sales, Manufacturing', + stakeholder_group: 'Data owners or stakeholders responsible for the dataset', + transform_level: 'Data warehouse processing stage, e.g., Raw, Conformed, Processed, Reporting, or Medallion level (bronze, silver, gold)', + aggregation_level: 'Data granularity of the dataset, e.g. atomic, historical, snapshot, aggregated, time-rollup, rolling, summary', + data_product: 'Data domain that comprises the dataset', +}; + + const DataCatalog = (/** @type Properties */ props) => { loadStylesheet('data-catalog', stylesheet); Streamlit.setFrameHeight(1); // Non-zero value is needed to render window.frameElement.style.setProperty('height', 'calc(100vh - 175px)'); window.testgen.isPage = true; + /** @type TreeNode[] */ const treeNodes = van.derive(() => { let columns = []; try { @@ -52,22 +92,27 @@ const DataCatalog = (/** @type Properties */ props) => { const tables = {}; columns.forEach((item) => { - const { column_id, table_id, column_name, table_name, column_drop_date, table_drop_date } = item; + const { column_id, table_id, column_name, table_name, drop_date, table_drop_date } = item; if (!tables[table_id]) { tables[table_id] = { id: table_id, label: table_name, classes: table_drop_date ? 'text-disabled' : '', ...TABLE_ICON, + criticalDataElement: !!item.table_critical_data_element, children: [], }; + TAG_KEYS.forEach(key => tables[table_id][key] = item[`table_${key}`]); } - tables[table_id].children.push({ + const columnNode = { id: column_id, label: column_name, - classes: column_drop_date ? 'text-disabled' : '', + classes: drop_date ? 'text-disabled' : '', ...getColumnIcon(item), - }); + criticalDataElement: !!(item.critical_data_element ?? item.table_critical_data_element), + }; + TAG_KEYS.forEach(key => columnNode[key] = item[key] ?? item[`table_${key}`]); + tables[table_id].children.push(columnNode); }); return Object.values(tables); }); @@ -81,10 +126,14 @@ const DataCatalog = (/** @type Properties */ props) => { } }); + // Reset to false after saving + const multiEditMode = van.derive(() => getValue(props.last_saved_timestamp) && false); + const multiSelectedItems = van.state(null); + const treeDomId = 'data-catalog-tree'; const dragState = van.state(null); const dragConstraints = { min: 250, max: 600 }; - const dragResize = (event) => { + const dragResize = (/** @type Event */ event) => { // https://stackoverflow.com/questions/36308460/why-is-clientx-reset-to-0-on-last-drag-event-and-how-to-solve-it if (event.screenX && dragState.val) { const dragWidth = dragState.val.startWidth + event.screenX - dragState.val.startX; @@ -93,18 +142,65 @@ const DataCatalog = (/** @type Properties */ props) => { } }; + const filters = { criticalDataElement: van.state(false) }; + TAG_KEYS.forEach(key => filters[key] = van.state(null)); + + const userCanEdit = getValue(props.permissions)?.can_edit ?? false; + return div( { class: 'flex-row tg-dh', ondragover: (event) => event.preventDefault(), }, - Tree({ - id: treeDomId, - nodes: treeNodes, - // Use .rawVal, so only initial value from query params is passed to tree - selected: selectedItem.rawVal ? `${selectedItem.rawVal.type}_${selectedItem.rawVal.id}` : null, - classes: 'tg-dh--tree', - }), + Tree( + { + id: treeDomId, + classes: 'tg-dh--tree', + nodes: treeNodes, + // Use .rawVal, so only initial value from query params is passed to tree + selected: selectedItem.rawVal ? `${selectedItem.rawVal.type}_${selectedItem.rawVal.id}` : null, + onSelect: (/** @type string */ selected) => emitEvent('ItemSelected', { payload: selected }), + multiSelect: multiEditMode, + multiSelectToggle: userCanEdit, + onMultiSelect: (/** @type string[] | null */ selected) => multiSelectedItems.val = selected, + isNodeHidden: (/** @type TreeNode */ node) => { + let hidden = ![ node.criticalDataElement, false ].includes(filters.criticalDataElement.val); + hidden = hidden || TAG_KEYS.some(key => ![ node[key], null ].includes(filters[key].val)); + return hidden; + }, + hasActiveFilters: () => filters.criticalDataElement.val || TAG_KEYS.some(key => !!filters[key].val), + onResetFilters: () => { + filters.criticalDataElement.val = false; + TAG_KEYS.forEach(key => filters[key].val = null); + }, + }, + // Pass as a function that will be called when the filter portal is opened + // Otherwise state bindings get garbage collected and Select dropdowns won't open + // https://vanjs.org/advanced#gc + () => div( + Checkbox({ + label: 'Only critical data elements (CDEs)', + checked: filters.criticalDataElement, + onChange: (checked) => filters.criticalDataElement.val = checked, + }), + div( + { + class: 'flex-row fx-flex-wrap fx-gap-4 fx-justify-space-between mt-4', + style: 'max-width: 420px;', + }, + TAG_KEYS.map(key => Select({ + id: `data-catalog-${key}`, + label: capitalize(key.replaceAll('_', ' ')), + height: 32, + value: filters[key], + options: getValue(props.tag_values)?.[key]?.map(key => ({ label: key, value: key })), + allowNull: true, + disabled: !getValue(props.tag_values)?.[key]?.length, + onChange: v => filters[key].val = v, + })), + ), + ), + ), div( { class: 'tg-dh--dragger', @@ -121,100 +217,105 @@ const DataCatalog = (/** @type Properties */ props) => { ondrag: van.derive(() => dragState.val ? dragResize : null), }, ), - () => { - const item = selectedItem.val; - if (item) { - return div( - { class: 'tg-dh--details' }, - div( - { class: 'mb-2' }, - h2( - { class: 'tg-dh--title' }, - item.type === 'column' ? [ - span( - { class: 'text-secondary' }, - `${item.table_name} > `, - ), - item.column_name, - ] : item.table_name, - ), - LatestProfilingLink(item), - ), - DataCharacteristicsCard({ scores: true }, item), - item.type === 'column' ? ColumnDistributionCard({}, item) : null, - TagsCard({}, item), - PotentialPIICard({}, item), - HygieneIssuesCard({}, item), - TestIssuesCard({}, item), - ); - } + () => multiEditMode.val + ? MultiEdit(props, multiSelectedItems, multiEditMode) + : SelectedDetails(props, selectedItem.val), + ); +}; - return div( - { class: 'flex-column fx-align-flex-center fx-justify-center tg-dh--no-selection' }, - i( - { class: 'material-symbols-rounded text-disabled mb-5' }, - 'quick_reference_all', - ), - span( - { class: 'text-secondary' }, - 'Select a table or column on the left to view its details.', +const SelectedDetails = (/** @type Properties */ props, /** @type Table | Column */ item) => { + const userCanEdit = getValue(props.permissions)?.can_edit ?? false; + const userCanNavigate = getValue(props.permissions)?.can_navigate ?? false; + + return item + ? div( + { class: 'tg-dh--details' }, + div( + { class: 'mb-2' }, + h2( + { class: 'tg-dh--title' }, + item.type === 'column' ? [ + span( + { class: 'text-secondary' }, + `${item.table_name} > `, + ), + item.column_name, + ] : item.table_name, ), - ); - }, - ); + LatestProfilingTime({ noLinks: !userCanNavigate }, item), + ), + DataCharacteristicsCard({ scores: true }, item), + item.type === 'column' + ? ColumnDistributionCard({ dataPreview: true }, item) + : TableSizeCard({}, item), + TagsCard({ tagOptions: getValue(props.tag_values), editable: userCanEdit }, item), + PotentialPIICard({ noLinks: !userCanNavigate }, item), + HygieneIssuesCard({ noLinks: !userCanNavigate }, item), + TestIssuesCard({ noLinks: !userCanNavigate }, item), + ) + : EmptyState( + 'Select a table or column on the left to view its details.', + 'quick_reference_all', + ); }; -const TagsCard = (/** @type object */ _props, /** @type Table | Column */ item) => { +/** +* @typedef TagProperties +* @type {object} +* @property {Object.} tagOptions +* @property {boolean} editable +*/ +const TagsCard = (/** @type TagProperties */ props, /** @type Table | Column */ item) => { + const title = `${item.type} Tags `; const attributes = [ - { key: 'description' }, - { key: 'critical_data_element' }, - { key: 'data_source', help: 'Original source of the dataset' }, - { key: 'source_system', help: 'Enterprise system source for the dataset' }, - { key: 'source_process', help: 'Process, program, or data flow that produced the dataset' }, - { key: 'business_domain', help: 'Business division responsible for the dataset, e.g., Finance, Sales, Manufacturing' }, - { key: 'stakeholder_group', help: 'Data owners or stakeholders responsible for the dataset' }, - { key: 'transform_level', help: 'Data warehouse processing stage, e.g., Raw, Conformed, Processed, Reporting, or Medallion level (bronze, silver, gold)' }, - { key: 'aggregation_level', help: 'Data granularity of the dataset, e.g. atomic, historical, snapshot, aggregated, time-rollup, rolling, summary' }, - { key: 'data_product', help: 'Data domain that comprises the dataset' }, - ].map(attribute => ({ - ...attribute, - label: attribute.key.replaceAll('_', ' '), - state: van.state(item[attribute.key]), - inherited: item[`table_${attribute.key}`], // Table values inherited by column + 'description', + 'critical_data_element', + ...TAG_KEYS, + ].map(key => ({ + key, + help: TAG_HELP[key], + label: capitalize(key.replaceAll('_', ' ')), + state: van.state(item[key]), + inheritTableGroup: item[`table_group_${key}`] ?? null, // Table group values inherited by table or column + inheritTable: item[`table_${key}`] ?? null, // Table values inherited by column })); - const InheritedIcon = () => withTooltip( + const InheritedIcon = (/** @type string */ inheritedFrom) => withTooltip( Icon({ size: 18, classes: 'text-disabled' }, 'layers'), - { text: 'Inherited from table tags', position: 'top-right'}, + { text: `Inherited from ${inheritedFrom} tags`, position: 'top-right'}, ); const width = 300; const descriptionWidth = 932; const content = div( { class: 'flex-row fx-flex-wrap fx-gap-4' }, - attributes.map(({ key, label, help, state, inherited }) => { - let value = state.rawVal ?? inherited; - const isInherited = item.type === 'column' && state.rawVal === null; + attributes.map(({ key, label, help, state, inheritTable, inheritTableGroup }) => { + let value = state.rawVal ?? inheritTable ?? inheritTableGroup; if (key === 'critical_data_element') { return span( { class: 'flex-row fx-gap-1', style: `width: ${width}px` }, - i( - { class: `material-symbols-rounded ${value ? 'text-green' : 'text-disabled'}` }, + Icon( + { classes: value ? 'text-green' : 'text-disabled' }, value ? 'check_circle' : 'cancel', ), span( { class: value ? 'text-capitalize' : 'text-secondary' }, value ? label : `Not a ${label}`, ), - isInherited ? InheritedIcon() : null, + (item.type === 'column' && state.rawVal === null) ? InheritedIcon('table') : null, ); } - if (isInherited && value) { + const inheritedFrom = state.rawVal !== null ? null + : inheritTable !== null ? 'table' + : inheritTableGroup !== null ? 'table group' + : null; + + if (inheritedFrom && value) { value = span( { class: 'flex-row fx-gap-1' }, - InheritedIcon(), + InheritedIcon(inheritedFrom), value, ); } @@ -222,21 +323,23 @@ const TagsCard = (/** @type object */ _props, /** @type Table | Column */ item) }), ); + if (!props.editable) { + return Card({ title, content }); + } + // Define as function so the block is re-rendered with reset values when re-editing after a cancel const editingContent = () => div( { class: 'flex-row fx-flex-wrap fx-gap-4' }, - attributes.map(({ key, label, help, state, inherited }) => { + attributes.map(({ key, label, help, state, inheritTable, inheritTableGroup }) => { if (key === 'critical_data_element') { const options = [ { label: 'Yes', value: true }, { label: 'No', value: false }, + { label: 'Inherit', value: null }, ]; - if (item.type === 'column') { - options.push({ label: 'Inherit', value: null }); - } return RadioGroup({ label, width, options, - value: item.type === 'column' ? state.rawVal : !!state.rawVal, // Coerce null to false for tables + value: state.rawVal, onChange: (value) => state.val = value, }); }; @@ -245,8 +348,8 @@ const TagsCard = (/** @type object */ _props, /** @type Table | Column */ item) label, help, width: key === 'description' ? descriptionWidth : width, value: state.rawVal, - placeholder: inherited ? `Inherited: ${inherited}` : null, - style: 'text-transform: capitalize;', + placeholder: (inheritTable || inheritTableGroup) ? `Inherited: ${inheritTable ?? inheritTableGroup}` : null, + autocompleteOptions: props.tagOptions?.[key], onChange: (value) => state.val = value || null, }); }), @@ -256,11 +359,12 @@ const TagsCard = (/** @type object */ _props, /** @type Table | Column */ item) title: `${item.type} Tags `, content, editingContent, onSave: () => { - const payload = attributes.reduce((object, { key, state }) => { + const items = [{ type: item.type, id: item.id }]; + const tags = attributes.reduce((object, { key, state }) => { object[key] = state.rawVal; return object; - }, { id: item.id, type: item.type }); - emitEvent('TagsChanged', { payload }) + }, {}); + emitEvent('TagsChanged', { payload: { items, tags } }); }, // Reset states to original values on cancel onCancel: () => attributes.forEach(({ key, state }) => state.val = item[key]), @@ -268,6 +372,130 @@ const TagsCard = (/** @type object */ _props, /** @type Table | Column */ item) }); }; +const MultiEdit = (/** @type Properties */ props, /** @type Object */ selectedItems, /** @type Object */ multiEditMode) => { + const hasSelection = van.derive(() => selectedItems.val?.length); + const columnCount = van.derive(() => selectedItems.val?.reduce((count, { children }) => count + children.length, 0)); + + const attributes = [ + 'description', + 'critical_data_element', + ...TAG_KEYS, + ].map(key => ({ + key, + help: TAG_HELP[key], + label: capitalize(key.replaceAll('_', ' ')), + checkedState: van.state(null), + valueState: van.state(null), + })); + + const cdeOptions = [ + { label: 'Yes', value: true }, + { label: 'No', value: false }, + { label: 'Inherit', value: null }, + ]; + const tagOptions = getValue(props.tag_values) ?? {}; + const width = 400; + const descriptionWidth = 800; + + return div( + { class: 'tg-dh--details flex-column' }, + () => hasSelection.val + ? Card({ + title: 'Edit Tags for Selection', + actionContent: span( + { class: 'text-secondary mr-4' }, + span({ style: 'font-weight: 500' }, columnCount), + () => ` column${columnCount.val > 1 ? 's' : ''} selected` + ), + content: div( + { class: 'flex-column' }, + attributes.map(({ key, label, help, checkedState, valueState }) => div( + { class: 'flex-row fx-gap-3' }, + Checkbox({ + checked: checkedState, + onChange: (checked) => checkedState.val = checked, + }), + div( + { + class: 'pb-4 flex-row', + style: `min-width: ${width}px`, + onclick: () => checkedState.val = true, + }, + key === 'critical_data_element' + ? RadioGroup({ + label, width, + options: cdeOptions, + onChange: (value) => valueState.val = value, + }) + : Input({ + label, help, + width: key === 'description' ? descriptionWidth : width, + placeholder: () => checkedState.val ? null : '(keep current values)', + autocompleteOptions: tagOptions[key], + onChange: (value) => valueState.val = value || null, + }), + ), + )), + div( + { class: 'flex-row fx-justify-content-flex-end fx-gap-3 mt-4' }, + Button({ + type: 'stroked', + label: 'Cancel', + width: 'auto', + onclick: () => multiEditMode.val = false, + }), + Button({ + type: 'stroked', + color: 'primary', + label: 'Save', + width: 'auto', + disabled: () => attributes.every(({ checkedState }) => !checkedState.val), + onclick: () => { + const items = selectedItems.val.reduce((array, table) => { + if (table.all) { + const [ type, id ] = table.id.split('_'); + array.push({ type, id }); + } else { + const columns = table.children.map(column => { + const [ type, id ] = column.id.split('_'); + return { type, id }; + }); + array.push(...columns); + } + return array; + }, []); + + const tags = attributes.reduce((object, { key, checkedState, valueState }) => { + if (checkedState.val) { + object[key] = valueState.rawVal; + } + return object; + }, {}); + + emitEvent('TagsChanged', { payload: { items, tags } }); + // Don't set multiEditMode to false here + // Otherwise this event gets superseded by the ItemSelected event + // Let the Streamlit rerun handle the state reset with 'last_saved_timestamp' + }, + }), + ), + ), + }) + : EmptyState( + 'Select tables or columns on the left to edit their tags.', + 'edit_document', + ), + ); +}; + +const EmptyState = (/** @type string */ message, /** @type string */ icon) => { + return div( + { class: 'flex-column fx-align-flex-center fx-justify-center tg-dh--no-selection' }, + Icon({ size: 80, classes: 'text-disabled mb-5' }, icon), + span({ class: 'text-secondary' }, message), + ); +}; + const stylesheet = new CSSStyleSheet(); stylesheet.replace(` .tg-dh { @@ -310,10 +538,6 @@ stylesheet.replace(` padding: 16px; } -.tg-dh--no-selection > i { - font-size: 80px; -} - .tg-dh--no-selection > span { font-size: 18px; text-align: center; diff --git a/testgen/ui/components/frontend/js/pages/profiling_runs.js b/testgen/ui/components/frontend/js/pages/profiling_runs.js index d7e268a8..993530cf 100644 --- a/testgen/ui/components/frontend/js/pages/profiling_runs.js +++ b/testgen/ui/components/frontend/js/pages/profiling_runs.js @@ -17,10 +17,15 @@ * @property {number} anomalies_possible_ct * @property {number} anomalies_dismissed_ct * @property {string} dq_score_profiling - * + * + * @typedef Permissions + * @type {object} + * @property {boolean} can_run + * * @typedef Properties * @type {object} * @property {ProfilingRun[]} items + * @property {Permissions} permissions */ import van from '../van.min.js'; import { Tooltip } from '../components/tooltip.js'; @@ -28,7 +33,7 @@ import { SummaryBar } from '../components/summary_bar.js'; import { Link } from '../components/link.js'; import { Button } from '../components/button.js'; import { Streamlit } from '../streamlit.js'; -import { emitEvent, resizeFrameHeightToElement } from '../utils.js'; +import { emitEvent, getValue, resizeFrameHeightToElement } from '../utils.js'; import { formatTimestamp, formatDuration } from '../display_utils.js'; const { div, span, i } = van.tags; @@ -46,6 +51,8 @@ const ProfilingRuns = (/** @type Properties */ props) => { }); const columns = ['20%', '20%', '20%', '30%', '10%']; + const userCanRun = getValue(props.permissions)?.can_run ?? false; + const tableId = 'profiling-runs-table'; resizeFrameHeightToElement(tableId); @@ -75,12 +82,16 @@ const ProfilingRuns = (/** @type Properties */ props) => { ), ), () => div( - profilingRunItems.val.map(item => ProfilingRunItem(item, columns)), + profilingRunItems.val.map(item => ProfilingRunItem(item, columns, userCanRun)), ), ); } -const ProfilingRunItem = (/** @type ProfilingRun */ item, /** @type string[] */ columns) => { +const ProfilingRunItem = ( + /** @type ProfilingRun */ item, + /** @type string[] */ columns, + /** @type boolean */ userCanRun, +) => { return div( { class: 'table-row flex-row' }, div( @@ -100,7 +111,7 @@ const ProfilingRunItem = (/** @type ProfilingRun */ item, /** @type string[] */ formatDuration(item.duration), ), ), - item.status === 'Running' && item.process_id ? Button({ + item.status === 'Running' && item.process_id && userCanRun ? Button({ type: 'stroked', label: 'Cancel Run', style: 'width: auto; height: 32px; color: var(--purple); margin-left: 16px;', diff --git a/testgen/ui/components/frontend/js/pages/quality_dashboard.js b/testgen/ui/components/frontend/js/pages/quality_dashboard.js index d6604b02..135e1808 100644 --- a/testgen/ui/components/frontend/js/pages/quality_dashboard.js +++ b/testgen/ui/components/frontend/js/pages/quality_dashboard.js @@ -74,7 +74,8 @@ const QualityDashboard = (/** @type {Properties} */ props) => { href: 'quality-dashboard:score-details', class: 'ml-4', params: { definition_id: score.id }, - }) + }), + {showHistory: true}, )) ), ) : ConditionalEmptyState(getValue(props.project_summary)), diff --git a/testgen/ui/components/frontend/js/pages/score_details.js b/testgen/ui/components/frontend/js/pages/score_details.js index 1122f54a..1b5e9a1c 100644 --- a/testgen/ui/components/frontend/js/pages/score_details.js +++ b/testgen/ui/components/frontend/js/pages/score_details.js @@ -29,12 +29,13 @@ import van from '../van.min.js'; import { Streamlit } from '../streamlit.js'; import { emitEvent, getValue, loadStylesheet, resizeFrameHeightOnDOMChange, resizeFrameHeightToElement } from '../utils.js'; import { ScoreCard } from '../components/score_card.js'; +import { ScoreHistory } from '../components/score_history.js'; import { ScoreLegend } from '../components/score_legend.js'; import { ScoreBreakdown } from '../components/score_breakdown.js'; import { IssuesTable } from '../components/score_issues.js'; import { Button } from '../components/button.js'; -const { div } = van.tags; +const { div, i } = van.tags; const ScoreDetails = (/** @type {Properties} */ props) => { window.testgen.isPage = true; @@ -53,7 +54,7 @@ const ScoreDetails = (/** @type {Properties} */ props) => { { id: domId, class: 'tg-score-details flex-column' }, ScoreLegend(), div( - { class: 'flex-row mb-4'}, + { class: 'flex-row fx-flex-wrap fx-gap-4 mb-4 mt-4'}, ScoreCard( props.score, () => { @@ -65,6 +66,13 @@ const ScoreDetails = (/** @type {Properties} */ props) => { ) : ''; }, ), + () => { + const score = getValue(props.score); + const history = getValue(props.score).history; + return history?.length > 0 + ? ScoreHistory({style: 'min-height: 216px; flex: 610px 0 1;', showRefresh: userCanEdit, score}, ...history) + : null; + }, ), () => { const issuesValue = getValue(props.issues); diff --git a/testgen/ui/components/frontend/js/pages/score_explorer.js b/testgen/ui/components/frontend/js/pages/score_explorer.js index 9bcd6fc4..6c3fdf2e 100644 --- a/testgen/ui/components/frontend/js/pages/score_explorer.js +++ b/testgen/ui/components/frontend/js/pages/score_explorer.js @@ -32,6 +32,10 @@ * @property {Array} columns * @property {Array} items * + * @typedef Permissions + * @type {object} + * @property {boolean} can_edit + * * @typedef Properties * @type {object} * @property {object} filter_values @@ -41,6 +45,7 @@ * @property {string} breakdown_category * @property {string} breakdown_score_type * @property {boolean} is_new + * @property {Permissions} permissions */ import van from '../van.min.js'; import { Streamlit } from '../streamlit.js'; @@ -77,12 +82,14 @@ const ScoreExplorer = (/** @type {Properties} */ props) => { Streamlit.setFrameHeight(1); const domId = 'score-explorer-page'; + const userCanEdit = getValue(props.permissions)?.can_edit ?? false; + resizeFrameHeightToElement(domId); resizeFrameHeightOnDOMChange(domId); return div( { id: domId, class: 'score-explorer' }, - Toolbar(props.filter_values, getValue(props.definition), props.is_new), + Toolbar(props.filter_values, getValue(props.definition), props.is_new, userCanEdit), span({ class: 'mb-4', style: 'display: block;' }), ScoreCard(props.score_card), span({ class: 'mb-4', style: 'display: block;' }), @@ -117,6 +124,7 @@ const Toolbar = ( /** @type object */ filterValues, /** @type ScoreDefinition */ definition, /** @type boolean */ isNew, + /** @type boolean */ userCanEdit, ) => { const addFilterButtonId = 'score-explorer--add-filter-btn'; const categories = [ @@ -274,7 +282,7 @@ const Toolbar = ( ), ), ), - div( + userCanEdit ? div( { class: 'flex-row fx-align-flex-end' }, Input({ label: 'Scorecard Name', @@ -312,7 +320,7 @@ const Toolbar = ( onclick: () => emitEvent('LinkClicked', { href, params }), }); }, - ), + ) : '', ), ); }; @@ -393,8 +401,8 @@ stylesheet.replace(` .score-explorer--selector { min-height: 41px; overflow-y: auto; - background: var(--select-portal-background); - box-shadow: rgba(0, 0, 0, 0.16) 0px 4px 16px; + background: var(--portal-background); + box-shadow: var(--portal-box-shadow); border-radius: 8px; z-index: 99; diff --git a/testgen/ui/components/frontend/js/pages/test_runs.js b/testgen/ui/components/frontend/js/pages/test_runs.js index a653a12c..9c5713d1 100644 --- a/testgen/ui/components/frontend/js/pages/test_runs.js +++ b/testgen/ui/components/frontend/js/pages/test_runs.js @@ -16,10 +16,15 @@ * @property {number} error_ct * @property {number} dismissed_ct * @property {string} dq_score_testing - * + * + * @typedef Permissions + * @type {object} + * @property {boolean} can_run + * * @typedef Properties * @type {object} * @property {TestRun[]} items + * @property {Permissions} permissions */ import van from '../van.min.js'; import { Tooltip } from '../components/tooltip.js'; @@ -27,7 +32,7 @@ import { SummaryBar } from '../components/summary_bar.js'; import { Link } from '../components/link.js'; import { Button } from '../components/button.js'; import { Streamlit } from '../streamlit.js'; -import { emitEvent, resizeFrameHeightToElement } from '../utils.js'; +import { emitEvent, getValue, resizeFrameHeightToElement } from '../utils.js'; import { formatTimestamp, formatDuration } from '../display_utils.js'; const { div, span, i } = van.tags; @@ -45,6 +50,8 @@ const TestRuns = (/** @type Properties */ props) => { }); const columns = ['30%', '20%', '40%', '10%']; + const userCanRun = getValue(props.permissions)?.can_run ?? false; + const tableId = 'test-runs-table'; resizeFrameHeightToElement(tableId); @@ -70,12 +77,16 @@ const TestRuns = (/** @type Properties */ props) => { ), ), () => div( - testRunItems.val.map(item => TestRunItem(item, columns)), + testRunItems.val.map(item => TestRunItem(item, columns, userCanRun)), ), ); } -const TestRunItem = (/** @type TestRun */ item, /** @type string[] */ columns) => { +const TestRunItem = ( + /** @type TestRun */ item, + /** @type string[] */ columns, + /** @type boolean */ userCanRun, +) => { return div( { class: 'table-row flex-row' }, div( @@ -100,7 +111,7 @@ const TestRunItem = (/** @type TestRun */ item, /** @type string[] */ columns) = formatDuration(item.duration), ), ), - item.status === 'Running' && item.process_id ? Button({ + item.status === 'Running' && item.process_id && userCanRun ? Button({ type: 'stroked', label: 'Cancel Run', style: 'width: auto; height: 32px; color: var(--purple); margin-left: 16px;', diff --git a/testgen/ui/components/frontend/js/pages/test_suites.js b/testgen/ui/components/frontend/js/pages/test_suites.js index 067bb0b7..9e62ef67 100644 --- a/testgen/ui/components/frontend/js/pages/test_suites.js +++ b/testgen/ui/components/frontend/js/pages/test_suites.js @@ -5,13 +5,13 @@ * @property {number} connections_ct * @property {number} table_groups_ct * @property {string} default_connection_id - * + * * @typedef TableGroupOption * @type {object} * @property {string} id * @property {string} name * @property {boolean} selected - * + * * @typedef TestSuite * @type {object} * @property {string} id @@ -29,11 +29,11 @@ * @property {number} last_run_error_ct * @property {number} last_run_dismissed_ct * @property {string} last_complete_profile_run_id - * + * * @typedef Permissions * @type {object} * @property {boolean} can_edit - * + * * @typedef Properties * @type {object} * @property {ProjectSummary} project_summary @@ -44,7 +44,7 @@ import van from '../van.min.js'; import { Streamlit } from '../streamlit.js'; import { emitEvent, getValue, loadStylesheet, resizeFrameHeightToElement, resizeFrameHeightOnDOMChange } from '../utils.js'; -import { formatTimestamp } from '../display_utils.js'; +import { formatTimestamp, DISABLED_ACTION_TEXT } from '../display_utils.js'; import { Select } from '../components/select.js'; import { Button } from '../components/button.js'; import { Card } from '../components/card.js'; @@ -69,7 +69,7 @@ const TestSuites = (/** @type Properties */ props) => { return div( { id: wrapperId, style: 'overflow-y: auto;' }, - () => + () => getValue(props.project_summary).test_suites_ct > 0 ? div( { class: 'tg-test-suites'}, @@ -181,11 +181,14 @@ const TestSuites = (/** @type Properties */ props) => { })), ), ) - : ConditionalEmptyState(getValue(props.project_summary)), + : ConditionalEmptyState(getValue(props.project_summary), userCanEdit), ); }; -const ConditionalEmptyState = (/** @type ProjectSummary */ projectSummary) => { +const ConditionalEmptyState = ( + /** @type ProjectSummary */ projectSummary, + /** @type boolean */ userCanEdit, +) => { let args = { message: EMPTY_STATE_MESSAGE.testSuite, button: Button({ @@ -195,6 +198,9 @@ const ConditionalEmptyState = (/** @type ProjectSummary */ projectSummary) => { label: 'Add Test Suite', width: 'fit-content', style: 'margin: auto; background: white;', + disabled: !userCanEdit, + tooltip: userCanEdit ? null : DISABLED_ACTION_TEXT, + tooltipPosition: 'bottom', onclick: () => emitEvent('AddTestSuiteClicked', {}), }), }; diff --git a/testgen/ui/components/widgets/empty_state.py b/testgen/ui/components/widgets/empty_state.py index 505d5601..13c06708 100644 --- a/testgen/ui/components/widgets/empty_state.py +++ b/testgen/ui/components/widgets/empty_state.py @@ -6,6 +6,7 @@ from testgen.ui.components.widgets.button import button from testgen.ui.components.widgets.link import link from testgen.ui.components.widgets.page import css_class, whitespace +from testgen.ui.services.user_session_service import DISABLED_ACTION_TEXT class EmptyStateMessage(Enum): @@ -36,6 +37,7 @@ def empty_state( icon: str, message: EmptyStateMessage, action_label: str, + action_disabled: bool = False, link_href: str | None = None, link_params: dict | None = None, button_onclick: typing.Callable[..., None] | None = None, @@ -61,15 +63,25 @@ def empty_state( right_icon="chevron_right", underline=False, height=40, - style="margin: auto; border-radius: 4px; border: var(--button-stroked-border); padding: 8px 8px 8px 16px; color: var(--primary-color)", + style=f""" + margin: auto; + border-radius: 4px; + border: var(--button-stroked-border); + padding: 8px 8px 8px 16px; + color: {"var(--disabled-text-color)" if action_disabled else "var(--primary-color)"}; + """, + disabled=action_disabled, + tooltip=DISABLED_ACTION_TEXT if action_disabled else None, ) elif button_onclick: button( - type_="flat", - color="primary", + type_="stroked" if action_disabled else "flat", + color="basic" if action_disabled else "primary", label=action_label, icon=button_icon, on_click=button_onclick, style="margin: auto; width: auto;", + disabled=action_disabled, + tooltip=DISABLED_ACTION_TEXT if action_disabled else None, ) whitespace(5) diff --git a/testgen/ui/components/widgets/link.py b/testgen/ui/components/widgets/link.py index 4e2bf282..ce3e26c5 100644 --- a/testgen/ui/components/widgets/link.py +++ b/testgen/ui/components/widgets/link.py @@ -1,6 +1,10 @@ +import typing + from testgen.ui.components.utils.component import component from testgen.ui.navigation.router import Router +TooltipPosition = typing.Literal["left", "right"] + def link( href: str, @@ -16,6 +20,9 @@ def link( height: float | None = 21.0, width: float | None = None, style: str | None = None, + disabled: bool = False, + tooltip: str | None = None, + tooltip_position: TooltipPosition = "left", key: str = "testgen:link", ) -> None: props = { @@ -25,6 +32,7 @@ def link( "height": height, "open_new": open_new, "underline": underline, + "disabled": disabled, } if left_icon: props.update({"left_icon": left_icon, "left_icon_size": left_icon_size}) @@ -38,6 +46,9 @@ def link( if width: props.update({"width": width}) + if tooltip: + props.update({"tooltip": tooltip, "tooltipPosition": tooltip_position}) + clicked = component(id_="link", key=key, props=props) if clicked: Router().navigate(to=href, with_args=params) diff --git a/testgen/ui/components/widgets/sidebar.py b/testgen/ui/components/widgets/sidebar.py index 51e967f5..312f19a1 100644 --- a/testgen/ui/components/widgets/sidebar.py +++ b/testgen/ui/components/widgets/sidebar.py @@ -1,10 +1,12 @@ import logging +from typing import Literal from testgen.ui.components.utils.component import component from testgen.ui.navigation.menu import Menu from testgen.ui.navigation.router import Router -from testgen.ui.services import javascript_service, user_session_service +from testgen.ui.services import javascript_service, project_service, user_session_service from testgen.ui.session import session +from testgen.ui.views.dialogs.application_logs_dialog import application_logs_dialog LOG = logging.getLogger("testgen") @@ -14,7 +16,8 @@ def sidebar( key: str = SIDEBAR_KEY, - project: str | None = None, + projects: list[dict[Literal["name", "codde"], str]] | None = None, + current_project: str | None = None, username: str | None = None, menu: Menu = None, current_page: str | None = None, @@ -32,11 +35,15 @@ def sidebar( component( id_="sidebar", props={ - "project": project, + "projects": projects, + "current_project": current_project, "username": username, "menu": menu.filter_for_current_user().sort_items().unflatten().asdict(), "current_page": current_page, "logout_path": LOGOUT_PATH, + "permissions": { + "can_edit": user_session_service.user_can_edit(), + }, }, key=key, on_change=on_change, @@ -48,10 +55,22 @@ def on_change(): # and we get a "Calling st.rerun() within a callback is a noop" error # So we store the path and navigate on the next run - path = getattr(session, SIDEBAR_KEY) - if path == LOGOUT_PATH: - javascript_service.clear_component_states() - user_session_service.end_user_session() - Router().queue_navigation(to="", with_args={ "project_code": session.project }) - else: - Router().queue_navigation(to=path, with_args={ "project_code": session.project }) + event_data = getattr(session, SIDEBAR_KEY) + project = event_data.get("project") + path = event_data.get("path") + view_logs = event_data.get("view_logs") + + if project: + project_service.set_current_project(project) + Router().queue_navigation(to="") + + if path: + if path == LOGOUT_PATH: + javascript_service.clear_component_states() + user_session_service.end_user_session() + Router().queue_navigation(to="", with_args={ "project_code": session.project }) + else: + Router().queue_navigation(to=path, with_args={ "project_code": session.project }) + + if view_logs: + application_logs_dialog() diff --git a/testgen/ui/components/widgets/testgen_component.py b/testgen/ui/components/widgets/testgen_component.py index 690396c5..74e939fc 100644 --- a/testgen/ui/components/widgets/testgen_component.py +++ b/testgen/ui/components/widgets/testgen_component.py @@ -2,6 +2,7 @@ import streamlit as st +from testgen.common.models import with_database_session from testgen.ui.components.utils.component import component from testgen.ui.navigation.router import Router from testgen.ui.session import session @@ -36,9 +37,10 @@ def testgen_component( For both on_change_handlers and event_handlers, the "payload" data from the event is passed as the only argument to the callback function """ - + key = f"testgen:{component_id}" - + + @with_database_session def on_change(): event_data = st.session_state[key] if event_data and (event := event_data.get("event")): diff --git a/testgen/ui/navigation/menu.py b/testgen/ui/navigation/menu.py index 2a53669f..7c519baf 100644 --- a/testgen/ui/navigation/menu.py +++ b/testgen/ui/navigation/menu.py @@ -1,7 +1,7 @@ import dataclasses import typing -from testgen.ui.services import authentication_service +from testgen.ui.services import user_session_service MenuSections = typing.Literal["Data Profiling", "Data Quality Testing", "Data Configuration", "Settings"] @@ -11,7 +11,7 @@ class MenuItem: label: str icon: str | None = dataclasses.field(default=None) page: str | None = dataclasses.field(default=None) - roles: list[authentication_service.RoleType] | None = dataclasses.field(default_factory=list) + roles: list[user_session_service.RoleType] | None = dataclasses.field(default_factory=list) order: int = dataclasses.field(default=0) section: MenuSections | None = dataclasses.field(default=None) items: list["MenuItem"] | None = dataclasses.field(default=None) @@ -33,13 +33,13 @@ def filter_for_current_user(self) -> "Menu": filtered_items = [] for menu_item in self.items: item_roles = menu_item.roles or [] - if len(item_roles) <= 0 or any(map(authentication_service.current_user_has_role, item_roles)): + if len(item_roles) <= 0 or any(map(user_session_service.user_has_role, item_roles)): filtered_items.append(menu_item) return dataclasses.replace(self, items=filtered_items) def sort_items(self) -> "Menu": return dataclasses.replace(self, items=sorted(self.items, key=lambda item: item.order)) - + def unflatten(self) -> "Menu": unflattened_items = [] section_items = { section: [] for section in typing.get_args(MenuSections) } diff --git a/testgen/ui/navigation/page.py b/testgen/ui/navigation/page.py index 69ba1bc0..ca4ed5f4 100644 --- a/testgen/ui/navigation/page.py +++ b/testgen/ui/navigation/page.py @@ -31,7 +31,6 @@ def __init__(self, router: testgen.ui.navigation.router.Router) -> None: def _navigate(self) -> None: self.router.navigate_to_pending() - for guard in self.can_activate or []: can_activate = guard() if type(can_activate) == str: @@ -39,7 +38,7 @@ def _navigate(self) -> None: if not can_activate: session.page_pending_login = self.path - return self.router.navigate(to="") + return self.router.navigate(to=session.user_default_page or "") session.current_page_args = session.current_page_args or {} self._validate_project_query_param() diff --git a/testgen/ui/queries/connection_queries.py b/testgen/ui/queries/connection_queries.py index 087c9f0e..dead1744 100644 --- a/testgen/ui/queries/connection_queries.py +++ b/testgen/ui/queries/connection_queries.py @@ -12,7 +12,8 @@ def get_by_id(connection_id): SELECT id::VARCHAR(50), project_code, connection_id, connection_name, sql_flavor, project_host, project_port, project_user, project_db, project_pw_encrypted, NULL as password, - max_threads, max_query_chars, url, connect_by_url, connect_by_key, private_key, private_key_passphrase + max_threads, max_query_chars, url, connect_by_url, connect_by_key, private_key, + private_key_passphrase, http_path FROM {str_schema}.connections WHERE connection_id = '{connection_id}' """ @@ -26,7 +27,7 @@ def get_connections(project_code): sql_flavor, project_host, project_port, project_user, project_db, project_pw_encrypted, NULL as password, max_threads, max_query_chars, connect_by_url, url, connect_by_key, private_key, - private_key_passphrase + private_key_passphrase, http_path FROM {str_schema}.connections WHERE project_code = '{project_code}' ORDER BY connection_id @@ -53,7 +54,8 @@ def edit_connection(schema, connection, encrypted_password, encrypted_private_ke max_query_chars = '{connection["max_query_chars"]}', url = '{connection["url"]}', connect_by_key = '{connection["connect_by_key"]}', - connect_by_url = '{connection["connect_by_url"]}'""" + connect_by_url = '{connection["connect_by_url"]}', + http_path = '{connection["http_path"]}'""" if encrypted_password: sql += f""", project_pw_encrypted = '{encrypted_password}' """ @@ -77,9 +79,9 @@ def add_connection( encrypted_private_key_passphrase: str | None, ) -> int: sql_header = f"""INSERT INTO {schema}.connections - (project_code, sql_flavor, url, connect_by_url, connect_by_key, + (project_code, sql_flavor, url, connect_by_url, connect_by_key, project_host, project_port, project_user, project_db, - connection_name,""" + connection_name, http_path, """ sql_footer = f""" SELECT '{connection["project_code"]}' as project_code, @@ -91,7 +93,8 @@ def add_connection( '{connection["project_port"]}' as project_port, '{connection["project_user"]}' as project_user, '{connection["project_db"]}' as project_db, - '{connection["connection_name"]}' as connection_name, """ + '{connection["connection_name"]}' as connection_name, + '{connection["http_path"]}' as http_path, """ if encrypted_password: sql_header += "project_pw_encrypted, " diff --git a/testgen/ui/queries/profiling_queries.py b/testgen/ui/queries/profiling_queries.py index 399b26e7..f58edf0c 100644 --- a/testgen/ui/queries/profiling_queries.py +++ b/testgen/ui/queries/profiling_queries.py @@ -1,11 +1,24 @@ import json +from datetime import datetime +from typing import NamedTuple import pandas as pd import streamlit as st import testgen.ui.services.database_service as db +from testgen.common.models import get_current_session from testgen.utils import is_uuid4 +TAG_FIELDS = [ + "data_source", + "source_system", + "source_process", + "business_domain", + "stakeholder_group", + "transform_level", + "aggregation_level", + "data_product", +] COLUMN_PROFILING_FIELDS = """ -- Value Counts profile_results.record_ct, @@ -139,6 +152,7 @@ def get_table_by_id(table_id: str, table_group_id: str) -> dict | None: table_chars.table_id::VARCHAR AS id, 'table' AS type, table_chars.table_name, + table_chars.schema_name, table_chars.table_groups_id::VARCHAR AS table_group_id, -- Characteristics functional_table_type, @@ -147,19 +161,14 @@ def get_table_by_id(table_id: str, table_group_id: str) -> dict | None: data_point_ct, add_date, drop_date, - -- Tags - description, - critical_data_element, - data_source, - source_system, - source_process, - business_domain, - stakeholder_group, - transform_level, - aggregation_level, - data_product, + -- Table Tags + table_chars.description, + table_chars.critical_data_element, + {", ".join([ f"table_chars.{tag}" for tag in TAG_FIELDS ])}, + -- Table Groups Tags + {", ".join([ f"table_groups.{tag} AS table_group_{tag}" for tag in TAG_FIELDS if tag != "aggregation_level" ])}, -- Profile & Test Runs - last_complete_profile_run_id::VARCHAR AS profile_run_id, + table_chars.last_complete_profile_run_id::VARCHAR AS profile_run_id, profiling_starttime AS profile_run_date, TRUE AS is_latest_profile, EXISTS( @@ -175,6 +184,9 @@ def get_table_by_id(table_id: str, table_group_id: str) -> dict | None: LEFT JOIN {schema}.profiling_runs ON ( table_chars.last_complete_profile_run_id = profiling_runs.id ) + LEFT JOIN {schema}.table_groups ON ( + table_chars.table_groups_id = table_groups.id + ) WHERE table_id = '{table_id}' AND table_chars.table_groups_id = '{table_group_id}'; """ @@ -236,6 +248,7 @@ def get_column_by_condition( 'column' AS type, column_chars.column_name, column_chars.table_name, + column_chars.schema_name, column_chars.table_groups_id::VARCHAR AS table_group_id, -- Characteristics column_chars.general_type, @@ -245,28 +258,16 @@ def get_column_by_condition( column_chars.add_date, column_chars.last_mod_date, column_chars.drop_date, - {""" + {f""" -- Column Tags column_chars.description, column_chars.critical_data_element, - column_chars.data_source, - column_chars.source_system, - column_chars.source_process, - column_chars.business_domain, - column_chars.stakeholder_group, - column_chars.transform_level, - column_chars.aggregation_level, - column_chars.data_product, + {", ".join([ f"column_chars.{tag}" for tag in TAG_FIELDS ])}, -- Table Tags table_chars.critical_data_element AS table_critical_data_element, - table_chars.data_source AS table_data_source, - table_chars.source_system AS table_source_system, - table_chars.source_process AS table_source_process, - table_chars.business_domain AS table_business_domain, - table_chars.stakeholder_group AS table_stakeholder_group, - table_chars.transform_level AS table_transform_level, - table_chars.aggregation_level AS table_aggregation_level, - table_chars.data_product AS table_data_product, + {", ".join([ f"table_chars.{tag} AS table_{tag}" for tag in TAG_FIELDS ])}, + -- Table Groups Tags + {", ".join([ f"table_groups.{tag} AS table_group_{tag}" for tag in TAG_FIELDS if tag != "aggregation_level" ])}, """ if include_tags else ""} -- Profile & Test Runs column_chars.last_complete_profile_run_id::VARCHAR AS profile_run_id, @@ -292,6 +293,9 @@ def get_column_by_condition( LEFT JOIN {schema}.data_table_chars table_chars ON ( column_chars.table_id = table_chars.table_id ) + LEFT JOIN {schema}.table_groups ON ( + column_chars.table_groups_id = table_groups.id + ) """ if include_tags else ""} LEFT JOIN {schema}.profile_results ON ( column_chars.last_complete_profile_run_id = profile_results.profile_run_id @@ -316,7 +320,7 @@ def get_hygiene_issues(profile_run_id: str, table_name: str, column_name: str | column_condition = "" if column_name: column_condition = f"AND column_name = '{column_name}'" - + query = f""" WITH pii_results AS ( SELECT id, @@ -360,3 +364,26 @@ def get_hygiene_issues(profile_run_id: str, table_name: str, column_name: str | results = db.retrieve_data(query) return [row.to_dict() for _, row in results.iterrows()] + + +class LatestProfilingRun(NamedTuple): + id: str + run_time: datetime + + +def get_latest_run_date(project_code: str) -> LatestProfilingRun | None: + session = get_current_session() + result = session.execute( + """ + SELECT id, profiling_starttime + FROM profiling_runs + WHERE project_code = :project_code + AND status = 'Complete' + ORDER BY profiling_starttime DESC + LIMIT 1 + """, + params={"project_code": project_code}, + ) + if result and (latest_run := result.first()): + return LatestProfilingRun(str(latest_run.id), latest_run.profiling_starttime) + return None diff --git a/testgen/ui/queries/scoring_queries.py b/testgen/ui/queries/scoring_queries.py index 9dd6e965..ab7cec73 100644 --- a/testgen/ui/queries/scoring_queries.py +++ b/testgen/ui/queries/scoring_queries.py @@ -9,25 +9,10 @@ @st.cache_data(show_spinner="Loading data ...") def get_all_score_cards(project_code: str) -> list["ScoreCard"]: - definitions = ScoreDefinition.all(project_code=project_code) - score_cards: list[ScoreCard] = [] - root_keys: list[str] = ["score", "profiling_score", "testing_score", "cde_score"] - - for definition in definitions: - score_card: ScoreCard = { - "id": definition.id, - "project_code": project_code, - "name": definition.name, - "categories": [], - "definition": definition, - } - for result in sorted(definition.results, key=lambda r: r.category): - if result.category in root_keys: - score_card[result.category] = result.score - continue - score_card["categories"].append({"label": result.category, "score": result.score}) - score_cards.append(score_card) - return score_cards + return [ + definition.as_cached_score_card() + for definition in ScoreDefinition.all(project_code=project_code) + ] def get_score_card_issue_reports(selected_issues: list["SelectedIssue"]): @@ -142,22 +127,20 @@ def get_score_category_values(project_code: str) -> dict[ScoreCategory, list[str ] quote = lambda v: f"'{v}'" - query = " UNION ".join([ - f""" + query = f""" SELECT DISTINCT UNNEST(array[{', '.join([quote(c) for c in categories])}]) as category, UNNEST(array[{', '.join(categories)}]) AS value FROM v_dq_test_scoring_latest_by_column WHERE project_code = '{project_code}' - """, - f""" + UNION SELECT DISTINCT UNNEST(array[{', '.join([quote(c) for c in categories])}]) as category, UNNEST(array[{', '.join(categories)}]) AS value FROM v_dq_profile_scoring_latest_by_column WHERE project_code = '{project_code}' - """, - ]) + ORDER BY value + """ results = pd.read_sql_query(query, engine) for _, row in results.iterrows(): if row["category"] and row["value"]: diff --git a/testgen/ui/queries/table_group_queries.py b/testgen/ui/queries/table_group_queries.py index 02de776c..74c81c31 100644 --- a/testgen/ui/queries/table_group_queries.py +++ b/testgen/ui/queries/table_group_queries.py @@ -108,14 +108,14 @@ def edit(schema, table_group): profiling_delay_days='{table_group["profiling_delay_days"]}', profile_flag_cdes={table_group["profile_flag_cdes"]}, description='{table_group["description"]}', - data_source='{table_group["data_source"]}', - source_system='{table_group["source_system"]}', - source_process='{table_group["source_process"]}', - data_location='{table_group["data_location"]}', - business_domain='{table_group["business_domain"]}', - stakeholder_group='{table_group["stakeholder_group"]}', - transform_level='{table_group["transform_level"]}', - data_product='{table_group["data_product"]}' + data_source=NULLIF('{table_group["data_source"]}', ''), + source_system=NULLIF('{table_group["source_system"]}', ''), + source_process=NULLIF('{table_group["source_process"]}', ''), + data_location=NULLIF('{table_group["data_location"]}', ''), + business_domain=NULLIF('{table_group["business_domain"]}', ''), + stakeholder_group=NULLIF('{table_group["stakeholder_group"]}', ''), + transform_level=NULLIF('{table_group["transform_level"]}', ''), + data_product=NULLIF('{table_group["data_product"]}', '') WHERE id = '{table_group["id"]}' ; @@ -168,14 +168,14 @@ def add(schema, table_group) -> str: '{table_group["profiling_delay_days"]}'::character varying, {table_group["profile_flag_cdes"]}, '{table_group["description"]}', - '{table_group["data_source"]}', - '{table_group["source_system"]}', - '{table_group["source_process"]}', - '{table_group["data_location"]}', - '{table_group["business_domain"]}', - '{table_group["stakeholder_group"]}', - '{table_group["transform_level"]}', - '{table_group["data_product"]}' + NULLIF('{table_group["data_source"]}', ''), + NULLIF('{table_group["source_system"]}', ''), + NULLIF('{table_group["source_process"]}', ''), + NULLIF('{table_group["data_location"]}', ''), + NULLIF('{table_group["business_domain"]}', ''), + NULLIF('{table_group["stakeholder_group"]}', ''), + NULLIF('{table_group["transform_level"]}', ''), + NULLIF('{table_group["data_product"]}', '') ;""" db.execute_sql(sql) st.cache_data.clear() diff --git a/testgen/ui/queries/test_run_queries.py b/testgen/ui/queries/test_run_queries.py index a1484116..1ad15044 100644 --- a/testgen/ui/queries/test_run_queries.py +++ b/testgen/ui/queries/test_run_queries.py @@ -1,7 +1,11 @@ +from datetime import datetime +from typing import NamedTuple + import streamlit as st import testgen.common.date_service as date_service import testgen.ui.services.database_service as db +from testgen.common.models import get_current_session def cascade_delete(test_suite_ids: list[str]) -> None: @@ -48,3 +52,27 @@ def cancel_all_running() -> None: SET status = 'Cancelled' WHERE status = 'Running'; """) + + +class LatestTestRun(NamedTuple): + id: str + run_time: datetime + + +def get_latest_run_date(project_code: str) -> LatestTestRun | None: + session = get_current_session() + result = session.execute( + """ + SELECT runs.id, test_starttime + FROM test_runs AS runs + INNER JOIN test_suites AS suite ON (suite.id = runs.test_suite_id) + WHERE project_code = :project_code + AND status = 'Complete' + ORDER BY test_starttime DESC + LIMIT 1 + """, + params={"project_code": project_code}, + ) + if result and (latest_run := result.first()): + return LatestTestRun(str(latest_run.id), latest_run.test_starttime) + return None diff --git a/testgen/ui/queries/authentication_queries.py b/testgen/ui/queries/user_queries.py similarity index 73% rename from testgen/ui/queries/authentication_queries.py rename to testgen/ui/queries/user_queries.py index ab12c56b..c9953b4c 100644 --- a/testgen/ui/queries/authentication_queries.py +++ b/testgen/ui/queries/user_queries.py @@ -1,10 +1,12 @@ import streamlit as st import testgen.ui.services.database_service as db +from testgen.common.encrypt import encrypt_ui_password @st.cache_data(show_spinner=False) -def get_users(schema): +def get_users(): + schema: str = st.session_state["dbschema"] sql = f"""SELECT id::VARCHAR(50), username, email, "name", "password", preauthorized, role @@ -12,17 +14,20 @@ def get_users(schema): return db.retrieve_data(sql) -def delete_users(schema, user_ids): +def delete_users(user_ids): if user_ids is None or len(user_ids) == 0: raise ValueError("No user is specified.") + schema: str = st.session_state["dbschema"] items = [f"'{item}'" for item in user_ids] sql = f"""DELETE FROM {schema}.auth_users WHERE id in ({",".join(items)})""" db.execute_sql(sql) st.cache_data.clear() -def add_user(schema, user, encrypted_password): +def add_user(user): + schema: str = st.session_state["dbschema"] + encrypted_password = encrypt_ui_password(user["password"]) sql = f"""INSERT INTO {schema}.auth_users (username, email, name, password, role) SELECT @@ -35,7 +40,9 @@ def add_user(schema, user, encrypted_password): st.cache_data.clear() -def edit_user(schema, user, encrypted_password): +def edit_user(user): + schema: str = st.session_state["dbschema"] + encrypted_password = encrypt_ui_password(user["password"]) sql = f"""UPDATE {schema}.auth_users SET username = '{user["username"]}', email = '{user["email"]}', diff --git a/testgen/ui/services/authentication_service.py b/testgen/ui/services/authentication_service.py deleted file mode 100644 index 2dfc5a2b..00000000 --- a/testgen/ui/services/authentication_service.py +++ /dev/null @@ -1,55 +0,0 @@ -# ruff: noqa: S105 - -import logging -import typing - -import streamlit as st - -from testgen.common.encrypt import encrypt_ui_password -from testgen.ui.queries import authentication_queries -from testgen.ui.session import session - -RoleType = typing.Literal["admin", "edit", "read"] - -LOG = logging.getLogger("testgen") - - -def add_user(user): - encrypted_password = encrypt_ui_password(user["password"]) - schema = st.session_state["dbschema"] - authentication_queries.add_user(schema, user, encrypted_password) - - -def delete_users(user_ids): - schema = st.session_state["dbschema"] - return authentication_queries.delete_users(schema, user_ids) - - -def edit_user(user): - encrypted_password = encrypt_ui_password(user["password"]) - schema = st.session_state["dbschema"] - authentication_queries.edit_user(schema, user, encrypted_password) - - -def get_users(): - return authentication_queries.get_users(session.dbschema) - - -def get_role_for_user(auth_data, username): - return auth_data["credentials"]["usernames"][username]["role"] - - -def current_user_has_admin_role(): - return session.auth_role == "admin" - - -def current_user_has_edit_role(): - return session.auth_role in ("edit", "admin") - - -def current_user_has_read_role(): - return not session.auth_role or session.auth_role == "read" - - -def current_user_has_role(role: RoleType) -> bool: - return session.auth_role == role diff --git a/testgen/ui/services/connection_service.py b/testgen/ui/services/connection_service.py index 293a6232..8bf53a51 100644 --- a/testgen/ui/services/connection_service.py +++ b/testgen/ui/services/connection_service.py @@ -138,6 +138,7 @@ def init_profiling_sql(project_code, connection, table_group_schema=None): project_db = connection["project_db"] project_user = connection["project_user"] password = connection["password"] + http_path = connection["http_path"] # prepare the profiling query clsProfiling = InitializeProfilingSQL(project_code, sql_flavor) @@ -156,6 +157,7 @@ def init_profiling_sql(project_code, connection, table_group_schema=None): connect_by_key, private_key, private_key_passphrase, + http_path, connectname="PROJECT", password=password, ) diff --git a/testgen/ui/services/database_service.py b/testgen/ui/services/database_service.py index e5030cde..98c09ed6 100644 --- a/testgen/ui/services/database_service.py +++ b/testgen/ui/services/database_service.py @@ -245,7 +245,7 @@ def apply_df_edits(df_original, df_edited, str_table, lst_id_columns, no_update_ return booStatus -def _start_target_db_engine(flavor, host, port, db_name, user, password, url, connect_by_url, connect_by_key, private_key, private_key_passphrase): +def _start_target_db_engine(flavor, host, port, db_name, user, password, url, connect_by_url, connect_by_key, private_key, private_key_passphrase, http_path): connection_params = { "flavor": flavor if flavor != "redshift" else "postgresql", "user": user, @@ -257,6 +257,7 @@ def _start_target_db_engine(flavor, host, port, db_name, user, password, url, co "connect_by_key": connect_by_key, "private_key": private_key, "private_key_passphrase": private_key_passphrase, + "http_path": http_path, "dbschema": None, } flavor_service = get_flavor_service(flavor) @@ -267,17 +268,17 @@ def _start_target_db_engine(flavor, host, port, db_name, user, password, url, co return create_engine(connection_string, connect_args=connect_args) -def retrieve_target_db_data(flavor, host, port, db_name, user, password, url, connect_by_url, connect_by_key, private_key, private_key_passphrase, sql_query, decrypt=False): +def retrieve_target_db_data(flavor, host, port, db_name, user, password, url, connect_by_url, connect_by_key, private_key, private_key_passphrase, http_path, sql_query, decrypt=False): if decrypt: password = DecryptText(password) - db_engine = _start_target_db_engine(flavor, host, port, db_name, user, password, url, connect_by_url, connect_by_key, private_key, private_key_passphrase) + db_engine = _start_target_db_engine(flavor, host, port, db_name, user, password, url, connect_by_url, connect_by_key, private_key, private_key_passphrase, http_path) with db_engine.connect() as connection: query_result = connection.execute(text(sql_query)) return query_result.fetchall() -def retrieve_target_db_df(flavor, host, port, db_name, user, password, sql_query, url, connect_by_url, connect_by_key, private_key, private_key_passphrase): +def retrieve_target_db_df(flavor, host, port, db_name, user, password, sql_query, url, connect_by_url, connect_by_key, private_key, private_key_passphrase, http_path): if password: password = DecryptText(password) - db_engine = _start_target_db_engine(flavor, host, port, db_name, user, password, url, connect_by_url, connect_by_key, private_key, private_key_passphrase) + db_engine = _start_target_db_engine(flavor, host, port, db_name, user, password, url, connect_by_url, connect_by_key, private_key, private_key_passphrase, http_path) return pd.read_sql_query(text(sql_query), db_engine) diff --git a/testgen/ui/services/form_service.py b/testgen/ui/services/form_service.py index 06ed0f9e..20fe0a14 100644 --- a/testgen/ui/services/form_service.py +++ b/testgen/ui/services/form_service.py @@ -2,7 +2,6 @@ import base64 import typing from builtins import float -from datetime import date, datetime, time from enum import Enum from io import BytesIO from os.path import splitext @@ -17,7 +16,6 @@ from streamlit_extras.no_default_selectbox import selectbox import testgen.common.date_service as date_service -import testgen.ui.services.authentication_service as authentication_service import testgen.ui.services.database_service as db from testgen.ui.navigation.router import Router @@ -334,7 +332,7 @@ def render_form_by_field_specs( submit = ( False if boo_display_only - else st.form_submit_button("Save Changes", disabled=authentication_service.current_user_has_read_role()) + else st.form_submit_button("Save Changes") ) if submit and not boo_display_only: @@ -574,194 +572,6 @@ def render_markdown_table(df, lst_columns): st.markdown(md_str) -def render_column_list(row_selected, lst_columns, str_prompt): - with st.container(): - show_prompt(str_prompt) - - for column in lst_columns: - column_type = type(row_selected[column]) - if column_type is str: - st.text_input(label=ut_prettify_header(column), value=row_selected[column], disabled=True) - elif column_type is (int | float): - st.number_input(label=ut_prettify_header(column), value=row_selected[column], disabled=True) - elif column_type is (date | datetime): - st.date_input(label=ut_prettify_header(column), value=row_selected[column], disabled=True) - elif column_type is time: - st.time_input(label=ut_prettify_header(column), value=row_selected[column], disabled=True) - else: - st.text_input(label=ut_prettify_header(column), value=row_selected[column], disabled=True) - - -def render_grid_form( - str_form_name, - df_data, - str_table_name, - lst_key_columns, - lst_show_columns, - lst_disabled_columns, - lst_no_update_columns, - dct_hard_default_columns, - dct_column_config, - str_prompt=None, -): - show_header(str_form_name) - with st.form(str_form_name, clear_on_submit=True): - show_prompt(str_prompt) - df_edits = st.data_editor( - df_data, - column_order=lst_show_columns, - column_config=dct_column_config, - disabled=lst_disabled_columns, - num_rows="dynamic", - hide_index=True, - ) - submit = st.form_submit_button("Save Changes", disabled=authentication_service.current_user_has_read_role()) - if submit: - booStatus = db.apply_df_edits( - df_data, df_edits, str_table_name, lst_key_columns, lst_no_update_columns, dct_hard_default_columns - ) - if booStatus: - reset_post_updates("Changes have been saved.") - - -def render_edit_form( - str_form_name, - row_selected, - str_table_name, - lst_show_columns, - lst_key_columns, - lst_disabled=None, - str_text_display=None, - submit_disabled=False, - form_unique_key: str | None = None, -): - show_header(str_form_name) - - layout_column_1 = st.empty() - if str_text_display: - layout_column_1, layout_column_2 = st.columns([0.7, 0.3]) - - dct_mods = {} - if not lst_disabled: - lst_disabled = lst_key_columns - # Retrieve data types - row_selected.map(type) - - if str_text_display: - with layout_column_2: - st.markdown(str_text_display) - - with layout_column_1: - with st.form(form_unique_key or str_form_name, clear_on_submit=True): - for column, value in row_selected.items(): - if column in lst_show_columns: - column_type = type(value) - if column_type is str: - dct_mods[column] = st.text_input( - label=ut_prettify_header(column), - value=row_selected[column], - disabled=(column in lst_disabled), - ) - elif column_type in (int, float): - dct_mods[column] = st.number_input( - label=ut_prettify_header(column), - value=row_selected[column], - disabled=(column in lst_disabled), - ) - elif column_type in (date, datetime, datetime.date): - dct_mods[column] = st.date_input( - label=ut_prettify_header(column), - value=row_selected[column], - disabled=(column in lst_disabled), - ) - elif column_type is time: - dct_mods[column] = st.time_input( - label=ut_prettify_header(column), - value=row_selected[column], - disabled=(column in lst_disabled), - ) - else: - dct_mods[column] = st.text_input( - label=ut_prettify_header(column), - value=row_selected[column], - disabled=(column in lst_disabled), - ) - else: - # If Hidden, add directly to dct_mods for updates - dct_mods[column] = row_selected[column] - edit_allowed = not submit_disabled and authentication_service.current_user_has_edit_role() - submit = st.form_submit_button("Save Changes", disabled=not edit_allowed) - - if submit and edit_allowed: - # Construct SQL UPDATE statement based on the changed columns - changes = [] - keys = [] - for col, val in dct_mods.items(): - if col in lst_key_columns: - keys.append(f"{col} = {db.make_value_db_friendly(val)}") - if val != row_selected[col]: - changes.append(f"{col} = {db.make_value_db_friendly(val)}") - - # If there are any changes, construct and run the SQL statement - if changes: - str_schema = st.session_state["dbschema"] - str_sql = ( - f"UPDATE {str_schema}.{str_table_name} SET {', '.join(changes)} WHERE {' AND '.join(keys)};" - ) - db.execute_sql(str_sql) - reset_post_updates("Changes have been saved.") - elif submit: - reset_post_updates("The current user does not have permission to save changes.", style="warning") - - - -def render_insert_form( - str_form_name, - lst_columns, - str_table_name, - dct_default_values=None, - lst_hidden=None, - lst_disabled=None, - form_unique_key: str | None = None, - on_cancel=None, -): - show_header(str_form_name) - dct_mods = {} - - with st.form(form_unique_key or str_form_name, clear_on_submit=True): - for column in lst_columns: - if column not in (lst_hidden or []): - val = "" if column not in (dct_default_values or []) else dct_default_values[column] - input_type_by_default_value = { - date: st.date_input, - } - is_disabled = column in (lst_disabled or []) - input_type = input_type_by_default_value.get(type(val), st.text_input) - - dct_mods[column] = input_type(label=ut_prettify_header(column), value=val, disabled=is_disabled) - else: - dct_mods[column] = dct_default_values[column] - - _, col1, col2 = st.columns([0.7, 0.1, 0.2]) - with col2: - submit = st.form_submit_button("Insert Record", use_container_width=True) - if on_cancel: - with col1: - st.form_submit_button("Cancel", on_click=on_cancel, use_container_width=True) - - if submit: - str_schema = st.session_state["dbschema"] - # Construct SQL INSERT statement based on all columns - insert_cols = [] - insert_vals = [] - for col, val in dct_mods.items(): - insert_cols.append(col) - insert_vals.append(f"'{val}'") - str_sql = f"INSERT INTO {str_schema}.{str_table_name} ({', '.join(insert_cols)}) VALUES ({', '.join(insert_vals)})" - db.execute_sql(str_sql) - reset_post_updates("New record created.") - - def render_grid_select( df: pd.DataFrame, show_columns, diff --git a/testgen/ui/services/hygiene_issues_service.py b/testgen/ui/services/hygiene_issues_service.py index dd50476d..71a24fe7 100644 --- a/testgen/ui/services/hygiene_issues_service.py +++ b/testgen/ui/services/hygiene_issues_service.py @@ -10,7 +10,7 @@ def get_source_data(hi_data): str_sql = f""" SELECT t.lookup_query, tg.table_group_schema, c.sql_flavor, c.project_host, c.project_port, c.project_db, c.project_user, c.project_pw_encrypted, - c.url, c.connect_by_url, c.connect_by_key, c.private_key, c.private_key_passphrase + c.url, c.connect_by_url, c.connect_by_key, c.private_key, c.private_key_passphrase, c.http_path FROM {str_schema}.target_data_lookups t INNER JOIN {str_schema}.table_groups tg ON ('{hi_data["table_groups_id"]}'::UUID = tg.id) @@ -22,7 +22,7 @@ def get_source_data(hi_data): AND t.lookup_query > ''; """ - def get_lookup_query(test_id, detail_exp, column_names): + def get_lookup_query(test_id, detail_exp, column_names, sql_flavor): if test_id in {"1019", "1020"}: start_index = detail_exp.find("Columns: ") if start_index == -1: @@ -31,8 +31,9 @@ def get_lookup_query(test_id, detail_exp, column_names): start_index += len("Columns: ") column_names_str = detail_exp[start_index:] columns = [col.strip() for col in column_names_str.split(",")] + quote = "`" if sql_flavor == "databricks" else '"' queries = [ - f"SELECT '{column}' AS column_name, MAX({column}) AS max_date_available FROM {{TARGET_SCHEMA}}.{{TABLE_NAME}}" + f"SELECT '{column}' AS column_name, MAX({quote}{column}{quote}) AS max_date_available FROM {{TARGET_SCHEMA}}.{{TABLE_NAME}}" for column in columns ] sql_query = " UNION ALL ".join(queries) + " ORDER BY max_date_available DESC;" @@ -42,7 +43,7 @@ def get_lookup_query(test_id, detail_exp, column_names): def replace_parms(str_query): str_query = ( - get_lookup_query(hi_data["anomaly_id"], hi_data["detail"], hi_data["column_name"]) + get_lookup_query(hi_data["anomaly_id"], hi_data["detail"], hi_data["column_name"], lst_query[0]["sql_flavor"]) if lst_query[0]["lookup_query"] == "created_in_ui" else lst_query[0]["lookup_query"] ) @@ -54,7 +55,7 @@ def replace_parms(str_query): str_query = replace_templated_functions(str_query, lst_query[0]["sql_flavor"]) if str_query is None or str_query == "": - raise ValueError("Lookup query is not defined for this Anomoly Type.") + raise ValueError("Lookup query is not defined for this Anomaly Type.") return str_query try: @@ -77,6 +78,7 @@ def replace_parms(str_query): lst_query[0]["connect_by_key"], lst_query[0]["private_key"], lst_query[0]["private_key_passphrase"], + lst_query[0]["http_path"], ) if df.empty: return "ND", "Data that violates Hygiene Issue criteria is not present in the current dataset.", str_sql, None diff --git a/testgen/ui/services/project_service.py b/testgen/ui/services/project_service.py index fa049b7b..18063ba7 100644 --- a/testgen/ui/services/project_service.py +++ b/testgen/ui/services/project_service.py @@ -1,7 +1,8 @@ import streamlit as st +from testgen.ui.navigation.router import Router from testgen.ui.queries import project_queries -from testgen.ui.services import query_service +from testgen.ui.services import database_service, query_service from testgen.ui.session import session @@ -17,6 +18,7 @@ def get_projects(): def set_current_project(project_code: str) -> None: session.project = project_code + Router().set_query_params({ "project_code": project_code }) @st.cache_data(show_spinner=False) @@ -25,3 +27,16 @@ def get_project_by_code(code: str): return None return query_service.get_project_by_code(session.dbschema, code) + +def edit_project(project: dict): + schema = st.session_state["dbschema"] + query = f""" + UPDATE {schema}.projects + SET + project_name = '{project["project_name"]}', + observability_api_url = '{project["observability_api_url"]}', + observability_api_key = '{project["observability_api_key"]}' + WHERE id = '{project["id"]}'; + """ + database_service.execute_sql(query) + st.cache_data.clear() diff --git a/testgen/ui/services/test_results_service.py b/testgen/ui/services/test_results_service.py index c2cc9431..0860fe87 100644 --- a/testgen/ui/services/test_results_service.py +++ b/testgen/ui/services/test_results_service.py @@ -7,6 +7,106 @@ from testgen.ui.services.test_definition_service import get_test_definition +def get_test_results( + schema: str, + run_id: str, + test_status: str | None = None, + test_type_id: str | None = None, + table_name: str | None = None, + column_name: str | None = None, + sorting_columns: list[str] | None = None, +) -> pd.DataFrame: + # First visible row first, so multi-select checkbox will render + order_by = "ORDER BY " + (", ".join(" ".join(col) for col in sorting_columns)) if sorting_columns else "" + filters = "" + if test_status: + filters += f" AND r.result_status IN ({test_status})" + if test_type_id: + filters += f" AND r.test_type = '{test_type_id}'" + if table_name: + filters += f" AND r.table_name = '{table_name}'" + if column_name: + filters += f" AND r.column_names = '{column_name}'" + + sql = f""" + WITH run_results + AS (SELECT * + FROM {schema}.test_results r + WHERE + r.test_run_id = '{run_id}' + {filters} + ) + SELECT r.table_name, + p.project_name, ts.test_suite, tg.table_groups_name, cn.connection_name, cn.project_host, cn.sql_flavor, + tt.dq_dimension, tt.test_scope, + r.schema_name, r.column_names, r.test_time::DATE as test_date, r.test_type, tt.id as test_type_id, + tt.test_name_short, tt.test_name_long, r.test_description, tt.measure_uom, tt.measure_uom_description, + c.test_operator, r.threshold_value::NUMERIC(16, 5), r.result_measure::NUMERIC(16, 5), r.result_status, + CASE + WHEN r.result_code <> 1 THEN r.disposition + ELSE 'Passed' + END as disposition, + NULL::VARCHAR(1) as action, + r.input_parameters, r.result_message, CASE WHEN result_code <> 1 THEN r.severity END as severity, + r.result_code as passed_ct, + (1 - r.result_code)::INTEGER as exception_ct, + CASE + WHEN result_status = 'Warning' + AND result_message NOT ILIKE 'Inactivated%%' THEN 1 + END::INTEGER as warning_ct, + CASE + WHEN result_status = 'Failed' + AND result_message NOT ILIKE 'Inactivated%%' THEN 1 + END::INTEGER as failed_ct, + CASE + WHEN result_message ILIKE 'Inactivated%%' THEN 1 + END as execution_error_ct, + p.project_code, r.table_groups_id::VARCHAR, + r.id::VARCHAR as test_result_id, r.test_run_id::VARCHAR, + c.id::VARCHAR as connection_id, r.test_suite_id::VARCHAR, + r.test_definition_id::VARCHAR as test_definition_id_runtime, + CASE + WHEN r.auto_gen = TRUE THEN d.id + ELSE r.test_definition_id + END::VARCHAR as test_definition_id_current, + r.auto_gen, + + -- These are used in the PDF report + tt.threshold_description, tt.usage_notes, r.test_time + + FROM run_results r + INNER JOIN {schema}.test_types tt + ON (r.test_type = tt.test_type) + LEFT JOIN {schema}.test_definitions rd + ON (r.test_definition_id = rd.id) + LEFT JOIN {schema}.test_definitions d + ON (r.test_suite_id = d.test_suite_id + AND r.table_name = d.table_name + AND r.column_names = COALESCE(d.column_name, 'N/A') + AND r.test_type = d.test_type + AND r.auto_gen = TRUE + AND d.last_auto_gen_date IS NOT NULL) + INNER JOIN {schema}.test_suites ts + ON r.test_suite_id = ts.id + INNER JOIN {schema}.projects p + ON (ts.project_code = p.project_code) + INNER JOIN {schema}.table_groups tg + ON (ts.table_groups_id = tg.id) + INNER JOIN {schema}.connections cn + ON (tg.connection_id = cn.connection_id) + LEFT JOIN {schema}.cat_test_conditions c + ON (cn.sql_flavor = c.sql_flavor + AND r.test_type = c.test_type) + {order_by} ; + """ + df = db.retrieve_data(sql) + + # Clean Up + df["test_date"] = pd.to_datetime(df["test_date"]) + + return df + + def get_test_result_history(db_schema, tr_data): if tr_data["auto_gen"]: str_where = f""" @@ -41,7 +141,7 @@ def do_source_data_lookup_custom(db_schema, tr_data): str_sql = f""" SELECT d.custom_query as lookup_query, tg.table_group_schema, c.sql_flavor, c.project_host, c.project_port, c.project_db, c.project_user, c.project_pw_encrypted, - c.url, c.connect_by_url, c.connect_by_key, c.private_key, c.private_key_passphrase + c.url, c.connect_by_url, c.connect_by_key, c.private_key, c.private_key_passphrase, c.http_path FROM {db_schema}.test_definitions d INNER JOIN {db_schema}.table_groups tg ON ('{tr_data["table_groups_id"]}'::UUID = tg.id) @@ -71,6 +171,7 @@ def do_source_data_lookup_custom(db_schema, tr_data): lst_query[0]["connect_by_key"], lst_query[0]["private_key"], lst_query[0]["private_key_passphrase"], + lst_query[0]["http_path"], ) if df.empty: return "ND", "Data that violates Test criteria is not present in the current dataset.", str_sql, None @@ -89,7 +190,8 @@ def do_source_data_lookup(db_schema, tr_data, sql_only=False): SELECT t.lookup_query, tg.table_group_schema, c.sql_flavor, c.project_host, c.project_port, c.project_db, c.project_user, c.project_pw_encrypted, c.url, c.connect_by_url, - c.connect_by_key, c.private_key, c.private_key_passphrase + c.connect_by_key, c.private_key, c.private_key_passphrase, + c.http_path FROM {db_schema}.target_data_lookups t INNER JOIN {db_schema}.table_groups tg ON ('{tr_data["table_groups_id"]}'::UUID = tg.id) @@ -174,6 +276,7 @@ def replace_parms(df_test, str_query): lst_query[0]["connect_by_key"], lst_query[0]["private_key"], lst_query[0]["private_key_passphrase"], + lst_query[0]["http_path"], ) if df.empty: return "ND", "Data that violates Test criteria is not present in the current dataset.", str_sql, None diff --git a/testgen/ui/services/user_session_service.py b/testgen/ui/services/user_session_service.py index 019a2d3d..a5b8c186 100644 --- a/testgen/ui/services/user_session_service.py +++ b/testgen/ui/services/user_session_service.py @@ -1,16 +1,20 @@ import datetime import logging +import typing import extra_streamlit_components as stx import jwt +import streamlit as st -from testgen.ui.queries import authentication_queries -from testgen.ui.services.authentication_service import get_role_for_user +from testgen.ui.queries import user_queries from testgen.ui.session import session +RoleType = typing.Literal["admin", "data_quality", "analyst", "business", "catalog"] + JWT_HASHING_KEY = "dk_signature_key" AUTH_TOKEN_COOKIE_NAME = "dk_cookie_name" # noqa: S105 -AUTH_TOKEN_EXPIRATION_DAYS = 5 +AUTH_TOKEN_EXPIRATION_DAYS = 1 +DISABLED_ACTION_TEXT = "You do not have permissions to perform this action. Contact your administrator." LOG = logging.getLogger("testgen") @@ -32,22 +36,28 @@ def load_user_session() -> None: def start_user_session(name: str, username: str) -> None: session.name = name session.username = username - session.auth_role = get_role_for_user(get_auth_data(), username) + session.auth_role = get_auth_data()["credentials"]["usernames"][username]["role"] session.authentication_status = True session.logging_out = False + if user_has_catalog_role(): + session.user_default_page = "data-catalog" + st.rerun() + else: + session.user_default_page = "project-dashboard" def end_user_session() -> None: session.auth_role = None session.authentication_status = None session.logging_out = True + session.user_default_page = "" del session.name del session.username def get_auth_data(): - auth_data = authentication_queries.get_users(session.dbschema) + auth_data = user_queries.get_users() usernames = {} preauthorized_list = [] @@ -67,3 +77,23 @@ def get_auth_data(): "cookie": {"expiry_days": AUTH_TOKEN_EXPIRATION_DAYS, "key": JWT_HASHING_KEY, "name": AUTH_TOKEN_COOKIE_NAME}, "preauthorized": {"emails": preauthorized_list}, } + + +def user_is_admin(): + return session.auth_role == "admin" + + +def user_can_edit(): + return session.auth_role in ("admin", "data_quality") + + +def user_can_disposition(): + return session.auth_role in ("admin", "data_quality", "analyst") + + +def user_has_catalog_role(): + return session.auth_role == "catalog" + + +def user_has_role(role: RoleType) -> bool: + return session.auth_role == role diff --git a/testgen/ui/session.py b/testgen/ui/session.py index 0e5ef49b..826fcc6e 100644 --- a/testgen/ui/session.py +++ b/testgen/ui/session.py @@ -19,7 +19,7 @@ class TestgenSession(Singleton): page_pending_login: str page_pending_sidebar: str page_args_pending_router: dict - + current_page: str current_page_args: dict @@ -28,7 +28,8 @@ class TestgenSession(Singleton): name: str username: str authentication_status: bool - auth_role: Literal["admin", "edit", "read"] + auth_role: Literal["admin", "data_quality", "analyst", "business", "catalog"] + user_default_page: str project: str add_project: bool diff --git a/testgen/ui/views/connections/forms.py b/testgen/ui/views/connections/forms.py index cc42d83c..23cf64b8 100644 --- a/testgen/ui/views/connections/forms.py +++ b/testgen/ui/views/connections/forms.py @@ -10,8 +10,8 @@ from testgen.ui.forms import BaseForm, Field, ManualRender, computed_field from testgen.ui.services import connection_service -SQL_FLAVORS = ["redshift", "snowflake", "mssql", "postgresql"] -SQLFlavor = typing.Literal["redshift", "snowflake", "mssql", "postgresql"] +SQL_FLAVORS = ["redshift", "snowflake", "mssql", "postgresql", "databricks"] +SQLFlavor = typing.Literal["redshift", "snowflake", "mssql", "postgresql", "databricks"] class BaseConnectionForm(BaseForm, ManualRender): @@ -170,6 +170,8 @@ def set_default_port(sql_flavor: SQLFlavor, form: type["BaseConnectionForm"]) -> form.project_port = 5432 elif sql_flavor == "snowflake": form.project_port = 443 + elif sql_flavor == "databricks": + form.project_port = 443 @staticmethod def for_flavor(flavor: SQLFlavor) -> type["BaseConnectionForm"]: @@ -178,6 +180,7 @@ def for_flavor(flavor: SQLFlavor) -> type["BaseConnectionForm"]: "snowflake": KeyPairConnectionForm, "mssql": PasswordConnectionForm, "postgresql": PasswordConnectionForm, + "databricks": HttpPathConnectionForm, }[flavor] @@ -201,6 +204,25 @@ def render_extra( self.render_field("password", left_fields_container) +class HttpPathConnectionForm(PasswordConnectionForm): + http_path: str = Field( + default="", + max_length=200, + st_kwargs_label="HTTP Path", + st_kwargs_max_chars=50, + ) + + def render_extra( + self, + _container: DeltaGenerator, + left_fields_container: DeltaGenerator, + _right_fields_container: DeltaGenerator, + _data: dict, + ) -> None: + super().render_extra(_container, left_fields_container, _right_fields_container, _data) + self.render_field("http_path", left_fields_container) + + class KeyPairConnectionForm(PasswordConnectionForm): connect_by_key: bool = Field(default=None) private_key_passphrase: str = Field( diff --git a/testgen/ui/views/connections/page.py b/testgen/ui/views/connections/page.py index 15104d56..610ed0c2 100644 --- a/testgen/ui/views/connections/page.py +++ b/testgen/ui/views/connections/page.py @@ -11,10 +11,11 @@ import testgen.ui.services.database_service as db from testgen.commands.run_profiling_bridge import run_profiling_in_background from testgen.common.database.database_service import empty_cache +from testgen.common.models import with_database_session from testgen.ui.components import widgets as testgen from testgen.ui.navigation.menu import MenuItem from testgen.ui.navigation.page import Page -from testgen.ui.services import connection_service, table_group_service +from testgen.ui.services import connection_service, table_group_service, user_session_service from testgen.ui.session import session, temp_value from testgen.ui.views.connections.forms import BaseConnectionForm from testgen.ui.views.connections.models import ConnectionStatus @@ -28,8 +29,15 @@ class ConnectionsPage(Page): path = "connections" can_activate: typing.ClassVar = [ lambda: session.authentication_status, + lambda: not user_session_service.user_has_catalog_role(), ] - menu_item = MenuItem(icon="database", label=PAGE_TITLE, section="Data Configuration", order=0) + menu_item = MenuItem( + icon="database", + label=PAGE_TITLE, + section="Data Configuration", + order=0, + roles=[ role for role in typing.get_args(user_session_service.RoleType) if role != "catalog" ], + ) def render(self, project_code: str, **_kwargs) -> None: dataframe = connection_service.get_connections(project_code) @@ -64,14 +72,17 @@ def render(self, project_code: str, **_kwargs) -> None: " border: var(--button-stroked-border); padding: 8px 8px 8px 16px; color: var(--primary-color)", ) else: + user_can_edit = user_session_service.user_can_edit() with actions_column: testgen.button( type_="stroked", color="primary", icon="table_view", label="Setup Table Groups", - style="background: white;", + style="var(--dk-card-background)", width=200, + disabled=not user_can_edit, + tooltip=None if user_can_edit else user_session_service.DISABLED_ACTION_TEXT, on_click=lambda: self.setup_data_configuration(project_code, connection.to_dict()), ) @@ -86,7 +97,7 @@ def show_connection_form(self, selected_connection: dict, _mode: str, project_co FlavorForm = BaseConnectionForm.for_flavor(sql_flavor) if connection: connection["password"] = connection["password"] or "" - + form_kwargs = connection or {"sql_flavor": sql_flavor, "connection_id": connection_id, "connection_name": connection_name} form = FlavorForm(**form_kwargs) @@ -130,22 +141,23 @@ def show_connection_form(self, selected_connection: dict, _mode: str, project_co f"connection_form-{connection_id}:test_conn" ) - with save_button_column: - testgen.button( - type_="flat", - label="Save", - key=f"connection_form:{connection_id}:submit", - on_click=lambda: set_submitted(True), - ) + if user_session_service.user_is_admin(): + with save_button_column: + testgen.button( + type_="flat", + label="Save", + key=f"connection_form:{connection_id}:submit", + on_click=lambda: set_submitted(True), + ) - with test_button_column: - testgen.button( - type_="stroked", - color="basic", - label="Test Connection", - key=f"connection_form:{connection_id}:test", - on_click=lambda: set_connecting(True), - ) + with test_button_column: + testgen.button( + type_="stroked", + color="basic", + label="Test Connection", + key=f"connection_form:{connection_id}:test", + on_click=lambda: set_connecting(True), + ) if is_connecting(): single_element_container = st.empty() @@ -207,6 +219,7 @@ def test_connection(self, connection: dict) -> "ConnectionStatus": connection["connect_by_key"], connection["private_key"], connection["private_key_passphrase"], + connection["http_path"], sql_query, ) connection_successful = len(results) == 1 and results[0][0] == 1 @@ -215,9 +228,10 @@ def test_connection(self, connection: dict) -> "ConnectionStatus": return ConnectionStatus(message="Error completing a query to the database server.", successful=False) return ConnectionStatus(message="The connection was successful.", successful=True) except Exception as error: - return ConnectionStatus(message="Error attempting the Connection.", details=error.args[0], successful=False) + return ConnectionStatus(message="Error attempting the connection.", details=error.args[0], successful=False) @st.dialog(title="Data Configuration Setup") + @with_database_session def setup_data_configuration(self, project_code: str, connection: dict) -> None: will_run_profiling = st.session_state.get("connection_form-new:run-profiling-toggle", True) testgen.wizard( diff --git a/testgen/ui/views/data_catalog.py b/testgen/ui/views/data_catalog.py index d7d18433..c131b81c 100644 --- a/testgen/ui/views/data_catalog.py +++ b/testgen/ui/views/data_catalog.py @@ -1,9 +1,12 @@ import json import typing +from collections import defaultdict +from datetime import datetime from functools import partial import pandas as pd import streamlit as st +from streamlit.delta_generator import DeltaGenerator import testgen.ui.services.database_service as db import testgen.ui.services.query_service as dq @@ -12,8 +15,10 @@ from testgen.ui.navigation.menu import MenuItem from testgen.ui.navigation.page import Page from testgen.ui.queries import project_queries -from testgen.ui.queries.profiling_queries import get_column_by_id, get_hygiene_issues, get_table_by_id +from testgen.ui.queries.profiling_queries import TAG_FIELDS, get_column_by_id, get_hygiene_issues, get_table_by_id +from testgen.ui.services import user_session_service from testgen.ui.session import session +from testgen.ui.views.dialogs.data_preview_dialog import data_preview_dialog from testgen.ui.views.dialogs.run_profiling_dialog import run_profiling_dialog from testgen.utils import friendly_score, score @@ -34,8 +39,9 @@ def render(self, project_code: str | None = None, table_group_id: str | None = N ) project_code = project_code or session.project + user_can_navigate = not user_session_service.user_has_catalog_role() - if render_empty_state(project_code): + if render_empty_state(project_code, user_can_navigate): return group_filter_column, _, loading_column = st.columns([.3, .5, .2], vertical_alignment="center") @@ -68,63 +74,84 @@ def render(self, project_code: str | None = None, table_group_id: str | None = N icon=PAGE_ICON, message=testgen.EmptyStateMessage.Profiling, action_label="Run Profiling", + action_disabled=not user_session_service.user_can_edit(), button_onclick=partial(run_profiling_dialog, project_code, table_group), button_icon="play_arrow", ) else: - def on_tree_node_select(node_id): - self.router.set_query_params({ "selected": node_id }) + def on_item_selected(item_id): + self.router.set_query_params({ "selected": item_id }) testgen_component( "data_catalog", - props={ "columns": columns_df.to_json(orient="records"), "selected": json.dumps(selected_item) }, - on_change_handlers={ "TreeNodeSelected": on_tree_node_select }, - event_handlers={ "TagsChanged": on_tags_changed }, + props={ + "columns": columns_df.to_json(orient="records"), + "selected": json.dumps(selected_item), + "tag_values": get_tag_values(), + "last_saved_timestamp": st.session_state.get("data_catalog:last_saved_timestamp"), + "permissions": { + "can_edit": user_session_service.user_can_disposition(), + "can_navigate": user_can_navigate, + }, + }, + on_change_handlers={ + "ItemSelected": on_item_selected, + "DataPreviewClicked": lambda item: data_preview_dialog( + item["table_group_id"], + item["schema_name"], + item["table_name"], + item.get("column_name"), + ), + }, + event_handlers={ "TagsChanged": partial(on_tags_changed, loading_column) }, ) -def on_tags_changed(tags: dict) -> None: - schema = st.session_state["dbschema"] - - if tags["type"] == "table": - update_table = "data_table_chars" - id_column = "table_id" - cached_function = get_table_by_id - else: - update_table = "data_column_chars" - id_column = "column_id" - cached_function = get_column_by_id - - attributes = [ - "description", - "data_source", - "source_system", - "source_process", - "business_domain", - "stakeholder_group", - "transform_level", - "aggregation_level", - "data_product" - ] +def on_tags_changed(spinner_container: DeltaGenerator, payload: dict) -> None: + attributes = ["description"] + attributes.extend(TAG_FIELDS) cde_value_map = { True: "TRUE", False: "FALSE", None: "NULL", } - set_attributes = [ f"{key} = NULLIF('{tags.get(key) or ''}', '')" for key in attributes ] - set_attributes.append(f"critical_data_element = {cde_value_map[tags.get('critical_data_element')]}") - sql = f""" - UPDATE {schema}.{update_table} - SET {', '.join(set_attributes)} - WHERE {id_column} = '{tags["id"]}'; - """ - db.execute_sql(sql) - cached_function.clear() + tags = payload["tags"] + set_attributes = [ f"{key} = NULLIF('{tags.get(key) or ''}', '')" for key in attributes if key in tags ] + if "critical_data_element" in tags: + set_attributes.append(f"critical_data_element = {cde_value_map[tags.get('critical_data_element')]}") + + tables = [] + columns = [] + for item in payload["items"]: + id_list = tables if item["type"] == "table" else columns + id_list.append(item["id"]) + + schema = st.session_state["dbschema"] + + with spinner_container: + with st.spinner("Saving tags"): + if tables: + db.execute_sql(f""" + UPDATE {schema}.data_table_chars + SET {', '.join(set_attributes)} + WHERE table_id IN ({", ".join([ f"'{item}'" for item in tables ])}); + """) + + if columns: + db.execute_sql(f""" + UPDATE {schema}.data_column_chars + SET {', '.join(set_attributes)} + WHERE column_id IN ({", ".join([ f"'{item}'" for item in columns ])}); + """) + + for func in [ get_table_group_columns, get_table_by_id, get_column_by_id, get_tag_values ]: + func.clear() + st.session_state["data_catalog:last_saved_timestamp"] = datetime.now().timestamp() st.rerun() -def render_empty_state(project_code: str) -> bool: +def render_empty_state(project_code: str, user_can_navigate: bool) -> bool: project_summary_df = project_queries.get_summary_by_code(project_code) if project_summary_df["profiling_runs_ct"]: # Without profiling, we don't have any table and column information in db return False @@ -137,6 +164,7 @@ def render_empty_state(project_code: str) -> bool: icon=PAGE_ICON, message=testgen.EmptyStateMessage.Connection, action_label="Go to Connections", + action_disabled=not user_can_navigate, link_href="connections", ) else: @@ -145,6 +173,7 @@ def render_empty_state(project_code: str) -> bool: icon=PAGE_ICON, message=testgen.EmptyStateMessage.Profiling if project_summary_df["table_groups_ct"] else testgen.EmptyStateMessage.TableGroup, action_label="Go to Table Groups", + action_disabled=not user_can_navigate, link_href="connections:table-groups", link_params={ "connection_id": str(project_summary_df["default_connection_id"]) } ) @@ -167,8 +196,12 @@ def get_table_group_columns(table_group_id: str) -> pd.DataFrame: table_chars.table_name, column_chars.general_type, column_chars.functional_data_type, - column_chars.drop_date AS column_drop_date, - table_chars.drop_date AS table_drop_date + column_chars.drop_date, + table_chars.drop_date AS table_drop_date, + column_chars.critical_data_element, + table_chars.critical_data_element AS table_critical_data_element, + {", ".join([ f"column_chars.{tag}" for tag in TAG_FIELDS ])}, + {", ".join([ f"table_chars.{tag} AS table_{tag}" for tag in TAG_FIELDS ])} FROM {schema}.data_column_chars column_chars LEFT JOIN {schema}.data_table_chars table_chars ON ( column_chars.table_id = table_chars.table_id @@ -244,3 +277,34 @@ def get_latest_test_issues(table_group_id: str, table_name: str, column_name: st df = db.retrieve_data(sql) return [row.to_dict() for _, row in df.iterrows()] + + +@st.cache_data(show_spinner=False) +def get_tag_values() -> dict[str, list[str]]: + schema = st.session_state["dbschema"] + + quote = lambda v: f"'{v}'" + sql = f""" + SELECT DISTINCT + UNNEST(array[{', '.join([quote(t) for t in TAG_FIELDS])}]) as tag, + UNNEST(array[{', '.join(TAG_FIELDS)}]) AS value + FROM {schema}.data_column_chars + UNION + SELECT DISTINCT + UNNEST(array[{', '.join([quote(t) for t in TAG_FIELDS])}]) as tag, + UNNEST(array[{', '.join(TAG_FIELDS)}]) AS value + FROM {schema}.data_table_chars + UNION + SELECT DISTINCT + UNNEST(array[{', '.join([quote(t) for t in TAG_FIELDS if t != 'aggregation_level'])}]) as tag, + UNNEST(array[{', '.join([ t for t in TAG_FIELDS if t != 'aggregation_level'])}]) AS value + FROM {schema}.table_groups + ORDER BY value + """ + df = db.retrieve_data(sql) + + values = defaultdict(list) + for _, row in df.iterrows(): + if row["tag"] and row["value"]: + values[row["tag"]].append(row["value"]) + return values diff --git a/testgen/ui/views/dialogs/application_logs_dialog.py b/testgen/ui/views/dialogs/application_logs_dialog.py index 30430201..de8abc80 100644 --- a/testgen/ui/views/dialogs/application_logs_dialog.py +++ b/testgen/ui/views/dialogs/application_logs_dialog.py @@ -42,14 +42,6 @@ def _search_text(log_data, search_query): return [line for line in log_data if search_query in line] -def view_log_file(button_container): - with button_container: - if st.button( - "Troubleshooting →", help="Open and review TestGen Log files", use_container_width=True - ): - application_logs_dialog() - - @st.dialog(title="Application Logs") def application_logs_dialog(): _, file_out_path = display_service.get_in_out_paths() diff --git a/testgen/ui/views/dialogs/data_preview_dialog.py b/testgen/ui/views/dialogs/data_preview_dialog.py new file mode 100644 index 00000000..a1d199ee --- /dev/null +++ b/testgen/ui/views/dialogs/data_preview_dialog.py @@ -0,0 +1,91 @@ +import pandas as pd +import streamlit as st + +import testgen.ui.services.database_service as db +from testgen.ui.components import widgets as testgen + + +@st.dialog(title="Data Preview") +def data_preview_dialog( + table_group_id: str, + schema_name: str, + table_name: str, + column_name: str | None = None, +) -> None: + testgen.css_class("s-dialog" if column_name else "xl-dialog") + + testgen.caption( + f"Table > Column: {table_name} > {column_name}" + if column_name else + f"Table: {table_name}" + ) + + data = get_preview_data(table_group_id, schema_name, table_name, column_name) + + if data.empty: + st.warning("The preview data could not be loaded.") + else: + st.dataframe( + data, + width=520 if column_name else None, + height=700, + ) + + +@st.cache_data(show_spinner="Loading data ...") +def get_preview_data( + table_group_id: str, + schema_name: str, + table_name: str, + column_name: str | None = None, +) -> pd.DataFrame: + tg_schema = st.session_state["dbschema"] + connection_query=f""" + SELECT + c.sql_flavor, + c.project_host, + c.project_port, + c.project_db, + c.project_user, + c.project_pw_encrypted, + c.url, + c.connect_by_url, + c.connect_by_key, + c.private_key, + c.private_key_passphrase, + c.http_path + FROM {tg_schema}.table_groups tg + INNER JOIN {tg_schema}.connections c ON ( + tg.connection_id = c.connection_id + ) + WHERE tg.id = '{table_group_id}'; + """ + connection_df = db.retrieve_data(connection_query).iloc[0] + + if not connection_df.empty: + query = f""" + SELECT + {column_name or "*"} + FROM {schema_name}.{table_name} + LIMIT 100 + """ + + df = db.retrieve_target_db_df( + connection_df["sql_flavor"], + connection_df["project_host"], + connection_df["project_port"], + connection_df["project_db"], + connection_df["project_user"], + connection_df["project_pw_encrypted"], + query, + connection_df["url"], + connection_df["connect_by_url"], + connection_df["connect_by_key"], + connection_df["private_key"], + connection_df["private_key_passphrase"], + connection_df["http_path"], + ) + df.index = df.index + 1 + return df + else: + return pd.DataFrame() diff --git a/testgen/ui/views/hygiene_issues.py b/testgen/ui/views/hygiene_issues.py index 5c15c13d..f2dd6bea 100644 --- a/testgen/ui/views/hygiene_issues.py +++ b/testgen/ui/views/hygiene_issues.py @@ -16,7 +16,7 @@ from testgen.ui.components.widgets.download_dialog import FILE_DATA_TYPE, download_dialog, zip_multi_file_data from testgen.ui.navigation.page import Page from testgen.ui.pdf.hygiene_issue_report import create_report -from testgen.ui.services import project_service +from testgen.ui.services import project_service, user_session_service from testgen.ui.services.hygiene_issues_service import get_source_data as get_source_data_uncached from testgen.ui.session import session from testgen.ui.views.dialogs.profiling_results_dialog import view_profiling_button @@ -27,6 +27,7 @@ class HygieneIssuesPage(Page): path = "profiling-runs:hygiene" can_activate: typing.ClassVar = [ lambda: session.authentication_status, + lambda: not user_session_service.user_has_catalog_role(), lambda: "run_id" in session.current_page_args or "profiling-runs", ] @@ -261,19 +262,20 @@ def render( { "icon": "â†Šī¸Ž", "help": "Clear action", "status": "No Decision" }, ] - # Need to render toolbar buttons after grid, so selection status is maintained - for action in disposition_actions: - action["button"] = actions_column.button(action["icon"], help=action["help"], disabled=not selected) - - # This has to be done as a second loop - otherwise, the rest of the buttons after the clicked one are not displayed briefly while refreshing - for action in disposition_actions: - if action["button"]: - fm.reset_post_updates( - do_disposition_update(selected, action["status"]), - as_toast=True, - clear_cache=True, - lst_cached_functions=cached_functions, - ) + if user_session_service.user_can_disposition(): + # Need to render toolbar buttons after grid, so selection status is maintained + for action in disposition_actions: + action["button"] = actions_column.button(action["icon"], help=action["help"], disabled=not selected) + + # This has to be done as a second loop - otherwise, the rest of the buttons after the clicked one are not displayed briefly while refreshing + for action in disposition_actions: + if action["button"]: + fm.reset_post_updates( + do_disposition_update(selected, action["status"]), + as_toast=True, + clear_cache=True, + lst_cached_functions=cached_functions, + ) else: st.markdown(":green[**No Hygiene Issues Found**]") diff --git a/testgen/ui/views/login.py b/testgen/ui/views/login.py index f2385c1c..9fbabab5 100644 --- a/testgen/ui/views/login.py +++ b/testgen/ui/views/login.py @@ -4,6 +4,7 @@ import streamlit as st import streamlit_authenticator as stauth +from testgen.common.mixpanel_service import MixpanelService from testgen.ui.components import widgets as testgen from testgen.ui.navigation.page import Page from testgen.ui.services import javascript_service, user_session_service @@ -15,7 +16,7 @@ class LoginPage(Page): path = "" can_activate: typing.ClassVar = [ - lambda: not session.authentication_status or session.logging_in or "project-dashboard", + lambda: not session.authentication_status or session.logging_in, ] def render(self, **_kwargs) -> None: @@ -40,7 +41,7 @@ def render(self, **_kwargs) -> None:

Welcome to DataKitchen DataOps TestGen

""") name, authentication_status, username = authenticator.login("Login") - + if authentication_status is False: st.error("Username or password is incorrect.") @@ -55,9 +56,9 @@ def render(self, **_kwargs) -> None: # This hack is needed because the auth cookie is not set if navigation happens immediately if session.logging_in: session.logging_in = False - next_route = session.page_pending_login or "project-dashboard" + next_route = session.page_pending_login or session.user_default_page session.page_pending_login = None self.router.navigate(next_route) else: session.logging_in = True - \ No newline at end of file + MixpanelService().send_event("login") diff --git a/testgen/ui/views/profiling_results.py b/testgen/ui/views/profiling_results.py index beb2fb75..24cd6da7 100644 --- a/testgen/ui/views/profiling_results.py +++ b/testgen/ui/views/profiling_results.py @@ -10,8 +10,9 @@ from testgen.ui.components import widgets as testgen from testgen.ui.components.widgets.testgen_component import testgen_component from testgen.ui.navigation.page import Page -from testgen.ui.services import project_service +from testgen.ui.services import project_service, user_session_service from testgen.ui.session import session +from testgen.ui.views.dialogs.data_preview_dialog import data_preview_dialog FORM_DATA_WIDTH = 400 @@ -20,6 +21,7 @@ class ProfilingResultsPage(Page): path = "profiling-runs:results" can_activate: typing.ClassVar = [ lambda: session.authentication_status, + lambda: not user_session_service.user_has_catalog_role(), lambda: "run_id" in session.current_page_args or "profiling-runs", ] @@ -124,7 +126,15 @@ def render(self, run_id: str, table_name: str | None = None, column_name: str | item["hygiene_issues"] = profiling_queries.get_hygiene_issues(run_id, item["table_name"], item.get("column_name")) testgen_component( "column_profiling_results", - props={ "column": json.dumps(item) }, + props={ "column": json.dumps(item), "data_preview": True }, + on_change_handlers={ + "DataPreviewClicked": lambda item: data_preview_dialog( + item["table_group_id"], + item["schema_name"], + item["table_name"], + item.get("column_name"), + ), + }, ) @@ -192,7 +202,7 @@ def render_export_button(df): "fractional_sum", "date_days_present", "date_weeks_present", - "date_months_present", + "date_months_present", ] wrap_columns = ["top_freq_values", "top_patterns"] caption = "{TIMESTAMP}" diff --git a/testgen/ui/views/profiling_runs.py b/testgen/ui/views/profiling_runs.py index b2a0a87a..14504afe 100644 --- a/testgen/ui/views/profiling_runs.py +++ b/testgen/ui/views/profiling_runs.py @@ -13,7 +13,7 @@ from testgen.ui.navigation.menu import MenuItem from testgen.ui.navigation.page import Page from testgen.ui.queries import profiling_run_queries, project_queries -from testgen.ui.services import authentication_service +from testgen.ui.services import user_session_service from testgen.ui.session import session from testgen.ui.views.dialogs.run_profiling_dialog import run_profiling_dialog from testgen.utils import friendly_score, to_int @@ -28,8 +28,15 @@ class DataProfilingPage(Page): path = "profiling-runs" can_activate: typing.ClassVar = [ lambda: session.authentication_status, + lambda: not user_session_service.user_has_catalog_role(), ] - menu_item = MenuItem(icon=PAGE_ICON, label=PAGE_TITLE, section="Data Profiling", order=1) + menu_item = MenuItem( + icon=PAGE_ICON, + label=PAGE_TITLE, + section="Data Profiling", + order=1, + roles=[ role for role in typing.get_args(user_session_service.RoleType) if role != "catalog" ], + ) def render(self, project_code: str | None = None, table_group_id: str | None = None, **_kwargs) -> None: testgen.page_header( @@ -38,7 +45,8 @@ def render(self, project_code: str | None = None, table_group_id: str | None = N ) project_code = project_code or session.project - if render_empty_state(project_code): + user_can_run = user_session_service.user_can_edit() + if render_empty_state(project_code, user_can_run): return group_filter_column, actions_column = st.columns([.3, .7], vertical_alignment="bottom") @@ -57,7 +65,7 @@ def render(self, project_code: str | None = None, table_group_id: str | None = N with actions_column: testgen.flex_row_end() - if authentication_service.current_user_has_edit_role(): + if user_can_run: st.button( ":material/play_arrow: Run Profiling", help="Run profiling for a table group", @@ -78,12 +86,17 @@ def render(self, project_code: str | None = None, table_group_id: str | None = N with list_container: testgen_component( "profiling_runs", - props={ "items": paginated_df.to_json(orient="records") }, + props={ + "items": paginated_df.to_json(orient="records"), + "permissions": { + "can_run": user_can_run, + }, + }, event_handlers={ "RunCanceled": on_cancel_run } ) -def render_empty_state(project_code: str) -> bool: +def render_empty_state(project_code: str, user_can_run: bool) -> bool: project_summary_df = project_queries.get_summary_by_code(project_code) if project_summary_df["profiling_runs_ct"]: return False @@ -113,6 +126,7 @@ def render_empty_state(project_code: str) -> bool: icon=PAGE_ICON, message=testgen.EmptyStateMessage.Profiling, action_label="Run Profiling", + action_disabled=not user_can_run, button_onclick=partial(run_profiling_dialog, project_code), button_icon="play_arrow", ) diff --git a/testgen/ui/views/project_dashboard.py b/testgen/ui/views/project_dashboard.py index c55e02f3..be3a2e82 100644 --- a/testgen/ui/views/project_dashboard.py +++ b/testgen/ui/views/project_dashboard.py @@ -8,7 +8,7 @@ from testgen.ui.navigation.menu import MenuItem from testgen.ui.navigation.page import Page from testgen.ui.queries import project_queries -from testgen.ui.services import test_suite_service +from testgen.ui.services import test_suite_service, user_session_service from testgen.ui.session import session from testgen.utils import format_field, friendly_score, score @@ -21,8 +21,14 @@ class ProjectDashboardPage(Page): path = "project-dashboard" can_activate: typing.ClassVar = [ lambda: session.authentication_status, + lambda: not user_session_service.user_has_catalog_role(), ] - menu_item = MenuItem(icon=PAGE_ICON, label=PAGE_TITLE, order=0) + menu_item = MenuItem( + icon=PAGE_ICON, + label=PAGE_TITLE, + order=0, + roles=[ role for role in typing.get_args(user_session_service.RoleType) if role != "catalog" ], + ) def render(self, project_code: str | None = None, **_kwargs): testgen.page_header( diff --git a/testgen/ui/views/project_settings.py b/testgen/ui/views/project_settings.py index b28d7492..21e0059e 100644 --- a/testgen/ui/views/project_settings.py +++ b/testgen/ui/views/project_settings.py @@ -1,14 +1,16 @@ +import time import typing +from functools import partial import streamlit as st +from streamlit.delta_generator import DeltaGenerator from testgen.commands.run_observability_exporter import test_observability_exporter from testgen.ui.components import widgets as testgen from testgen.ui.navigation.menu import MenuItem from testgen.ui.navigation.page import Page -from testgen.ui.services import form_service, project_service +from testgen.ui.services import project_service, user_session_service from testgen.ui.session import session -from testgen.ui.views.dialogs.application_logs_dialog import view_log_file PAGE_TITLE = "Project Settings" @@ -17,12 +19,22 @@ class ProjectSettingsPage(Page): path = "settings" can_activate: typing.ClassVar = [ lambda: session.authentication_status, - lambda: session.project is not None or "project-dashboard", + lambda: user_session_service.user_is_admin(), + lambda: session.project is not None, ] - menu_item = MenuItem(icon="settings", label=PAGE_TITLE, section="Settings", order=0) + menu_item = MenuItem( + icon="settings", + label=PAGE_TITLE, + section="Settings", + order=0, + roles=[ "admin" ], + ) + + project: dict | None = None + existing_names: list[str] | None = None def render(self, project_code: str | None = None, **_kwargs) -> None: - project = project_service.get_project_by_code(project_code or session.project) + self.project = project_service.get_project_by_code(project_code or session.project) testgen.page_header( PAGE_TITLE, @@ -30,38 +42,97 @@ def render(self, project_code: str | None = None, **_kwargs) -> None: ) testgen.whitespace(1) - form_service.render_edit_form( - "", - project, - "projects", - project.keys(), - ["id"], - form_unique_key="project-settings", - ) + self.show_edit_form() - _, col2, col3 = st.columns([50, 25, 25]) - if col2.button("Test Observability Connection", use_container_width=False): - status = st.empty() - status.info("Testing your connection to DataKitchen Observability...") - try: - project_code = project["project_code"] - api_url = project["observability_api_url"] - api_key = project["observability_api_key"] - test_observability_exporter(project_code, api_url, api_key) - status.empty() - status.success("The Observability connection test was successful.") - except Exception as e: - status.empty() - status.error("An error occurred during the Observability connection test.") - error_message = e.args[0] - st.text_area("Error Details", value=error_message) + def show_edit_form(self) -> None: + form_container = st.container() + status_container = st.container() + + with form_container: + with testgen.card(): + name_input = st.text_input( + label="Project Name", + value=self.project["project_name"], + max_chars=30, + key="project_settings:keys:project_name", + ) + st.text_input( + label="Observability API URL", + value=self.project["observability_api_url"], + key="project_settings:keys:observability_api_url", + ) + st.text_input( + label="Observability API Key", + value=self.project["observability_api_key"], + key="project_settings:keys:observability_api_key", + ) + + testgen.whitespace(1) + test_button_column, warning_column, save_button_column = st.columns([.4, .3, .3]) + testgen.flex_row_start(test_button_column) + testgen.flex_row_end(save_button_column) - view_log_file(col3) + with test_button_column: + testgen.button( + type_="stroked", + color="basic", + label="Test Observability Connection", + width=250, + on_click=partial(self._display_connection_status, status_container), + key="project-settings:keys:test-connection", + ) + with warning_column: + if not name_input: + testgen.text("Project name is required", "color: var(--red)") + elif self.existing_names and name_input in self.existing_names: + testgen.text("Project name in use", "color: var(--red)") -def set_add_new_project(): - session.add_project = True + with save_button_column: + testgen.button( + type_="flat", + label="Save", + width=100, + on_click=self.edit_project, + key="project-settings:keys:edit", + ) + def edit_project(self) -> None: + project = self._get_edited_project() + if project["project_name"] and (not self.existing_names or project["project_name"] not in self.existing_names): + project_service.edit_project(project) + st.toast("Changes have been saved.") + + def _get_edited_project(self) -> None: + edited_project = { + "id": self.project["id"], + "project_code": self.project["project_code"], + } + # We have to get the input widget values from the session state + # The return values for st.text_input do not reflect the latest user input if the button is clicked without unfocusing the input + # https://discuss.streamlit.io/t/issue-with-modifying-text-using-st-text-input-and-st-button/56619/5 + for key in [ "project_name", "observability_api_url", "observability_api_key" ]: + edited_project[key] = st.session_state[f"project_settings:keys:{key}"].strip() + return edited_project + + def _display_connection_status(self, status_container: DeltaGenerator) -> None: + single_element_container = status_container.empty() + single_element_container.info("Connecting ...") + + try: + project = self._get_edited_project() + test_observability_exporter( + project["project_code"], + project["observability_api_url"], + project["observability_api_key"], + ) + status_container.success("The connection was successful.") + except Exception as e: + with single_element_container.container(): + st.error("Error attempting the connection.") + error_message = e.args[0] + st.caption("Connection Error Details") + with st.container(border=True): + st.markdown(error_message) -def set_edit_current_project(): - session.add_project = False + time.sleep(0.1) diff --git a/testgen/ui/views/quality_dashboard.py b/testgen/ui/views/quality_dashboard.py index d58875d4..3a09fe7d 100644 --- a/testgen/ui/views/quality_dashboard.py +++ b/testgen/ui/views/quality_dashboard.py @@ -1,10 +1,11 @@ -from typing import ClassVar +from typing import ClassVar, get_args from testgen.ui.components import widgets as testgen from testgen.ui.navigation.menu import MenuItem from testgen.ui.navigation.page import Page from testgen.ui.queries import project_queries from testgen.ui.queries.scoring_queries import get_all_score_cards +from testgen.ui.services import user_session_service from testgen.ui.session import session from testgen.utils import format_score_card @@ -15,8 +16,14 @@ class QualityDashboardPage(Page): path = "quality-dashboard" can_activate: ClassVar = [ lambda: session.authentication_status, + lambda: not user_session_service.user_has_catalog_role(), ] - menu_item = MenuItem(icon="readiness_score", label=PAGE_TITLE, order=1) + menu_item = MenuItem( + icon="readiness_score", + label=PAGE_TITLE, + order=1, + roles=[ role for role in get_args(user_session_service.RoleType) if role != "catalog" ], + ) def render(self, *, project_code: str, **_kwargs) -> None: project_summary = project_queries.get_summary_by_code(project_code) diff --git a/testgen/ui/views/score_details.py b/testgen/ui/views/score_details.py index d91bc37c..749ce1aa 100644 --- a/testgen/ui/views/score_details.py +++ b/testgen/ui/views/score_details.py @@ -1,9 +1,12 @@ +import logging from io import BytesIO from typing import ClassVar import pandas as pd import streamlit as st +from testgen.commands.run_refresh_score_cards_results import run_recalculate_score_card +from testgen.common.models import with_database_session from testgen.common.models.scores import ScoreDefinition, ScoreDefinitionBreakdownItem, SelectedIssue from testgen.ui.components import widgets as testgen from testgen.ui.components.widgets.download_dialog import FILE_DATA_TYPE, download_dialog, zip_multi_file_data @@ -11,16 +14,19 @@ from testgen.ui.navigation.router import Router from testgen.ui.pdf import hygiene_issue_report, test_result_report from testgen.ui.queries.scoring_queries import get_all_score_cards, get_score_card_issue_reports -from testgen.ui.services import authentication_service +from testgen.ui.services import user_session_service from testgen.ui.session import session, temp_value from testgen.ui.views.dialogs.profiling_results_dialog import profiling_results_dialog from testgen.utils import format_score_card, format_score_card_breakdown, format_score_card_issues +LOG = logging.getLogger("testgen") + class ScoreDetailsPage(Page): path = "quality-dashboard:score-details" can_activate: ClassVar = [ lambda: session.authentication_status, + lambda: not user_session_service.user_has_catalog_role(), lambda: "definition_id" in session.current_page_args or "quality-dashboard", ] @@ -55,8 +61,8 @@ def render( score_breakdown = None issues = None with st.spinner(text="Loading data ..."): - user_can_edit = authentication_service.current_user_has_edit_role() - score_card = format_score_card(score_definition.as_score_card()) + user_can_edit = user_session_service.user_can_edit() + score_card = format_score_card(score_definition.as_cached_score_card()) if not score_type: score_type = "cde_score" if score_card["cde_score"] and not score_card["score"] else "score" if not drilldown: @@ -96,7 +102,8 @@ def render( payload["column_name"], payload["table_name"], payload["table_group_id"], - ) + ), + "RecalculateHistory": recalculate_score_history, }, ) @@ -146,6 +153,7 @@ def get_report_file_data(update_progress, issue) -> FILE_DATA_TYPE: @st.dialog(title="Delete Scorecard") +@with_database_session def delete_score_card(definition_id: str) -> None: score_definition = ScoreDefinition.get(definition_id) @@ -171,3 +179,13 @@ def delete_score_card(definition_id: str) -> None: score_definition.delete() get_all_score_cards.clear() Router().navigate("quality-dashboard") + + +def recalculate_score_history(definition_id: str) -> None: + try: + score_definition = ScoreDefinition.get(definition_id) + run_recalculate_score_card(project_code=score_definition.project_code, definition_id=score_definition.id) + st.toast("Scorecard trend recalculated", icon=":material/task_alt:") + except: + LOG.exception(f"Failure recalculating history for scorecard id={definition_id}") + st.toast("Recalculating the trend failed. Try again", icon=":material/error:") diff --git a/testgen/ui/views/score_explorer.py b/testgen/ui/views/score_explorer.py index b3afea39..01e92cdd 100644 --- a/testgen/ui/views/score_explorer.py +++ b/testgen/ui/views/score_explorer.py @@ -4,18 +4,23 @@ import pandas as pd import streamlit as st -from testgen.commands.run_refresh_score_cards_results import run_refresh_score_cards_results +from testgen.commands.run_refresh_score_cards_results import ( + run_recalculate_score_card, + run_refresh_score_cards_results, +) from testgen.common.models.scores import ScoreCategory, ScoreDefinition, ScoreDefinitionFilter, SelectedIssue from testgen.ui.components import widgets as testgen from testgen.ui.components.widgets.download_dialog import FILE_DATA_TYPE, download_dialog, zip_multi_file_data from testgen.ui.navigation.page import Page from testgen.ui.navigation.router import Router from testgen.ui.pdf import hygiene_issue_report, test_result_report +from testgen.ui.queries import profiling_queries, test_run_queries from testgen.ui.queries.scoring_queries import ( get_all_score_cards, get_score_card_issue_reports, get_score_category_values, ) +from testgen.ui.services import user_session_service from testgen.ui.session import session from testgen.utils import format_score_card, format_score_card_breakdown, format_score_card_issues @@ -24,6 +29,7 @@ class ScoreExplorerPage(Page): path = "quality-dashboard:explorer" can_activate: ClassVar = [ lambda: session.authentication_status, + lambda: not user_session_service.user_has_catalog_role(), ] def render( @@ -55,6 +61,7 @@ def render( issues = None filter_values = {} with st.spinner(text="Loading data ..."): + user_can_edit = user_session_service.user_can_edit() filter_values = get_score_category_values(project_code) score_definition: ScoreDefinition = ScoreDefinition( @@ -114,6 +121,9 @@ def render( "drilldown": drilldown, "issues": issues, "is_new": not definition_id, + "permissions": { + "can_edit": user_can_edit, + }, }, on_change_handlers={ "ScoreUpdated": set_score_definition, @@ -205,10 +215,25 @@ def save_score_definition(_) -> None: if not filters: raise ValueError("At least one filter is required to save the scorecard") + is_new = True score_definition = ScoreDefinition() + refresh_kwargs = {} if definition_id: + is_new = False score_definition = ScoreDefinition.get(definition_id) + if is_new: + latest_run = max( + profiling_queries.get_latest_run_date(session.project), + test_run_queries.get_latest_run_date(session.project), + key=lambda run: getattr(run, "run_time", 0), + ) + + refresh_kwargs = { + "add_history_entry": True, + "refresh_date": latest_run.run_time if latest_run else None, + } + score_definition.project_code = session.project score_definition.name = name score_definition.total_score = total_score and total_score.lower() == "true" @@ -219,9 +244,12 @@ def save_score_definition(_) -> None: for f in filters if (field_value := f.split("=")) ] score_definition.save() - run_refresh_score_cards_results(definition_id=score_definition.id) + run_refresh_score_cards_results(definition_id=score_definition.id, **refresh_kwargs) get_all_score_cards.clear() + if not is_new: + run_recalculate_score_card(project_code=score_definition.project_code, definition_id=score_definition.id) + Router().set_query_params({ "name": None, "total_score": None, diff --git a/testgen/ui/views/table_groups/page.py b/testgen/ui/views/table_groups/page.py index 1823a0c2..f97a6c45 100644 --- a/testgen/ui/views/table_groups/page.py +++ b/testgen/ui/views/table_groups/page.py @@ -6,13 +6,13 @@ import streamlit as st from sqlalchemy.exc import IntegrityError -import testgen.ui.services.authentication_service as authentication_service import testgen.ui.services.connection_service as connection_service import testgen.ui.services.form_service as fm import testgen.ui.services.table_group_service as table_group_service +from testgen.common.models import with_database_session from testgen.ui.components import widgets as testgen from testgen.ui.navigation.page import Page -from testgen.ui.services import project_service +from testgen.ui.services import project_service, user_session_service from testgen.ui.services.string_service import empty_if_null from testgen.ui.session import session from testgen.ui.views.dialogs.run_profiling_dialog import run_profiling_dialog @@ -22,7 +22,7 @@ class TableGroupsPage(Page): path = "connections:table-groups" can_activate: typing.ClassVar = [ lambda: session.authentication_status, - lambda: authentication_service.current_user_has_admin_role(), + lambda: not user_session_service.user_has_catalog_role(), lambda: "connection_id" in session.current_page_args or "connections", ] @@ -36,6 +36,7 @@ def render(self, connection_id: str, **_kwargs) -> None: project_code = connection["project_code"] project_service.set_current_project(project_code) + user_can_edit = user_session_service.user_can_edit() testgen.page_header( "Table Groups", @@ -55,6 +56,7 @@ def render(self, connection_id: str, **_kwargs) -> None: icon="table_view", message=testgen.EmptyStateMessage.TableGroup, action_label="Add Table Group", + action_disabled=not user_can_edit, button_onclick=partial(self.add_table_group_dialog, project_code, connection), ) return @@ -63,25 +65,32 @@ def render(self, connection_id: str, **_kwargs) -> None: _, actions_column = st.columns([.1, .9], vertical_alignment="bottom") testgen.flex_row_end(actions_column) + if user_can_edit: + actions_column.button( + ":material/add: Add Table Group", + on_click=partial(self.add_table_group_dialog, project_code, connection) + ) + for _, table_group in df.iterrows(): with testgen.card(title=table_group["table_groups_name"]) as table_group_card: - with table_group_card.actions: - testgen.button( - type_="icon", - icon="edit", - tooltip="Edit table group", - tooltip_position="right", - on_click=partial(self.edit_table_group_dialog, project_code, connection, table_group), - key=f"tablegroups:keys:edit:{table_group['id']}", - ) - testgen.button( - type_="icon", - icon="delete", - tooltip="Delete table group", - tooltip_position="right", - on_click=partial(self.delete_table_group_dialog, table_group), - key=f"tablegroups:keys:delete:{table_group['id']}", - ) + if user_can_edit: + with table_group_card.actions: + testgen.button( + type_="icon", + icon="edit", + tooltip="Edit table group", + tooltip_position="right", + on_click=partial(self.edit_table_group_dialog, project_code, connection, table_group), + key=f"tablegroups:keys:edit:{table_group['id']}", + ) + testgen.button( + type_="icon", + icon="delete", + tooltip="Delete table group", + tooltip_position="right", + on_click=partial(self.delete_table_group_dialog, table_group), + key=f"tablegroups:keys:delete:{table_group['id']}", + ) main_section, actions_section = st.columns([.8, .2]) @@ -122,21 +131,17 @@ def render(self, connection_id: str, **_kwargs) -> None: testgen.caption("Min Profiling Age (Days)") st.markdown(table_group["profiling_delay_days"] or "0") - with actions_section: - testgen.button( - type_="stroked", - label="Run Profiling", - on_click=partial(run_profiling_dialog, project_code, table_group), - key=f"tablegroups:keys:runprofiling:{table_group['id']}", - ) - - actions_column.button( - ":material/add: Add Table Group", - help="Add a new Table Group", - on_click=partial(self.add_table_group_dialog, project_code, connection) - ) + if user_can_edit: + with actions_section: + testgen.button( + type_="stroked", + label="Run Profiling", + on_click=partial(run_profiling_dialog, project_code, table_group), + key=f"tablegroups:keys:runprofiling:{table_group['id']}", + ) @st.dialog(title="Add Table Group") + @with_database_session def add_table_group_dialog(self, project_code, connection): show_table_group_form("add", project_code, connection) @@ -168,14 +173,11 @@ def delete_table_group_dialog(self, table_group: pd.Series): accept_cascade_delete = st.toggle("I accept deletion of this Table Group and all related TestGen data.") with st.form("Delete Table Group", clear_on_submit=True, border=False): - disable_delete_button = authentication_service.current_user_has_read_role() or ( - not can_be_deleted and not accept_cascade_delete - ) _, button_column = st.columns([.85, .15]) with button_column: delete = st.form_submit_button( "Delete", - disabled=disable_delete_button, + disabled=not can_be_deleted and not accept_cascade_delete, type="primary", use_container_width=True, ) @@ -392,7 +394,6 @@ def show_table_group_form(mode, project_code: str, connection: dict, table_group submit = st.form_submit_button( "Save" if mode == "edit" else "Add", use_container_width=True, - disabled=authentication_service.current_user_has_read_role(), ) if submit: diff --git a/testgen/ui/views/test_definitions.py b/testgen/ui/views/test_definitions.py index be127739..90e4f686 100644 --- a/testgen/ui/views/test_definitions.py +++ b/testgen/ui/views/test_definitions.py @@ -15,7 +15,7 @@ from testgen.common import date_service from testgen.ui.components import widgets as testgen from testgen.ui.navigation.page import Page -from testgen.ui.services import authentication_service, project_service +from testgen.ui.services import project_service, user_session_service from testgen.ui.services.string_service import empty_if_null, snake_case_to_title_case from testgen.ui.session import session from testgen.ui.views.dialogs.profiling_results_dialog import view_profiling_button @@ -27,6 +27,7 @@ class TestDefinitionsPage(Page): path = "test-suites:definitions" can_activate: typing.ClassVar = [ lambda: session.authentication_status, + lambda: not user_session_service.user_has_catalog_role(), lambda: "test_suite_id" in session.current_page_args or "test-suites", ] @@ -41,7 +42,8 @@ def render(self, test_suite_id: str, table_name: str | None = None, column_name: table_group = table_group_service.get_by_id(test_suite["table_groups_id"]) project_code = table_group["project_code"] project_service.set_current_project(project_code) - user_can_edit = authentication_service.current_user_has_edit_role() + user_can_edit = user_session_service.user_can_edit() + user_can_disposition = user_session_service.user_can_disposition() testgen.page_header( "Test Definitions", @@ -81,7 +83,7 @@ def render(self, test_suite_id: str, table_name: str | None = None, column_name: with disposition_column: str_help = "Toggle on to perform actions on multiple test definitions" - do_multi_select = st.toggle("Multi-Select", help=str_help) + do_multi_select = user_can_disposition and st.toggle("Multi-Select", help=str_help) if user_can_edit and actions_column.button( ":material/add: Add", help="Add a new Test Definition" @@ -94,49 +96,55 @@ def render(self, test_suite_id: str, table_name: str | None = None, column_name: ) fm.render_refresh_button(table_actions_column) - disposition_actions = [ - { "icon": "✓", "help": "Activate for future runs", "attribute": "test_active", "value": True, "message": "Activated" }, - { "icon": "🔇", "help": "Deactivate Test for future runs", "attribute": "test_active", "value": False, "message": "Deactivated" }, - { "icon": "🔒", "help": "Protect from future test generation", "attribute": "lock_refresh", "value": True, "message": "Locked" }, - { "icon": "🔐", "help": "Unlock for future test generation", "attribute": "lock_refresh", "value": False, "message": "Unlocked" }, - ] - - for action in disposition_actions: - action["button"] = disposition_column.button(action["icon"], help=action["help"], disabled=not selected) - - # This has to be done as a second loop - otherwise, the rest of the buttons after the clicked one are not displayed briefly while refreshing - for action in disposition_actions: - if action["button"]: - fm.reset_post_updates( - update_test_definition(selected, action["attribute"], action["value"], action["message"]), - as_toast=True, - clear_cache=True, - lst_cached_functions=[], - ) + if user_can_disposition: + disposition_actions = [ + { "icon": "✓", "help": "Activate for future runs", "attribute": "test_active", "value": True, "message": "Activated" }, + { "icon": "🔇", "help": "Deactivate Test for future runs", "attribute": "test_active", "value": False, "message": "Deactivated" }, + ] + + if user_can_edit: + disposition_actions.extend([ + { "icon": "🔒", "help": "Protect from future test generation", "attribute": "lock_refresh", "value": True, "message": "Locked" }, + { "icon": "🔐", "help": "Unlock for future test generation", "attribute": "lock_refresh", "value": False, "message": "Unlocked" }, + ]) + + for action in disposition_actions: + action["button"] = disposition_column.button(action["icon"], help=action["help"], disabled=not selected) + + # This has to be done as a second loop - otherwise, the rest of the buttons after the clicked one are not displayed briefly while refreshing + for action in disposition_actions: + if action["button"]: + fm.reset_post_updates( + update_test_definition(selected, action["attribute"], action["value"], action["message"]), + as_toast=True, + clear_cache=True, + lst_cached_functions=[], + ) if selected: selected_test_def = selected[0] - if user_can_edit and actions_column.button( - ":material/edit: Edit", - help="Edit the Test Definition", - disabled=not selected, - ): - edit_test_dialog(project_code, table_group, test_suite, table_name, column_name, selected_test_def) - - if user_can_edit and actions_column.button( - ":material/file_copy: Copy/Move", - help="Copy or Move the Test Definition", - disabled=not selected, - ): - copy_move_test_dialog(project_code, table_group, test_suite, selected) - - if user_can_edit and actions_column.button( - ":material/delete: Delete", - help="Delete the selected Test Definition", - disabled=not selected, - ): - delete_test_dialog(selected_test_def) + if user_can_edit: + if actions_column.button( + ":material/edit: Edit", + help="Edit the Test Definition", + disabled=not selected, + ): + edit_test_dialog(project_code, table_group, test_suite, table_name, column_name, selected_test_def) + + if actions_column.button( + ":material/file_copy: Copy/Move", + help="Copy or Move the Test Definition", + disabled=not selected, + ): + copy_move_test_dialog(project_code, table_group, test_suite, selected) + + if actions_column.button( + ":material/delete: Delete", + help="Delete the selected Test Definition", + disabled=not selected, + ): + delete_test_dialog(selected_test_def) @st.dialog("Delete Test") @@ -166,12 +174,11 @@ def delete_test_dialog(selected_test_definition): ) with st.form("Delete Test Definition", clear_on_submit=True, border=False): - disable_delete_button = authentication_service.current_user_has_read_role() or not can_be_deleted _, button_column = st.columns([.85, .15]) with button_column: delete = st.form_submit_button( "Delete", - disabled=disable_delete_button, + disabled=not can_be_deleted, type="primary", use_container_width=True, ) @@ -583,7 +590,7 @@ def show_test_form( # Add Validate button if test_type in ("Condition_Flag", "CUSTOM"): validate = bottom_left_column.button( - "Validate", disabled=authentication_service.current_user_has_read_role() + "Validate", ) if validate: try: @@ -598,7 +605,7 @@ def show_test_form( # Some or all (it seems random) of the input fields disappear when this happens time.sleep(0.1) - submit = bottom_left_column.button("Save", disabled=authentication_service.current_user_has_read_role()) + submit = bottom_left_column.button("Save") if submit: if validate_form(test_scope, test_type, test_definition, column_name_label): @@ -624,8 +631,6 @@ def edit_test_dialog(project_code, table_group, test_suite, str_table_name, str_ def copy_move_test_dialog(project_code, origin_table_group, origin_test_suite, selected_test_definitions): st.text(f"Selected tests: {len(selected_test_definitions)}") - user_can_edit = authentication_service.current_user_has_edit_role() - group_filter_column, suite_filter_column = st.columns([.5, .5], vertical_alignment="bottom") with group_filter_column: @@ -675,12 +680,12 @@ def copy_move_test_dialog(project_code, origin_table_group, origin_test_suite, s copy = copy_column.button( "Copy", use_container_width=True, - disabled=not (user_can_edit and len(movable_test_definitions)>0), + disabled=not len(movable_test_definitions)>0, ) move = move_column.button( "Move", - disabled=not (user_can_edit and len(movable_test_definitions)>0), + disabled=not len(movable_test_definitions)>0, use_container_width=True, ) diff --git a/testgen/ui/views/test_results.py b/testgen/ui/views/test_results.py index 334fc9ba..42d6001f 100644 --- a/testgen/ui/views/test_results.py +++ b/testgen/ui/views/test_results.py @@ -18,20 +18,8 @@ from testgen.ui.components.widgets.download_dialog import FILE_DATA_TYPE, download_dialog, zip_multi_file_data from testgen.ui.navigation.page import Page from testgen.ui.pdf.test_result_report import create_report -from testgen.ui.services import authentication_service, project_service +from testgen.ui.services import project_service, test_definition_service, test_results_service, user_session_service from testgen.ui.services.string_service import empty_if_null -from testgen.ui.services.test_definition_service import ( - get_test_definition as get_test_definition_uncached, -) -from testgen.ui.services.test_results_service import ( - do_source_data_lookup as do_source_data_lookup_uncached, -) -from testgen.ui.services.test_results_service import ( - do_source_data_lookup_custom as do_source_data_lookup_custom_uncached, -) -from testgen.ui.services.test_results_service import ( - get_test_result_history as get_test_result_history_uncached, -) from testgen.ui.session import session from testgen.ui.views.dialogs.profiling_results_dialog import view_profiling_button from testgen.ui.views.test_definitions import show_test_form_by_id @@ -44,6 +32,7 @@ class TestResultsPage(Page): path = "test-runs:results" can_activate: typing.ClassVar = [ lambda: session.authentication_status, + lambda: not user_session_service.user_has_catalog_role(), lambda: "run_id" in session.current_page_args or "test-runs", ] @@ -184,18 +173,19 @@ def render( { "icon": "â†Šī¸Ž", "help": "Clear action", "status": "No Decision" }, ] - for action in disposition_actions: - action["button"] = actions_column.button(action["icon"], help=action["help"], disabled=disable_dispo) - - # This has to be done as a second loop - otherwise, the rest of the buttons after the clicked one are not displayed briefly while refreshing - for action in disposition_actions: - if action["button"]: - fm.reset_post_updates( - do_disposition_update(selected, action["status"]), - as_toast=True, - clear_cache=True, - lst_cached_functions=affected_cached_functions, - ) + if user_session_service.user_can_disposition(): + for action in disposition_actions: + action["button"] = actions_column.button(action["icon"], help=action["help"], disabled=disable_dispo) + + # This has to be done as a second loop - otherwise, the rest of the buttons after the clicked one are not displayed briefly while refreshing + for action in disposition_actions: + if action["button"]: + fm.reset_post_updates( + do_disposition_update(selected, action["status"]), + as_toast=True, + clear_cache=True, + lst_cached_functions=affected_cached_functions, + ) # Help Links st.markdown( @@ -286,107 +276,7 @@ def get_test_results( sorting_columns: list[str] | None = None, ) -> pd.DataFrame: schema: str = st.session_state["dbschema"] - return get_test_results_uncached(schema, run_id, test_status, test_type_id, table_name, column_name, sorting_columns) - - -def get_test_results_uncached( - schema: str, - run_id: str, - test_status: str | None = None, - test_type_id: str | None = None, - table_name: str | None = None, - column_name: str | None = None, - sorting_columns: list[str] | None = None, -) -> pd.DataFrame: - # First visible row first, so multi-select checkbox will render - order_by = "ORDER BY " + (", ".join(" ".join(col) for col in sorting_columns)) if sorting_columns else "" - filters = "" - if test_status: - filters += f" AND r.result_status IN ({test_status})" - if test_type_id: - filters += f" AND r.test_type = '{test_type_id}'" - if table_name: - filters += f" AND r.table_name = '{table_name}'" - if column_name: - filters += f" AND r.column_names = '{column_name}'" - - sql = f""" - WITH run_results - AS (SELECT * - FROM {schema}.test_results r - WHERE - r.test_run_id = '{run_id}' - {filters} - ) - SELECT r.table_name, - p.project_name, ts.test_suite, tg.table_groups_name, cn.connection_name, cn.project_host, cn.sql_flavor, - tt.dq_dimension, tt.test_scope, - r.schema_name, r.column_names, r.test_time::DATE as test_date, r.test_type, tt.id as test_type_id, - tt.test_name_short, tt.test_name_long, r.test_description, tt.measure_uom, tt.measure_uom_description, - c.test_operator, r.threshold_value::NUMERIC(16, 5), r.result_measure::NUMERIC(16, 5), r.result_status, - CASE - WHEN r.result_code <> 1 THEN r.disposition - ELSE 'Passed' - END as disposition, - NULL::VARCHAR(1) as action, - r.input_parameters, r.result_message, CASE WHEN result_code <> 1 THEN r.severity END as severity, - r.result_code as passed_ct, - (1 - r.result_code)::INTEGER as exception_ct, - CASE - WHEN result_status = 'Warning' - AND result_message NOT ILIKE 'Inactivated%%' THEN 1 - END::INTEGER as warning_ct, - CASE - WHEN result_status = 'Failed' - AND result_message NOT ILIKE 'Inactivated%%' THEN 1 - END::INTEGER as failed_ct, - CASE - WHEN result_message ILIKE 'Inactivated%%' THEN 1 - END as execution_error_ct, - p.project_code, r.table_groups_id::VARCHAR, - r.id::VARCHAR as test_result_id, r.test_run_id::VARCHAR, - c.id::VARCHAR as connection_id, r.test_suite_id::VARCHAR, - r.test_definition_id::VARCHAR as test_definition_id_runtime, - CASE - WHEN r.auto_gen = TRUE THEN d.id - ELSE r.test_definition_id - END::VARCHAR as test_definition_id_current, - r.auto_gen, - - -- These are used in the PDF report - tt.threshold_description, tt.usage_notes, r.test_time - - FROM run_results r - INNER JOIN {schema}.test_types tt - ON (r.test_type = tt.test_type) - LEFT JOIN {schema}.test_definitions rd - ON (r.test_definition_id = rd.id) - LEFT JOIN {schema}.test_definitions d - ON (r.test_suite_id = d.test_suite_id - AND r.table_name = d.table_name - AND r.column_names = COALESCE(d.column_name, 'N/A') - AND r.test_type = d.test_type - AND r.auto_gen = TRUE - AND d.last_auto_gen_date IS NOT NULL) - INNER JOIN {schema}.test_suites ts - ON r.test_suite_id = ts.id - INNER JOIN {schema}.projects p - ON (ts.project_code = p.project_code) - INNER JOIN {schema}.table_groups tg - ON (ts.table_groups_id = tg.id) - INNER JOIN {schema}.connections cn - ON (tg.connection_id = cn.connection_id) - LEFT JOIN {schema}.cat_test_conditions c - ON (cn.sql_flavor = c.sql_flavor - AND r.test_type = c.test_type) - {order_by} ; - """ - df = db.retrieve_data(sql) - - # Clean Up - df["test_date"] = pd.to_datetime(df["test_date"]) - - return df + return test_results_service.get_test_results(schema, run_id, test_status, test_type_id, table_name, column_name, sorting_columns) @st.cache_data(show_spinner="Retrieving Status") @@ -464,25 +354,25 @@ def get_test_result_summary(run_id): @st.cache_data(show_spinner=ALWAYS_SPIN) def get_test_definition(str_test_def_id): str_schema = st.session_state["dbschema"] - return get_test_definition_uncached(str_schema, str_test_def_id) + return test_definition_service.get_test_definition(str_schema, str_test_def_id) @st.cache_data(show_spinner=False) def do_source_data_lookup(selected_row): schema = st.session_state["dbschema"] - return do_source_data_lookup_uncached(schema, selected_row) + return test_results_service.do_source_data_lookup(schema, selected_row) @st.cache_data(show_spinner=False) def do_source_data_lookup_custom(selected_row): schema = st.session_state["dbschema"] - return do_source_data_lookup_custom_uncached(schema, selected_row) + return test_results_service.do_source_data_lookup_custom(schema, selected_row) @st.cache_data(show_spinner=False) def get_test_result_history(selected_row): schema = st.session_state["dbschema"] - return get_test_result_history_uncached(schema, selected_row) + return test_results_service.get_test_result_history(schema, selected_row) def show_test_def_detail(str_test_def_id): @@ -657,7 +547,7 @@ def show_result_detail( with pg_col2: v_col1, v_col2, v_col3, v_col4 = st.columns([.25, .25, .25, .25]) - if authentication_service.current_user_has_edit_role(): + if user_session_service.user_can_edit(): view_edit_test(v_col1, selected_row["test_definition_id_current"]) if selected_row["test_scope"] == "column": diff --git a/testgen/ui/views/test_runs.py b/testgen/ui/views/test_runs.py index 26a8ce21..765e46b6 100644 --- a/testgen/ui/views/test_runs.py +++ b/testgen/ui/views/test_runs.py @@ -13,7 +13,7 @@ from testgen.ui.navigation.menu import MenuItem from testgen.ui.navigation.page import Page from testgen.ui.queries import project_queries, test_run_queries -from testgen.ui.services import authentication_service +from testgen.ui.services import user_session_service from testgen.ui.session import session from testgen.ui.views.dialogs.run_tests_dialog import run_tests_dialog from testgen.utils import friendly_score, to_int @@ -27,9 +27,15 @@ class TestRunsPage(Page): path = "test-runs" can_activate: typing.ClassVar = [ lambda: session.authentication_status, - lambda: session.project != None or "project-dashboard", + lambda: not user_session_service.user_has_catalog_role(), ] - menu_item = MenuItem(icon=PAGE_ICON, label=PAGE_TITLE, section="Data Quality Testing", order=0) + menu_item = MenuItem( + icon=PAGE_ICON, + label=PAGE_TITLE, + section="Data Quality Testing", + order=0, + roles=[ role for role in typing.get_args(user_session_service.RoleType) if role != "catalog" ], + ) def render(self, project_code: str | None = None, table_group_id: str | None = None, test_suite_id: str | None = None, **_kwargs) -> None: testgen.page_header( @@ -38,7 +44,8 @@ def render(self, project_code: str | None = None, table_group_id: str | None = N ) project_code = project_code or session.project - if render_empty_state(project_code): + user_can_run = user_session_service.user_can_edit() + if render_empty_state(project_code, user_can_run): return group_filter_column, suite_filter_column, actions_column = st.columns([.3, .3, .4], vertical_alignment="bottom") @@ -68,7 +75,7 @@ def render(self, project_code: str | None = None, table_group_id: str | None = N with actions_column: testgen.flex_row_end(actions_column) - if authentication_service.current_user_has_edit_role(): + if user_can_run: st.button( ":material/play_arrow: Run Tests", help="Run tests for a test suite", @@ -88,12 +95,17 @@ def render(self, project_code: str | None = None, table_group_id: str | None = N with list_container: testgen_component( "test_runs", - props={ "items": paginated_df.to_json(orient="records") }, + props={ + "items": paginated_df.to_json(orient="records"), + "permissions": { + "can_run": user_can_run, + }, + }, event_handlers={ "RunCanceled": on_cancel_run } ) -def render_empty_state(project_code: str) -> bool: +def render_empty_state(project_code: str, user_can_run: bool) -> bool: project_summary_df = project_queries.get_summary_by_code(project_code) if project_summary_df["test_runs_ct"]: return False @@ -131,6 +143,7 @@ def render_empty_state(project_code: str) -> bool: icon=PAGE_ICON, message=testgen.EmptyStateMessage.TestExecution, action_label="Run Tests", + action_disabled=not user_can_run, button_onclick=partial(run_tests_dialog, project_code), button_icon="play_arrow", ) diff --git a/testgen/ui/views/test_suites.py b/testgen/ui/views/test_suites.py index bf8bd990..9246b1a2 100644 --- a/testgen/ui/views/test_suites.py +++ b/testgen/ui/views/test_suites.py @@ -4,7 +4,6 @@ import streamlit as st -import testgen.ui.services.authentication_service as authentication_service import testgen.ui.services.form_service as fm import testgen.ui.services.query_service as dq import testgen.ui.services.test_suite_service as test_suite_service @@ -14,6 +13,7 @@ from testgen.ui.navigation.page import Page from testgen.ui.navigation.router import Router from testgen.ui.queries import project_queries +from testgen.ui.services import user_session_service from testgen.ui.services.string_service import empty_if_null from testgen.ui.session import session from testgen.ui.views.dialogs.generate_tests_dialog import generate_tests_dialog @@ -28,8 +28,15 @@ class TestSuitesPage(Page): path = "test-suites" can_activate: typing.ClassVar = [ lambda: session.authentication_status, + lambda: not user_session_service.user_has_catalog_role(), ] - menu_item = MenuItem(icon=PAGE_ICON, label=PAGE_TITLE, section="Data Quality Testing", order=1) + menu_item = MenuItem( + icon=PAGE_ICON, + label=PAGE_TITLE, + section="Data Quality Testing", + order=1, + roles=[ role for role in typing.get_args(user_session_service.RoleType) if role != "catalog" ], + ) def render(self, project_code: str | None = None, table_group_id: str | None = None, **_kwargs) -> None: testgen.page_header( @@ -39,7 +46,7 @@ def render(self, project_code: str | None = None, table_group_id: str | None = N project_code = project_code or session.project table_groups = get_db_table_group_choices(project_code) - user_can_edit = authentication_service.current_user_has_edit_role() + user_can_edit = user_session_service.user_can_edit() test_suites = test_suite_service.get_by_project(project_code, table_group_id) project_summary = project_queries.get_summary_by_code(project_code) @@ -101,41 +108,6 @@ def on_test_suites_filtered(table_group_id: str | None = None) -> None: Router().set_query_params({ "table_group_id": table_group_id }) -def render_empty_state(project_code: str, add_button_onclick: partial) -> bool: - project_summary_df = project_queries.get_summary_by_code(project_code) - if project_summary_df["test_suites_ct"]: - return False - - label="No test suites yet" - testgen.whitespace(5) - if not project_summary_df["connections_ct"]: - testgen.empty_state( - label=label, - icon=PAGE_ICON, - message=testgen.EmptyStateMessage.Connection, - action_label="Go to Connections", - link_href="connections", - ) - elif not project_summary_df["table_groups_ct"]: - testgen.empty_state( - label=label, - icon=PAGE_ICON, - message=testgen.EmptyStateMessage.TableGroup, - action_label="Go to Table Groups", - link_href="connections:table-groups", - link_params={ "connection_id": str(project_summary_df["default_connection_id"]) } - ) - else: - testgen.empty_state( - label=label, - icon=PAGE_ICON, - message=testgen.EmptyStateMessage.TestSuite, - action_label="Add Test Suite", - button_onclick=add_button_onclick, - ) - return True - - @st.cache_data(show_spinner=False) def get_db_table_group_choices(project_code): schema = st.session_state["dbschema"] @@ -239,7 +211,6 @@ def show_test_suite(mode, project_code, table_groups_df, selected=None): submit = st.form_submit_button( "Save" if mode == "edit" else "Add", use_container_width=True, - disabled=authentication_service.current_user_has_read_role(), ) if submit: @@ -293,17 +264,13 @@ def delete_test_suite_dialog(test_suite_id: str) -> None: accept_cascade_delete = st.toggle("I accept deletion of this Test Suite and all related TestGen data.") with st.form("Delete Test Suite", clear_on_submit=True, border=False): - disable_delete_button = authentication_service.current_user_has_read_role() or ( - not can_be_deleted and not accept_cascade_delete - ) - delete = False _, button_column = st.columns([.85, .15]) with button_column: delete = st.form_submit_button( "Delete", type="primary", - disabled=disable_delete_button, + disabled=not can_be_deleted and not accept_cascade_delete, use_container_width=True, ) diff --git a/testgen/utils/__init__.py b/testgen/utils/__init__.py index 502bbd74..ff7d878d 100644 --- a/testgen/utils/__init__.py +++ b/testgen/utils/__init__.py @@ -116,8 +116,9 @@ def format_score_card(score_card: ScoreCard | None) -> ScoreCard: "cde_score": "--" if not definition or definition.cde_score else None, "profiling_score": "--" if not definition or definition.total_score else None, "testing_score": "--" if not definition or definition.total_score else None, - "categories": [], "categories_label": None, + "categories": [], + "history": [], } return { @@ -137,6 +138,11 @@ def format_score_card(score_card: ScoreCard | None) -> ScoreCard: {**category, "score": friendly_score(category["score"])} for category in score_card.get("categories", []) ], + "history": [ + {**entry, "score": round(100 * float(entry["score"]), 1), "time": entry["time"].isoformat()} + for entry in score_card.get("history", []) + if entry["score"] and not pd.isnull(entry["score"]) + ], }