From 7208e8a037395e17f49da617c0d604d07b8e2fb6 Mon Sep 17 00:00:00 2001 From: Luis Date: Tue, 12 Aug 2025 12:55:32 -0400 Subject: [PATCH 01/28] misc(connection): trim leading and trailing whitespaces --- testgen/ui/views/connections.py | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/testgen/ui/views/connections.py b/testgen/ui/views/connections.py index d8af0332..546b2bb7 100644 --- a/testgen/ui/views/connections.py +++ b/testgen/ui/views/connections.py @@ -46,6 +46,14 @@ class ConnectionsPage(Page): order=1, roles=[ role for role in typing.get_args(user_session_service.RoleType) if role != "catalog" ], ) + trim_fields: typing.ClassVar[list[str]] = [ + "project_host", + "project_port", + "project_user", + "project_db", + "url", + "http_path", + ] def render(self, project_code: str, **_kwargs) -> None: testgen.page_header( @@ -105,7 +113,7 @@ def on_save_connection_clicked(updated_connection): updated_connection["sql_flavor"] = self._get_sql_flavor_from_value(updated_connection["sql_flavor_code"]).flavor set_save(True) - set_updated_connection(updated_connection) + set_updated_connection(self._sanitize_connection_input(updated_connection)) def on_test_connection_clicked(updated_connection: dict) -> None: password = updated_connection.get("project_pw_encrypted") @@ -129,7 +137,7 @@ def on_test_connection_clicked(updated_connection: dict) -> None: updated_connection["sql_flavor"] = self._get_sql_flavor_from_value(updated_connection["sql_flavor_code"]).flavor set_check_status(True) - set_updated_connection(updated_connection) + set_updated_connection(self._sanitize_connection_input(updated_connection)) results = None for key, value in get_updated_connection().items(): @@ -175,6 +183,18 @@ def _get_sql_flavor_from_value(self, value: str) -> "ConnectionFlavor | None": return match[0] return None + def _sanitize_connection_input(self, connection: dict) -> dict: + if not connection: + return connection + + sanitized_connection_input = {} + for key, value in connection.items(): + sanitized_value = value + if isinstance(value, str) and key in self.trim_fields: + sanitized_value = value.strip() + sanitized_connection_input[key] = sanitized_value + return sanitized_connection_input + def _format_connection(self, connection: Connection, should_test: bool = False) -> dict: formatted_connection = format_connection(connection) if should_test: From bad261e9ef4879b1590eb0f74bfe2fd699c25038 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Wed, 13 Aug 2025 15:56:50 -0400 Subject: [PATCH 02/28] fix(test definitions): bugs in validate test query --- testgen/ui/views/test_definitions.py | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/testgen/ui/views/test_definitions.py b/testgen/ui/views/test_definitions.py index 8123049f..5fc082df 100644 --- a/testgen/ui/views/test_definitions.py +++ b/testgen/ui/views/test_definitions.py @@ -12,6 +12,7 @@ import testgen.ui.services.form_service as fm from testgen.common import date_service +from testgen.common.database.database_service import get_flavor_service, replace_params from testgen.common.models import with_database_session from testgen.common.models.connection import Connection from testgen.common.models.table_group import TableGroup, TableGroupMinimal @@ -1179,17 +1180,33 @@ def get_column_names(table_groups_id: str, table_name: str) -> list[str]: def validate_test(test_definition, table_group: TableGroupMinimal): schema = test_definition["schema_name"] table_name = test_definition["table_name"] + connection = Connection.get_by_table_group(table_group.id) if test_definition["test_type"] == "Condition_Flag": condition = test_definition["custom_query"] + concat_operator = get_flavor_service(connection.sql_flavor).get_concat_operator() query = f""" SELECT - COALESCE(CAST(SUM(CASE WHEN {condition} THEN 1 ELSE 0 END) AS VARCHAR(1000) ) || '|' ,'|') + COALESCE( + CAST( + SUM( + CASE WHEN {condition} THEN 1 ELSE 0 END + ) AS VARCHAR(1000) + ) + {concat_operator} '|', + '|' + ) FROM {schema}.{table_name}; """ else: - query = test_definition["custom_query"] - query = query.replace("{DATA_SCHEMA}", schema) + query = replace_params( + f""" + SELECT COUNT(*) + FROM ( + {test_definition["custom_query"]} + ) TEST + """, + {"DATA_SCHEMA": schema}, + ) - connection = Connection.get_by_table_group(table_group.id) fetch_from_target_db(connection, query) From 3620af0961cc9a1c5abc283462d2b52e7cd4586c Mon Sep 17 00:00:00 2001 From: Luis Date: Tue, 12 Aug 2025 09:08:00 -0400 Subject: [PATCH 03/28] fix: display a show/hide icon for password fields --- testgen/ui/assets/style.css | 5 ++ testgen/ui/components/frontend/css/shared.css | 5 ++ .../frontend/js/components/connection_form.js | 25 ++---- .../frontend/js/components/input.js | 82 +++++++++++++++---- 4 files changed, 82 insertions(+), 35 deletions(-) diff --git a/testgen/ui/assets/style.css b/testgen/ui/assets/style.css index 420f9605..aaf90add 100644 --- a/testgen/ui/assets/style.css +++ b/testgen/ui/assets/style.css @@ -478,6 +478,11 @@ div[data-testid="stPopoverBody"] [data-testid="stVerticalBlock"]:has(i.tg--expor } /* */ +input::-ms-reveal, +input::-ms-clear { + display: none; +} + /* Dark mode */ @media (prefers-color-scheme: dark) { body { diff --git a/testgen/ui/components/frontend/css/shared.css b/testgen/ui/components/frontend/css/shared.css index bcc96b4b..8cfd5d9d 100644 --- a/testgen/ui/components/frontend/css/shared.css +++ b/testgen/ui/components/frontend/css/shared.css @@ -631,3 +631,8 @@ code > .tg-icon:hover { .border-radius-1 { border-radius: 4px; } + +input::-ms-reveal, +input::-ms-clear { + display: none; +} \ No newline at end of file diff --git a/testgen/ui/components/frontend/js/components/connection_form.js b/testgen/ui/components/frontend/js/components/connection_form.js index 2c700b32..9bca683c 100644 --- a/testgen/ui/components/frontend/js/components/connection_form.js +++ b/testgen/ui/components/frontend/js/components/connection_form.js @@ -879,6 +879,8 @@ const SnowflakeForm = ( }), () => { if (connectByKey.val) { + const hasPrivateKeyPhrase = originalConnection?.private_key_passphrase || connectionPrivateKeyPassphrase.val; + return div( { class: 'flex-column fx-gap-3' }, div( @@ -899,24 +901,13 @@ const SnowflakeForm = ( validityPerField['private_key_passphrase'] = state.valid; isValid.val = Object.values(validityPerField).every(v => v); }, + clearable: hasPrivateKeyPhrase, + clearableCondition: 'always', + onClear: () => { + clearPrivateKeyPhrase.val = true; + connectionPrivateKeyPassphrase.val = ''; + }, }), - () => { - const hasPrivateKeyPhrase = originalConnection?.private_key_passphrase || connectionPrivateKeyPassphrase.val; - if (!hasPrivateKeyPhrase) { - return ''; - } - - return i( - { - class: 'material-symbols-rounded clickable text-secondary', - onclick: () => { - clearPrivateKeyPhrase.val = true; - connectionPrivateKeyPassphrase.val = ''; - }, - }, - 'clear', - ); - }, ), FileInput({ name: 'private_key', diff --git a/testgen/ui/components/frontend/js/components/input.js b/testgen/ui/components/frontend/js/components/input.js index 9c0b569f..8ecae34f 100644 --- a/testgen/ui/components/frontend/js/components/input.js +++ b/testgen/ui/components/frontend/js/components/input.js @@ -19,8 +19,10 @@ * @property {string[]?} autocompleteOptions * @property {string?} icon * @property {boolean?} clearable - * @property {boolean?} disabled + * @property {('value' | 'always')?} clearableCondition * @property {function(string, InputState)?} onChange + * @property {boolean?} disabled + * @property {function(string, InputState)?} onClear * @property {number?} width * @property {number?} height * @property {string?} style @@ -39,7 +41,11 @@ import { Portal } from './portal.js'; const { div,input, label, i, small } = van.tags; const defaultHeight = 32; const iconSize = 22; -const clearIconSize = 20; +const addonIconSize = 20; +const passwordFieldTypeSwitch = { + password: 'text', + text: 'password', +}; const Input = (/** @type Properties */ props) => { loadStylesheet('input', stylesheet); @@ -54,6 +60,9 @@ const Input = (/** @type Properties */ props) => { return errors.val[0] ?? ''; }); + const originalInputType = van.derive(() => getValue(props.type) ?? 'text'); + const inputType = van.state(originalInputType.rawVal); + const onChange = props.onChange?.val ?? props.onChange; if (onChange) { onChange(value.val, { errors: errors.val, valid: errors.val.length <= 0 }); @@ -65,6 +74,8 @@ const Input = (/** @type Properties */ props) => { } }); + const onClear = props.onClear?.val ?? props.onClear ?? (() => value.val = ''); + const autocompleteOpened = van.state(false); const autocompleteOptions = van.derive(() => { const filtered = getValue(props.autocompleteOptions)?.filter(option => option.toLowerCase().includes(value.val.toLowerCase())); @@ -97,23 +108,49 @@ const Input = (/** @type Properties */ props) => { ), () => getValue(props.icon) ? i( { - class: 'material-symbols-rounded tg-input--icon', + class: 'material-symbols-rounded tg-input--icon text-secondary', style: `bottom: ${((getValue(props.height) || defaultHeight) - iconSize) / 2}px`, }, props.icon, ) : '', - () => getValue(props.clearable) ? i( - { - class: () => `material-symbols-rounded tg-input--clear clickable ${value.val ? '' : 'hidden'}`, - style: `bottom: ${((getValue(props.height) || defaultHeight) - clearIconSize) / 2}px`, - onclick: () => value.val = '', - }, - 'clear', - ) : '', + () => { + const clearableCondition = getValue(props.clearableCondition) ?? 'value'; + const showClearable = getValue(props.clearable) && ( + clearableCondition === 'always' + || (clearableCondition === 'value' && value.val) + ); + + return div( + { class: 'flex-row' }, + originalInputType.val === 'password' && value.val + ? i( + { + class: 'material-symbols-rounded tg-input--visibility clickable text-secondary', + style: `bottom: ${((getValue(props.height) || defaultHeight) - addonIconSize) / 2}px`, + onclick: () => inputType.val = passwordFieldTypeSwitch[inputType.val], + }, + inputType.val === 'password' ? 'visibility' : 'visibility_off', + ) + : '', + showClearable + ? i( + { + class: () => `material-symbols-rounded tg-input--clear text-secondary clickable`, + style: `bottom: ${((getValue(props.height) || defaultHeight) - addonIconSize) / 2}px`, + onclick: onClear, + }, + 'clear', + ) + : '', + ); + }, div( { - class: () => `flex-row tg-input--field ${getValue(props.disabled) ? 'tg-input--disabled' : ''}`, + class: () => { + const sufixIconCount = Number(value.val && originalInputType.val === 'password') + Number(value.val && getValue(props.clearable)); + return `flex-row tg-input--field ${getValue(props.disabled) ? 'tg-input--disabled' : ''} sufix-padding-${sufixIconCount}`; + }, style: () => `height: ${getValue(props.height) || defaultHeight}px;`, }, props.prefix @@ -125,7 +162,7 @@ const Input = (/** @type Properties */ props) => { input({ value, name: props.name ?? '', - type: props.type ?? 'text', + type: inputType, disabled: props.disabled, placeholder: () => getValue(props.placeholder) ?? '', oninput: debounce((/** @type Event */ event) => value.val = event.target.value, 300), @@ -178,14 +215,23 @@ stylesheet.replace(` padding-left: 28px; } -.tg-input--clear { +.tg-input--clear, +.tg-input--visibility { position: absolute; - right: 4px; - font-size: ${clearIconSize}px; + font-size: ${addonIconSize}px; + right: 8px; +} + +.tg-input--visibility + .tg-input--clear { + right: ${addonIconSize + 16}px; +} + +.tg-input--field.sufix-padding-1 { + padding-right: ${addonIconSize + 8}px; } -.tg-input--clear ~ .tg-input--field { - padding-right: 24px; +.tg-input--field.sufix-padding-2 { + padding-right: ${addonIconSize * 2 + 8 * 2}px;; } .tg-input--field { From 603828c57cc8fd9473a146d503dc7ec082282a41 Mon Sep 17 00:00:00 2001 From: Luis Date: Tue, 12 Aug 2025 10:33:31 -0400 Subject: [PATCH 04/28] feat: add partial name filter to table group list --- .../frontend/js/components/connection_form.js | 2 +- .../frontend/js/pages/table_group_list.js | 57 ++++++++++++++----- testgen/ui/views/table_groups.py | 27 ++++++--- 3 files changed, 61 insertions(+), 25 deletions(-) diff --git a/testgen/ui/components/frontend/js/components/connection_form.js b/testgen/ui/components/frontend/js/components/connection_form.js index 9bca683c..d16aa959 100644 --- a/testgen/ui/components/frontend/js/components/connection_form.js +++ b/testgen/ui/components/frontend/js/components/connection_form.js @@ -928,7 +928,7 @@ const SnowflakeForm = ( console.error(err); isFieldValid = false; } - validityPerField['private_key'] = state.valid; + validityPerField['private_key'] = isFieldValid; isValid.val = Object.values(validityPerField).every(v => v); }, validators: [ diff --git a/testgen/ui/components/frontend/js/pages/table_group_list.js b/testgen/ui/components/frontend/js/pages/table_group_list.js index 059a5746..934e6f56 100644 --- a/testgen/ui/components/frontend/js/pages/table_group_list.js +++ b/testgen/ui/components/frontend/js/pages/table_group_list.js @@ -11,6 +11,7 @@ * @type {object} * @property {ProjectSummary} project_summary * @property {string?} connection_id + * @property {string?} table_group_name * @property {Connection[]} connections * @property {TableGroup[]} table_groups * @property {Permissions} permissions @@ -26,6 +27,7 @@ import { EMPTY_STATE_MESSAGE, EmptyState } from '../components/empty_state.js'; import { Select } from '../components/select.js'; import { Icon } from '../components/icon.js'; import { withTooltip } from '../components/tooltip.js'; +import { Input } from '../components/input.js'; const { div, h4, i, span } = van.tags; @@ -49,6 +51,7 @@ const TableGroupList = (props) => { const permissions = getValue(props.permissions) ?? {can_edit: false}; const connections = getValue(props.connections) ?? []; const connectionId = getValue(props.connection_id); + const tableGroupNameFilter = getValue(props.table_group_name); const tableGroups = getValue(props.table_groups) ?? []; const projectSummary = getValue(props.project_summary); @@ -68,7 +71,7 @@ const TableGroupList = (props) => { return projectSummary.table_group_count > 0 ? div( - Toolbar(permissions, connections, connectionId), + Toolbar(permissions, connections, connectionId, tableGroupNameFilter), tableGroups.length ? tableGroups.map((tableGroup) => Card({ testId: 'table-group-card', @@ -211,25 +214,49 @@ const TableGroupList = (props) => { * @param {Permissions} permissions * @param {Connection[]} connections * @param {string?} selectedConnection + * @param {string?} tableGroupNameFilter * @returns */ -const Toolbar = (permissions, connections, selectedConnection) => { +const Toolbar = (permissions, connections, selectedConnection, tableGroupNameFilter) => { + const connection = van.state(selectedConnection || null); + const tableGroupFilter = van.state(tableGroupNameFilter || null); + + van.derive(() => { + if (connection.val !== selectedConnection || tableGroupFilter.val !== tableGroupNameFilter) { + emitEvent('TableGroupsFiltered', { payload: { connection_id: connection.val || null, table_group_name: tableGroupFilter.val || null } }); + } + }); + return div( { class: 'flex-row fx-align-flex-end fx-justify-space-between mb-4' }, - (getValue(connections) ?? [])?.length > 1 - ? Select({ - testId: 'connection-select', - label: 'Connection', - allowNull: true, + div( + {class: 'flex-row fx-gap-4'}, + (getValue(connections) ?? [])?.length > 1 + ? Select({ + testId: 'connection-select', + label: 'Connection', + allowNull: true, + height: 38, + value: connection, + options: getValue(connections)?.map((connection) => ({ + label: connection.connection_name, + value: String(connection.connection_id), + })) ?? [], + onChange: (value) => connection.val = value, + }) + : span(''), + Input({ + testId: 'table-groups-name-filter', + icon: 'search', + label: 'Table Group Name', + placeholder: 'Search by table group names', height: 38, - value: selectedConnection, - options: getValue(connections)?.map((connection) => ({ - label: connection.connection_name, - value: String(connection.connection_id), - })) ?? [], - onChange: (value) => emitEvent('ConnectionSelected', { payload: value }), - }) - : span(''), + width: 360, + clearable: true, + value: tableGroupFilter, + onChange: (value) => tableGroupFilter.val = value || null, + }), + ), div( { class: 'flex-row fx-gap-4' }, Button({ diff --git a/testgen/ui/views/table_groups.py b/testgen/ui/views/table_groups.py index e0d4158d..1c1dd1bc 100644 --- a/testgen/ui/views/table_groups.py +++ b/testgen/ui/views/table_groups.py @@ -40,7 +40,13 @@ class TableGroupsPage(Page): roles=[ role for role in typing.get_args(user_session_service.RoleType) if role != "catalog" ], ) - def render(self, project_code: str, connection_id: str | None = None, **_kwargs) -> None: + def render( + self, + project_code: str, + connection_id: str | None = None, + table_group_name: str | None = None, + **_kwargs, + ) -> None: testgen.page_header(PAGE_TITLE, "create-a-table-group") user_can_edit = user_session_service.user_can_edit() @@ -48,14 +54,16 @@ def render(self, project_code: str, connection_id: str | None = None, **_kwargs) if connection_id and not connection_id.isdigit(): connection_id = None + table_group_filters = [ + TableGroup.project_code == project_code, + ] if connection_id: - table_groups = TableGroup.select_minimal_where( - TableGroup.project_code == project_code, - TableGroup.connection_id == connection_id, - ) - else: - table_groups = TableGroup.select_minimal_where(TableGroup.project_code == project_code) + table_group_filters.append(TableGroup.connection_id == connection_id) + + if table_group_name: + table_group_filters.append(TableGroup.table_groups_name.ilike(f"%{table_group_name}%")) + table_groups = TableGroup.select_minimal_where(*table_group_filters) connections = self._get_connections(project_code) return testgen.testgen_component( @@ -63,6 +71,7 @@ def render(self, project_code: str, connection_id: str | None = None, **_kwargs) props={ "project_summary": project_summary.to_dict(json_safe=True), "connection_id": connection_id, + "table_group_name": table_group_name, "permissions": { "can_edit": user_can_edit, }, @@ -75,9 +84,9 @@ def render(self, project_code: str, connection_id: str | None = None, **_kwargs) "EditTableGroupClicked": partial(self.edit_table_group_dialog, project_code), "DeleteTableGroupClicked": partial(self.delete_table_group_dialog, project_code), "RunProfilingClicked": partial(self.run_profiling_dialog, project_code), - "ConnectionSelected": lambda inner_connection_id: self.router.queue_navigation( + "TableGroupsFiltered": lambda params: self.router.queue_navigation( to="table-groups", - with_args={"project_code": project_code, "connection_id": inner_connection_id}, + with_args={"project_code": project_code, **params}, ), }, ) From 0923d65215a3c11156a62bccfc5d1529a697090b Mon Sep 17 00:00:00 2001 From: Luis Date: Thu, 14 Aug 2025 13:11:13 -0400 Subject: [PATCH 05/28] refactor(ui): extend input component to support hidding password suggestions --- .../ui/components/frontend/js/components/connection_form.js | 4 ++++ testgen/ui/components/frontend/js/components/input.js | 2 ++ 2 files changed, 6 insertions(+) diff --git a/testgen/ui/components/frontend/js/components/connection_form.js b/testgen/ui/components/frontend/js/components/connection_form.js index d16aa959..16223386 100644 --- a/testgen/ui/components/frontend/js/components/connection_form.js +++ b/testgen/ui/components/frontend/js/components/connection_form.js @@ -486,6 +486,7 @@ const RedshiftForm = ( value: connectionPassword, height: 38, type: 'password', + passwordSuggestions: false, placeholder: (originalConnection?.connection_id && originalConnection?.project_pw_encrypted) ? secretsPlaceholder : '', onChange: (value, state) => { connectionPassword.val = value; @@ -683,6 +684,7 @@ const DatabricksForm = ( value: connectionPassword, height: 38, type: 'password', + passwordSuggestions: false, placeholder: (originalConnection?.connection_id && originalConnection?.project_pw_encrypted) ? secretsPlaceholder : '', onChange: (value, state) => { connectionPassword.val = value; @@ -891,6 +893,7 @@ const SnowflakeForm = ( value: connectionPrivateKeyPassphrase, height: 38, type: 'password', + passwordSuggestions: false, help: 'Passphrase used when creating the private key. Leave empty if the private key is not encrypted.', placeholder: () => (originalConnection?.connection_id && originalConnection?.private_key_passphrase && !clearPrivateKeyPhrase.val) ? secretsPlaceholder : '', onChange: (value, state) => { @@ -944,6 +947,7 @@ const SnowflakeForm = ( value: connectionPassword, height: 38, type: 'password', + passwordSuggestions: false, placeholder: (originalConnection?.connection_id && originalConnection?.project_pw_encrypted) ? secretsPlaceholder : '', onChange: (value, state) => { connectionPassword.val = value; diff --git a/testgen/ui/components/frontend/js/components/input.js b/testgen/ui/components/frontend/js/components/input.js index 8ecae34f..a77ad67c 100644 --- a/testgen/ui/components/frontend/js/components/input.js +++ b/testgen/ui/components/frontend/js/components/input.js @@ -20,6 +20,7 @@ * @property {string?} icon * @property {boolean?} clearable * @property {('value' | 'always')?} clearableCondition + * @property {boolean?} passwordSuggestions * @property {function(string, InputState)?} onChange * @property {boolean?} disabled * @property {function(string, InputState)?} onClear @@ -164,6 +165,7 @@ const Input = (/** @type Properties */ props) => { name: props.name ?? '', type: inputType, disabled: props.disabled, + ...(props.passwordSuggestions ?? true ? {} : {autocomplete: 'off', 'data-op-ignore': true}), placeholder: () => getValue(props.placeholder) ?? '', oninput: debounce((/** @type Event */ event) => value.val = event.target.value, 300), onclick: van.derive(() => autocompleteOptions.val?.length From d3bb59a7acd934d80226b4cda31a2f54d8b9ef84 Mon Sep 17 00:00:00 2001 From: Luis Date: Thu, 14 Aug 2025 13:11:57 -0400 Subject: [PATCH 06/28] fix(table groups) remove non-intended gap from fitlters section --- .../ui/components/frontend/js/pages/table_group_list.js | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/testgen/ui/components/frontend/js/pages/table_group_list.js b/testgen/ui/components/frontend/js/pages/table_group_list.js index 934e6f56..4376f856 100644 --- a/testgen/ui/components/frontend/js/pages/table_group_list.js +++ b/testgen/ui/components/frontend/js/pages/table_group_list.js @@ -244,14 +244,14 @@ const Toolbar = (permissions, connections, selectedConnection, tableGroupNameFil })) ?? [], onChange: (value) => connection.val = value, }) - : span(''), + : '', Input({ testId: 'table-groups-name-filter', icon: 'search', - label: 'Table Group Name', - placeholder: 'Search by table group names', + label: '', + placeholder: 'Search table group names', height: 38, - width: 360, + width: 300, clearable: true, value: tableGroupFilter, onChange: (value) => tableGroupFilter.val = value || null, From 6a54f2e82b890008cd90393dd8d114f333e79a23 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Thu, 14 Aug 2025 14:24:43 -0400 Subject: [PATCH 07/28] fix(runs): duration display incorrect when > 24 hours --- testgen/common/models/profiling_run.py | 4 ++-- testgen/common/models/test_run.py | 4 ++-- .../dbsetup/060_create_standard_views.sql | 2 +- .../components/frontend/js/display_utils.js | 23 ++++++++++++------- .../frontend/js/pages/profiling_runs.js | 4 ++-- .../components/frontend/js/pages/test_runs.js | 4 ++-- 6 files changed, 24 insertions(+), 17 deletions(-) diff --git a/testgen/common/models/profiling_run.py b/testgen/common/models/profiling_run.py index f7c68bb0..ca05f5ac 100644 --- a/testgen/common/models/profiling_run.py +++ b/testgen/common/models/profiling_run.py @@ -33,10 +33,10 @@ class ProfilingRunMinimal(EntityMinimal): class ProfilingRunSummary(EntityMinimal): profiling_run_id: UUID start_time: datetime + end_time: datetime table_groups_name: str status: ProfilingRunStatus process_id: int - duration: str log_message: str schema_name: str table_ct: int @@ -177,10 +177,10 @@ def select_summary( ) SELECT v_profiling_runs.profiling_run_id, v_profiling_runs.start_time, + v_profiling_runs.end_time, v_profiling_runs.table_groups_name, v_profiling_runs.status, v_profiling_runs.process_id, - v_profiling_runs.duration, v_profiling_runs.log_message, v_profiling_runs.schema_name, v_profiling_runs.table_ct, diff --git a/testgen/common/models/test_run.py b/testgen/common/models/test_run.py index 47aa1584..63872a4a 100644 --- a/testgen/common/models/test_run.py +++ b/testgen/common/models/test_run.py @@ -33,10 +33,10 @@ class TestRunMinimal(EntityMinimal): class TestRunSummary(EntityMinimal): test_run_id: UUID test_starttime: datetime + test_endtime: datetime table_groups_name: str test_suite: str status: TestRunStatus - duration: str process_id: int log_message: str test_ct: int @@ -174,10 +174,10 @@ def select_summary( ) SELECT test_runs.id AS test_run_id, test_runs.test_starttime, + test_runs.test_endtime, table_groups.table_groups_name, test_suites.test_suite, test_runs.status, - test_runs.duration, test_runs.process_id, test_runs.log_message, test_runs.test_ct, diff --git a/testgen/template/dbsetup/060_create_standard_views.sql b/testgen/template/dbsetup/060_create_standard_views.sql index e93f4f77..563e5224 100644 --- a/testgen/template/dbsetup/060_create_standard_views.sql +++ b/testgen/template/dbsetup/060_create_standard_views.sql @@ -64,7 +64,7 @@ SELECT r.id as profiling_run_id, tg.table_groups_name, tg.table_group_schema as schema_name, r.profiling_starttime as start_time, - TO_CHAR(r.profiling_endtime - r.profiling_starttime, 'HH24:MI:SS') as duration, + r.profiling_endtime as end_time, r.status, r.log_message, r.table_ct, diff --git a/testgen/ui/components/frontend/js/display_utils.js b/testgen/ui/components/frontend/js/display_utils.js index e4dcc612..9eb5e8c1 100644 --- a/testgen/ui/components/frontend/js/display_utils.js +++ b/testgen/ui/components/frontend/js/display_utils.js @@ -1,6 +1,6 @@ function formatTimestamp( /** @type number | string */ timestamp, - /** @type boolean */ show_year, + /** @type boolean */ showYear, ) { if (timestamp) { const date = new Date(typeof timestamp === 'number' ? timestamp * 1000 : timestamp); @@ -8,22 +8,29 @@ function formatTimestamp( const months = [ 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' ]; const hours = date.getHours(); const minutes = date.getMinutes(); - return `${months[date.getMonth()]} ${date.getDate()}, ${show_year ? date.getFullYear() + ' at ': ''}${(hours % 12) || 12}:${String(minutes).padStart(2, '0')} ${hours / 12 >= 1 ? 'PM' : 'AM'}`; + return `${months[date.getMonth()]} ${date.getDate()}, ${showYear ? date.getFullYear() + ' at ': ''}${(hours % 12) || 12}:${String(minutes).padStart(2, '0')} ${hours / 12 >= 1 ? 'PM' : 'AM'}`; } } return '--'; } -function formatDuration(/** @type string */ duration) { - if (!duration) { +function formatDuration( + /** @type Date | number | string */ startTime, + /** @type Date | number | string */ endTime, +) { + if (!startTime || !endTime) { return '--'; } - const [ hour, minute, second ] = duration.split(':'); + const startDate = new Date(typeof startTime === 'number' ? startTime * 1000 : startTime); + const endDate = new Date(typeof endTime === 'number' ? endTime * 1000 : endTime); + const totalSeconds = Math.floor((endDate.getTime() - startDate.getTime()) / 1000); + let formatted = [ - { value: Number(hour), unit: 'h' }, - { value: Number(minute), unit: 'm' }, - { value: Number(second), unit: 's' }, + { value: Math.floor(totalSeconds / (3600 * 24)), unit: 'd' }, + { value: Math.floor((totalSeconds % (3600 * 24)) / 3600), unit: 'h' }, + { value: Math.floor((totalSeconds % 3600) / 60), unit: 'm' }, + { value: totalSeconds % 60, unit: 's' }, ].map(({ value, unit }) => value ? `${value}${unit}` : '') .join(' '); diff --git a/testgen/ui/components/frontend/js/pages/profiling_runs.js b/testgen/ui/components/frontend/js/pages/profiling_runs.js index 93981556..94ae2c5b 100644 --- a/testgen/ui/components/frontend/js/pages/profiling_runs.js +++ b/testgen/ui/components/frontend/js/pages/profiling_runs.js @@ -3,10 +3,10 @@ * @type {object} * @property {string} profiling_run_id * @property {number} start_time + * @property {number} end_time * @property {string} table_groups_name * @property {'Running'|'Complete'|'Error'|'Cancelled'} status * @property {string} log_message - * @property {string} duration * @property {string} process_id * @property {string} schema_name * @property {number} column_ct @@ -191,7 +191,7 @@ const ProfilingRunItem = ( ProfilingRunStatus(item), div( { class: 'text-caption mt-1', 'data-testid': 'profiling-run-item-duration' }, - formatDuration(item.duration), + formatDuration(item.start_time, item.end_time), ), ), item.status === 'Running' && item.process_id && userCanRun ? Button({ diff --git a/testgen/ui/components/frontend/js/pages/test_runs.js b/testgen/ui/components/frontend/js/pages/test_runs.js index f5376560..03b2aa47 100644 --- a/testgen/ui/components/frontend/js/pages/test_runs.js +++ b/testgen/ui/components/frontend/js/pages/test_runs.js @@ -3,11 +3,11 @@ * @type {object} * @property {string} test_run_id * @property {number} test_starttime + * @property {number} test_endtime * @property {string} table_groups_name * @property {string} test_suite * @property {'Running'|'Complete'|'Error'|'Cancelled'} status * @property {string} log_message - * @property {string} duration * @property {string} process_id * @property {number} test_ct * @property {number} passed_ct @@ -192,7 +192,7 @@ const TestRunItem = ( TestRunStatus(item), div( { class: 'text-caption mt-1' }, - formatDuration(item.duration), + formatDuration(item.test_starttime, item.test_endtime), ), ), item.status === 'Running' && item.process_id && userCanRun ? Button({ From f2dfe40d923bebe2c4e1bf3e6b0f1c5129203c26 Mon Sep 17 00:00:00 2001 From: Diogo Basto Date: Fri, 8 Aug 2025 19:29:31 +0100 Subject: [PATCH 08/28] misc: Consolidate demo into a single quick-start call. --- README.md | 6 +- docs/local_development.md | 6 +- testgen/__main__.py | 71 +++++++++------------ testgen/commands/queries/profiling_query.py | 10 +-- testgen/commands/run_profiling_bridge.py | 4 +- testgen/common/date_service.py | 3 +- 6 files changed, 41 insertions(+), 59 deletions(-) diff --git a/README.md b/README.md index 8fa9637a..218b0748 100644 --- a/README.md +++ b/README.md @@ -168,11 +168,7 @@ Verify that you can login to the UI with the `TESTGEN_USERNAME` and `TESTGEN_PAS The [Data Observability quickstart](https://docs.datakitchen.io/articles/open-source-data-observability/data-observability-overview) walks you through DataOps Data Quality TestGen capabilities to demonstrate how it covers critical use cases for data and analytic teams. ```shell -testgen quick-start --delete-target-db -testgen run-profile --table-group-id 0ea85e17-acbe-47fe-8394-9970725ad37d -testgen run-test-generation --table-group-id 0ea85e17-acbe-47fe-8394-9970725ad37d -testgen run-tests --project-key DEFAULT --test-suite-key default-suite-1 -testgen quick-start --simulate-fast-forward +testgen quick-start ``` In the TestGen UI, you will see that new data profiling and test results have been generated. diff --git a/docs/local_development.md b/docs/local_development.md index 338070ce..bbe49c78 100644 --- a/docs/local_development.md +++ b/docs/local_development.md @@ -93,11 +93,7 @@ testgen setup-system-db --yes Seed the demo data. ```shell -testgen quick-start --delete-target-db -testgen run-profile --table-group-id 0ea85e17-acbe-47fe-8394-9970725ad37d -testgen run-test-generation --table-group-id 0ea85e17-acbe-47fe-8394-9970725ad37d -testgen run-tests --project-key DEFAULT --test-suite-key default-suite-1 -testgen quick-start --simulate-fast-forward +testgen quick-start ``` ### Run the Application diff --git a/testgen/__main__.py b/testgen/__main__.py index 6e0d8a9c..b3973897 100644 --- a/testgen/__main__.py +++ b/testgen/__main__.py @@ -117,10 +117,9 @@ def cli(ctx: Context, verbose: bool): @click.option( "-tg", "--table-group-id", - required=False, + required=True, type=click.STRING, help="The identifier for the table group used during a profile run. Use a table_group_id shown in list-table-groups.", - default=None, ) def run_profile(configuration: Configuration, table_group_id: str): click.echo(f"run-profile with table_group_id: {table_group_id}") @@ -136,16 +135,15 @@ def run_profile(configuration: Configuration, table_group_id: str): "-tg", "--table-group-id", help="The identifier for the table group used during a profile run. Use a table_group_id shown in list-table-groups.", - required=False, + required=True, type=click.STRING, - default=None, ) @click.option( "-ts", "--test-suite-key", help="The identifier for a test suite. Use a test_suite_key shown in list-test-suites.", - required=False, - default=settings.DEFAULT_TEST_SUITE_KEY, + required=True, + type=click.STRING, ) @click.option( "-gs", @@ -339,27 +337,6 @@ def list_test_runs(configuration: Configuration, project_key: str, test_suite_ke @cli.command("quick-start", help="Use to generate sample target database, for demo purposes.") -@click.option( - "--delete-target-db", - help="Will delete the current target database, if it exists", - is_flag=True, - default=False, -) -@click.option( - "--iteration", - "-i", - default=0, - required=False, - help="The monthly data increment snapshot. Can be 0, 1, 2 or 3. 0 is the initial data.", -) -@click.option( - "--simulate-fast-forward", - "-s", - default=False, - is_flag=True, - required=False, - help="For demo purposes, simulates that some time pass by and the target data is changing. This will call the iterations in order.", -) @click.option( "--observability-api-url", help="Observability API url to be able to export TestGen data to Observability using the command 'export-observability'", @@ -375,11 +352,10 @@ def list_test_runs(configuration: Configuration, project_key: str, test_suite_ke default="", ) @pass_configuration +@click.pass_context def quick_start( + ctx: Context, configuration: Configuration, - delete_target_db: bool, - iteration: int, - simulate_fast_forward: bool, observability_api_url: str, observability_api_key: str, ): @@ -388,19 +364,32 @@ def quick_start( if observability_api_key: settings.OBSERVABILITY_API_KEY = observability_api_key - # Check if this is an increment or the initial state - if iteration == 0 and not simulate_fast_forward: - click.echo("quick-start command") - run_quick_start(delete_target_db) + click.echo("quick-start command") + run_quick_start(delete_target_db=True) + + click.echo("loading initial data") + run_quick_start_increment(0) + minutes_offset = -30*24*60*3 + table_group_id="0ea85e17-acbe-47fe-8394-9970725ad37d" - if not simulate_fast_forward: + click.echo(f"run-profile with table_group_id: {table_group_id}") + spinner = None + if not configuration.verbose: + spinner = MoonSpinner("Processing ... ") + message = run_profiling_queries(table_group_id, spinner=spinner, minutes_offset=minutes_offset) + click.echo("\n" + message) + + LOG.info(f"run-test-generation with table_group_id: {table_group_id} test_suite: {settings.DEFAULT_TEST_SUITE_KEY}") + message = run_test_gen_queries(table_group_id, settings.DEFAULT_TEST_SUITE_KEY) + click.echo("\n" + message) + + run_execution_steps(settings.PROJECT_KEY, settings.DEFAULT_TEST_SUITE_KEY, minutes_offset=minutes_offset) + + for iteration in range(1, 4): + click.echo(f"Running iteration: {iteration} / 3") + minutes_offset = -30*24*60 * (3-iteration) run_quick_start_increment(iteration) - else: - for iteration in range(1, 4): - click.echo(f"Running iteration: {iteration} / 3") - minutes_offset = 2 * iteration - run_quick_start_increment(iteration) - run_execution_steps(settings.PROJECT_KEY, settings.DEFAULT_TEST_SUITE_KEY, minutes_offset=minutes_offset) + run_execution_steps(settings.PROJECT_KEY, settings.DEFAULT_TEST_SUITE_KEY, minutes_offset=minutes_offset) click.echo("Quick start has successfully finished.") diff --git a/testgen/commands/queries/profiling_query.py b/testgen/commands/queries/profiling_query.py index d71334dc..4eadab10 100644 --- a/testgen/commands/queries/profiling_query.py +++ b/testgen/commands/queries/profiling_query.py @@ -51,16 +51,18 @@ class CProfilingSQL: contingency_columns = "" exception_message = "" + minutes_offset = 0 _data_chars_sql: CRefreshDataCharsSQL = None _rollup_scores_sql: CRollupScoresSQL = None - def __init__(self, strProjectCode, flavor): + def __init__(self, strProjectCode, flavor, minutes_offset=0): self.flavor = flavor self.project_code = strProjectCode # Defaults - self.run_date = date_service.get_now_as_string() - self.today = date_service.get_now_as_string() + self.run_date = date_service.get_now_as_string_with_offset(minutes_offset) + self.today = date_service.get_now_as_string_with_offset(minutes_offset) + self.minutes_offset = minutes_offset def _get_data_chars_sql(self) -> CRefreshDataCharsSQL: if not self._data_chars_sql: @@ -102,7 +104,7 @@ def _get_params(self) -> dict: "PROFILE_ID_COLUMN_MASK": self.profile_id_column_mask, "PROFILE_SK_COLUMN_MASK": self.profile_sk_column_mask, "START_TIME": self.today, - "NOW_TIMESTAMP": date_service.get_now_as_string(), + "NOW_TIMESTAMP": date_service.get_now_as_string_with_offset(minutes_offset=self.minutes_offset), "EXCEPTION_MESSAGE": self.exception_message, "SAMPLING_TABLE": self.sampling_table, "SAMPLE_SIZE": int(self.parm_sample_size), diff --git a/testgen/commands/run_profiling_bridge.py b/testgen/commands/run_profiling_bridge.py index cdffbfaa..6bcccc6b 100644 --- a/testgen/commands/run_profiling_bridge.py +++ b/testgen/commands/run_profiling_bridge.py @@ -221,7 +221,7 @@ def run_profiling_in_background(table_group_id): @with_database_session -def run_profiling_queries(table_group_id: str, username: str | None = None, spinner: Spinner | None = None): +def run_profiling_queries(table_group_id: str, username: str | None = None, spinner: Spinner | None = None, minutes_offset: int = 0): if table_group_id is None: raise ValueError("Table Group ID was not specified") @@ -240,7 +240,7 @@ def run_profiling_queries(table_group_id: str, username: str | None = None, spin params = get_profiling_params(table_group_id) LOG.info("CurrentStep: Initializing Query Generator") - clsProfiling = CProfilingSQL(params["project_code"], connection.sql_flavor) + clsProfiling = CProfilingSQL(params["project_code"], connection.sql_flavor, minutes_offset=minutes_offset) # Set General Parms clsProfiling.table_groups_id = table_group_id diff --git a/testgen/common/date_service.py b/testgen/common/date_service.py index 620e4143..41e34125 100644 --- a/testgen/common/date_service.py +++ b/testgen/common/date_service.py @@ -17,8 +17,7 @@ def parse_now(value: str) -> datetime: def get_now_as_string_with_offset(minutes_offset): ret = datetime.utcnow() - if minutes_offset > 0: - ret = ret + timedelta(minutes=minutes_offset) + ret = ret + timedelta(minutes=minutes_offset) return ret.strftime("%Y-%m-%d %H:%M:%S") From d4d198fbf77b24ef18712ef043776cb47fd7a5d0 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Mon, 18 Aug 2025 23:02:23 -0400 Subject: [PATCH 09/28] feat(forms): improve field validation --- .../frontend/js/components/connection_form.js | 32 +---- .../frontend/js/components/input.js | 109 ++++++++++-------- .../frontend/js/components/score_breakdown.js | 2 + .../frontend/js/components/select.js | 13 ++- .../js/components/table_group_form.js | 26 +---- .../components/frontend/js/components/tree.js | 1 + .../components/frontend/js/form_validators.js | 12 +- .../frontend/js/pages/data_catalog.js | 3 +- .../frontend/js/pages/project_dashboard.js | 2 - .../frontend/js/pages/quality_dashboard.js | 2 - .../frontend/js/pages/table_group_list.js | 6 +- .../frontend/js/pages/test_suites.js | 1 - 12 files changed, 95 insertions(+), 114 deletions(-) diff --git a/testgen/ui/components/frontend/js/components/connection_form.js b/testgen/ui/components/frontend/js/components/connection_form.js index 16223386..fa4df34a 100644 --- a/testgen/ui/components/frontend/js/components/connection_form.js +++ b/testgen/ui/components/frontend/js/components/connection_form.js @@ -59,7 +59,7 @@ import { getValue, emitEvent, loadStylesheet, isEqual } from '../utils.js'; import { Input } from './input.js'; import { Slider } from './slider.js'; import { Select } from './select.js'; -import { maxLength, minLength, sizeLimit } from '../form_validators.js'; +import { maxLength, minLength, required, sizeLimit } from '../form_validators.js'; import { RadioGroup } from './radio_group.js'; import { FileInput } from './file_input.js'; import { ExpansionPanel } from './expansion_panel.js'; @@ -252,7 +252,6 @@ const ConnectionForm = (props, saveButton) => { value: connectionFlavor, options: props.flavors, disabled: props.disableFlavor, - height: 38, help: 'Type of database server to connect to. This determines the database driver and SQL dialect that will be used by TestGen.', testId: 'sql_flavor', }), @@ -260,13 +259,12 @@ const ConnectionForm = (props, saveButton) => { name: 'connection_name', label: 'Connection Name', value: connectionName, - height: 38, help: 'Unique name to describe the connection', onChange: (value, state) => { connectionName.val = value; setFieldValidity('connection_name', state.valid); }, - validators: [ minLength(3), maxLength(40) ], + validators: [ required, minLength(3), maxLength(40) ], }), authenticationForm, @@ -411,7 +409,6 @@ const RedshiftForm = ( name: 'db_host', label: 'Host', value: connectionHost, - height: 38, class: 'fx-flex', disabled: connectByUrl, onChange: (value, state) => { @@ -425,7 +422,6 @@ const RedshiftForm = ( name: 'db_port', label: 'Port', value: connectionPort, - height: 38, type: 'number', disabled: connectByUrl, onChange: (value, state) => { @@ -440,7 +436,6 @@ const RedshiftForm = ( name: 'db_name', label: 'Database', value: connectionDatabase, - height: 38, disabled: connectByUrl, onChange: (value, state) => { connectionDatabase.val = value; @@ -455,9 +450,8 @@ const RedshiftForm = ( label: 'URL', value: connectionStringSuffix, class: 'fx-flex', - height: 38, name: 'url_suffix', - prefix: span({ style: 'height: 38px; white-space: nowrap; color: var(--disabled-text-color)' }, connectionStringPrefix), + prefix: span({ style: 'white-space: nowrap; color: var(--disabled-text-color)' }, connectionStringPrefix), disabled: !connectByUrl.val, onChange: (value, state) => connectionStringSuffix.val = value, }), @@ -472,7 +466,6 @@ const RedshiftForm = ( name: 'db_user', label: 'Username', value: connectionUsername, - height: 38, onChange: (value, state) => { connectionUsername.val = value; validityPerField['db_user'] = state.valid; @@ -484,7 +477,6 @@ const RedshiftForm = ( name: 'password', label: 'Password', value: connectionPassword, - height: 38, type: 'password', passwordSuggestions: false, placeholder: (originalConnection?.connection_id && originalConnection?.project_pw_encrypted) ? secretsPlaceholder : '', @@ -595,7 +587,6 @@ const DatabricksForm = ( name: 'db_host', label: 'Host', value: connectionHost, - height: 38, class: 'fx-flex', disabled: connectByUrl, onChange: (value, state) => { @@ -609,7 +600,6 @@ const DatabricksForm = ( name: 'db_port', label: 'Port', value: connectionPort, - height: 38, type: 'number', disabled: connectByUrl, onChange: (value, state) => { @@ -624,7 +614,6 @@ const DatabricksForm = ( label: 'HTTP Path', value: connectionHttpPath, class: 'fx-flex', - height: 38, name: 'http_path', disabled: connectByUrl, onChange: (value, state) => { @@ -638,7 +627,6 @@ const DatabricksForm = ( name: 'db_name', label: 'Database', value: connectionDatabase, - height: 38, disabled: connectByUrl, onChange: (value, state) => { connectionDatabase.val = value; @@ -653,9 +641,8 @@ const DatabricksForm = ( label: 'URL', value: connectionStringSuffix, class: 'fx-flex', - height: 38, name: 'url_suffix', - prefix: span({ style: 'height: 38px; white-space: nowrap; color: var(--disabled-text-color)' }, connectionStringPrefix), + prefix: span({ style: 'white-space: nowrap; color: var(--disabled-text-color)' }, connectionStringPrefix), disabled: !connectByUrl.val, onChange: (value, state) => connectionStringSuffix.val = value, }), @@ -670,7 +657,6 @@ const DatabricksForm = ( name: 'db_user', label: 'Username', value: connectionUsername, - height: 38, onChange: (value, state) => { connectionUsername.val = value; validityPerField['db_user'] = state.valid; @@ -682,7 +668,6 @@ const DatabricksForm = ( name: 'password', label: 'Password', value: connectionPassword, - height: 38, type: 'password', passwordSuggestions: false, placeholder: (originalConnection?.connection_id && originalConnection?.project_pw_encrypted) ? secretsPlaceholder : '', @@ -795,7 +780,6 @@ const SnowflakeForm = ( name: 'db_host', label: 'Host', value: connectionHost, - height: 38, class: 'fx-flex', disabled: connectByUrl, onChange: (value, state) => { @@ -809,7 +793,6 @@ const SnowflakeForm = ( name: 'db_port', label: 'Port', value: connectionPort, - height: 38, type: 'number', disabled: connectByUrl, onChange: (value, state) => { @@ -824,7 +807,6 @@ const SnowflakeForm = ( name: 'db_name', label: 'Database', value: connectionDatabase, - height: 38, disabled: connectByUrl, onChange: (value, state) => { connectionDatabase.val = value; @@ -839,9 +821,8 @@ const SnowflakeForm = ( label: 'URL', value: connectionStringSuffix, class: 'fx-flex', - height: 38, name: 'url_suffix', - prefix: span({ style: 'height: 38px; white-space: nowrap; color: var(--disabled-text-color)' }, connectionStringPrefix), + prefix: span({ style: 'white-space: nowrap; color: var(--disabled-text-color)' }, connectionStringPrefix), disabled: !connectByUrl.val, onChange: (value, state) => { connectionStringSuffix.val = value; @@ -871,7 +852,6 @@ const SnowflakeForm = ( name: 'db_user', label: 'Username', value: connectionUsername, - height: 38, onChange: (value, state) => { connectionUsername.val = value; validityPerField['db_user'] = state.valid; @@ -891,7 +871,6 @@ const SnowflakeForm = ( name: 'private_key_passphrase', label: 'Private Key Passphrase', value: connectionPrivateKeyPassphrase, - height: 38, type: 'password', passwordSuggestions: false, help: 'Passphrase used when creating the private key. Leave empty if the private key is not encrypted.', @@ -945,7 +924,6 @@ const SnowflakeForm = ( name: 'password', label: 'Password', value: connectionPassword, - height: 38, type: 'password', passwordSuggestions: false, placeholder: (originalConnection?.connection_id && originalConnection?.project_pw_encrypted) ? secretsPlaceholder : '', diff --git a/testgen/ui/components/frontend/js/components/input.js b/testgen/ui/components/frontend/js/components/input.js index a77ad67c..d0a48413 100644 --- a/testgen/ui/components/frontend/js/components/input.js +++ b/testgen/ui/components/frontend/js/components/input.js @@ -39,8 +39,8 @@ import { Icon } from './icon.js'; import { withTooltip } from './tooltip.js'; import { Portal } from './portal.js'; -const { div,input, label, i, small } = van.tags; -const defaultHeight = 32; +const { div, input, label, i, small, span } = van.tags; +const defaultHeight = 38; const iconSize = 22; const addonIconSize = 20; const passwordFieldTypeSwitch = { @@ -53,6 +53,10 @@ const Input = (/** @type Properties */ props) => { const domId = van.derive(() => getValue(props.id) ?? getRandomId()); const value = van.derive(() => getValue(props.value) ?? ''); + const isRequired = van.derive(() => { + const validators = getValue(props.validators) ?? []; + return validators.some(v => v.name === 'required'); + }); const errors = van.derive(() => { const validators = getValue(props.validators) ?? []; return validators.map(v => v(value.val)).filter(error => error); @@ -64,6 +68,7 @@ const Input = (/** @type Properties */ props) => { const originalInputType = van.derive(() => getValue(props.type) ?? 'text'); const inputType = van.state(originalInputType.rawVal); + const isDirty = van.state(false); const onChange = props.onChange?.val ?? props.onChange; if (onChange) { onChange(value.val, { errors: errors.val, valid: errors.val.length <= 0 }); @@ -100,6 +105,9 @@ const Input = (/** @type Properties */ props) => { div( { class: 'flex-row fx-gap-1 text-caption' }, props.label, + () => isRequired.val + ? span({ class: 'text-error' }, '*') + : '', () => getValue(props.help) ? withTooltip( Icon({ size: 16, classes: 'text-disabled' }, 'help'), @@ -107,45 +115,6 @@ const Input = (/** @type Properties */ props) => { ) : null, ), - () => getValue(props.icon) ? i( - { - class: 'material-symbols-rounded tg-input--icon text-secondary', - style: `bottom: ${((getValue(props.height) || defaultHeight) - iconSize) / 2}px`, - }, - props.icon, - ) : '', - () => { - const clearableCondition = getValue(props.clearableCondition) ?? 'value'; - const showClearable = getValue(props.clearable) && ( - clearableCondition === 'always' - || (clearableCondition === 'value' && value.val) - ); - - return div( - { class: 'flex-row' }, - originalInputType.val === 'password' && value.val - ? i( - { - class: 'material-symbols-rounded tg-input--visibility clickable text-secondary', - style: `bottom: ${((getValue(props.height) || defaultHeight) - addonIconSize) / 2}px`, - onclick: () => inputType.val = passwordFieldTypeSwitch[inputType.val], - }, - inputType.val === 'password' ? 'visibility' : 'visibility_off', - ) - : '', - showClearable - ? i( - { - class: () => `material-symbols-rounded tg-input--clear text-secondary clickable`, - style: `bottom: ${((getValue(props.height) || defaultHeight) - addonIconSize) / 2}px`, - onclick: onClear, - }, - 'clear', - ) - : '', - ); - }, - div( { class: () => { @@ -167,15 +136,56 @@ const Input = (/** @type Properties */ props) => { disabled: props.disabled, ...(props.passwordSuggestions ?? true ? {} : {autocomplete: 'off', 'data-op-ignore': true}), placeholder: () => getValue(props.placeholder) ?? '', - oninput: debounce((/** @type Event */ event) => value.val = event.target.value, 300), + oninput: debounce((/** @type Event */ event) => { + isDirty.val = true; + value.val = event.target.value; + }, 300), onclick: van.derive(() => autocompleteOptions.val?.length ? () => autocompleteOpened.val = true : null ), }), + () => getValue(props.icon) ? i( + { + class: 'material-symbols-rounded tg-input--icon text-secondary', + style: `top: ${((getValue(props.height) || defaultHeight) - iconSize) / 2}px`, + }, + props.icon, + ) : '', + () => { + const clearableCondition = getValue(props.clearableCondition) ?? 'value'; + const showClearable = getValue(props.clearable) && ( + clearableCondition === 'always' + || (clearableCondition === 'value' && value.val) + ); + + return div( + { class: 'flex-row tg-input--icon-actions' }, + originalInputType.val === 'password' && value.val + ? i( + { + class: 'material-symbols-rounded tg-input--visibility clickable text-secondary', + style: `top: ${((getValue(props.height) || defaultHeight) - addonIconSize) / 2}px`, + onclick: () => inputType.val = passwordFieldTypeSwitch[inputType.val], + }, + inputType.val === 'password' ? 'visibility' : 'visibility_off', + ) + : '', + showClearable + ? i( + { + class: () => `material-symbols-rounded tg-input--clear text-secondary clickable`, + style: `top: ${((getValue(props.height) || defaultHeight) - addonIconSize) / 2}px`, + onclick: onClear, + }, + 'clear', + ) + : '', + ); + }, ), () => - getValue(props.validators)?.length > 0 + isDirty.val && firstError.val ? small({ class: 'tg-input--error' }, firstError) : '', Portal( @@ -203,7 +213,7 @@ const Input = (/** @type Properties */ props) => { const stylesheet = new CSSStyleSheet(); stylesheet.replace(` -.tg-input--label { +.tg-input--field { position: relative; } @@ -213,19 +223,18 @@ stylesheet.replace(` font-size: ${iconSize}px; } -.tg-input--icon ~ .tg-input--field { +.tg-input--field:has(.tg-input--icon) { padding-left: 28px; } -.tg-input--clear, -.tg-input--visibility { +.tg-input--icon-actions { position: absolute; - font-size: ${addonIconSize}px; right: 8px; } -.tg-input--visibility + .tg-input--clear { - right: ${addonIconSize + 16}px; +.tg-input--clear, +.tg-input--visibility { + font-size: ${addonIconSize}px; } .tg-input--field.sufix-padding-1 { diff --git a/testgen/ui/components/frontend/js/components/score_breakdown.js b/testgen/ui/components/frontend/js/components/score_breakdown.js index be530235..3c8e5e9b 100644 --- a/testgen/ui/components/frontend/js/components/score_breakdown.js +++ b/testgen/ui/components/frontend/js/components/score_breakdown.js @@ -25,6 +25,7 @@ const ScoreBreakdown = (score, breakdown, category, scoreType, onViewDetails) => options: Object.entries(CATEGORIES) .sort((A, B) => A[1].localeCompare(B[1])) .map(([value, label]) => ({ value, label })), + height: 32, onChange: (value) => emitEvent('CategoryChanged', { payload: value }), testId: 'groupby-selector', }); @@ -41,6 +42,7 @@ const ScoreBreakdown = (score, breakdown, category, scoreType, onViewDetails) => label: '', value: selectedScoreType, options: scoreTypeOptions.map((s) => ({ label: SCORE_TYPE_LABEL[s], value: s, selected: s === scoreType })), + height: 32, onChange: (value) => emitEvent('ScoreTypeChanged', { payload: value }), testId: 'score-type-selector', }); diff --git a/testgen/ui/components/frontend/js/components/select.js b/testgen/ui/components/frontend/js/components/select.js index eb885fa2..28879e04 100644 --- a/testgen/ui/components/frontend/js/components/select.js +++ b/testgen/ui/components/frontend/js/components/select.js @@ -15,6 +15,7 @@ * @property {boolean} allowNull * @property {Function|null} onChange * @property {boolean?} disabled + * @property {boolean?} required * @property {number?} width * @property {number?} height * @property {string?} style @@ -70,7 +71,7 @@ const Select = (/** @type {Properties} */ props) => { valueLabel.val = selectedOption?.label ?? ''; valueIcon.val = selectedOption?.icon ?? undefined; - props.onChange?.(currentValue); + props.onChange?.(currentValue, { valid: !!currentValue || !getValue(props.required) }); } }); @@ -82,7 +83,13 @@ const Select = (/** @type {Properties} */ props) => { onclick: van.derive(() => !getValue(props.disabled) ? () => opened.val = !opened.val : null), 'data-testid': getValue(props.testId) ?? '', }, - span({'data-testid': 'select-label'}, props.label), + span( + { class: 'flex-row fx-gap-1', 'data-testid': 'select-label' }, + props.label, + () => getValue(props.required) + ? span({ class: 'text-error' }, '*') + : '', + ), div( { class: () => `flex-row tg-select--field ${opened.val ? 'opened' : ''}`, @@ -146,7 +153,7 @@ stylesheet.replace(` .tg-select--field { box-sizing: border-box; width: 100%; - height: 32px; + height: 38px; min-width: 200px; border: 1px solid transparent; transition: border-color 0.3s; diff --git a/testgen/ui/components/frontend/js/components/table_group_form.js b/testgen/ui/components/frontend/js/components/table_group_form.js index 909160cf..6b072255 100644 --- a/testgen/ui/components/frontend/js/components/table_group_form.js +++ b/testgen/ui/components/frontend/js/components/table_group_form.js @@ -161,11 +161,11 @@ const TableGroupForm = (props) => { label: 'Connection', value: tableGroupConnectionId.rawVal, options: connectionOptions, - height: 38, + required: true, disabled: props.disableConnectionSelector, - onChange: (value) => { + onChange: (value, state) => { tableGroupConnectionId.val = value; - setFieldValidity('connection_id', !!value); + setFieldValidity('connection_id', state.valid); }, }) : undefined, @@ -216,12 +216,11 @@ const MainForm = ( tableGroupSchema, ) => { return div( - { class: 'flex-row fx-gap-3 fx-flex-wrap' }, + { class: 'flex-row fx-align-flex-start fx-gap-3 fx-flex-wrap' }, Input({ name: 'table_groups_name', label: 'Name', value: tableGroupsName, - height: 38, class: 'tg-column-flex', help: 'Unique name to describe the table group', helpPlacement: 'bottom-right', @@ -235,7 +234,6 @@ const MainForm = ( name: 'table_group_schema', label: 'Schema', value: tableGroupSchema, - height: 38, class: 'tg-column-flex', help: 'Database schema containing the tables for the Table Group', helpPlacement: 'bottom-left', @@ -268,7 +266,6 @@ const CriteriaForm = ( name: 'profiling_include_mask', label: 'Tables to Include Mask', value: profilingIncludeMask, - height: 38, help: 'SQL filter supported by your database\'s LIKE operator for table names to include', onChange: (value, state) => { profilingIncludeMask.val = value; @@ -279,7 +276,6 @@ const CriteriaForm = ( name: 'profiling_exclude_mask', label: 'Tables to Exclude Mask', value: profilingExcludeMask, - height: 38, help: 'SQL filter supported by your database\'s LIKE operator for table names to exclude', onChange: (value, state) => { profilingExcludeMask.val = value; @@ -303,7 +299,6 @@ const CriteriaForm = ( name: 'profile_id_column_mask', label: 'Profiling ID Column Mask', value: profileIdColumnMask, - height: 38, class: 'tg-column-flex', help: 'SQL filter supported by your database\'s LIKE operator representing ID columns', onChange: (value, state) => { @@ -315,7 +310,6 @@ const CriteriaForm = ( name: 'profile_sk_column_mask', label: 'Profiling Surrogate Key Column Mask', value: profileSkColumnMask, - height: 38, class: 'tg-column-flex', help: 'SQL filter supported by your database\'s LIKE operator representing surrogate key columns', onChange: (value, state) => { @@ -366,7 +360,6 @@ const SettingsForm = ( type: 'number', label: 'Min Profiling Age (in days)', value: profilingDelayDays, - height: 38, class: 'tg-column-flex', help: 'Number of days to wait before new profiling will be available to generate tests', onChange: (value, state) => { @@ -402,7 +395,6 @@ const SamplingForm = ( type: 'number', label: 'Sample percent', value: profileSamplePercent, - height: 38, help: 'Percent of records to include in the sample, unless the calculated count falls below the specified minimum', onChange: (value, state) => { profileSamplePercent.val = value; @@ -415,7 +407,6 @@ const SamplingForm = ( type: 'number', label: 'Min Sample Record Count', value: profileSampleMinCount, - height: 38, help: 'Minimum number of records to be included in any sample (if available)', onChange: (value, state) => { profileSampleMinCount.val = value; @@ -446,7 +437,6 @@ const TaggingForm = ( class: 'fx-flex mb-3', label: 'Description', value: description, - height: 38, onChange: (value, state) => { description.val = value; options.setValidity?.('description', state.valid); @@ -458,7 +448,6 @@ const TaggingForm = ( name: 'data_source', label: 'Data Source', value: dataSource, - height: 38, help: 'Original source of the dataset', onChange: (value, state) => { dataSource.val = value; @@ -469,7 +458,6 @@ const TaggingForm = ( name: 'source_process', label: 'Source Process', value: sourceProcess, - height: 38, help: 'Process, program, or data flow that produced the dataset', onChange: (value, state) => { sourceProcess.val = value; @@ -480,7 +468,6 @@ const TaggingForm = ( name: 'business_domain', label: 'Business Domain', value: businessDomain, - height: 38, help: 'Business division responsible for the dataset, e.g., Finance, Sales, Manufacturing', onChange: (value, state) => { businessDomain.val = value; @@ -491,7 +478,6 @@ const TaggingForm = ( name: 'transform_level', label: 'Transform Level', value: transformLevel, - height: 38, help: 'Data warehouse processing stage, e.g., Raw, Conformed, Processed, Reporting, or Medallion level (bronze, silver, gold)', onChange: (value, state) => { transformLevel.val = value; @@ -502,7 +488,6 @@ const TaggingForm = ( name: 'source_system', label: 'Source System', value: sourceSystem, - height: 38, help: 'Enterprise system source for the dataset', onChange: (value, state) => { sourceSystem.val = value; @@ -513,7 +498,6 @@ const TaggingForm = ( name: 'data_location', label: 'Data Location', value: dataLocation, - height: 38, help: 'Physical or virtual location of the dataset, e.g., Headquarters, Cloud', onChange: (value, state) => { dataLocation.val = value; @@ -524,7 +508,6 @@ const TaggingForm = ( name: 'stakeholder_group', label: 'Stakeholder Group', value: stakeholderGroup, - height: 38, help: 'Data owners or stakeholders responsible for the dataset', onChange: (value, state) => { stakeholderGroup.val = value; @@ -535,7 +518,6 @@ const TaggingForm = ( name: 'data_product', label: 'Data Product', value: dataProduct, - height: 38, help: 'Data domain that comprises the dataset', onChange: (value, state) => { dataProduct.val = value; diff --git a/testgen/ui/components/frontend/js/components/tree.js b/testgen/ui/components/frontend/js/components/tree.js index 8f8e95b0..e32357b0 100644 --- a/testgen/ui/components/frontend/js/components/tree.js +++ b/testgen/ui/components/frontend/js/components/tree.js @@ -130,6 +130,7 @@ const Toolbar = ( Input({ icon: 'search', clearable: true, + height: 32, onChange: (/** @type string */ value) => { search.val = value; filterTree(nodes.val, isNodeHidden); diff --git a/testgen/ui/components/frontend/js/form_validators.js b/testgen/ui/components/frontend/js/form_validators.js index 905003d3..d57a2cc4 100644 --- a/testgen/ui/components/frontend/js/form_validators.js +++ b/testgen/ui/components/frontend/js/form_validators.js @@ -13,6 +13,13 @@ function required(value) { return null; } +function noSpaces(value) { + if (value?.includes(' ')) { + return `Value cannot contain spaces.`; + } + return null; +} + /** * * @param {number} min @@ -20,7 +27,7 @@ function required(value) { */ function minLength(min) { return (value) => { - if (typeof value !== 'string' || value.length < min) { + if (value && value.length < min) { return `Value must be at least ${min} characters long.`; } return null; @@ -45,7 +52,7 @@ function maxLength(max) { * To use with FileInput, enforce a cap on file size * allowed to upload. * - * @param {number} size + * @param {number} limit * @returns {Validator} */ function sizeLimit(limit) { @@ -67,6 +74,7 @@ function sizeLimit(limit) { export { maxLength, minLength, + noSpaces, required, sizeLimit, }; diff --git a/testgen/ui/components/frontend/js/pages/data_catalog.js b/testgen/ui/components/frontend/js/pages/data_catalog.js index 2587a4ef..42fed8cc 100644 --- a/testgen/ui/components/frontend/js/pages/data_catalog.js +++ b/testgen/ui/components/frontend/js/pages/data_catalog.js @@ -185,7 +185,6 @@ const DataCatalog = (/** @type Properties */ props) => { label: 'Table Group', value: getValue(props.table_group_filter_options)?.find((op) => op.selected)?.value ?? null, options: getValue(props.table_group_filter_options) ?? [], - height: 38, style: 'font-size: 14px;', testId: 'table-group-filter', onChange: (value) => emitEvent('TableGroupSelected', {payload: value}), @@ -516,6 +515,7 @@ const TagsCard = (/** @type TagProperties */ props, /** @type Table | Column */ return Input({ label, help, width: key === 'description' ? descriptionWidth : width, + height: 32, value: state.rawVal, placeholder: (inheritTable || inheritTableGroup) ? `Inherited: ${inheritTable ?? inheritTableGroup}` : null, autocompleteOptions: props.tagOptions?.[key], @@ -637,6 +637,7 @@ const MultiEdit = (/** @type Properties */ props, /** @type Object */ selectedIt }) : Input({ label, help, width, + height: 32, placeholder: () => checkedState.val ? null : '(keep current values)', autocompleteOptions: tagOptions[key], onChange: (value) => valueState.val = value || null, diff --git a/testgen/ui/components/frontend/js/pages/project_dashboard.js b/testgen/ui/components/frontend/js/pages/project_dashboard.js index d2394b03..1a90c92a 100644 --- a/testgen/ui/components/frontend/js/pages/project_dashboard.js +++ b/testgen/ui/components/frontend/js/pages/project_dashboard.js @@ -91,7 +91,6 @@ const ProjectDashboard = (/** @type Properties */ props) => { { class: 'flex-row fx-align-flex-end fx-gap-4' }, Input({ width: 230, - height: 38, style: 'font-size: 14px;', icon: 'search', clearable: true, @@ -103,7 +102,6 @@ const ProjectDashboard = (/** @type Properties */ props) => { label: 'Sort by', value: tableGroupsSortOption, options: props.table_groups_sort_options?.val ?? [], - height: 38, style: 'font-size: 14px;', testId: 'table-groups-sort', }), diff --git a/testgen/ui/components/frontend/js/pages/quality_dashboard.js b/testgen/ui/components/frontend/js/pages/quality_dashboard.js index 55a5e22f..f10116f2 100644 --- a/testgen/ui/components/frontend/js/pages/quality_dashboard.js +++ b/testgen/ui/components/frontend/js/pages/quality_dashboard.js @@ -97,7 +97,6 @@ const Toolbar = ( { class: 'flex-row fx-align-flex-end mb-4' }, Input({ width: 230, - height: 38, style: 'font-size: 14px; margin-right: 16px;', icon: 'search', clearable: true, @@ -109,7 +108,6 @@ const Toolbar = ( Select({ id: 'score-dashboard-sort', label: 'Sort by', - height: 38, style: 'font-size: 14px;', value: sortedBy, options: sortOptions, diff --git a/testgen/ui/components/frontend/js/pages/table_group_list.js b/testgen/ui/components/frontend/js/pages/table_group_list.js index 4376f856..c4bde7e2 100644 --- a/testgen/ui/components/frontend/js/pages/table_group_list.js +++ b/testgen/ui/components/frontend/js/pages/table_group_list.js @@ -228,15 +228,14 @@ const Toolbar = (permissions, connections, selectedConnection, tableGroupNameFil }); return div( - { class: 'flex-row fx-align-flex-end fx-justify-space-between mb-4' }, + { class: 'flex-row fx-align-flex-end fx-justify-space-between fx-gap-4 fx-flex-wrap mb-4' }, div( - {class: 'flex-row fx-gap-4'}, + {class: 'flex-row fx-align-flex-end fx-gap-4'}, (getValue(connections) ?? [])?.length > 1 ? Select({ testId: 'connection-select', label: 'Connection', allowNull: true, - height: 38, value: connection, options: getValue(connections)?.map((connection) => ({ label: connection.connection_name, @@ -250,7 +249,6 @@ const Toolbar = (permissions, connections, selectedConnection, tableGroupNameFil icon: 'search', label: '', placeholder: 'Search table group names', - height: 38, width: 300, clearable: true, value: tableGroupFilter, diff --git a/testgen/ui/components/frontend/js/pages/test_suites.js b/testgen/ui/components/frontend/js/pages/test_suites.js index f38faed3..ce34cba5 100644 --- a/testgen/ui/components/frontend/js/pages/test_suites.js +++ b/testgen/ui/components/frontend/js/pages/test_suites.js @@ -59,7 +59,6 @@ const TestSuites = (/** @type Properties */ props) => { value: getValue(props.table_group_filter_options)?.find((op) => op.selected)?.value ?? null, options: getValue(props.table_group_filter_options) ?? [], allowNull: true, - height: 38, style: 'font-size: 14px;', testId: 'table-group-filter', onChange: (value) => emitEvent('FilterApplied', {payload: value}), From ab190bdd2fb60de4616d0d3354d985adf95168da Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Mon, 18 Aug 2025 23:15:30 -0400 Subject: [PATCH 10/28] feat(radio-group): support vertical layout --- .../frontend/js/components/connection_form.js | 8 +-- .../frontend/js/components/radio_group.js | 70 ++++++++++++++++--- 2 files changed, 65 insertions(+), 13 deletions(-) diff --git a/testgen/ui/components/frontend/js/components/connection_form.js b/testgen/ui/components/frontend/js/components/connection_form.js index fa4df34a..ed0f0848 100644 --- a/testgen/ui/components/frontend/js/components/connection_form.js +++ b/testgen/ui/components/frontend/js/components/connection_form.js @@ -401,7 +401,7 @@ const RedshiftForm = ( ], value: connectByUrl, onChange: (value) => connectByUrl.val = value, - inline: true, + layout: 'inline', }), div( { class: 'flex-row fx-gap-3 fx-flex' }, @@ -579,7 +579,7 @@ const DatabricksForm = ( ], value: connectByUrl, onChange: (value) => connectByUrl.val = value, - inline: true, + layout: 'inline', }), div( { class: 'flex-row fx-gap-3 fx-flex' }, @@ -772,7 +772,7 @@ const SnowflakeForm = ( ], value: connectByUrl, onChange: (value) => connectByUrl.val = value, - inline: true, + layout: 'inline', }), div( { class: 'flex-row fx-gap-3 fx-flex' }, @@ -845,7 +845,7 @@ const SnowflakeForm = ( ], value: connectByKey, onChange: (value) => connectByKey.val = value, - inline: true, + layout: 'inline', }), Input({ diff --git a/testgen/ui/components/frontend/js/components/radio_group.js b/testgen/ui/components/frontend/js/components/radio_group.js index 26807332..4f8b0008 100644 --- a/testgen/ui/components/frontend/js/components/radio_group.js +++ b/testgen/ui/components/frontend/js/components/radio_group.js @@ -2,37 +2,41 @@ * @typedef Option * @type {object} * @property {string} label + * @property {string} help * @property {string | number | boolean | null} value * * @typedef Properties * @type {object} * @property {string} label * @property {Option[]} options - * @property {string | number | boolean | null} selected + * @property {string | number | boolean | null} value * @property {function(string | number | boolean | null)?} onChange * @property {number?} width - * @property {boolean?} inline + * @property {('default' | 'inline' | 'vertical')?} layout */ import van from '../van.min.js'; import { getRandomId, getValue, loadStylesheet } from '../utils.js'; +import { withTooltip } from './tooltip.js'; +import { Icon } from './icon.js'; -const { div, input, label } = van.tags; +const { div, input, label, span } = van.tags; const RadioGroup = (/** @type Properties */ props) => { loadStylesheet('radioGroup', stylesheet); const groupName = getRandomId(); + const layout = getValue(props.layout) ?? 'default'; return div( - { class: () => `${getValue(props.inline) ? 'flex-row fx-gap-2' : ''}`, style: () => `width: ${props.width ? getValue(props.width) + 'px' : 'auto'}` }, + { class: () => `tg-radio-group--wrapper ${layout}`, style: () => `width: ${props.width ? getValue(props.width) + 'px' : 'auto'}` }, div( - { class: () => `text-caption ${getValue(props.inline) ? '' : 'mb-1'}` }, + { class: 'text-caption tg-radio-group--label' }, props.label, ), () => div( - { class: 'flex-row fx-gap-4 tg-radio-group' }, + { class: 'tg-radio-group' }, getValue(props.options).map(option => label( - { class: 'flex-row fx-gap-2 clickable' }, + { class: `flex-row fx-gap-2 clickable ${layout === 'vertical' ? 'fx-align-flex-start' : ''}` }, input({ type: 'radio', name: groupName, @@ -44,7 +48,22 @@ const RadioGroup = (/** @type Properties */ props) => { }), class: 'tg-radio-group--input', }), - option.label, + layout === 'vertical' + ? div( + { class: 'flex-column fx-gap-1' }, + option.label, + span( + { class: 'text-caption tg-radio-group--help' }, + option.help, + ), + ) + : option.label, + layout !== 'vertical' && option.help + ? withTooltip( + Icon({ size: 16, classes: 'text-disabled' }, 'help'), + { text: option.help, position: 'top', width: 200 } + ) + : null, )), ), ); @@ -52,11 +71,39 @@ const RadioGroup = (/** @type Properties */ props) => { const stylesheet = new CSSStyleSheet(); stylesheet.replace(` -.tg-radio-group { +.tg-radio-group--wrapper.inline { + display: flex; + flex-direction: row; + align-items: center; + gap: 8px; +} + +.tg-radio-group--wrapper.default .tg-radio-group--label, +.tg-radio-group--wrapper.vertical .tg-radio-group--label { + margin-bottom: 4px; +} + +.tg-radio-group--wrapper.vertical .tg-radio-group--label { + margin-bottom: 12px; +} + +.tg-radio-group--wrapper.default .tg-radio-group, +.tg-radio-group--wrapper.inline .tg-radio-group { + display: flex; + flex-direction: row; + align-items: center; + gap: 16px; height: 32px; } +.tg-radio-group--wrapper.vertical .tg-radio-group { + display: flex; + flex-direction: column; + gap: 12px; +} + .tg-radio-group--input { + flex: 0 0 auto; appearance: none; box-sizing: border-box; margin: 0; @@ -101,6 +148,11 @@ stylesheet.replace(` background-color: var(--primary-color); border-radius: 5px; } + +.tg-radio-group--help { + white-space: pre-wrap; + line-height: 16px; +} `); export { RadioGroup }; From a5e2a0667fec44432d6f6aae62439d8083cc86ec Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Mon, 18 Aug 2025 23:16:18 -0400 Subject: [PATCH 11/28] fix(styles): misc css improvements --- testgen/ui/assets/style.css | 5 ++++- testgen/ui/components/frontend/css/shared.css | 6 ++++++ .../ui/components/frontend/js/components/score_issues.js | 5 ----- testgen/ui/components/frontend/js/components/sidebar.js | 2 +- 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/testgen/ui/assets/style.css b/testgen/ui/assets/style.css index aaf90add..b413f980 100644 --- a/testgen/ui/assets/style.css +++ b/testgen/ui/assets/style.css @@ -79,7 +79,9 @@ img.dk-logo-img { /* Sidebar */ [data-testid="stSidebarContent"] [data-testid="stSidebarHeader"] { - padding: 16px 20px; + padding: 16px 20px 20px; + margin-bottom: 0; + height: auto; } [data-testid="stSidebarHeader"] .stLogo { @@ -200,6 +202,7 @@ button[title="Show password text"] { display: none; } +.element-container:has(iframe[height="0"][title="extra_streamlit_components.CookieManager.cookie_manager"]), .element-container:has(iframe[height="0"][title="streamlit_javascript.streamlit_javascript"]), .element-container:has(iframe[height="0"][title="testgen.ui.components.utils.component.testgen"]) { display: none !important; diff --git a/testgen/ui/components/frontend/css/shared.css b/testgen/ui/components/frontend/css/shared.css index 8cfd5d9d..a3d18383 100644 --- a/testgen/ui/components/frontend/css/shared.css +++ b/testgen/ui/components/frontend/css/shared.css @@ -179,6 +179,12 @@ body { color: var(--caption-text-color); text-transform: uppercase; } + +.table-header > *, +.table-row > * { + box-sizing: border-box; + padding: 0 4px; +} /* */ /* Text utilities */ diff --git a/testgen/ui/components/frontend/js/components/score_issues.js b/testgen/ui/components/frontend/js/components/score_issues.js index d773a1bb..06cd5170 100644 --- a/testgen/ui/components/frontend/js/components/score_issues.js +++ b/testgen/ui/components/frontend/js/components/score_issues.js @@ -375,11 +375,6 @@ stylesheet.replace(` .issues-columns { text-transform: capitalize; } - -.issues-columns > span, -.issues-row > div { - padding: 0 4px; -} `); export { IssuesTable }; diff --git a/testgen/ui/components/frontend/js/components/sidebar.js b/testgen/ui/components/frontend/js/components/sidebar.js index 70b93883..9c6e9329 100644 --- a/testgen/ui/components/frontend/js/components/sidebar.js +++ b/testgen/ui/components/frontend/js/components/sidebar.js @@ -223,7 +223,7 @@ stylesheet.replace(` display: flex; flex-direction: column; justify-content: space-between; - height: calc(100% - 64px); + height: calc(100% - 68px); } .menu .menu--project { From b5757d496011c9609fb7f473b680651f35299d51 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Mon, 18 Aug 2025 23:23:05 -0400 Subject: [PATCH 12/28] refactor(auth): implement as plugin class --- testgen/commands/run_execute_tests.py | 2 +- testgen/commands/run_profiling_bridge.py | 2 +- testgen/common/mixpanel_service.py | 2 +- testgen/common/models/entity.py | 3 +- testgen/common/models/user.py | 26 ++++- .../030_initialize_new_schema_structure.sql | 21 ++-- .../dbupgrade/0146_incremental_upgrade.sql | 8 ++ testgen/ui/app.py | 14 +-- testgen/ui/assets/scripts.js | 1 + testgen/ui/auth.py | 102 ++++++++++++++++++ testgen/ui/bootstrap.py | 11 +- testgen/ui/components/widgets/empty_state.py | 3 +- testgen/ui/components/widgets/page.py | 4 +- testgen/ui/components/widgets/sidebar.py | 19 ++-- testgen/ui/navigation/menu.py | 9 +- testgen/ui/navigation/page.py | 7 +- testgen/ui/navigation/router.py | 9 +- testgen/ui/session.py | 16 ++- testgen/ui/views/connections.py | 7 +- testgen/ui/views/data_catalog.py | 43 ++++---- testgen/ui/views/dialogs/manage_schedules.py | 7 +- testgen/ui/views/hygiene_issues.py | 6 +- testgen/ui/views/login.py | 54 +++++----- testgen/ui/views/profiling_results.py | 4 +- testgen/ui/views/profiling_runs.py | 7 +- testgen/ui/views/project_dashboard.py | 5 +- testgen/ui/views/project_settings.py | 6 +- testgen/ui/views/quality_dashboard.py | 7 +- testgen/ui/views/score_details.py | 6 +- testgen/ui/views/score_explorer.py | 6 +- testgen/ui/views/table_groups.py | 7 +- testgen/ui/views/test_definitions.py | 8 +- testgen/ui/views/test_results.py | 10 +- testgen/ui/views/test_runs.py | 7 +- testgen/ui/views/test_suites.py | 7 +- testgen/utils/plugins.py | 9 +- 36 files changed, 290 insertions(+), 175 deletions(-) create mode 100644 testgen/template/dbupgrade/0146_incremental_upgrade.sql create mode 100644 testgen/ui/auth.py diff --git a/testgen/commands/run_execute_tests.py b/testgen/commands/run_execute_tests.py index e5ff2beb..1c899d57 100644 --- a/testgen/commands/run_execute_tests.py +++ b/testgen/commands/run_execute_tests.py @@ -123,7 +123,7 @@ def run_execution_steps_in_background(project_code, test_suite): empty_cache() background_thread = threading.Thread( target=run_execution_steps, - args=(project_code, test_suite, session.username), + args=(project_code, test_suite, session.auth.user_display), ) background_thread.start() else: diff --git a/testgen/commands/run_profiling_bridge.py b/testgen/commands/run_profiling_bridge.py index 6bcccc6b..78aa8a56 100644 --- a/testgen/commands/run_profiling_bridge.py +++ b/testgen/commands/run_profiling_bridge.py @@ -211,7 +211,7 @@ def run_profiling_in_background(table_group_id): empty_cache() background_thread = threading.Thread( target=run_profiling_queries, - args=(table_group_id, session.username), + args=(table_group_id, session.auth.user_display), ) background_thread.start() else: diff --git a/testgen/common/mixpanel_service.py b/testgen/common/mixpanel_service.py index b534cf69..2863a0b4 100644 --- a/testgen/common/mixpanel_service.py +++ b/testgen/common/mixpanel_service.py @@ -57,7 +57,7 @@ def send_event(self, event_name, include_usage=False, **properties): properties.setdefault("instance_id", self.instance_id) properties.setdefault("edition", settings.DOCKER_HUB_REPOSITORY) properties.setdefault("version", settings.VERSION) - properties.setdefault("username", session.username) + properties.setdefault("username", session.auth.user_display) properties.setdefault("distinct_id", self.get_distinct_id(properties["username"])) if include_usage: properties.update(self.get_usage()) diff --git a/testgen/common/models/entity.py b/testgen/common/models/entity.py index a6175606..8545b3da 100644 --- a/testgen/common/models/entity.py +++ b/testgen/common/models/entity.py @@ -7,13 +7,14 @@ from sqlalchemy import delete, select from sqlalchemy.dialects import postgresql from sqlalchemy.orm import InstrumentedAttribute -from sqlalchemy.sql.elements import BinaryExpression +from sqlalchemy.sql.elements import BinaryExpression, BooleanClauseList from testgen.common.models import Base, get_current_session from testgen.utils import is_uuid4, make_json_safe ENTITY_HASH_FUNCS = { BinaryExpression: lambda x: str(x.compile(compile_kwargs={"literal_binds": True})), + BooleanClauseList: lambda x: str(x.compile(compile_kwargs={"literal_binds": True})), tuple: lambda x: [str(y) for y in x], } diff --git a/testgen/common/models/user.py b/testgen/common/models/user.py index bcba4599..7824f57b 100644 --- a/testgen/common/models/user.py +++ b/testgen/common/models/user.py @@ -1,9 +1,11 @@ +from datetime import UTC, datetime from typing import Literal from uuid import UUID, uuid4 -from sqlalchemy import Column, String, asc +from sqlalchemy import Column, String, asc, func, update from sqlalchemy.dialects import postgresql +from testgen.common.models import get_current_session from testgen.common.models.custom_types import NullIfEmptyString from testgen.common.models.entity import Entity @@ -19,5 +21,25 @@ class User(Entity): name: str = Column(NullIfEmptyString) password: str = Column(String) role: RoleType = Column(String) + latest_login: datetime = Column(postgresql.TIMESTAMP) + + _get_by = "username" + _default_order_by = (asc(func.lower(username)),) + + def save(self, update_latest_login: bool = False) -> None: + if self.id and not update_latest_login: + values = { + column.key: getattr(self, column.key, None) + for column in self.__table__.columns + if column != User.latest_login + } + query = update(User).where(User.id == self.id).values(**values) + db_session = get_current_session() + db_session.execute(query) + db_session.commit() + User.clear_cache() + else: + if update_latest_login: + self.latest_login = datetime.now(UTC) + super().save() - _default_order_by = (asc(username),) diff --git a/testgen/template/dbsetup/030_initialize_new_schema_structure.sql b/testgen/template/dbsetup/030_initialize_new_schema_structure.sql index d8079a07..d945a4e1 100644 --- a/testgen/template/dbsetup/030_initialize_new_schema_structure.sql +++ b/testgen/template/dbsetup/030_initialize_new_schema_structure.sql @@ -622,22 +622,15 @@ CREATE TABLE functional_test_results ); CREATE TABLE auth_users ( - id UUID DEFAULT gen_random_uuid() + id UUID DEFAULT gen_random_uuid() CONSTRAINT pk_au_id PRIMARY KEY, - username VARCHAR(20), - email VARCHAR(120), - name VARCHAR(120), - password VARCHAR(120), - role VARCHAR(20) -); - -ALTER TABLE auth_users -ADD CONSTRAINT username_check -CHECK ( - LENGTH(username) >= 4 AND -- Minimum length of 4 characters - LENGTH(username) <= 20 AND -- Maximum length of 20 characters - username ~ '^[a-zA-Z0-9_]+$' -- Only alphanumeric characters and underscores allowed + username VARCHAR(256), + email VARCHAR(256), + name VARCHAR(256), + password VARCHAR(120), + role VARCHAR(20), + latest_login TIMESTAMP ); ALTER TABLE auth_users diff --git a/testgen/template/dbupgrade/0146_incremental_upgrade.sql b/testgen/template/dbupgrade/0146_incremental_upgrade.sql new file mode 100644 index 00000000..ffffc0ba --- /dev/null +++ b/testgen/template/dbupgrade/0146_incremental_upgrade.sql @@ -0,0 +1,8 @@ +SET SEARCH_PATH TO {SCHEMA_NAME}; + +ALTER TABLE auth_users + ADD COLUMN latest_login TIMESTAMP, + ALTER COLUMN username TYPE VARCHAR(256), + ALTER COLUMN email TYPE VARCHAR(256), + ALTER COLUMN name TYPE VARCHAR(256), + DROP CONSTRAINT username_check; diff --git a/testgen/ui/app.py b/testgen/ui/app.py index cc2e6173..9358938c 100644 --- a/testgen/ui/app.py +++ b/testgen/ui/app.py @@ -10,7 +10,7 @@ from testgen.ui import bootstrap from testgen.ui.assets import get_asset_path from testgen.ui.components import widgets as testgen -from testgen.ui.services import javascript_service, user_session_service +from testgen.ui.services import javascript_service from testgen.ui.session import session @@ -23,12 +23,14 @@ def render(log_level: int = logging.INFO): # Collapse when logging out because the sidebar takes some time to be removed from the DOM # Collapse for Catalog role since they only have access to one page initial_sidebar_state="collapsed" - if session.logging_out or user_session_service.user_has_catalog_role() + if session.auth and (session.auth.logging_out or (session.auth.is_logged_in and not session.auth.user_has_permission("view"))) else "auto", ) application = get_application(log_level=log_level) application.logger.debug("Starting Streamlit re-run") + if not session.auth: + session.auth = application.auth_class() status_ok, message = check_basic_configuration() if not status_ok: @@ -41,20 +43,18 @@ def render(log_level: int = logging.INFO): session.page_args_pending_router and session.page_args_pending_router.get("project_code") ) or st.query_params.get("project_code", session.sidebar_project) - if session.authentication_status is None and not session.logging_out: - user_session_service.load_user_session() + if not session.auth.is_logged_in and not session.auth.logging_out: + session.auth.load_user_session() application.logo.render() - if session.authentication_status and not session.logging_in: + if session.auth.is_logged_in and not session.auth.logging_in: with st.sidebar: testgen.sidebar( projects=Project.select_where(), current_project=session.sidebar_project, menu=application.menu, current_page=session.current_page, - username=session.username, - role=session.auth_role, version=version_service.get_version(), support_email=settings.SUPPORT_EMAIL, ) diff --git a/testgen/ui/assets/scripts.js b/testgen/ui/assets/scripts.js index 46e0aafb..575dfdf5 100644 --- a/testgen/ui/assets/scripts.js +++ b/testgen/ui/assets/scripts.js @@ -55,4 +55,5 @@ window.testgen = { states: {}, components: {}, loadedStylesheets: {}, + changeLocation: url => window.location.href = url, }; diff --git a/testgen/ui/auth.py b/testgen/ui/auth.py new file mode 100644 index 00000000..05abb2ed --- /dev/null +++ b/testgen/ui/auth.py @@ -0,0 +1,102 @@ +import base64 +import logging +from datetime import UTC, datetime +from typing import Literal + +import extra_streamlit_components as stx +import jwt +import streamlit as st + +from testgen import settings +from testgen.common.mixpanel_service import MixpanelService +from testgen.common.models.user import User +from testgen.ui.services.javascript_service import execute_javascript + +LOG = logging.getLogger("testgen") + +Permission = Literal["catalog", "view", "disposition", "edit", "administer"] + + +class Authentication: + + jwt_cookie_name = "dk_cookie_name" + jwt_cookie_expiry_days = 1 + + user: User | None = None + + # Intermediate state holders because auth cookie changes are not immediate + cookies_ready: bool = False + logging_in: bool = False + logging_out: bool = False + + @property + def is_logged_in(self) -> bool: + return bool(self.user) + + @property + def user_display(self) -> str | None: + return (self.user.name or self.user.username) if self.user else None + + @property + def default_page(self) -> str | None: + return "project-dashboard" if self.user else "" + + def user_has_permission(self, _permission: Permission) -> bool: + return True + + def get_jwt_hashing_key(self) -> bytes: + try: + return base64.b64decode(settings.JWT_HASHING_KEY_B64.encode("ascii")) + except Exception as e: + st.error( + "Error reading the JWT signing key from settings.\n\n Make sure you have a valid " + "base64 string assigned to the TG_JWT_HASHING_KEY environment variable." + ) + st.stop() + + def get_credentials(self): + users = User.select_where() + usernames = {} + for item in users: + usernames[item.username.lower()] = { + "name": item.name, + "password": item.password, + } + return {"usernames": usernames} + + def login_user(self, username: str) -> None: + self.user = User.get(username) + self.user.save(update_latest_login=True) + MixpanelService().send_event("login", include_usage=True, role=self.user.role) + + def load_user_session(self) -> None: + cookies = self._load_cookies() + token = cookies.get(self.jwt_cookie_name) + if token is not None: + try: + token = jwt.decode(token, self.get_jwt_hashing_key(), algorithms=["HS256"]) + if token["exp_date"] > datetime.now(UTC).timestamp(): + self.user = User.get(token["username"]) + except Exception: + LOG.debug("Invalid auth token found on cookies", exc_info=True, stack_info=True) + + def end_user_session(self) -> None: + self._clear_jwt_cookie() + self.user = None + + def _clear_jwt_cookie(self) -> None: + execute_javascript( + f"""await (async function () {{ + window.parent.postMessage({{ type: 'TestgenLogout', cookie: '{self.jwt_cookie_name}' }}, '*'); + return 0; + }})() + """ + ) + + def _load_cookies(self) -> dict: + # Replacing this with st.context.cookies does not work + # Because it does not update when cookies are deleted on logout + cookie_manager = stx.CookieManager(key="testgen.cookies.get") + if cookie_manager.cookies: + self.cookies_ready = True + return cookie_manager.cookies diff --git a/testgen/ui/bootstrap.py b/testgen/ui/bootstrap.py index 3b048414..bb39c83b 100644 --- a/testgen/ui/bootstrap.py +++ b/testgen/ui/bootstrap.py @@ -3,6 +3,7 @@ from testgen import settings from testgen.common import configure_logging +from testgen.ui.auth import Authentication from testgen.ui.navigation.menu import Menu from testgen.ui.navigation.page import Page from testgen.ui.navigation.router import Router @@ -47,7 +48,8 @@ class Application(singleton.Singleton): - def __init__(self, logo: plugins.Logo, router: Router, menu: Menu, logger: logging.Logger) -> None: + def __init__(self, auth_class: Authentication, logo: plugins.Logo, router: Router, menu: Menu, logger: logging.Logger) -> None: + self.auth_class = auth_class self.logo = logo self.router = router self.menu = menu @@ -69,6 +71,7 @@ def run(log_level: int = logging.INFO) -> Application: plugins.cleanup() configure_logging(level=log_level) + auth_class = Authentication logo_class = plugins.Logo for plugin in installed_plugins: @@ -77,6 +80,9 @@ def run(log_level: int = logging.INFO) -> Application: if spec.page: pages.append(spec.page) + if spec.auth: + auth_class = spec.auth + if spec.logo: logo_class = spec.logo @@ -84,12 +90,13 @@ def run(log_level: int = logging.INFO) -> Application: spec.component.provide() return Application( + auth_class=auth_class, logo=logo_class(), router=Router(routes=pages), menu=Menu( items=list( { - page.path: dataclasses.replace(page.menu_item, page=page.path) + page.path: dataclasses.replace(page.menu_item, page=page.path, permission=page.permission) for page in pages if page.menu_item }.values() ), diff --git a/testgen/ui/components/widgets/empty_state.py b/testgen/ui/components/widgets/empty_state.py index 13c06708..726b639a 100644 --- a/testgen/ui/components/widgets/empty_state.py +++ b/testgen/ui/components/widgets/empty_state.py @@ -6,7 +6,8 @@ from testgen.ui.components.widgets.button import button from testgen.ui.components.widgets.link import link from testgen.ui.components.widgets.page import css_class, whitespace -from testgen.ui.services.user_session_service import DISABLED_ACTION_TEXT + +DISABLED_ACTION_TEXT = "You do not have permissions to perform this action. Contact your administrator." class EmptyStateMessage(Enum): diff --git a/testgen/ui/components/widgets/page.py b/testgen/ui/components/widgets/page.py index 721c1063..e9c743a2 100644 --- a/testgen/ui/components/widgets/page.py +++ b/testgen/ui/components/widgets/page.py @@ -6,7 +6,7 @@ from testgen.ui.components.widgets.breadcrumbs import Breadcrumb from testgen.ui.components.widgets.breadcrumbs import breadcrumbs as tg_breadcrumbs from testgen.ui.components.widgets.testgen_component import testgen_component -from testgen.ui.services import user_session_service +from testgen.ui.session import session from testgen.ui.views.dialogs.application_logs_dialog import application_logs_dialog UPGRADE_URL = "https://docs.datakitchen.io/articles/#!dataops-testgen-help/upgrade-testgen" @@ -64,7 +64,7 @@ def open_app_logs(): "support_email": settings.SUPPORT_EMAIL, "version": version.__dict__, "permissions": { - "can_edit": user_session_service.user_can_edit(), + "can_edit": session.auth.user_has_permission("edit"), }, }, on_change_handlers={ diff --git a/testgen/ui/components/widgets/sidebar.py b/testgen/ui/components/widgets/sidebar.py index 4ee5cec8..ec7c0ab2 100644 --- a/testgen/ui/components/widgets/sidebar.py +++ b/testgen/ui/components/widgets/sidebar.py @@ -7,7 +7,6 @@ from testgen.ui.components.utils.component import component from testgen.ui.navigation.menu import Menu from testgen.ui.navigation.router import Router -from testgen.ui.services import javascript_service, user_session_service from testgen.ui.session import session LOG = logging.getLogger("testgen") @@ -22,8 +21,6 @@ def sidebar( current_project: str | None = None, menu: Menu = None, current_page: str | None = None, - username: str | None = None, - role: str | None = None, version: Version | None = None, support_email: str | None = None, ) -> None: @@ -44,8 +41,8 @@ def sidebar( "current_project": current_project, "menu": menu.filter_for_current_user().sort_items().unflatten().asdict(), "current_page": current_page, - "username": username, - "role": role, + "username": session.auth.user_display, + "role": session.auth.user.role if session.auth.user else None, "logout_path": LOGOUT_PATH, "version": version.__dict__, "support_email": support_email, @@ -70,14 +67,20 @@ def on_change(): session.sidebar_event_id = event_id if event_data.get("path") == LOGOUT_PATH: - javascript_service.clear_component_states() - user_session_service.end_user_session() + session.auth.end_user_session() + # This hack is needed because the auth cookie does not immediately get cleared + # We don't want to try to load the session again on the next run + session.auth.logging_out = True + # streamlit_authenticator sets authentication_status implicitly + # So we need to clear it + session.authentication_status = None + Router().queue_navigation(to="") # Without the time.sleep, cookies sometimes don't get cleared on deployed instances # (even though it works fine locally) time.sleep(0.3) else: Router().queue_navigation( - to=event_data.get("path") or session.user_default_page, + to=event_data.get("path") or session.auth.default_page, with_args=event_data.get("params", {}), ) diff --git a/testgen/ui/navigation/menu.py b/testgen/ui/navigation/menu.py index d44002a1..978ab53b 100644 --- a/testgen/ui/navigation/menu.py +++ b/testgen/ui/navigation/menu.py @@ -1,7 +1,8 @@ import dataclasses import typing -from testgen.ui.services import user_session_service +from testgen.ui.auth import Permission +from testgen.ui.session import session MenuSections = typing.Literal["Data Profiling", "Data Quality Testing", "Data Configuration", "Settings"] @@ -11,7 +12,7 @@ class MenuItem: label: str icon: str | None = dataclasses.field(default=None) page: str | None = dataclasses.field(default=None) - roles: list[user_session_service.RoleType] | None = dataclasses.field(default_factory=list) + permission: Permission = dataclasses.field(default="view") order: int = dataclasses.field(default=0) section: MenuSections | None = dataclasses.field(default=None) items: list["MenuItem"] | None = dataclasses.field(default=None) @@ -24,8 +25,8 @@ class Menu: def filter_for_current_user(self) -> "Menu": filtered_items = [] for menu_item in self.items: - item_roles = menu_item.roles or [] - if len(item_roles) <= 0 or any(map(user_session_service.user_has_role, item_roles)): + item_permission = menu_item.permission or "view" + if session.auth.user_has_permission(item_permission): filtered_items.append(menu_item) return dataclasses.replace(self, items=filtered_items) diff --git a/testgen/ui/navigation/page.py b/testgen/ui/navigation/page.py index 489a0fc1..d80bee29 100644 --- a/testgen/ui/navigation/page.py +++ b/testgen/ui/navigation/page.py @@ -9,6 +9,7 @@ import testgen.ui.navigation.router from testgen.common.models.project import Project +from testgen.ui.auth import Permission from testgen.ui.navigation.menu import MenuItem from testgen.ui.session import session @@ -19,6 +20,7 @@ class Page(abc.ABC): path: str menu_item: MenuItem | None = None + permission: Permission | None = "view" can_activate: typing.ClassVar[list[CanActivateGuard] | None] = None def __init__(self, router: testgen.ui.navigation.router.Router) -> None: @@ -31,7 +33,8 @@ def __init__(self, router: testgen.ui.navigation.router.Router) -> None: def _navigate(self) -> None: self.router.navigate_to_pending() - for guard in self.can_activate or []: + permission_guard = lambda: session.auth.user_has_permission(self.permission) if self.permission else True + for guard in [ permission_guard, *(self.can_activate or []) ]: can_activate = guard() if can_activate != True: session.sidebar_project = session.sidebar_project or Project.select_where()[0].project_code @@ -42,7 +45,7 @@ def _navigate(self) -> None: session.page_pending_login = self.path session.page_args_pending_login = st.query_params.to_dict() - default_page = session.user_default_page or "" + default_page = session.auth.default_page or "" with_args = { "project_code": session.sidebar_project } if default_page else {} return self.router.navigate(to=default_page, with_args=with_args) diff --git a/testgen/ui/navigation/router.py b/testgen/ui/navigation/router.py index c754aab6..ab968569 100644 --- a/testgen/ui/navigation/router.py +++ b/testgen/ui/navigation/router.py @@ -32,10 +32,10 @@ def run(self) -> None: # This hack is needed because the auth cookie is not set if navigation happens immediately after login # We have to navigate on the next run - if session.logging_in: - session.logging_in = False + if session.auth.logging_in: + session.auth.logging_in = False - pending_route = session.page_pending_login or session.user_default_page or "" + pending_route = session.page_pending_login or session.auth.default_page or "" pending_args = ( (session.page_args_pending_login or {}) if session.page_pending_login @@ -43,10 +43,9 @@ def run(self) -> None: ) session.page_pending_login = None session.page_args_pending_login = None - self.navigate(to=pending_route, with_args=pending_args) - if session.cookies_ready: + if session.auth.cookies_ready: current_page = session.page_pending_cookies or current_page session.page_pending_cookies = None diff --git a/testgen/ui/session.py b/testgen/ui/session.py index cb8d028a..e5cd7ebb 100644 --- a/testgen/ui/session.py +++ b/testgen/ui/session.py @@ -1,9 +1,10 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any, ClassVar, Literal, TypeVar +from typing import TYPE_CHECKING, Any, ClassVar, TypeVar if TYPE_CHECKING: from testgen.common.version_service import Version + from testgen.ui.auth import Authentication from collections.abc import Callable @@ -18,21 +19,16 @@ class TestgenSession(Singleton): - cookies_ready: bool - logging_in: bool - logging_out: bool + auth: Authentication + # streamlit_authenticator sets this attribute implicitly + authentication_status: bool + page_pending_cookies: st.Page # type: ignore page_pending_login: str page_args_pending_login: dict page_args_pending_router: dict current_page: str - name: str - username: str - authentication_status: bool - auth_role: Literal["admin", "data_quality", "analyst", "business", "catalog"] - user_default_page: str - sidebar_project: str add_project: bool version: Version | None diff --git a/testgen/ui/views/connections.py b/testgen/ui/views/connections.py index 546b2bb7..6ece4ce7 100644 --- a/testgen/ui/views/connections.py +++ b/testgen/ui/views/connections.py @@ -24,7 +24,6 @@ from testgen.ui.components import widgets as testgen from testgen.ui.navigation.menu import MenuItem from testgen.ui.navigation.page import Page -from testgen.ui.services import user_session_service from testgen.ui.session import session, temp_value LOG = logging.getLogger("testgen") @@ -35,8 +34,7 @@ class ConnectionsPage(Page): path = "connections" can_activate: typing.ClassVar = [ - lambda: session.authentication_status, - lambda: not user_session_service.user_has_catalog_role(), + lambda: session.auth.is_logged_in, lambda: "project_code" in st.query_params, ] menu_item = MenuItem( @@ -44,7 +42,6 @@ class ConnectionsPage(Page): label=PAGE_TITLE, section="Data Configuration", order=1, - roles=[ role for role in typing.get_args(user_session_service.RoleType) if role != "catalog" ], ) trim_fields: typing.ClassVar[list[str]] = [ "project_host", @@ -66,7 +63,7 @@ def render(self, project_code: str, **_kwargs) -> None: has_table_groups = ( len(TableGroup.select_minimal_where(TableGroup.connection_id == connection.connection_id) or []) > 0 ) - user_is_admin = user_session_service.user_is_admin() + user_is_admin = session.auth.user_has_permission("administer") should_check_status, set_check_status = temp_value( "connections:status_check", default=False, diff --git a/testgen/ui/views/data_catalog.py b/testgen/ui/views/data_catalog.py index decb83dd..26791ccc 100644 --- a/testgen/ui/views/data_catalog.py +++ b/testgen/ui/views/data_catalog.py @@ -33,7 +33,6 @@ get_tables_by_id, get_tables_by_table_group, ) -from testgen.ui.services import user_session_service from testgen.ui.services.database_service import execute_db_query, fetch_all_from_db from testgen.ui.session import session, temp_value from testgen.ui.views.dialogs.column_history_dialog import column_history_dialog @@ -48,8 +47,9 @@ class DataCatalogPage(Page): path = "data-catalog" + permission = "catalog" can_activate: typing.ClassVar = [ - lambda: session.authentication_status, + lambda: session.auth.is_logged_in, lambda: "project_code" in st.query_params, ] menu_item = MenuItem(icon=PAGE_ICON, label=PAGE_TITLE, section="Data Profiling", order=0) @@ -70,7 +70,7 @@ def render(self, project_code: str, table_group_id: str | None = None, selected: # Enclosing the loading logic in a Streamlit container also fixes it project_summary = Project.get_summary(project_code) - user_can_navigate = not user_session_service.user_has_catalog_role() + user_can_navigate = session.auth.user_has_permission("view") table_groups = TableGroup.select_minimal_where(TableGroup.project_code == project_code) if not table_group_id or table_group_id not in [ str(item.id) for item in table_groups ]: @@ -105,7 +105,7 @@ def render(self, project_code: str, table_group_id: str | None = None, selected: "tag_values": get_tag_values(), "last_saved_timestamp": st.session_state.get("data_catalog:last_saved_timestamp"), "permissions": { - "can_edit": user_session_service.user_can_disposition(), + "can_edit": session.auth.user_has_permission("disposition"), "can_navigate": user_can_navigate, }, }, @@ -417,7 +417,7 @@ def get_table_group_columns(table_group_id: str) -> list[dict]: AND column_chars.column_name = profile_results.column_name ) WHERE column_chars.table_groups_id = :table_group_id - ORDER BY table_name, ordinal_position; + ORDER BY LOWER(table_chars.table_name), ordinal_position; """ params = {"table_group_id": table_group_id} @@ -523,21 +523,24 @@ def get_related_test_suites(table_group_id: str, table_name: str, column_name: s def get_tag_values() -> dict[str, list[str]]: quote = lambda v: f"'{v}'" query = f""" - SELECT DISTINCT - UNNEST(array[{', '.join([quote(t) for t in TAG_FIELDS])}]) as tag, - UNNEST(array[{', '.join(TAG_FIELDS)}]) AS value - FROM data_column_chars - UNION - SELECT DISTINCT - UNNEST(array[{', '.join([quote(t) for t in TAG_FIELDS])}]) as tag, - UNNEST(array[{', '.join(TAG_FIELDS)}]) AS value - FROM data_table_chars - UNION - SELECT DISTINCT - UNNEST(array[{', '.join([quote(t) for t in TAG_FIELDS if t != 'aggregation_level'])}]) as tag, - UNNEST(array[{', '.join([ t for t in TAG_FIELDS if t != 'aggregation_level'])}]) AS value - FROM table_groups - ORDER BY value; + SELECT * + FROM ( + SELECT DISTINCT + UNNEST(array[{', '.join([quote(t) for t in TAG_FIELDS])}]) as tag, + UNNEST(array[{', '.join(TAG_FIELDS)}]) AS value + FROM data_column_chars + UNION + SELECT DISTINCT + UNNEST(array[{', '.join([quote(t) for t in TAG_FIELDS])}]) as tag, + UNNEST(array[{', '.join(TAG_FIELDS)}]) AS value + FROM data_table_chars + UNION + SELECT DISTINCT + UNNEST(array[{', '.join([quote(t) for t in TAG_FIELDS if t != 'aggregation_level'])}]) as tag, + UNNEST(array[{', '.join([ t for t in TAG_FIELDS if t != 'aggregation_level'])}]) AS value + FROM table_groups + ) tag_values + ORDER BY LOWER(value); """ results = fetch_all_from_db(query) diff --git a/testgen/ui/views/dialogs/manage_schedules.py b/testgen/ui/views/dialogs/manage_schedules.py index 3260eca4..702e41b4 100644 --- a/testgen/ui/views/dialogs/manage_schedules.py +++ b/testgen/ui/views/dialogs/manage_schedules.py @@ -12,7 +12,7 @@ from testgen.common.models.scheduler import JobSchedule from testgen.ui.components import widgets as testgen from testgen.ui.components.widgets import tz_select -from testgen.ui.services import user_session_service +from testgen.ui.session import session class ScheduleDialog: @@ -72,17 +72,18 @@ def on_delete_sched(item): db_session.commit() st.rerun(scope="fragment") + user_can_edit = session.auth.user_has_permission("edit") testgen.testgen_component( "schedule_list", props={ "items": json.dumps(scheduled_jobs_json), "arg_abel": self.arg_label, - "permissions": {"can_edit": user_session_service.user_can_edit()}, + "permissions": {"can_edit": user_can_edit}, }, event_handlers={"DeleteSchedule": on_delete_sched} ) - if user_session_service.user_can_edit(): + if user_can_edit: with st.container(border=True): self.add_schedule_form() diff --git a/testgen/ui/views/hygiene_issues.py b/testgen/ui/views/hygiene_issues.py index a46f2185..3dd75fb0 100644 --- a/testgen/ui/views/hygiene_issues.py +++ b/testgen/ui/views/hygiene_issues.py @@ -23,7 +23,6 @@ from testgen.ui.navigation.page import Page from testgen.ui.pdf.hygiene_issue_report import create_report from testgen.ui.queries.source_data_queries import get_hygiene_issue_source_data -from testgen.ui.services import user_session_service from testgen.ui.services.database_service import ( execute_db_query, fetch_df_from_db, @@ -37,8 +36,7 @@ class HygieneIssuesPage(Page): path = "profiling-runs:hygiene" can_activate: typing.ClassVar = [ - lambda: session.authentication_status, - lambda: not user_session_service.user_has_catalog_role(), + lambda: session.auth.is_logged_in, lambda: "run_id" in st.query_params or "profiling-runs", ] @@ -332,7 +330,7 @@ def open_download_dialog(data: pd.DataFrame | None = None) -> None: { "icon": "↩︎", "help": "Clear action", "status": "No Decision" }, ] - if user_session_service.user_can_disposition(): + if session.auth.user_has_permission("disposition"): disposition_translator = {"No Decision": None} # Need to render toolbar buttons after grid, so selection status is maintained for d_action in disposition_actions: diff --git a/testgen/ui/views/login.py b/testgen/ui/views/login.py index d59ab817..a77235d7 100644 --- a/testgen/ui/views/login.py +++ b/testgen/ui/views/login.py @@ -7,7 +7,6 @@ from testgen.common.mixpanel_service import MixpanelService from testgen.ui.components import widgets as testgen from testgen.ui.navigation.page import Page -from testgen.ui.services import javascript_service, user_session_service from testgen.ui.session import session LOG = logging.getLogger("testgen") @@ -15,42 +14,43 @@ class LoginPage(Page): path = "" + permission = None can_activate: typing.ClassVar = [ - lambda: not session.authentication_status or session.logging_in, + lambda: not session.auth.is_logged_in or session.auth.logging_in, ] def render(self, **_kwargs) -> None: - auth_data = user_session_service.get_auth_data() - - authenticator = stauth.Authenticate( - auth_data["credentials"], - auth_data["cookie"]["name"], - auth_data["cookie"]["key"], - auth_data["cookie"]["expiry_days"], - ) - _, login_column, links_column = st.columns([0.25, 0.5, 0.25]) with links_column: testgen.help_menu() with login_column: - st.html(""" -


-

Welcome to DataKitchen DataOps TestGen

- """) - name, authentication_status, username = authenticator.login("Login") - - if authentication_status is False: - st.error("Username or password is incorrect.") - MixpanelService().send_event("login-denied", username=username) + self.render_login_form(**_kwargs) + + def render_login_form(self, **_kwargs) -> None: + st.html(""" +


+

Welcome to DataKitchen DataOps TestGen

+ """) + + authenticator = stauth.Authenticate( + session.auth.get_credentials(), + session.auth.jwt_cookie_name, + session.auth.get_jwt_hashing_key(), + session.auth.jwt_cookie_expiry_days, + ) + + _name, authentication_status, username = authenticator.login("Login") - if authentication_status is None: - javascript_service.clear_component_states() + if authentication_status is False: + st.error("Username or password is incorrect.") + MixpanelService().send_event("login-denied", username=username) - session.authentication_status = authentication_status + if authentication_status is None: + session.auth.end_user_session() - if authentication_status: - user_session_service.start_user_session(name, username) - session.logging_in = True - MixpanelService().send_event("login", include_usage=True) + if authentication_status: + session.auth.logging_in = True + session.auth.logging_out = False + session.auth.login_user(username) diff --git a/testgen/ui/views/profiling_results.py b/testgen/ui/views/profiling_results.py index faff90f5..c1139616 100644 --- a/testgen/ui/views/profiling_results.py +++ b/testgen/ui/views/profiling_results.py @@ -21,7 +21,6 @@ from testgen.ui.components.widgets.page import css_class, flex_row_end from testgen.ui.components.widgets.testgen_component import testgen_component from testgen.ui.navigation.page import Page -from testgen.ui.services import user_session_service from testgen.ui.services.database_service import fetch_df_from_db from testgen.ui.session import session from testgen.ui.views.dialogs.data_preview_dialog import data_preview_dialog @@ -32,8 +31,7 @@ class ProfilingResultsPage(Page): path = "profiling-runs:results" can_activate: typing.ClassVar = [ - lambda: session.authentication_status, - lambda: not user_session_service.user_has_catalog_role(), + lambda: session.auth.is_logged_in, lambda: "run_id" in st.query_params or "profiling-runs", ] diff --git a/testgen/ui/views/profiling_runs.py b/testgen/ui/views/profiling_runs.py index 540623b8..01760874 100644 --- a/testgen/ui/views/profiling_runs.py +++ b/testgen/ui/views/profiling_runs.py @@ -16,7 +16,6 @@ from testgen.ui.components.widgets import testgen_component from testgen.ui.navigation.menu import MenuItem from testgen.ui.navigation.page import Page -from testgen.ui.services import user_session_service from testgen.ui.session import session, temp_value from testgen.ui.views.dialogs.manage_schedules import ScheduleDialog from testgen.ui.views.dialogs.run_profiling_dialog import run_profiling_dialog @@ -32,8 +31,7 @@ class DataProfilingPage(Page): path = "profiling-runs" can_activate: typing.ClassVar = [ - lambda: session.authentication_status, - lambda: not user_session_service.user_has_catalog_role(), + lambda: session.auth.is_logged_in, lambda: "project_code" in st.query_params, ] menu_item = MenuItem( @@ -41,7 +39,6 @@ class DataProfilingPage(Page): label=PAGE_TITLE, section="Data Profiling", order=1, - roles=[ role for role in typing.get_args(user_session_service.RoleType) if role != "catalog" ], ) def render(self, project_code: str, table_group_id: str | None = None, **_kwargs) -> None: @@ -50,7 +47,7 @@ def render(self, project_code: str, table_group_id: str | None = None, **_kwargs "investigate-profiling", ) - user_can_run = user_session_service.user_can_edit() + user_can_run = session.auth.user_has_permission("edit") if render_empty_state(project_code, user_can_run): return diff --git a/testgen/ui/views/project_dashboard.py b/testgen/ui/views/project_dashboard.py index 44586419..6f7fe37b 100644 --- a/testgen/ui/views/project_dashboard.py +++ b/testgen/ui/views/project_dashboard.py @@ -8,7 +8,6 @@ from testgen.ui.components import widgets as testgen from testgen.ui.navigation.menu import MenuItem from testgen.ui.navigation.page import Page -from testgen.ui.services import user_session_service from testgen.ui.session import session from testgen.utils import friendly_score, make_json_safe, score @@ -19,15 +18,13 @@ class ProjectDashboardPage(Page): path = "project-dashboard" can_activate: typing.ClassVar = [ - lambda: session.authentication_status, - lambda: not user_session_service.user_has_catalog_role(), + lambda: session.auth.is_logged_in, lambda: "project_code" in st.query_params, ] menu_item = MenuItem( icon=PAGE_ICON, label=PAGE_TITLE, order=0, - roles=[ role for role in typing.get_args(user_session_service.RoleType) if role != "catalog" ], ) def render(self, project_code: str, **_kwargs): diff --git a/testgen/ui/views/project_settings.py b/testgen/ui/views/project_settings.py index 06a93fc8..5a05b3f8 100644 --- a/testgen/ui/views/project_settings.py +++ b/testgen/ui/views/project_settings.py @@ -11,7 +11,6 @@ from testgen.ui.components import widgets as testgen from testgen.ui.navigation.menu import MenuItem from testgen.ui.navigation.page import Page -from testgen.ui.services import user_session_service from testgen.ui.session import session PAGE_TITLE = "Project Settings" @@ -19,9 +18,9 @@ class ProjectSettingsPage(Page): path = "settings" + permission = "administer" can_activate: typing.ClassVar = [ - lambda: session.authentication_status, - lambda: user_session_service.user_is_admin(), + lambda: session.auth.is_logged_in, lambda: "project_code" in st.query_params, ] menu_item = MenuItem( @@ -29,7 +28,6 @@ class ProjectSettingsPage(Page): label=PAGE_TITLE, section="Settings", order=0, - roles=[ "admin" ], ) project: Project | None = None diff --git a/testgen/ui/views/quality_dashboard.py b/testgen/ui/views/quality_dashboard.py index d66f746b..e2b7e0b9 100644 --- a/testgen/ui/views/quality_dashboard.py +++ b/testgen/ui/views/quality_dashboard.py @@ -1,4 +1,4 @@ -from typing import ClassVar, get_args +from typing import ClassVar import streamlit as st @@ -7,7 +7,6 @@ from testgen.ui.navigation.menu import MenuItem from testgen.ui.navigation.page import Page from testgen.ui.queries.scoring_queries import get_all_score_cards -from testgen.ui.services import user_session_service from testgen.ui.session import session from testgen.utils import format_score_card @@ -17,15 +16,13 @@ class QualityDashboardPage(Page): path = "quality-dashboard" can_activate: ClassVar = [ - lambda: session.authentication_status, - lambda: not user_session_service.user_has_catalog_role(), + lambda: session.auth.is_logged_in, lambda: "project_code" in st.query_params, ] menu_item = MenuItem( icon="readiness_score", label=PAGE_TITLE, order=1, - roles=[ role for role in get_args(user_session_service.RoleType) if role != "catalog" ], ) def render(self, *, project_code: str, **_kwargs) -> None: diff --git a/testgen/ui/views/score_details.py b/testgen/ui/views/score_details.py index 70877754..25a19c25 100644 --- a/testgen/ui/views/score_details.py +++ b/testgen/ui/views/score_details.py @@ -23,7 +23,6 @@ from testgen.ui.navigation.router import Router from testgen.ui.pdf import hygiene_issue_report, test_result_report from testgen.ui.queries.scoring_queries import get_all_score_cards, get_score_card_issue_reports -from testgen.ui.services import user_session_service from testgen.ui.session import session, temp_value from testgen.ui.views.dialogs.profiling_results_dialog import profiling_results_dialog from testgen.utils import format_score_card, format_score_card_breakdown, format_score_card_issues @@ -35,8 +34,7 @@ class ScoreDetailsPage(Page): path = PAGE_PATH can_activate: ClassVar = [ - lambda: session.authentication_status, - lambda: not user_session_service.user_has_catalog_role(), + lambda: session.auth.is_logged_in, lambda: "definition_id" in st.query_params or "quality-dashboard", ] @@ -81,7 +79,7 @@ def render( score_breakdown = None issues = None with st.spinner(text="Loading data :gray[:small[(This might take a few minutes)]] ..."): - user_can_edit = user_session_service.user_can_edit() + user_can_edit = session.auth.user_has_permission("edit") score_card = format_score_card(score_definition.as_cached_score_card()) if score_type not in typing.get_args(ScoreTypes): score_type = None diff --git a/testgen/ui/views/score_explorer.py b/testgen/ui/views/score_explorer.py index f90e3786..1522967b 100644 --- a/testgen/ui/views/score_explorer.py +++ b/testgen/ui/views/score_explorer.py @@ -26,7 +26,6 @@ get_score_card_issue_reports, get_score_category_values, ) -from testgen.ui.services import user_session_service from testgen.ui.session import session, temp_value from testgen.utils import format_score_card, format_score_card_breakdown, format_score_card_issues, try_json @@ -35,8 +34,7 @@ class ScoreExplorerPage(Page): path = PAGE_PATH can_activate: ClassVar = [ - lambda: session.authentication_status, - lambda: not user_session_service.user_has_catalog_role(), + lambda: session.auth.is_logged_in, lambda: "definition_id" in st.query_params or "project_code" in st.query_params or "quality-dashboard", ] @@ -85,7 +83,7 @@ def render( issues = None filter_values = {} with st.spinner(text="Loading data :gray[:small[(This might take a few minutes)]] ..."): - user_can_edit = user_session_service.user_can_edit() + user_can_edit = session.auth.user_has_permission("edit") filter_values = get_score_category_values(project_code) score_definition: ScoreDefinition = ScoreDefinition( diff --git a/testgen/ui/views/table_groups.py b/testgen/ui/views/table_groups.py index 1c1dd1bc..3ee2523b 100644 --- a/testgen/ui/views/table_groups.py +++ b/testgen/ui/views/table_groups.py @@ -16,7 +16,6 @@ from testgen.ui.navigation.menu import MenuItem from testgen.ui.navigation.page import Page from testgen.ui.queries import table_group_queries -from testgen.ui.services import user_session_service from testgen.ui.session import session, temp_value from testgen.ui.views.connections import FLAVOR_OPTIONS, format_connection from testgen.ui.views.profiling_runs import ProfilingScheduleDialog @@ -28,8 +27,7 @@ class TableGroupsPage(Page): path = "table-groups" can_activate: typing.ClassVar = [ - lambda: session.authentication_status, - lambda: not user_session_service.user_has_catalog_role(), + lambda: session.auth.is_logged_in, lambda: "project_code" in st.query_params, ] menu_item = MenuItem( @@ -37,7 +35,6 @@ class TableGroupsPage(Page): label=PAGE_TITLE, section="Data Configuration", order=0, - roles=[ role for role in typing.get_args(user_session_service.RoleType) if role != "catalog" ], ) def render( @@ -49,7 +46,7 @@ def render( ) -> None: testgen.page_header(PAGE_TITLE, "create-a-table-group") - user_can_edit = user_session_service.user_can_edit() + user_can_edit = session.auth.user_has_permission("edit") project_summary = Project.get_summary(project_code) if connection_id and not connection_id.isdigit(): connection_id = None diff --git a/testgen/ui/views/test_definitions.py b/testgen/ui/views/test_definitions.py index 5fc082df..850f4096 100644 --- a/testgen/ui/views/test_definitions.py +++ b/testgen/ui/views/test_definitions.py @@ -27,7 +27,6 @@ ) from testgen.ui.components.widgets.page import css_class, flex_row_end from testgen.ui.navigation.page import Page -from testgen.ui.services import user_session_service from testgen.ui.services.database_service import fetch_all_from_db, fetch_df_from_db, fetch_from_target_db from testgen.ui.services.string_service import empty_if_null, snake_case_to_title_case from testgen.ui.session import session, temp_value @@ -41,8 +40,7 @@ class TestDefinitionsPage(Page): path = "test-suites:definitions" can_activate: typing.ClassVar = [ - lambda: session.authentication_status, - lambda: not user_session_service.user_has_catalog_role(), + lambda: session.auth.is_logged_in, lambda: "test_suite_id" in st.query_params or "test-suites", ] @@ -57,8 +55,8 @@ def render(self, test_suite_id: str, table_name: str | None = None, column_name: table_group = TableGroup.get_minimal(test_suite.table_groups_id) project_code = table_group.project_code session.set_sidebar_project(project_code) - user_can_edit = user_session_service.user_can_edit() - user_can_disposition = user_session_service.user_can_disposition() + user_can_edit = session.auth.user_has_permission("edit") + user_can_disposition = session.auth.user_has_permission("disposition") testgen.page_header( "Test Definitions", diff --git a/testgen/ui/views/test_results.py b/testgen/ui/views/test_results.py index 6f03fe83..8a0ed3d8 100644 --- a/testgen/ui/views/test_results.py +++ b/testgen/ui/views/test_results.py @@ -32,7 +32,6 @@ from testgen.ui.pdf.test_result_report import create_report from testgen.ui.queries import test_result_queries from testgen.ui.queries.source_data_queries import get_test_issue_source_data, get_test_issue_source_data_custom -from testgen.ui.services import user_session_service from testgen.ui.services.database_service import execute_db_query, fetch_df_from_db, fetch_one_from_db from testgen.ui.services.string_service import empty_if_null, snake_case_to_title_case from testgen.ui.session import session @@ -46,8 +45,7 @@ class TestResultsPage(Page): path = PAGE_PATH can_activate: typing.ClassVar = [ - lambda: session.authentication_status, - lambda: not user_session_service.user_has_catalog_role(), + lambda: session.auth.is_logged_in, lambda: "run_id" in st.query_params or "test-runs", ] @@ -189,6 +187,7 @@ def render( run_date, run.test_suite_id, export_button_column, + session.auth.user_has_permission("edit"), status, test_type, table_name, @@ -208,7 +207,7 @@ def render( { "icon": "↩︎", "help": "Clear action", "status": "No Decision" }, ] - if user_session_service.user_can_disposition(): + if session.auth.user_has_permission("disposition"): disable_all_dispo = not selected or status == "'Passed'" or all(sel["result_status"] == "Passed" for sel in selected) disposition_translator = {"No Decision": None} for action in disposition_actions: @@ -424,6 +423,7 @@ def show_result_detail( run_date: str, test_suite_id: UUID, export_container: DeltaGenerator, + user_can_edit: bool, test_statuses: list[str] | None = None, test_type_id: str | None = None, table_name: str | None = None, @@ -517,7 +517,7 @@ def open_download_dialog(data: pd.DataFrame | None = None) -> None: with pg_col2: v_col1, v_col2, v_col3, v_col4 = st.columns([.25, .25, .25, .25]) - if user_session_service.user_can_edit(): + if user_can_edit: view_edit_test(v_col1, selected_row["test_definition_id_current"]) if selected_row["test_scope"] == "column": diff --git a/testgen/ui/views/test_runs.py b/testgen/ui/views/test_runs.py index 8f2ebd59..72312c8c 100644 --- a/testgen/ui/views/test_runs.py +++ b/testgen/ui/views/test_runs.py @@ -17,7 +17,6 @@ from testgen.ui.components.widgets import testgen_component from testgen.ui.navigation.menu import MenuItem from testgen.ui.navigation.page import Page -from testgen.ui.services import user_session_service from testgen.ui.session import session, temp_value from testgen.ui.views.dialogs.manage_schedules import ScheduleDialog from testgen.ui.views.dialogs.run_tests_dialog import run_tests_dialog @@ -32,8 +31,7 @@ class TestRunsPage(Page): path = "test-runs" can_activate: typing.ClassVar = [ - lambda: session.authentication_status, - lambda: not user_session_service.user_has_catalog_role(), + lambda: session.auth.is_logged_in, lambda: "project_code" in st.query_params, ] menu_item = MenuItem( @@ -41,7 +39,6 @@ class TestRunsPage(Page): label=PAGE_TITLE, section="Data Quality Testing", order=0, - roles=[ role for role in typing.get_args(user_session_service.RoleType) if role != "catalog" ], ) def render(self, project_code: str, table_group_id: str | None = None, test_suite_id: str | None = None, **_kwargs) -> None: @@ -50,7 +47,7 @@ def render(self, project_code: str, table_group_id: str | None = None, test_suit "test-results", ) - user_can_run = user_session_service.user_can_edit() + user_can_run = session.auth.user_has_permission("edit") if render_empty_state(project_code, user_can_run): return diff --git a/testgen/ui/views/test_suites.py b/testgen/ui/views/test_suites.py index 83169174..5efa1f41 100644 --- a/testgen/ui/views/test_suites.py +++ b/testgen/ui/views/test_suites.py @@ -14,7 +14,6 @@ from testgen.ui.navigation.menu import MenuItem from testgen.ui.navigation.page import Page from testgen.ui.navigation.router import Router -from testgen.ui.services import user_session_service from testgen.ui.services.string_service import empty_if_null from testgen.ui.session import session from testgen.ui.views.dialogs.generate_tests_dialog import generate_tests_dialog @@ -28,8 +27,7 @@ class TestSuitesPage(Page): path = "test-suites" can_activate: typing.ClassVar = [ - lambda: session.authentication_status, - lambda: not user_session_service.user_has_catalog_role(), + lambda: session.auth.is_logged_in, lambda: "project_code" in st.query_params, ] menu_item = MenuItem( @@ -37,7 +35,6 @@ class TestSuitesPage(Page): label=PAGE_TITLE, section="Data Quality Testing", order=1, - roles=[ role for role in typing.get_args(user_session_service.RoleType) if role != "catalog" ], ) def render(self, project_code: str, table_group_id: str | None = None, **_kwargs) -> None: @@ -47,7 +44,7 @@ def render(self, project_code: str, table_group_id: str | None = None, **_kwargs ) table_groups = TableGroup.select_minimal_where(TableGroup.project_code == project_code) - user_can_edit = user_session_service.user_can_edit() + user_can_edit = session.auth.user_has_permission("edit") test_suites = TestSuite.select_summary(project_code, table_group_id) project_summary = Project.get_summary(project_code) diff --git a/testgen/utils/plugins.py b/testgen/utils/plugins.py index 6dc2563b..6d5596a0 100644 --- a/testgen/utils/plugins.py +++ b/testgen/utils/plugins.py @@ -8,6 +8,7 @@ from typing import ClassVar from testgen.ui.assets import get_asset_path +from testgen.ui.auth import Authentication from testgen.ui.navigation.page import Page PLUGIN_PREFIX = "testgen_" @@ -85,6 +86,7 @@ def _read_ui_plugin_spec() -> dict: class PluginSpec: + auth: ClassVar[type[Authentication] | None] = None page: ClassVar[type[Page] | None] = None logo: ClassVar[type[Logo] | None] = None component: ClassVar[ComponentSpec | None] = None @@ -97,23 +99,28 @@ class Plugin: def load(self) -> PluginSpec: plugin_page = None + plugin_auth = None plugin_logo = None plugin_component_spec = None module = importlib.import_module(self.package) for property_name in dir(module): if ((maybe_class := getattr(module, property_name, None)) and inspect.isclass(maybe_class)): - if issubclass(maybe_class, PluginSpec): + if issubclass(maybe_class, PluginSpec) and maybe_class != PluginSpec: return maybe_class if issubclass(maybe_class, Page): plugin_page = maybe_class + elif issubclass(maybe_class, Authentication): + plugin_auth = maybe_class + elif issubclass(maybe_class, Logo): plugin_logo = maybe_class return type("AnyPlugin", (PluginSpec,), { "page": plugin_page, + "auth": plugin_auth, "logo": plugin_logo, "component": plugin_component_spec, }) From 1a5568181c17ae4cdec34f7dc6a90845e9979889 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Mon, 18 Aug 2025 23:23:27 -0400 Subject: [PATCH 13/28] refactor: remove dead code --- testgen/ui/assets/scripts.js | 9 -- testgen/ui/services/javascript_service.py | 12 --- testgen/ui/services/user_session_service.py | 110 -------------------- 3 files changed, 131 deletions(-) delete mode 100644 testgen/ui/services/user_session_service.py diff --git a/testgen/ui/assets/scripts.js b/testgen/ui/assets/scripts.js index 575dfdf5..52b4e520 100644 --- a/testgen/ui/assets/scripts.js +++ b/testgen/ui/assets/scripts.js @@ -13,15 +13,6 @@ window.addEventListener('message', async function(event) { } }); -function removeElements(selectors) { - for (const selector of selectors) { - const element = window.top.document.querySelector(selector); - if (element) { - element.remove(); - } - } -} - async function copyToClipboard(text) { if (navigator.clipboard && window.isSecureContext) { await navigator.clipboard.writeText(text || ''); diff --git a/testgen/ui/services/javascript_service.py b/testgen/ui/services/javascript_service.py index 73c87bfc..add2de45 100644 --- a/testgen/ui/services/javascript_service.py +++ b/testgen/ui/services/javascript_service.py @@ -2,21 +2,9 @@ from streamlit_javascript import st_javascript -from testgen.ui.services.user_session_service import AUTH_TOKEN_COOKIE_NAME - LOG = logging.getLogger("testgen") -def clear_component_states(): - execute_javascript( - f"""await (async function () {{ - window.parent.postMessage({{ type: 'TestgenLogout', cookie: '{AUTH_TOKEN_COOKIE_NAME}' }}, '*'); - return 0; - }})() - """ - ) - - def execute_javascript(script): return_value = st_javascript(script) if return_value != 0: diff --git a/testgen/ui/services/user_session_service.py b/testgen/ui/services/user_session_service.py deleted file mode 100644 index 8bbb1000..00000000 --- a/testgen/ui/services/user_session_service.py +++ /dev/null @@ -1,110 +0,0 @@ -import base64 -import datetime -import logging - -import extra_streamlit_components as stx -import jwt -import streamlit as st - -from testgen import settings -from testgen.common.models.user import RoleType, User -from testgen.ui.session import session - -AUTH_TOKEN_COOKIE_NAME = "dk_cookie_name" # noqa: S105 -AUTH_TOKEN_EXPIRATION_DAYS = 1 -DISABLED_ACTION_TEXT = "You do not have permissions to perform this action. Contact your administrator." - -LOG = logging.getLogger("testgen") - - -def _get_jwt_hashing_key() -> bytes: - try: - return base64.b64decode(settings.JWT_HASHING_KEY_B64.encode("ascii")) - except Exception as e: - st.error( - "Error reading the JWT signing key from settings.\n\n Make sure you have a valid " - "base64 string assigned to the TG_JWT_HASHING_KEY environment variable." - ) - st.stop() - - -def load_user_session() -> None: - # Replacing this with st.context.cookies does not work - # Because it does not update when cookies are deleted on logout - cookies = stx.CookieManager(key="testgen.cookies.get") - if cookies.cookies: - session.cookies_ready = True - - token = cookies.get(AUTH_TOKEN_COOKIE_NAME) - if token is not None: - try: - token = jwt.decode(token, _get_jwt_hashing_key(), algorithms=["HS256"]) - if token["exp_date"] > datetime.datetime.utcnow().timestamp(): - start_user_session(token["name"], token["username"]) - except Exception: - LOG.debug("Invalid auth token found on cookies", exc_info=True, stack_info=True) - - -def start_user_session(name: str, username: str) -> None: - session.name = name - session.username = username - session.auth_role = get_auth_data()["credentials"]["usernames"][username]["role"] - session.authentication_status = True - session.logging_out = False - if user_has_catalog_role(): - session.user_default_page = "data-catalog" - st.rerun() - else: - session.user_default_page = "project-dashboard" - - -def end_user_session() -> None: - session.auth_role = None - session.authentication_status = None - session.logging_out = True - session.user_default_page = "" - - del session.name - del session.username - - -def get_auth_data(): - users = User.select_where() - usernames = {} - - for item in users: - usernames[item.username.lower()] = { - "email": item.email, - "name": item.name, - "password": item.password, - "role": item.role, - } - - return { - "credentials": {"usernames": usernames}, - "cookie": { - "expiry_days": AUTH_TOKEN_EXPIRATION_DAYS, - "key": _get_jwt_hashing_key(), - "name": AUTH_TOKEN_COOKIE_NAME, - }, - } - - -def user_is_admin(): - return session.auth_role == "admin" - - -def user_can_edit(): - return session.auth_role in ("admin", "data_quality") - - -def user_can_disposition(): - return session.auth_role in ("admin", "data_quality", "analyst") - - -def user_has_catalog_role(): - return session.auth_role == "catalog" - - -def user_has_role(role: RoleType) -> bool: - return session.auth_role == role From e4fbf51c5b98a6ffd167e366c360df8e68d737f5 Mon Sep 17 00:00:00 2001 From: "Chip.Bloche" Date: Mon, 28 Jul 2025 13:48:34 -0400 Subject: [PATCH 14/28] Initial history update/Stale_Table test --- .../commands/queries/execute_tests_query.py | 8 +- testgen/commands/run_execute_tests.py | 5 + .../030_initialize_new_schema_structure.sql | 3 + .../050_populate_new_schema_metadata.sql | 11 +- .../dbupgrade/0500_incremental_upgrade.sql | 8 + .../ex_update_history_threshold_last_n.sql | 50 ++++++ .../ex_aggregate_match_no_drops_generic.sql | 1 + .../ex_aggregate_match_percent_generic.sql | 1 + .../ex_aggregate_match_range_generic.sql | 1 + .../ex_aggregate_match_same_generic.sql | 1 + .../ex_custom_query_generic.sql | 1 + .../ex_data_match_2way_generic.sql | 1 + .../ex_data_match_generic.sql | 1 + .../exec_query_tests/ex_dupe_rows_generic.sql | 1 + .../ex_prior_match_generic.sql | 1 + .../ex_relative_entropy_generic.sql | 1 + .../ex_table_changed_generic.sql | 33 ++++ .../ex_window_match_no_drops_generic.sql | 1 + .../ex_window_match_same_generic.sql | 1 + .../gen_table_changed_test.sql | 143 ++++++++++++++++++ 20 files changed, 268 insertions(+), 5 deletions(-) create mode 100644 testgen/template/dbupgrade/0500_incremental_upgrade.sql create mode 100644 testgen/template/execution/ex_update_history_threshold_last_n.sql create mode 100644 testgen/template/flavors/generic/exec_query_tests/ex_table_changed_generic.sql create mode 100644 testgen/template/gen_query_tests/gen_table_changed_test.sql diff --git a/testgen/commands/queries/execute_tests_query.py b/testgen/commands/queries/execute_tests_query.py index 23a3e492..daa38932 100644 --- a/testgen/commands/queries/execute_tests_query.py +++ b/testgen/commands/queries/execute_tests_query.py @@ -143,9 +143,11 @@ def GetTestsNonCAT(self) -> tuple[str, dict]: query = CleanSQL(query) return query, params - def AddTestRecordtoTestRunTable(self) -> tuple[str, dict]: - # Runs on App database - return self._get_query("ex_write_test_record_to_testrun_table.sql") + def GetHistoricThresholdUpdate(self) -> tuple[str, dict]: + query = self._get_query("ex_update_history_threshold_last_n.sql") + if self._use_clean: + query = CleanSQL(query) + return query def PushTestRunStatusUpdateSQL(self) -> tuple[str, dict]: # Runs on App database diff --git a/testgen/commands/run_execute_tests.py b/testgen/commands/run_execute_tests.py index 1c899d57..9e35dca5 100644 --- a/testgen/commands/run_execute_tests.py +++ b/testgen/commands/run_execute_tests.py @@ -68,6 +68,11 @@ def run_test_queries( clsExecute.process_id = process_service.get_current_process_id() try: + # Update Historic Test Thresholds + LOG.info("CurrentStep: Updating Historic Test Thresholds") + strQuery = clsExecute.GetHistoricThresholdUpdate(booClean) + ExecuteDBQuery("DKTG", strQuery) + # Retrieve non-CAT Queries LOG.info("CurrentStep: Retrieve Non-CAT Queries") lstTestSet = fetch_dict_from_db(*clsExecute.GetTestsNonCAT()) diff --git a/testgen/template/dbsetup/030_initialize_new_schema_structure.sql b/testgen/template/dbsetup/030_initialize_new_schema_structure.sql index d945a4e1..37414d72 100644 --- a/testgen/template/dbsetup/030_initialize_new_schema_structure.sql +++ b/testgen/template/dbsetup/030_initialize_new_schema_structure.sql @@ -200,6 +200,8 @@ CREATE TABLE test_definitions ( match_subset_condition VARCHAR(500), match_groupby_names VARCHAR, match_having_condition VARCHAR(500), + history_calculation VARCHAR(20), + history_lookback INTEGER, test_mode VARCHAR(20), custom_query VARCHAR, test_active VARCHAR(10) DEFAULT 'Y':: CHARACTER VARYING, @@ -538,6 +540,7 @@ CREATE TABLE test_results ( severity VARCHAR(10), result_status VARCHAR(10), result_message VARCHAR(1000), + result_signal VARCHAR(1000), result_measure VARCHAR(1000), threshold_value VARCHAR(1000), result_error_data VARCHAR(4000), diff --git a/testgen/template/dbsetup/050_populate_new_schema_metadata.sql b/testgen/template/dbsetup/050_populate_new_schema_metadata.sql index d12b0212..ddeae231 100644 --- a/testgen/template/dbsetup/050_populate_new_schema_metadata.sql +++ b/testgen/template/dbsetup/050_populate_new_schema_metadata.sql @@ -156,7 +156,8 @@ VALUES ('1004', 'Alpha_Trunc', 'Alpha Truncation', 'Maximum character count con ('1505', 'Aggregate_Balance_Range', 'Aggregate Balance Range', 'Aggregate measure per group within hard range of reference', 'Tests that aggregate measure for each set of column values fall within a hard range above or below the measure for reference dataset', 'Aggregate measure per set of column values is outside expected range of reference dataset.', 'Mismatched measures', NULL, NULL, '1', '1.0', 'Aggregate Expression', 'Specify an aggregate column expression: one of `SUM([column_name])` or `COUNT([column_name])`', 'subset_condition,groupby_names,having_condition,match_schema_name,match_table_name,match_column_names,match_subset_condition,match_groupby_names,match_having_condition,lower_tolerance,upper_tolerance', NULL, 'Record Subset Condition,Grouping Columns,Group Subset Condition,Matching Schema Name,Matching Table Name,Matching Aggregate Expression,Matching Record Subset Condition,Matching Grouping Columns,Matching Group Subset Condition,Lower Tolerance Constant,Upper Tolerance Constant', 'Condition defining a subset of records in main table, written like a condition within a SQL WHERE clause - OPTIONAL|Category columns in main table separated by commas (e.g. GROUP BY columns)|Condition defining a subset of aggregate records in main table (e.g. HAVING clause) - OPTIONAL|Schema location of matching table|Matching table name|Agregate column expression in matching table: one of `SUM([column_name])` or `COUNT([column_name])`|Condition defining a subset of records in matching table, written like a condition within a SQL WHERE clause - OPTIONAL|Category columns in matching table separated by commas (e.g. GROUP BY columns)|Condition defining a subset of aggregate records in matching table (e.g. HAVING clause) - OPTIONAL|Allowable tolerance below the reference measure expressed as a constant value|Allowable tolerance above the reference measure expressed as a constant value', 'Fail', 'QUERY', 'referential', 'Consistency', 'Data Drift', 'Expected count of group totals not matching aggregate value', 'This test compares sums or counts of a column rolled up to one or more category combinations across two different tables. Both tables must be accessible at the same time. Use it to confirm that two datasets closely match within the tolerances you define as specific values above or below the aggregate measure for the same categories in the reference dataset -- that the sum of a measure or count of a value remains sufficiently consistent between categories. For instance, you can use this test to compare sales per product within one month to another, when you want to be alerted if the difference for any product falls outside of the range defined as 10000 dollars above or below the prior week. An error here means that one or more value combinations fail to match within the set tolerances. New categories or combinations will cause failure.', 'Y'), ('1508', 'Timeframe_Combo_Gain', 'Timeframe No Drops', 'Latest timeframe has at least all value combinations from prior period', 'Tests that column values in most recent time-window include at least same as prior time window', 'Column values in most recent time-window don''t include all values in prior window.', 'Mismatched values', NULL, NULL, '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', 'Categorical Column List', 'Specify one or more Categorical columns, separated by commas. Make sure not to use continuous measurements here. Do not use numeric values unless they represent discrete categories.', 'window_date_column,window_days,subset_condition', NULL, 'Date Column for Time Windows,Time Window in Days,Record Subset Condition', 'The date column used to define the time windows. This must be a DATE or DATETIME type.|Length in days of the time window. The test will compare the most recent period of days to the prior period of the same duration.|Condition defining a subset of records in main table to evaluate, written like a condition within a SQL WHERE clause - OPTIONAL', 'Fail', 'QUERY', 'referential', 'Consistency', 'Data Drift', 'Expected count of missing value combinations', 'This test checks a single transactional table to verify that categorical values or combinations that are present in the most recent time window you define include at least all those found in the prior time window of the same duration. Missing values in the latest time window will trigger the test to fail. New values are permitted. Use this test to confirm that codes or categories are not lost across successive time periods in a transactional table.', 'Y'), ('1509', 'Timeframe_Combo_Match', 'Timeframe Match', 'Column value combinations from latest timeframe same as prior period', 'Tests for presence of same column values in most recent time-window vs. prior time window', 'Column values don''t match in most recent time-windows.', 'Mismatched values', NULL, NULL, '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', 'Categorical Column List', 'Specify one or more Categorical columns, separated by commas. Do not use continuous measurements here. Do not use numeric values unless they represent discrete categories.', 'window_date_column,window_days,subset_condition', NULL, 'Date Column for Time Windows,Time Window in Days,Record Subset Condition', NULL, 'Fail', 'QUERY', 'referential', 'Consistency', 'Data Drift', 'Expected count of non-matching value combinations', 'This test checks a single transactional table (such as a fact table) to verify that categorical values or combinations that are present in the most recent time window you define match those found in the prior time window of the same duration. New or missing values in the latest time window will trigger the test to fail. Use this test to confirm the consistency in the occurrence of codes or categories across successive time periods in a transactional table.', 'Y'), - ('1510', 'Dupe_Rows', 'Duplicate Rows', 'Rows are not duplicated in table', 'Tests for the absence of duplicate rows based on unique combination of column values', 'Column value combinations are duplicated in the table.', 'Duplicate records', NULL, NULL, '(({RESULT_MEASURE}-{THRESHOLD_VALUE}))::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', 'null', 'null', 'groupby_names', NULL, 'Columns to Compare', 'List of columns in the table that define a duplicate record when the combination of values is repeated on multiple rows', 'Fail', 'QUERY', 'table', 'Uniqueness', 'Schema Drift', 'Expected count of duplicate value combinations', 'This test verifies that combinations of values are not repeated within the table. By default when auto-generated, the test considers all columns to protect against duplication of entire rows. If you know the minimum columns that should constitute a unique record, such as a set of ID''s, you should use those to make the test as sensitive as possible. Alternatively, if you know of columns you can always exclude, such as file_date or refresh_snapshot_id, remove them to tighten the test somewhat.', 'Y') + ('1510', 'Dupe_Rows', 'Duplicate Rows', 'Rows are not duplicated in table', 'Tests for the absence of duplicate rows based on unique combination of column values', 'Column value combinations are duplicated in the table.', 'Duplicate records', NULL, NULL, '(({RESULT_MEASURE}-{THRESHOLD_VALUE}))::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', 'null', 'null', 'groupby_names', NULL, 'Columns to Compare', 'List of columns in the table that define a duplicate record when the combination of values is repeated on multiple rows', 'Fail', 'QUERY', 'table', 'Uniqueness', 'Schema Drift', 'Expected count of duplicate value combinations', 'This test verifies that combinations of values are not repeated within the table. By default when auto-generated, the test considers all columns to protect against duplication of entire rows. If you know the minimum columns that should constitute a unique record, such as a set of ID''s, you should use those to make the test as sensitive as possible. Alternatively, if you know of columns you can always exclude, such as file_date or refresh_snapshot_id, remove them to tighten the test somewhat.', 'Y'), + ('1511', 'Stale_Table', 'Stale Table', 'Stale Table Not Updated', 'Confirms whether table has been updated based on data fingerprint', 'Table has not been updated.', 'Update detected', NULL, 'TEMPLATE', '(({RESULT_MEASURE}-{THRESHOLD_VALUE}))::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '0.0', 'null', 'null', 'custom_query,subset_condition', NULL, 'Fingerprint Expression,Record Subset Condition', 'String expression combining key column measures into a distinct representation of table state', 'Log', 'QUERY', 'table', 'Recency', 'Recency', 'Most recent prior table fingerprint', 'This test compares the current table fingerprint, calculated signature of column contents, to confirm that the table has been updated. The table fingerprint is derived from a set of values and aggregates from columns most likely to change. This test allows you to track the schedule and frequency of updates and refreshes to the table.', 'Y') ; @@ -227,7 +228,13 @@ VALUES ('2001', 'Combo_Match', 'redshift', 'ex_data_match_generic.sql'), ('2408', 'Timeframe_Combo_Match', 'databricks', 'ex_window_match_same_databricks.sql'), ('2409', 'Aggregate_Balance_Percent', 'databricks', 'ex_aggregate_match_percent_generic.sql'), ('2410', 'Aggregate_Balance_Range', 'databricks', 'ex_aggregate_match_range_generic.sql'), - ('2411', 'Dupe_Rows', 'databricks', 'ex_dupe_rows_generic.sql') + ('2411', 'Dupe_Rows', 'databricks', 'ex_dupe_rows_generic.sql'), + + ('2012', 'Stale_Table', 'redshift', 'ex_table_changed_generic.sql'), + ('2112', 'Stale_Table', 'snowflake', 'ex_table_changed_generic.sql'), + ('2212', 'Stale_Table', 'mssql', 'ex_table_changed_generic.sql'), + ('2312', 'Stale_Table', 'postgresql', 'ex_table_changed_generic.sql'), + ('2412', 'Stale_Table', 'databricks', 'ex_table_changed_generic.sql') ; TRUNCATE TABLE cat_test_conditions; diff --git a/testgen/template/dbupgrade/0500_incremental_upgrade.sql b/testgen/template/dbupgrade/0500_incremental_upgrade.sql new file mode 100644 index 00000000..7bb0f165 --- /dev/null +++ b/testgen/template/dbupgrade/0500_incremental_upgrade.sql @@ -0,0 +1,8 @@ +SET SEARCH_PATH TO {SCHEMA_NAME}; + +ALTER TABLE test_definitions + ADD COLUMN history_calculation VARCHAR(20), + ADD COLUMN history_lookback INTEGER; + +ALTER TABLE test_results + ADD COLUMN result_signal VARCHAR(1000); \ No newline at end of file diff --git a/testgen/template/execution/ex_update_history_threshold_last_n.sql b/testgen/template/execution/ex_update_history_threshold_last_n.sql new file mode 100644 index 00000000..5049696e --- /dev/null +++ b/testgen/template/execution/ex_update_history_threshold_last_n.sql @@ -0,0 +1,50 @@ +WITH stats AS ( + SELECT + d.id AS test_definition_id, + CASE d.history_calculation + WHEN 'Value' THEN MIN(r.result_signal::NUMERIC)::VARCHAR + WHEN 'Minimum' THEN MIN(r.result_signal::NUMERIC)::VARCHAR + WHEN 'Maximum' THEN MAX(r.result_signal::NUMERIC)::VARCHAR + WHEN 'Sum' THEN SUM(r.result_signal::NUMERIC)::VARCHAR + WHEN 'Average' THEN AVG(r.result_signal::NUMERIC)::VARCHAR + END AS calc_signal + FROM test_definitions d + INNER JOIN LATERAL ( + SELECT result_signal + FROM test_results tr + WHERE tr.test_definition_id = d.id + ORDER BY tr.test_time DESC + LIMIT d.history_lookback + ) AS r ON TRUE + WHERE d.test_suite_id = '{TEST_SUITE_ID}' + AND d.test_active = 'Y' + AND d.history_lookback IS NOT NULL + GROUP BY d.id, d.history_calculation, d.history_lookback +) +UPDATE test_definitions t +SET baseline_value = s.calc_signal +FROM stats s +WHERE t.id = s.test_definition_id; + +/* +UPDATE test_definitions du + SET baseline_value = stats.calc_signal + FROM LATERAL ( + SELECT CASE du.history_calculation + WHEN 'Average' THEN AVG(r.result_signal) + WHEN 'Minimum' THEN MIN(r.result_signal) + WHEN 'Maximum' THEN MAX(r.result_signal) + WHEN 'Sum' THEN SUM(r.result_signal) + WHEN 'Value' THEN MAX(r.result_signal) -- MAX of 1 value + END AS calc_signal + FROM ( SELECT result_signal + FROM test_results tr + WHERE tr.test_definition_id = du.id + ORDER BY tr.test_time DESC + LIMIT du.history_lookback -- dynamically bound per row + ) AS r + ) AS stats + WHERE du.test_suite_id = '{TEST_SUITE_ID}' + AND du.test_active = 'Y' + AND du.history_lookback IS NOT NULL; +*/ \ No newline at end of file diff --git a/testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_no_drops_generic.sql b/testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_no_drops_generic.sql index e376ef71..b8db09e4 100644 --- a/testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_no_drops_generic.sql +++ b/testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_no_drops_generic.sql @@ -11,6 +11,7 @@ SELECT '{TEST_TYPE}' as test_type, '{SKIP_ERRORS}' as threshold_value, {SKIP_ERRORS} as skip_errors, '{INPUT_PARAMETERS}' as input_parameters, + NULL as result_signal, CASE WHEN COUNT (*) > {SKIP_ERRORS} THEN 0 ELSE 1 END as result_code, CASE WHEN COUNT(*) > 0 THEN diff --git a/testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_percent_generic.sql b/testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_percent_generic.sql index 62a92d40..872acfe5 100644 --- a/testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_percent_generic.sql +++ b/testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_percent_generic.sql @@ -11,6 +11,7 @@ SELECT '{TEST_TYPE}' as test_type, '{SKIP_ERRORS}' as threshold_value, {SKIP_ERRORS} as skip_errors, '{INPUT_PARAMETERS}' as input_parameters, + NULL as result_signal, CASE WHEN COUNT (*) > {SKIP_ERRORS} THEN 0 ELSE 1 END as result_code, CASE WHEN COUNT(*) > 0 THEN diff --git a/testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_range_generic.sql b/testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_range_generic.sql index 9ab77d10..d97df8a8 100644 --- a/testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_range_generic.sql +++ b/testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_range_generic.sql @@ -11,6 +11,7 @@ SELECT '{TEST_TYPE}' as test_type, '{SKIP_ERRORS}' as threshold_value, {SKIP_ERRORS} as skip_errors, '{INPUT_PARAMETERS}' as input_parameters, + NULL as result_signal, CASE WHEN COUNT (*) > {SKIP_ERRORS} THEN 0 ELSE 1 END as result_code, CASE WHEN COUNT(*) > 0 THEN diff --git a/testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_same_generic.sql b/testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_same_generic.sql index 8a4c4cdf..7fb3787e 100644 --- a/testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_same_generic.sql +++ b/testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_same_generic.sql @@ -11,6 +11,7 @@ SELECT '{TEST_TYPE}' as test_type, '{SKIP_ERRORS}' as threshold_value, {SKIP_ERRORS} as skip_errors, '{INPUT_PARAMETERS}' as input_parameters, + NULL as result_signal, CASE WHEN COUNT (*) > {SKIP_ERRORS} THEN 0 ELSE 1 END as result_code, CASE WHEN COUNT(*) > 0 THEN diff --git a/testgen/template/flavors/generic/exec_query_tests/ex_custom_query_generic.sql b/testgen/template/flavors/generic/exec_query_tests/ex_custom_query_generic.sql index 096dc351..19d0c515 100644 --- a/testgen/template/flavors/generic/exec_query_tests/ex_custom_query_generic.sql +++ b/testgen/template/flavors/generic/exec_query_tests/ex_custom_query_generic.sql @@ -15,6 +15,7 @@ SELECT '{TEST_TYPE}' as test_type, {SKIP_ERRORS} as skip_errors, /* TODO: 'custom_query= {CUSTOM_QUERY_ESCAPED}' as input_parameters, */ 'Skip_Errors={SKIP_ERRORS}' as input_parameters, + NULL as result_signal, CASE WHEN COUNT (*) > {SKIP_ERRORS} THEN 0 ELSE 1 END as result_code, CASE WHEN COUNT(*) > 0 THEN diff --git a/testgen/template/flavors/generic/exec_query_tests/ex_data_match_2way_generic.sql b/testgen/template/flavors/generic/exec_query_tests/ex_data_match_2way_generic.sql index a52f4a36..0640907e 100644 --- a/testgen/template/flavors/generic/exec_query_tests/ex_data_match_2way_generic.sql +++ b/testgen/template/flavors/generic/exec_query_tests/ex_data_match_2way_generic.sql @@ -11,6 +11,7 @@ SELECT '{TEST_TYPE}' as test_type, '{SKIP_ERRORS}' as threshold_value, {SKIP_ERRORS} as skip_errors, '{INPUT_PARAMETERS}' as input_parameters, + NULL as result_signal, CASE WHEN COUNT (*) > {SKIP_ERRORS} THEN 0 ELSE 1 END as result_code, CASE WHEN COUNT(*) > 0 THEN diff --git a/testgen/template/flavors/generic/exec_query_tests/ex_data_match_generic.sql b/testgen/template/flavors/generic/exec_query_tests/ex_data_match_generic.sql index 9d6702b5..84317e8a 100644 --- a/testgen/template/flavors/generic/exec_query_tests/ex_data_match_generic.sql +++ b/testgen/template/flavors/generic/exec_query_tests/ex_data_match_generic.sql @@ -11,6 +11,7 @@ SELECT '{TEST_TYPE}' as test_type, '{SKIP_ERRORS}' as threshold_value, {SKIP_ERRORS} as skip_errors, '{INPUT_PARAMETERS}' as input_parameters, + NULL as result_signal, CASE WHEN COUNT (*) > {SKIP_ERRORS} THEN 0 ELSE 1 END as result_code, CASE WHEN COUNT(*) > 0 THEN diff --git a/testgen/template/flavors/generic/exec_query_tests/ex_dupe_rows_generic.sql b/testgen/template/flavors/generic/exec_query_tests/ex_dupe_rows_generic.sql index 2ec939c1..4cdbf875 100644 --- a/testgen/template/flavors/generic/exec_query_tests/ex_dupe_rows_generic.sql +++ b/testgen/template/flavors/generic/exec_query_tests/ex_dupe_rows_generic.sql @@ -11,6 +11,7 @@ SELECT '{TEST_TYPE}' as test_type, '{SKIP_ERRORS}' as threshold_value, {SKIP_ERRORS} as skip_errors, '{INPUT_PARAMETERS}' as input_parameters, + NULL as result_signal, CASE WHEN COUNT (*) > {SKIP_ERRORS} THEN 0 ELSE 1 END as result_code, CASE WHEN COUNT(*) > 0 THEN diff --git a/testgen/template/flavors/generic/exec_query_tests/ex_prior_match_generic.sql b/testgen/template/flavors/generic/exec_query_tests/ex_prior_match_generic.sql index 654f7a1a..d7030589 100644 --- a/testgen/template/flavors/generic/exec_query_tests/ex_prior_match_generic.sql +++ b/testgen/template/flavors/generic/exec_query_tests/ex_prior_match_generic.sql @@ -6,6 +6,7 @@ SELECT '{TEST_TYPE}' as test_type, {SKIP_ERRORS} as skip_errors, 'schema_name = {SCHEMA_NAME}, prior_schema = {MATCH_SCHEMA_NAME}, table_name = {TABLE_NAME}, column_name = {COLUMN_NAME}, subset_condition = {SUBSET_CONDITION}, mode = {MODE}' as input_parameters, + NULL as result_signal, CASE WHEN COUNT(*) > COALESCE(skip_errors, 0) THEN 0 ELSE 1 END as result_code, CONCAT( CONCAT( 'Mismatched measures: ', CAST( COALESCE(COUNT(*), 0) AS {VARCHAR_TYPE}) ), diff --git a/testgen/template/flavors/generic/exec_query_tests/ex_relative_entropy_generic.sql b/testgen/template/flavors/generic/exec_query_tests/ex_relative_entropy_generic.sql index 9a5e5d79..0e6e0647 100644 --- a/testgen/template/flavors/generic/exec_query_tests/ex_relative_entropy_generic.sql +++ b/testgen/template/flavors/generic/exec_query_tests/ex_relative_entropy_generic.sql @@ -39,6 +39,7 @@ SELECT '{TEST_TYPE}' as test_type, '{THRESHOLD_VALUE}' as threshold_value, NULL as skip_errors, '{INPUT_PARAMETERS}' as input_parameters, + NULL as result_signal, CASE WHEN js_divergence > {THRESHOLD_VALUE} THEN 0 ELSE 1 END as result_code, CONCAT('Divergence Level: ', CONCAT(CAST(js_divergence AS {VARCHAR_TYPE}), diff --git a/testgen/template/flavors/generic/exec_query_tests/ex_table_changed_generic.sql b/testgen/template/flavors/generic/exec_query_tests/ex_table_changed_generic.sql new file mode 100644 index 00000000..da660156 --- /dev/null +++ b/testgen/template/flavors/generic/exec_query_tests/ex_table_changed_generic.sql @@ -0,0 +1,33 @@ +SELECT '{TEST_TYPE}' as test_type, + '{TEST_DEFINITION_ID}' as test_definition_id, + '{TEST_SUITE_ID}' as test_suite_id, + '{TEST_RUN_ID}' as test_run_id, + '{RUN_DATE}' as test_time, + '{START_TIME}' as starttime, + CURRENT_TIMESTAMP as endtime, + '{SCHEMA_NAME}' as schema_name, + '{TABLE_NAME}' as table_name, + '{COLUMN_NAME_NO_QUOTES}' as column_names, + '{SKIP_ERRORS}' as threshold_value, + {SKIP_ERRORS} as skip_errors, + '{INPUT_PARAMETERS}' as input_parameters, + fingerprint as result_signal, + /* Fails if table is the same */ + CASE WHEN fingerprint = '{BASELINE_VALUE}' THEN 0 ELSE 1 END as result_code, + + CASE + WHEN fingerprint = '{BASELINE_VALUE}' + THEN 'No table change detected.' + ELSE 'Table change detected.' + END AS result_message, + CASE + WHEN fingerprint = '{BASELINE_VALUE}' + THEN 0 + ELSE 1 + END as result_measure, + '{SUBSET_DISPLAY}' as subset_condition, + NULL as result_query + FROM ( SELECT {CUSTOM_QUERY} as fingerprint + FROM {SCHEMA_NAME}.{TABLE_NAME} + WHERE {SUBSET_CONDITION} + ) test; diff --git a/testgen/template/flavors/generic/exec_query_tests/ex_window_match_no_drops_generic.sql b/testgen/template/flavors/generic/exec_query_tests/ex_window_match_no_drops_generic.sql index 19ccf9b8..3993e1a4 100644 --- a/testgen/template/flavors/generic/exec_query_tests/ex_window_match_no_drops_generic.sql +++ b/testgen/template/flavors/generic/exec_query_tests/ex_window_match_no_drops_generic.sql @@ -11,6 +11,7 @@ SELECT '{TEST_TYPE}' as test_type, '{SKIP_ERRORS}' as threshold_value, {SKIP_ERRORS} as skip_errors, '{INPUT_PARAMETERS}' as input_parameters, + NULL as result_signal, CASE WHEN COUNT (*) > {SKIP_ERRORS} THEN 0 ELSE 1 END as result_code, CASE WHEN COUNT(*) > 0 THEN diff --git a/testgen/template/flavors/generic/exec_query_tests/ex_window_match_same_generic.sql b/testgen/template/flavors/generic/exec_query_tests/ex_window_match_same_generic.sql index 3825f8b7..42e603be 100644 --- a/testgen/template/flavors/generic/exec_query_tests/ex_window_match_same_generic.sql +++ b/testgen/template/flavors/generic/exec_query_tests/ex_window_match_same_generic.sql @@ -11,6 +11,7 @@ SELECT '{TEST_TYPE}' as test_type, '{SKIP_ERRORS}' as threshold_value, {SKIP_ERRORS} as skip_errors, '{INPUT_PARAMETERS}' as input_parameters, + NULL as result_signal, CASE WHEN COUNT (*) > {SKIP_ERRORS} THEN 0 ELSE 1 END as result_code, CASE WHEN COUNT(*) > 0 THEN diff --git a/testgen/template/gen_query_tests/gen_table_changed_test.sql b/testgen/template/gen_query_tests/gen_table_changed_test.sql new file mode 100644 index 00000000..2e4c1b72 --- /dev/null +++ b/testgen/template/gen_query_tests/gen_table_changed_test.sql @@ -0,0 +1,143 @@ +INSERT INTO test_definitions (table_groups_id, profile_run_id, test_type, test_suite_id, + schema_name, table_name, + skip_errors, test_active, last_auto_gen_date, profiling_as_of_date, + lock_refresh, custom_query ) +WITH last_run AS (SELECT r.table_groups_id, MAX(run_date) AS last_run_date + FROM profile_results p + INNER JOIN profiling_runs r + ON (p.profile_run_id = r.id) + INNER JOIN test_suites ts + ON p.project_code = ts.project_code + AND p.connection_id = ts.connection_id + WHERE p.project_code = '{PROJECT_CODE}' + AND r.table_groups_id = '{TABLE_GROUPS_ID}'::UUID + AND ts.id = '{TEST_SUITE_ID}' + AND p.run_date::DATE <= '{AS_OF_DATE}' + GROUP BY r.table_groups_id), +curprof AS (SELECT p.profile_run_id, schema_name, table_name, column_name, functional_data_type, general_type, + distinct_value_ct, record_ct, max_value, min_value, avg_value, stdev_value, null_value_ct + FROM last_run lr + INNER JOIN profile_results p + ON (lr.table_groups_id = p.table_groups_id + AND lr.last_run_date = p.run_date) ), +locked AS (SELECT schema_name, table_name + FROM test_definitions + WHERE table_groups_id = '{TABLE_GROUPS_ID}'::UUID + AND test_suite_id = '{TEST_SUITE_ID}' + AND test_type = '{TEST_TYPE}' + AND lock_refresh = 'Y'), +-- IDs - TOP 2 +id_cols + AS ( SELECT profile_run_id, schema_name, table_name, column_name, functional_data_type, general_type, + distinct_value_ct, + ROW_NUMBER() OVER (PARTITION BY schema_name, table_name + ORDER BY + CASE + WHEN functional_data_type ILIKE 'ID-Unique%' THEN 1 + WHEN functional_data_type = 'ID-Secondary' THEN 2 + ELSE 3 + END, distinct_value_ct DESC) AS rank + FROM curprof + WHERE functional_data_type ILIKE 'ID%'), +-- Process Date - TOP 1 +process_date_cols + AS (SELECT profile_run_id, schema_name, table_name, column_name, functional_data_type, general_type, + distinct_value_ct, + ROW_NUMBER() OVER (PARTITION BY schema_name, table_name + ORDER BY + CASE + WHEN column_name ILIKE '%mod%' THEN 1 + WHEN column_name ILIKE '%up%' THEN 1 + WHEN column_name ILIKE '%cr%' THEN 2 + WHEN column_name ILIKE '%in%' THEN 2 + END , distinct_value_ct DESC) AS rank + FROM curprof + WHERE functional_data_type ILIKE 'process%'), +-- Transaction Date - TOP 1 +tran_date_cols + AS ( SELECT profile_run_id, schema_name, table_name, column_name, functional_data_type, general_type, + distinct_value_ct, + ROW_NUMBER() OVER (PARTITION BY schema_name, table_name + ORDER BY + distinct_value_ct DESC) AS rank + FROM curprof + WHERE functional_data_type ILIKE 'transactional date%' OR functional_data_type ILIKE 'period%' + OR functional_data_type = 'timestamp' ), + +-- Numeric Measures +numeric_cols + AS ( SELECT profile_run_id, schema_name, table_name, column_name, functional_data_type, general_type, +/* + -- Subscores + distinct_value_ct * 1.0 / NULLIF(record_ct, 0) AS cardinality_score, + (max_value - min_value) / NULLIF(ABS(NULLIF(avg_value, 0)), 1) AS range_score, + LEAST(1, LOG(GREATEST(distinct_value_ct, 2))) / LOG(GREATEST(record_ct, 2)) AS nontriviality_score, + stdev_value / NULLIF(ABS(NULLIF(avg_value, 0)), 1) AS variability_score, + 1.0 - (null_value_ct * 1.0 / NULLIF(NULLIF(record_ct, 0), 1)) AS null_penalty, +*/ + -- Weighted score + ( + 0.25 * (distinct_value_ct * 1.0 / NULLIF(record_ct, 0)) + + 0.15 * ((max_value - min_value) / NULLIF(ABS(NULLIF(avg_value, 0)), 1)) + + 0.10 * (LEAST(1, LOG(GREATEST(distinct_value_ct, 2))) / LOG(GREATEST(record_ct, 2))) + + 0.40 * (stdev_value / NULLIF(ABS(NULLIF(avg_value, 0)), 1)) + + 0.10 * (1.0 - (null_value_ct * 1.0 / NULLIF(NULLIF(record_ct, 0), 1))) + ) AS change_detection_score + FROM curprof + WHERE general_type = 'N' + AND (functional_data_type ILIKE 'Measure%' OR functional_data_type IN ('Sequence', 'Constant')) + ), +numeric_cols_ranked + AS ( SELECT *, + ROW_NUMBER() OVER (PARTITION BY schema_name, table_name + ORDER BY change_detection_score DESC) as rank + FROM numeric_cols + WHERE change_detection_score IS NOT NULL), +combined + AS ( SELECT profile_run_id, schema_name, table_name, column_name, 'ID' AS element_type, general_type, rank AS fingerprint_order + FROM id_cols + WHERE rank <= 2 + UNION ALL + SELECT profile_run_id, schema_name, table_name, column_name, 'DATE_P' AS element_type, general_type, rank AS fingerprint_order + FROM process_date_cols + WHERE rank = 1 + UNION ALL + SELECT profile_run_id, schema_name, table_name, column_name, 'DATE_T' AS element_type, general_type, rank AS fingerprint_order + FROM tran_date_cols + WHERE rank = 1 + UNION ALL + SELECT profile_run_id, schema_name, table_name, column_name, 'MEAS' AS element_type, general_type, rank AS fingerprint_order + FROM numeric_cols_ranked + WHERE rank = 1 ), +newtests + AS (SELECT profile_run_id, schema_name, table_name, + 'COUNT(*)::VARCHAR || ''|'' || ' || + STRING_AGG( + REPLACE( + CASE + WHEN general_type = 'D' THEN 'MIN(@@@)::VARCHAR || ''|'' || MAX(@@@::VARCHAR) || ''|'' || COUNT(DISTINCT @@@)::VARCHAR' + WHEN general_type = 'A' THEN 'MIN(@@@)::VARCHAR || ''|'' || MAX(@@@::VARCHAR) || ''|'' || COUNT(DISTINCT @@@)::VARCHAR || ''|'' || SUM(LENGTH(@@@))::VARCHAR' + WHEN general_type = 'N' THEN 'MIN(@@@)::VARCHAR || ''|'' || MAX(@@@::VARCHAR) || ''|'' || SUM(@@@)::VARCHAR || ''|'' || ROUND(AVG(@@@), 5)::VARCHAR || ''|'' || ROUND(STDDEV(@@@), 5)::VARCHAR' + END, + '@@@', '"' || column_name || '"'), + ' || ''|'' || ' + ORDER BY element_type, fingerprint_order) as fingerprint + FROM combined + GROUP BY profile_run_id, schema_name, table_name) +SELECT '{TABLE_GROUPS_ID}'::UUID as table_groups_id, + n.profile_run_id, + 'Stale_Table' AS test_type, + '{TEST_SUITE_ID}' AS test_suite_id, + n.schema_name, n.table_name, + 0 as skip_errors, 'Y' as test_active, + + '{RUN_DATE}'::TIMESTAMP as last_auto_gen_date, + '{AS_OF_DATE}'::TIMESTAMP as profiling_as_of_date, + 'N' as lock_refresh, + fingerprint as custom_query +FROM newtests n +LEFT JOIN locked l + ON (n.schema_name = l.schema_name + AND n.table_name = l.table_name) +WHERE l.schema_name IS NULL; + From 51dba8c5df4192ab7a5cd66185cbdbb26c7447af Mon Sep 17 00:00:00 2001 From: "Chip.Bloche" Date: Wed, 30 Jul 2025 11:35:03 -0400 Subject: [PATCH 15/28] Test logic fixes --- .../gen_funny_cat_tests/gen_test_row_ct_pct.sql | 2 +- .../gen_query_tests/gen_dupe_rows_test.sql | 14 +++++++++++--- .../gen_query_tests/gen_table_changed_test.sql | 12 ++++++++++-- 3 files changed, 22 insertions(+), 6 deletions(-) diff --git a/testgen/template/gen_funny_cat_tests/gen_test_row_ct_pct.sql b/testgen/template/gen_funny_cat_tests/gen_test_row_ct_pct.sql index a338a2e2..656ad687 100644 --- a/testgen/template/gen_funny_cat_tests/gen_test_row_ct_pct.sql +++ b/testgen/template/gen_funny_cat_tests/gen_test_row_ct_pct.sql @@ -50,7 +50,7 @@ SELECT n.table_groups_id, n.profile_run_id, :RUN_DATE ::TIMESTAMP as last_auto_gen_date, :AS_OF_DATE ::TIMESTAMP as profiling_as_of_date, 'Y' as test_active, - record_ct as baseline_ct, 0.5 AS threshold_value + record_ct as baseline_ct, 10 AS threshold_value FROM newtests n LEFT JOIN locked l ON (n.schema_name = l.schema_name diff --git a/testgen/template/gen_query_tests/gen_dupe_rows_test.sql b/testgen/template/gen_query_tests/gen_dupe_rows_test.sql index e28164a8..5f416ec8 100644 --- a/testgen/template/gen_query_tests/gen_dupe_rows_test.sql +++ b/testgen/template/gen_query_tests/gen_dupe_rows_test.sql @@ -25,11 +25,19 @@ WITH last_run AS (SELECT r.table_groups_id, MAX(run_date) AS last_run_date FROM test_definitions WHERE table_groups_id = '{TABLE_GROUPS_ID}'::UUID AND test_suite_id = '{TEST_SUITE_ID}' - AND test_type = '{TEST_TYPE}' + AND test_type = 'Dupe_Rows' AND lock_refresh = 'Y'), newtests AS (SELECT * - FROM curprof - WHERE schema_name = '{DATA_SCHEMA}') + FROM curprof p + INNER JOIN test_types t + ON ('Dupe_Rows' = t.test_type + AND 'Y' = t.active) + LEFT JOIN generation_sets s + ON (t.test_type = s.test_type + AND '{GENERATION_SET}' = s.generation_set) + WHERE p.schema_name = '{DATA_SCHEMA}' + AND (s.generation_set IS NOT NULL + OR '{GENERATION_SET}' = '') ) SELECT '{TABLE_GROUPS_ID}'::UUID as table_groups_id, n.profile_run_id, 'Dupe_Rows' AS test_type, diff --git a/testgen/template/gen_query_tests/gen_table_changed_test.sql b/testgen/template/gen_query_tests/gen_table_changed_test.sql index 2e4c1b72..836a65fd 100644 --- a/testgen/template/gen_query_tests/gen_table_changed_test.sql +++ b/testgen/template/gen_query_tests/gen_table_changed_test.sql @@ -24,7 +24,7 @@ locked AS (SELECT schema_name, table_name FROM test_definitions WHERE table_groups_id = '{TABLE_GROUPS_ID}'::UUID AND test_suite_id = '{TEST_SUITE_ID}' - AND test_type = '{TEST_TYPE}' + AND test_type = 'Stale_Table' AND lock_refresh = 'Y'), -- IDs - TOP 2 id_cols @@ -136,8 +136,16 @@ SELECT '{TABLE_GROUPS_ID}'::UUID as table_groups_id, 'N' as lock_refresh, fingerprint as custom_query FROM newtests n +INNER JOIN test_types t + ON ('Stale_Table' = t.test_type + AND 'Y' = t.active) +LEFT JOIN generation_sets s + ON (t.test_type = s.test_type + AND '{GENERATION_SET}' = s.generation_set) LEFT JOIN locked l ON (n.schema_name = l.schema_name AND n.table_name = l.table_name) -WHERE l.schema_name IS NULL; +WHERE (s.generation_set IS NOT NULL + OR '{GENERATION_SET}' = '') + AND l.schema_name IS NULL; From b7f13784b34835b6116857aa25e11c031ec886d5 Mon Sep 17 00:00:00 2001 From: "Chip.Bloche" Date: Wed, 30 Jul 2025 15:39:45 -0400 Subject: [PATCH 16/28] Test SQL tweaks --- .../ex_finalize_test_run_results.sql | 1 + .../ex_update_history_threshold_last_n.sql | 23 ------------------- 2 files changed, 1 insertion(+), 23 deletions(-) diff --git a/testgen/template/execution/ex_finalize_test_run_results.sql b/testgen/template/execution/ex_finalize_test_run_results.sql index b2070b94..707b99ab 100644 --- a/testgen/template/execution/ex_finalize_test_run_results.sql +++ b/testgen/template/execution/ex_finalize_test_run_results.sql @@ -2,6 +2,7 @@ UPDATE test_results SET test_description = COALESCE(r.test_description, d.test_description, tt.test_description), severity = COALESCE(d.severity, s.severity, tt.default_severity), threshold_value = COALESCE(r.threshold_value, d.threshold_value), + result_signal = COALESCE(r.result_signal, r.result_measure), result_status = CASE WHEN r.result_status = 'Error' THEN 'Error' WHEN r.result_code = 1 THEN 'Passed' diff --git a/testgen/template/execution/ex_update_history_threshold_last_n.sql b/testgen/template/execution/ex_update_history_threshold_last_n.sql index 5049696e..d3e89284 100644 --- a/testgen/template/execution/ex_update_history_threshold_last_n.sql +++ b/testgen/template/execution/ex_update_history_threshold_last_n.sql @@ -25,26 +25,3 @@ UPDATE test_definitions t SET baseline_value = s.calc_signal FROM stats s WHERE t.id = s.test_definition_id; - -/* -UPDATE test_definitions du - SET baseline_value = stats.calc_signal - FROM LATERAL ( - SELECT CASE du.history_calculation - WHEN 'Average' THEN AVG(r.result_signal) - WHEN 'Minimum' THEN MIN(r.result_signal) - WHEN 'Maximum' THEN MAX(r.result_signal) - WHEN 'Sum' THEN SUM(r.result_signal) - WHEN 'Value' THEN MAX(r.result_signal) -- MAX of 1 value - END AS calc_signal - FROM ( SELECT result_signal - FROM test_results tr - WHERE tr.test_definition_id = du.id - ORDER BY tr.test_time DESC - LIMIT du.history_lookback -- dynamically bound per row - ) AS r - ) AS stats - WHERE du.test_suite_id = '{TEST_SUITE_ID}' - AND du.test_active = 'Y' - AND du.history_lookback IS NOT NULL; -*/ \ No newline at end of file From 35120c4eecd63212a6a8e9923e05d8c953f6acad Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Fri, 18 Jul 2025 17:02:48 -0400 Subject: [PATCH 17/28] refactor: database service and param replace for process execution --- testgen/commands/queries/execute_tests_query.py | 4 ++-- testgen/commands/run_execute_tests.py | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/testgen/commands/queries/execute_tests_query.py b/testgen/commands/queries/execute_tests_query.py index daa38932..93010829 100644 --- a/testgen/commands/queries/execute_tests_query.py +++ b/testgen/commands/queries/execute_tests_query.py @@ -144,10 +144,10 @@ def GetTestsNonCAT(self) -> tuple[str, dict]: return query, params def GetHistoricThresholdUpdate(self) -> tuple[str, dict]: - query = self._get_query("ex_update_history_threshold_last_n.sql") + query, params = self._get_query("ex_update_history_threshold_last_n.sql") if self._use_clean: query = CleanSQL(query) - return query + return query, params def PushTestRunStatusUpdateSQL(self) -> tuple[str, dict]: # Runs on App database diff --git a/testgen/commands/run_execute_tests.py b/testgen/commands/run_execute_tests.py index 9e35dca5..4e2996fb 100644 --- a/testgen/commands/run_execute_tests.py +++ b/testgen/commands/run_execute_tests.py @@ -70,8 +70,7 @@ def run_test_queries( try: # Update Historic Test Thresholds LOG.info("CurrentStep: Updating Historic Test Thresholds") - strQuery = clsExecute.GetHistoricThresholdUpdate(booClean) - ExecuteDBQuery("DKTG", strQuery) + execute_db_queries([clsExecute.GetHistoricThresholdUpdate()]) # Retrieve non-CAT Queries LOG.info("CurrentStep: Retrieve Non-CAT Queries") From 2cdffe6199d2dde18e59b723f19167ed92e36fe4 Mon Sep 17 00:00:00 2001 From: Luis Date: Tue, 5 Aug 2025 14:23:48 -0400 Subject: [PATCH 18/28] feat: add history related fields to test definitions --- testgen/common/models/test_definition.py | 4 ++ .../050_populate_new_schema_metadata.sql | 2 +- testgen/ui/views/test_definitions.py | 37 ++++++++++++++++--- 3 files changed, 37 insertions(+), 6 deletions(-) diff --git a/testgen/common/models/test_definition.py b/testgen/common/models/test_definition.py index 936e6b65..445cdd94 100644 --- a/testgen/common/models/test_definition.py +++ b/testgen/common/models/test_definition.py @@ -65,6 +65,8 @@ class TestDefinitionSummary(EntityMinimal): match_groupby_names: str match_having_condition: str custom_query: str + history_calculation: str + history_lookback: int test_active: str test_definition_status: str severity: str @@ -177,6 +179,8 @@ class TestDefinition(Entity): match_subset_condition: str = Column(NullIfEmptyString) match_groupby_names: str = Column(NullIfEmptyString) match_having_condition: str = Column(NullIfEmptyString) + history_calculation: str = Column(NullIfEmptyString) + history_lookback: int = Column(ZeroIfEmptyInteger, default=0) test_mode: str = Column(String) custom_query: str = Column(QueryString) test_active: bool = Column(YNString, default="Y") diff --git a/testgen/template/dbsetup/050_populate_new_schema_metadata.sql b/testgen/template/dbsetup/050_populate_new_schema_metadata.sql index ddeae231..51456c10 100644 --- a/testgen/template/dbsetup/050_populate_new_schema_metadata.sql +++ b/testgen/template/dbsetup/050_populate_new_schema_metadata.sql @@ -157,7 +157,7 @@ VALUES ('1004', 'Alpha_Trunc', 'Alpha Truncation', 'Maximum character count con ('1508', 'Timeframe_Combo_Gain', 'Timeframe No Drops', 'Latest timeframe has at least all value combinations from prior period', 'Tests that column values in most recent time-window include at least same as prior time window', 'Column values in most recent time-window don''t include all values in prior window.', 'Mismatched values', NULL, NULL, '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', 'Categorical Column List', 'Specify one or more Categorical columns, separated by commas. Make sure not to use continuous measurements here. Do not use numeric values unless they represent discrete categories.', 'window_date_column,window_days,subset_condition', NULL, 'Date Column for Time Windows,Time Window in Days,Record Subset Condition', 'The date column used to define the time windows. This must be a DATE or DATETIME type.|Length in days of the time window. The test will compare the most recent period of days to the prior period of the same duration.|Condition defining a subset of records in main table to evaluate, written like a condition within a SQL WHERE clause - OPTIONAL', 'Fail', 'QUERY', 'referential', 'Consistency', 'Data Drift', 'Expected count of missing value combinations', 'This test checks a single transactional table to verify that categorical values or combinations that are present in the most recent time window you define include at least all those found in the prior time window of the same duration. Missing values in the latest time window will trigger the test to fail. New values are permitted. Use this test to confirm that codes or categories are not lost across successive time periods in a transactional table.', 'Y'), ('1509', 'Timeframe_Combo_Match', 'Timeframe Match', 'Column value combinations from latest timeframe same as prior period', 'Tests for presence of same column values in most recent time-window vs. prior time window', 'Column values don''t match in most recent time-windows.', 'Mismatched values', NULL, NULL, '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', 'Categorical Column List', 'Specify one or more Categorical columns, separated by commas. Do not use continuous measurements here. Do not use numeric values unless they represent discrete categories.', 'window_date_column,window_days,subset_condition', NULL, 'Date Column for Time Windows,Time Window in Days,Record Subset Condition', NULL, 'Fail', 'QUERY', 'referential', 'Consistency', 'Data Drift', 'Expected count of non-matching value combinations', 'This test checks a single transactional table (such as a fact table) to verify that categorical values or combinations that are present in the most recent time window you define match those found in the prior time window of the same duration. New or missing values in the latest time window will trigger the test to fail. Use this test to confirm the consistency in the occurrence of codes or categories across successive time periods in a transactional table.', 'Y'), ('1510', 'Dupe_Rows', 'Duplicate Rows', 'Rows are not duplicated in table', 'Tests for the absence of duplicate rows based on unique combination of column values', 'Column value combinations are duplicated in the table.', 'Duplicate records', NULL, NULL, '(({RESULT_MEASURE}-{THRESHOLD_VALUE}))::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', 'null', 'null', 'groupby_names', NULL, 'Columns to Compare', 'List of columns in the table that define a duplicate record when the combination of values is repeated on multiple rows', 'Fail', 'QUERY', 'table', 'Uniqueness', 'Schema Drift', 'Expected count of duplicate value combinations', 'This test verifies that combinations of values are not repeated within the table. By default when auto-generated, the test considers all columns to protect against duplication of entire rows. If you know the minimum columns that should constitute a unique record, such as a set of ID''s, you should use those to make the test as sensitive as possible. Alternatively, if you know of columns you can always exclude, such as file_date or refresh_snapshot_id, remove them to tighten the test somewhat.', 'Y'), - ('1511', 'Stale_Table', 'Stale Table', 'Stale Table Not Updated', 'Confirms whether table has been updated based on data fingerprint', 'Table has not been updated.', 'Update detected', NULL, 'TEMPLATE', '(({RESULT_MEASURE}-{THRESHOLD_VALUE}))::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '0.0', 'null', 'null', 'custom_query,subset_condition', NULL, 'Fingerprint Expression,Record Subset Condition', 'String expression combining key column measures into a distinct representation of table state', 'Log', 'QUERY', 'table', 'Recency', 'Recency', 'Most recent prior table fingerprint', 'This test compares the current table fingerprint, calculated signature of column contents, to confirm that the table has been updated. The table fingerprint is derived from a set of values and aggregates from columns most likely to change. This test allows you to track the schedule and frequency of updates and refreshes to the table.', 'Y') + ('1511', 'Stale_Table', 'Stale Table', 'Stale Table Not Updated', 'Confirms whether table has been updated based on data fingerprint', 'Table has not been updated.', 'Update detected', NULL, 'TEMPLATE', '(({RESULT_MEASURE}-{THRESHOLD_VALUE}))::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '0.0', 'null', 'null', 'history_calculation,history_lookback,subset_condition,custom_query', NULL, 'History Aggregate,History Lookback,Record Subset Condition,Fingerprint Expression', 'Aggregate calculation to be performed on the N lookback results|Last N tests to use for history aggregate calculation|Condition defining a subset of records in main table|String expression combining key column measures into a distinct representation of table state', 'Log', 'QUERY', 'table', 'Recency', 'Recency', 'Most recent prior table fingerprint', 'This test compares the current table fingerprint, calculated signature of column contents, to confirm that the table has been updated. The table fingerprint is derived from a set of values and aggregates from columns most likely to change. This test allows you to track the schedule and frequency of updates and refreshes to the table.', 'Y') ; diff --git a/testgen/ui/views/test_definitions.py b/testgen/ui/views/test_definitions.py index 850f4096..b13b77bc 100644 --- a/testgen/ui/views/test_definitions.py +++ b/testgen/ui/views/test_definitions.py @@ -303,6 +303,8 @@ def show_test_form( match_groupby_names = empty_if_null(selected_test_def["match_groupby_names"]) if mode == "edit" else "" match_having_condition = empty_if_null(selected_test_def["match_having_condition"]) if mode == "edit" else "" window_days = selected_test_def["window_days"] or 0 if mode == "edit" else 0 + history_calculation = empty_if_null(selected_test_def["history_calculation"]) if mode == "edit" else "" + history_lookback = empty_if_null(selected_test_def["history_lookback"]) if mode == "edit" else "" # export_to_observability inherited_export_to_observability = "Yes" if test_suite.export_to_observability else "No" @@ -397,6 +399,8 @@ def show_test_form( "match_groupby_names": match_groupby_names, "match_having_condition": match_having_condition, "window_days": window_days, + "history_calculation": history_calculation, + "history_lookback": history_lookback, } # test_definition_status @@ -517,10 +521,18 @@ def render_dynamic_attribute(attribute: str, container: DeltaGenerator): if not attribute in dynamic_attributes: return - numeric_attributes = ["threshold_value", "lower_tolerance", "upper_tolerance"] - - default_value = 0 if attribute in numeric_attributes else "" - value = selected_test_def[attribute] if mode == "edit" and selected_test_def[attribute] is not None else default_value + choice_fields = { + "history_calculation": ["Value", "Minimum", "Maximum", "Sum", "Average"], + } + float_numeric_attributes = ["threshold_value", "lower_tolerance", "upper_tolerance"] + int_numeric_attributes = ["history_lookback"] + + default_value = 0 if attribute in [*float_numeric_attributes, *int_numeric_attributes] else "" + value = ( + selected_test_def[attribute] + if mode == "edit" and selected_test_def[attribute] is not None + else default_value + ) index = dynamic_attributes.index(attribute) leftover_attributes.remove(attribute) @@ -550,13 +562,28 @@ def render_dynamic_attribute(attribute: str, container: DeltaGenerator): height=150 if test_type == "CUSTOM" else 75, help=help_text, ) - elif attribute in numeric_attributes: + elif attribute in float_numeric_attributes: test_definition[attribute] = container.number_input( label=label_text, value=float(value), step=1.0, help=help_text, ) + elif attribute in int_numeric_attributes: + test_definition[attribute] = container.number_input( + label=label_text, + value=int(value), + step=1, + help=help_text, + ) + elif attribute in choice_fields: + with container: + test_definition[attribute] = testgen.select( + label_text, + choice_fields[attribute], + required=True, + default_value=value, + ) else: test_definition[attribute] = container.text_input( label=label_text, From 614747e1e6af826ee0fa7fae85b0578b59a1c7dc Mon Sep 17 00:00:00 2001 From: Luis Date: Mon, 11 Aug 2025 13:03:46 -0400 Subject: [PATCH 19/28] feat(tests): add log severity --- testgen/common/models/test_run.py | 10 +++ testgen/common/models/test_suite.py | 9 +++ .../030_initialize_new_schema_structure.sql | 1 + .../dbupgrade/0501_incremental_upgrade.sql | 3 + .../ex_finalize_test_run_results.sql | 63 ++++++++++--------- .../ex_update_history_threshold_last_n.sql | 16 ++--- ...ex_update_test_record_in_testrun_table.sql | 2 + .../get_test_results_for_run_cli.sql | 4 +- testgen/ui/components/frontend/css/shared.css | 1 + .../frontend/js/data_profiling/data_issues.js | 1 + .../components/frontend/js/display_utils.js | 1 + .../frontend/js/pages/project_dashboard.js | 1 + .../components/frontend/js/pages/test_runs.js | 2 + .../frontend/js/pages/test_suites.js | 1 + testgen/ui/components/frontend/js/types.js | 1 + testgen/ui/pdf/test_result_report.py | 1 + testgen/ui/queries/source_data_queries.py | 2 +- testgen/ui/queries/test_result_queries.py | 4 ++ testgen/ui/services/form_service.py | 48 +++++++++++--- testgen/ui/views/data_catalog.py | 2 +- testgen/ui/views/test_definitions.py | 7 ++- testgen/ui/views/test_results.py | 27 +++++--- testgen/ui/views/test_suites.py | 2 +- 23 files changed, 149 insertions(+), 60 deletions(-) create mode 100644 testgen/template/dbupgrade/0501_incremental_upgrade.sql diff --git a/testgen/common/models/test_run.py b/testgen/common/models/test_run.py index 63872a4a..ed1a01fe 100644 --- a/testgen/common/models/test_run.py +++ b/testgen/common/models/test_run.py @@ -44,6 +44,7 @@ class TestRunSummary(EntityMinimal): warning_ct: int failed_ct: int error_ct: int + log_ct: int dismissed_ct: int dq_score_testing: float @@ -68,6 +69,7 @@ class TestRun(Entity): failed_ct: int = Column(Integer) warning_ct: int = Column(Integer) error_ct: int = Column(Integer) + log_ct: int = Column(Integer) table_ct: int = Column(Integer) column_ct: int = Column(Integer) column_failed_ct: int = Column(Integer) @@ -163,6 +165,13 @@ def select_summary( ELSE 0 END ) AS error_ct, + SUM( + CASE + WHEN COALESCE(disposition, 'Confirmed') = 'Confirmed' + AND result_status = 'Log' THEN 1 + ELSE 0 + END + ) AS log_ct, SUM( CASE WHEN COALESCE(disposition, 'Confirmed') IN ('Dismissed', 'Inactive') THEN 1 @@ -185,6 +194,7 @@ def select_summary( run_results.warning_ct, run_results.failed_ct, run_results.error_ct, + run_results.log_ct, run_results.dismissed_ct, test_runs.dq_score_test_run AS dq_score_testing FROM test_runs diff --git a/testgen/common/models/test_suite.py b/testgen/common/models/test_suite.py index 02eccdec..95935b8d 100644 --- a/testgen/common/models/test_suite.py +++ b/testgen/common/models/test_suite.py @@ -43,6 +43,7 @@ class TestSuiteSummary(EntityMinimal): last_run_warning_ct: int last_run_failed_ct: int last_run_error_ct: int + last_run_log_ct: int last_run_dismissed_ct: int @@ -122,6 +123,13 @@ def select_summary(cls, project_code: str, table_group_id: str | UUID | None = N ELSE 0 END ) AS error_ct, + SUM( + CASE + WHEN COALESCE(test_results.disposition, 'Confirmed') = 'Confirmed' + AND test_results.result_status = 'Log' THEN 1 + ELSE 0 + END + ) AS log_ct, SUM( CASE WHEN COALESCE(test_results.disposition, 'Confirmed') IN ('Dismissed', 'Inactive') THEN 1 @@ -161,6 +169,7 @@ def select_summary(cls, project_code: str, table_group_id: str | UUID | None = N last_run.warning_ct AS last_run_warning_ct, last_run.failed_ct AS last_run_failed_ct, last_run.error_ct AS last_run_error_ct, + last_run.log_ct AS last_run_log_ct, last_run.dismissed_ct AS last_run_dismissed_ct FROM test_suites AS suites LEFT JOIN last_run diff --git a/testgen/template/dbsetup/030_initialize_new_schema_structure.sql b/testgen/template/dbsetup/030_initialize_new_schema_structure.sql index 37414d72..f442f4db 100644 --- a/testgen/template/dbsetup/030_initialize_new_schema_structure.sql +++ b/testgen/template/dbsetup/030_initialize_new_schema_structure.sql @@ -507,6 +507,7 @@ CREATE TABLE test_runs ( failed_ct INTEGER, warning_ct INTEGER, error_ct INTEGER, + log_ct INTEGER, table_ct INTEGER, column_ct INTEGER, column_failed_ct INTEGER, diff --git a/testgen/template/dbupgrade/0501_incremental_upgrade.sql b/testgen/template/dbupgrade/0501_incremental_upgrade.sql new file mode 100644 index 00000000..57b13840 --- /dev/null +++ b/testgen/template/dbupgrade/0501_incremental_upgrade.sql @@ -0,0 +1,3 @@ +SET SEARCH_PATH TO {SCHEMA_NAME}; + +ALTER TABLE test_runs ADD COLUMN log_ct INTEGER; diff --git a/testgen/template/execution/ex_finalize_test_run_results.sql b/testgen/template/execution/ex_finalize_test_run_results.sql index 707b99ab..2e210c1a 100644 --- a/testgen/template/execution/ex_finalize_test_run_results.sql +++ b/testgen/template/execution/ex_finalize_test_run_results.sql @@ -1,33 +1,38 @@ UPDATE test_results - SET test_description = COALESCE(r.test_description, d.test_description, tt.test_description), - severity = COALESCE(d.severity, s.severity, tt.default_severity), - threshold_value = COALESCE(r.threshold_value, d.threshold_value), - result_signal = COALESCE(r.result_signal, r.result_measure), - result_status = CASE - WHEN r.result_status = 'Error' THEN 'Error' - WHEN r.result_code = 1 THEN 'Passed' - WHEN r.result_code = 0 - AND COALESCE(d.severity, s.severity, tt.default_severity) = 'Warning' THEN 'Warning' - WHEN r.result_code = 0 - AND COALESCE(d.severity, s.severity, tt.default_severity) = 'Fail' THEN 'Failed' - WHEN r.result_code = 0 THEN 'Warning' - END, - observability_status = CASE - WHEN r.observability_status = 'Sent' THEN 'Sent' - WHEN COALESCE(d.export_to_observability, s.export_to_observability) = 'Y' THEN 'Queued' - WHEN COALESCE(d.export_to_observability, s.export_to_observability) = 'N' THEN 'Ignore' - END, - result_message = COALESCE(r.result_message, - tt.measure_uom || ': ' || r.result_measure::VARCHAR - || ', Threshold: ' || d.threshold_value::VARCHAR - || CASE - WHEN r.skip_errors > 0 THEN 'Errors Ignored: ' || r.skip_errors::VARCHAR - ELSE '' - END), - table_groups_id = d.table_groups_id, - test_suite_id = s.id, - auto_gen = d.last_auto_gen_date IS NOT NULL - FROM test_results r +SET test_description = COALESCE(r.test_description, d.test_description, tt.test_description), + severity = COALESCE(d.severity, s.severity, tt.default_severity), + threshold_value = COALESCE(r.threshold_value, d.threshold_value), + result_signal = COALESCE(r.result_signal, r.result_measure), + result_status = ( + CASE + WHEN r.result_status = 'Error' THEN 'Error' + WHEN COALESCE(d.severity, s.severity, tt.default_severity) = 'Log' THEN 'Log' + WHEN r.result_code = 1 THEN 'Passed' + WHEN r.result_code = 0 AND COALESCE(d.severity, s.severity, tt.default_severity) = 'Warning' THEN 'Warning' + WHEN r.result_code = 0 AND COALESCE(d.severity, s.severity, tt.default_severity) = 'Fail' THEN 'Failed' + WHEN r.result_code = 0 THEN 'Warning' + END + ), + observability_status = ( + CASE + WHEN r.observability_status = 'Sent' THEN 'Sent' + WHEN COALESCE(d.export_to_observability, s.export_to_observability) = 'Y' THEN 'Queued' + WHEN COALESCE(d.export_to_observability, s.export_to_observability) = 'N' THEN 'Ignore' + END + ), + result_message = COALESCE( + r.result_message, + tt.measure_uom || ': ' || r.result_measure::VARCHAR || ', Threshold: ' || d.threshold_value::VARCHAR || ( + CASE + WHEN r.skip_errors > 0 THEN 'Errors Ignored: ' || r.skip_errors::VARCHAR + ELSE '' + END + ) + ), + table_groups_id = d.table_groups_id, + test_suite_id = s.id, + auto_gen = d.last_auto_gen_date IS NOT NULL +FROM test_results r INNER JOIN test_suites s ON r.test_suite_id = s.id INNER JOIN test_definitions d ON r.test_definition_id = d.id INNER JOIN test_types tt ON r.test_type = tt.test_type diff --git a/testgen/template/execution/ex_update_history_threshold_last_n.sql b/testgen/template/execution/ex_update_history_threshold_last_n.sql index d3e89284..414de787 100644 --- a/testgen/template/execution/ex_update_history_threshold_last_n.sql +++ b/testgen/template/execution/ex_update_history_threshold_last_n.sql @@ -1,13 +1,13 @@ WITH stats AS ( SELECT d.id AS test_definition_id, - CASE d.history_calculation - WHEN 'Value' THEN MIN(r.result_signal::NUMERIC)::VARCHAR - WHEN 'Minimum' THEN MIN(r.result_signal::NUMERIC)::VARCHAR - WHEN 'Maximum' THEN MAX(r.result_signal::NUMERIC)::VARCHAR - WHEN 'Sum' THEN SUM(r.result_signal::NUMERIC)::VARCHAR - WHEN 'Average' THEN AVG(r.result_signal::NUMERIC)::VARCHAR - END AS calc_signal + COALESCE( + MIN(r.result_signal) FILTER (WHERE d.history_calculation = 'Value'), + MIN(r.result_signal::NUMERIC) FILTER (WHERE d.history_calculation = 'Minimum')::VARCHAR, + MAX(r.result_signal::NUMERIC) FILTER (WHERE d.history_calculation = 'Maximum')::VARCHAR, + SUM(r.result_signal::NUMERIC) FILTER (WHERE d.history_calculation = 'Sum')::VARCHAR, + AVG(r.result_signal::NUMERIC) FILTER (WHERE d.history_calculation = 'Average')::VARCHAR + ) as calc_signal FROM test_definitions d INNER JOIN LATERAL ( SELECT result_signal @@ -24,4 +24,4 @@ WITH stats AS ( UPDATE test_definitions t SET baseline_value = s.calc_signal FROM stats s -WHERE t.id = s.test_definition_id; +WHERE t.id = s.test_definition_id; \ No newline at end of file diff --git a/testgen/template/execution/ex_update_test_record_in_testrun_table.sql b/testgen/template/execution/ex_update_test_record_in_testrun_table.sql index 43ef1146..53137157 100644 --- a/testgen/template/execution/ex_update_test_record_in_testrun_table.sql +++ b/testgen/template/execution/ex_update_test_record_in_testrun_table.sql @@ -4,6 +4,7 @@ WITH stats SUM(result_code) AS passed_ct, COALESCE(SUM(CASE WHEN tr.result_status = 'Failed' THEN 1 END), 0) AS failed_ct, COALESCE(SUM(CASE WHEN tr.result_status = 'Warning' THEN 1 END), 0) AS warning_ct, + COALESCE(SUM(CASE WHEN tr.result_status = 'Log' THEN 1 END), 0) AS log_ct, COALESCE(SUM(CASE WHEN tr.result_message ILIKE 'ERROR%' THEN 1 ELSE 0 END), 0) AS error_ct FROM test_runs r INNER JOIN test_results tr @@ -19,6 +20,7 @@ UPDATE test_runs passed_ct = s.passed_ct, failed_ct = s.failed_ct, warning_ct = s.warning_ct, + log_ct = s.log_ct, error_ct = s.error_ct FROM test_runs r LEFT JOIN stats s diff --git a/testgen/template/get_entities/get_test_results_for_run_cli.sql b/testgen/template/get_entities/get_test_results_for_run_cli.sql index dd96a337..083d65e4 100644 --- a/testgen/template/get_entities/get_test_results_for_run_cli.sql +++ b/testgen/template/get_entities/get_test_results_for_run_cli.sql @@ -3,6 +3,7 @@ SELECT ts.test_suite as test_suite_key, column_names as column_name, r.test_type, CASE + WHEN COALESCE(td.severity, ts.severity, tt.default_severity) = 'Log' THEN 'Log' WHEN result_code = 1 THEN 'Passed' WHEN result_code = 0 AND r.severity = 'Warning' THEN 'Warning' WHEN result_code = 0 AND r.severity = 'Fail' THEN 'Failed' @@ -13,6 +14,7 @@ SELECT ts.test_suite as test_suite_key, FROM test_results r INNER JOIN test_types tt ON r.test_type = tt.test_type INNER JOIN test_suites ts ON r.test_suite_id = ts.id - WHERE test_run_id = :TEST_RUN_ID +INNER JOIN test_definitions td ON td.id = r.test_definition_id +WHERE test_run_id = :TEST_RUN_ID {ERRORS_ONLY} ORDER BY r.schema_name, r.table_name, r.column_names, r.test_type; diff --git a/testgen/ui/components/frontend/css/shared.css b/testgen/ui/components/frontend/css/shared.css index a3d18383..2f0a153c 100644 --- a/testgen/ui/components/frontend/css/shared.css +++ b/testgen/ui/components/frontend/css/shared.css @@ -20,6 +20,7 @@ body { --blue: #42A5F5; --brown: #8D6E63; --grey: #BDBDBD; + --darkGrey: #9E9E9E; --empty: #EEEEEE; --empty-light: #FAFAFA; --empty-teal: #E7F1F0; diff --git a/testgen/ui/components/frontend/js/data_profiling/data_issues.js b/testgen/ui/components/frontend/js/data_profiling/data_issues.js index c0b1d568..e0beea80 100644 --- a/testgen/ui/components/frontend/js/data_profiling/data_issues.js +++ b/testgen/ui/components/frontend/js/data_profiling/data_issues.js @@ -37,6 +37,7 @@ const STATUS_COLORS = { Failed: 'red', Warning: 'yellow', Error: 'brown', + Log: 'darkGrey', }; const PotentialPIICard = (/** @type Properties */ props, /** @type Table | Column */ item) => { diff --git a/testgen/ui/components/frontend/js/display_utils.js b/testgen/ui/components/frontend/js/display_utils.js index 9eb5e8c1..058c415e 100644 --- a/testgen/ui/components/frontend/js/display_utils.js +++ b/testgen/ui/components/frontend/js/display_utils.js @@ -91,6 +91,7 @@ const colorMap = { brownLight: '#D7CCC8', // Brown 100 brownDark: '#4E342E', // Brown 800 grey: '#BDBDBD', // Gray 400 + darkGrey: '#9E9E9E', // Gray 500 empty: 'var(--empty)', // Light: Gray 200, Dark: Gray 800 emptyLight: 'var(--empty-light)', // Light: Gray 50, Dark: Gray 900 emptyTeal: 'var(--empty-teal)', diff --git a/testgen/ui/components/frontend/js/pages/project_dashboard.js b/testgen/ui/components/frontend/js/pages/project_dashboard.js index 1a90c92a..a1e29e69 100644 --- a/testgen/ui/components/frontend/js/pages/project_dashboard.js +++ b/testgen/ui/components/frontend/js/pages/project_dashboard.js @@ -239,6 +239,7 @@ const TableGroupTestSuiteSummary = (/** @type TestSuiteSummary[] */testSuites) = { label: 'Warning', 'value': parseInt(suite.last_run_warning_ct), color: 'yellow' }, { label: 'Failed', 'value': parseInt(suite.last_run_failed_ct), color: 'red' }, { label: 'Error', 'value': parseInt(suite.last_run_error_ct), color: 'brown' }, + { label: 'Log', 'value': parseInt(suite.last_run_log_ct), color: 'darkGrey' }, { label: 'Dismissed', 'value': parseInt(suite.last_run_dismissed_ct), color: 'grey' }, ], width: 350, diff --git a/testgen/ui/components/frontend/js/pages/test_runs.js b/testgen/ui/components/frontend/js/pages/test_runs.js index 03b2aa47..3c7cb160 100644 --- a/testgen/ui/components/frontend/js/pages/test_runs.js +++ b/testgen/ui/components/frontend/js/pages/test_runs.js @@ -14,6 +14,7 @@ * @property {number} warning_ct * @property {number} failed_ct * @property {number} error_ct + * @property {number} log_ct * @property {number} dismissed_ct * @property {string} dq_score_testing * @@ -210,6 +211,7 @@ const TestRunItem = ( { label: 'Warning', value: item.warning_ct, color: 'yellow' }, { label: 'Failed', value: item.failed_ct, color: 'red' }, { label: 'Error', value: item.error_ct, color: 'brown' }, + { label: 'Log', value: item.log_ct, color: 'darkGrey' }, { label: 'Dismissed', value: item.dismissed_ct, color: 'grey' }, ], height: 8, diff --git a/testgen/ui/components/frontend/js/pages/test_suites.js b/testgen/ui/components/frontend/js/pages/test_suites.js index ce34cba5..5e6828cb 100644 --- a/testgen/ui/components/frontend/js/pages/test_suites.js +++ b/testgen/ui/components/frontend/js/pages/test_suites.js @@ -162,6 +162,7 @@ const TestSuites = (/** @type Properties */ props) => { { label: 'Warning', value: parseInt(testSuite.last_run_warning_ct), color: 'yellow' }, { label: 'Failed', value: parseInt(testSuite.last_run_failed_ct), color: 'red' }, { label: 'Error', value: parseInt(testSuite.last_run_error_ct), color: 'brown' }, + { label: 'Log', value: parseInt(testSuite.last_run_log_ct), color: 'darkGrey' }, { label: 'Dismissed', value: parseInt(testSuite.last_run_dismissed_ct), color: 'grey' }, ], height: 20, diff --git a/testgen/ui/components/frontend/js/types.js b/testgen/ui/components/frontend/js/types.js index b5066483..0155396e 100644 --- a/testgen/ui/components/frontend/js/types.js +++ b/testgen/ui/components/frontend/js/types.js @@ -30,5 +30,6 @@ * @property {number} last_run_warning_ct * @property {number} last_run_failed_ct * @property {number} last_run_error_ct + * @property {number} last_run_log_ct * @property {number} last_run_dismissed_ct */ diff --git a/testgen/ui/pdf/test_result_report.py b/testgen/ui/pdf/test_result_report.py index c79b9be2..9fd471f5 100644 --- a/testgen/ui/pdf/test_result_report.py +++ b/testgen/ui/pdf/test_result_report.py @@ -40,6 +40,7 @@ "Warning": HexColor(0xFBC02D), "Failed": HexColor(0xEF5350), "Error": HexColor(0x8D6E63), + "Log": HexColor(0x9E9E9E), } diff --git a/testgen/ui/queries/source_data_queries.py b/testgen/ui/queries/source_data_queries.py index ce4cd9b6..9bc5965b 100644 --- a/testgen/ui/queries/source_data_queries.py +++ b/testgen/ui/queries/source_data_queries.py @@ -185,7 +185,7 @@ def get_test_issue_source_data_custom( @dataclass class LookupData: lookup_query: str - sql_flavor: SQLFlavor | None + sql_flavor: SQLFlavor | None = None def _get_lookup_data( diff --git a/testgen/ui/queries/test_result_queries.py b/testgen/ui/queries/test_result_queries.py index 4636bb4c..991969ab 100644 --- a/testgen/ui/queries/test_result_queries.py +++ b/testgen/ui/queries/test_result_queries.py @@ -50,6 +50,10 @@ def get_test_results( WHEN result_status = 'Failed' AND result_message NOT ILIKE 'Inactivated%%' THEN 1 END::INTEGER as failed_ct, + CASE + WHEN result_status = 'Log' + AND result_message NOT ILIKE 'Inactivated%%' THEN 1 + END::INTEGER as log_ct, CASE WHEN result_message ILIKE 'Inactivated%%' THEN 1 END as execution_error_ct, diff --git a/testgen/ui/services/form_service.py b/testgen/ui/services/form_service.py index b7112f4c..43d30cc6 100644 --- a/testgen/ui/services/form_service.py +++ b/testgen/ui/services/form_service.py @@ -102,17 +102,18 @@ def render_html_list(dct_row, lst_columns, str_section_header=None, int_data_wid } .dk-text-value { display: <>; - width: <>px; + width: <>; background-color: var(--dk-text-value-background); text-align: left; font-family: 'Courier New', monospace; padding-left: 10px; padding-right: 10px; box-sizing: border-box; + overflow-wrap: break-word; } .dk-num-value { display: <>; - width: <>px; + width: <>; background-color: var(--dk-text-value-background); text-align: right; font-family: 'Courier New', monospace; @@ -122,7 +123,7 @@ def render_html_list(dct_row, lst_columns, str_section_header=None, int_data_wid } """ - str_data_width = "100%" if int_data_width == 0 else str(int_data_width) + str_data_width = "100%" if int_data_width == 0 else f"{int_data_width}px" str_markdown = str_markdown.replace("<>", str_data_width) str_markdown = str_markdown.replace("<>", str_block) @@ -164,7 +165,6 @@ def render_grid_select( :param key: Streamlit cache key for the grid. required when binding selection to query. """ - show_prompt(str_prompt) # Set grid formatting @@ -181,19 +181,36 @@ def render_grid_select( if (['Failed', 'Error'].includes(params.value)) { style.color = 'black'; - style.borderColor = 'mistyrose'; + style.borderColor = 'var(--ag-odd-row-background-color)'; style.backgroundColor = "mistyrose"; style.fontWeight = 'bolder'; + style.display = 'flex'; + style.alignItems = 'center'; + style.justifyContent = 'center'; return style; } else if (params.value === 'Warning') { style.color = 'black'; - style.borderColor = 'seashell'; + style.borderColor = 'var(--ag-odd-row-background-color)'; style.backgroundColor = "seashell"; + style.display = 'flex'; + style.alignItems = 'center'; + style.justifyContent = 'center'; return style; } else if (params.value === 'Passed') { style.color = 'black'; - style.borderColor = 'honeydew'; + style.borderColor = 'var(--ag-odd-row-background-color)'; style.backgroundColor = "honeydew"; + style.display = 'flex'; + style.alignItems = 'center'; + style.justifyContent = 'center'; + return style; + } else if (params.value === 'Log') { + style.color = 'black'; + style.borderColor = 'var(--ag-odd-row-background-color)'; + style.backgroundColor = "#9E9E9E"; + style.display = 'flex'; + style.alignItems = 'center'; + style.justifyContent = 'center'; return style; } else if (params.value === '✓') { return { @@ -280,7 +297,14 @@ def render_grid_select( "headerCheckboxSelection": selection_mode_ == "multiple" and column == show_columns[0], "headerCheckboxSelectionFilteredOnly": selection_mode_ == "multiple" and column == show_columns[0], } - highlight_kwargs = {"cellStyle": cellstyle_jscode} + highlight_kwargs = { + "cellStyle": cellstyle_jscode, + "cellClassRules": { + "status-tag": JsCode( + "function(params) { return ['Failed', 'Error', 'Warning', 'Passed', 'Log'].includes(params.value); }", + ), + }, + } # Check if the column is a date-time column if is_datetime64_any_dtype(df[column]): @@ -320,7 +344,13 @@ def render_grid_select( custom_css={ "#gridToolBar": { "padding-bottom": "0px !important", - } + }, + ".ag-row-hover .ag-cell.status-tag": { + "border-color": "var(--ag-row-hover-color) !important", + }, + ".ag-row-selected .ag-cell.status-tag": { + "border-color": "var(--ag-selected-row-background-color) !important", + }, }, key=f"{key}_{selection_mode_}_{rendering_counter}", reload_data=data_changed, diff --git a/testgen/ui/views/data_catalog.py b/testgen/ui/views/data_catalog.py index 26791ccc..1df6c2d9 100644 --- a/testgen/ui/views/data_catalog.py +++ b/testgen/ui/views/data_catalog.py @@ -472,7 +472,7 @@ def get_latest_test_issues(table_group_id: str, table_name: str, column_name: st WHERE test_suites.table_groups_id = :table_group_id AND table_name = :table_name {"AND column_names = :column_name" if column_name else ""} - AND result_status <> 'Passed' + AND result_status NOT IN ('Passed', 'Log') AND COALESCE(disposition, 'Confirmed') = 'Confirmed' ORDER BY CASE result_status diff --git a/testgen/ui/views/test_definitions.py b/testgen/ui/views/test_definitions.py index b13b77bc..e21c9a2c 100644 --- a/testgen/ui/views/test_definitions.py +++ b/testgen/ui/views/test_definitions.py @@ -259,7 +259,12 @@ def show_test_form( test_types_severity = selected_test_type_row["default_severity"] inherited_severity = test_suite_severity if test_suite_severity else test_types_severity - severity_options = [f"Inherited ({inherited_severity})", "Warning", "Fail"] + severity_options = [ + f"Inherited ({inherited_severity})", + "Log", + "Warning", + "Fail", + ] if mode == "add" or selected_test_def["severity"] is None: severity_index = 0 else: diff --git a/testgen/ui/views/test_results.py b/testgen/ui/views/test_results.py index 8a0ed3d8..7991d88d 100644 --- a/testgen/ui/views/test_results.py +++ b/testgen/ui/views/test_results.py @@ -98,6 +98,7 @@ def render( "Warning", "Passed", "Error", + "Log", ] status = testgen.select( options=status_options, @@ -170,16 +171,12 @@ def render( do_multi_select = st.toggle("Multi-Select", help=str_help) match status: + case None: + status = [] case "Failed + Warning": status = ["Failed", "Warning"] - case "Failed": - status = ["Failed"] - case "Warning": - status = ["Warning"] - case "Passed": - status = ["Passed"] - case "Error": - status = ["Error"] + case _: + status = [status] # Display main grid and retrieve selection selected = show_result_detail( @@ -330,6 +327,13 @@ def get_test_result_summary(test_run_id: str) -> list[dict]: ELSE 0 END ) as error_ct, + SUM( + CASE + WHEN COALESCE(test_results.disposition, 'Confirmed') = 'Confirmed' + AND test_results.result_status = 'Log' THEN 1 + ELSE 0 + END + ) as log_ct, SUM( CASE WHEN COALESCE(test_results.disposition, 'Confirmed') IN ('Dismissed', 'Inactive') THEN 1 @@ -349,6 +353,7 @@ def get_test_result_summary(test_run_id: str) -> list[dict]: { "label": "Warning", "value": result.warning_ct, "color": "yellow" }, { "label": "Failed", "value": result.failed_ct, "color": "red" }, { "label": "Error", "value": result.error_ct, "color": "brown" }, + { "label": "Log", "value": result.error_ct, "color": "darkGrey" }, { "label": "Dismissed", "value": result.dismissed_ct, "color": "grey" }, ] @@ -734,7 +739,11 @@ def source_data_dialog(selected_row): # Show detail fm.render_html_list( - selected_row, ["input_parameters", "result_message"], None, 700, ["Test Parameters", "Result Detail"] + selected_row, + lst_columns=["input_parameters", "result_message"], + str_section_header=None, + int_data_width=0, + lst_labels=["Test Parameters", "Result Detail"], ) with st.spinner("Retrieving source data..."): diff --git a/testgen/ui/views/test_suites.py b/testgen/ui/views/test_suites.py index 5efa1f41..820c92a0 100644 --- a/testgen/ui/views/test_suites.py +++ b/testgen/ui/views/test_suites.py @@ -95,7 +95,7 @@ def edit_test_suite_dialog(project_code, table_groups, test_suite_id: str) -> No def show_test_suite(mode, project_code, table_groups: Iterable[TableGroupMinimal], selected: TestSuite | None = None): - severity_options = ["Inherit", "Failed", "Warning"] + severity_options = ["Inherit", "Log", "Failed", "Warning"] selected_test_suite = selected if mode == "edit" else None table_groups_df = to_dataframe(table_groups, TableGroupMinimal.columns()) From bc83dcc88aab79f0a82c7b27b622b64883a2da01 Mon Sep 17 00:00:00 2001 From: Luis Date: Fri, 15 Aug 2025 13:41:18 -0400 Subject: [PATCH 20/28] feat(tests): customize visualization for stale table test type --- .../030_initialize_new_schema_structure.sql | 56 ++++----- .../050_populate_new_schema_metadata.sql | 106 +++++++++--------- .../dbsetup/060_create_standard_views.sql | 2 + .../dbupgrade/0501_incremental_upgrade.sql | 5 + .../ex_update_history_threshold_last_n.sql | 2 +- .../gen_table_changed_test.sql | 4 +- testgen/ui/queries/test_result_queries.py | 4 +- testgen/ui/views/test_definitions.py | 14 ++- testgen/ui/views/test_results.py | 65 ++++++++++- 9 files changed, 172 insertions(+), 86 deletions(-) diff --git a/testgen/template/dbsetup/030_initialize_new_schema_structure.sql b/testgen/template/dbsetup/030_initialize_new_schema_structure.sql index f442f4db..469e2776 100644 --- a/testgen/template/dbsetup/030_initialize_new_schema_structure.sql +++ b/testgen/template/dbsetup/030_initialize_new_schema_structure.sql @@ -445,33 +445,35 @@ CREATE TABLE data_column_chars ( ); CREATE TABLE test_types ( - id VARCHAR, - test_type VARCHAR(200) NOT NULL - CONSTRAINT cat_tests_test_type_pk - PRIMARY KEY, - test_name_short VARCHAR(30), - test_name_long VARCHAR(100), - test_description VARCHAR(1000), - except_message VARCHAR(1000), - measure_uom VARCHAR(100), - measure_uom_description VARCHAR(200), - selection_criteria TEXT, - dq_score_prevalence_formula TEXT, - dq_score_risk_factor TEXT, - column_name_prompt TEXT, - column_name_help TEXT, - default_parm_columns TEXT, - default_parm_values TEXT, - default_parm_prompts TEXT, - default_parm_help TEXT, - default_severity VARCHAR(10), - run_type VARCHAR(10), - test_scope VARCHAR, - dq_dimension VARCHAR(50), - health_dimension VARCHAR(50), - threshold_description VARCHAR(200), - usage_notes VARCHAR, - active VARCHAR + id VARCHAR, + test_type VARCHAR(200) NOT NULL + CONSTRAINT cat_tests_test_type_pk + PRIMARY KEY, + test_name_short VARCHAR(30), + test_name_long VARCHAR(100), + test_description VARCHAR(1000), + except_message VARCHAR(1000), + measure_uom VARCHAR(100), + measure_uom_description VARCHAR(200), + selection_criteria TEXT, + dq_score_prevalence_formula TEXT, + dq_score_risk_factor TEXT, + column_name_prompt TEXT, + column_name_help TEXT, + default_parm_columns TEXT, + default_parm_values TEXT, + default_parm_prompts TEXT, + default_parm_help TEXT, + default_severity VARCHAR(10), + run_type VARCHAR(10), + test_scope VARCHAR, + dq_dimension VARCHAR(50), + health_dimension VARCHAR(50), + threshold_description VARCHAR(200), + result_visualization VARCHAR(50) DEFAULT 'line_chart', + result_visualization_params TEXT DEFAULT NULL, + usage_notes VARCHAR, + active VARCHAR ); CREATE TABLE test_templates ( diff --git a/testgen/template/dbsetup/050_populate_new_schema_metadata.sql b/testgen/template/dbsetup/050_populate_new_schema_metadata.sql index 51456c10..f5af0ad5 100644 --- a/testgen/template/dbsetup/050_populate_new_schema_metadata.sql +++ b/testgen/template/dbsetup/050_populate_new_schema_metadata.sql @@ -106,58 +106,59 @@ n controls over data ingested and to make values more efficient, consistent and TRUNCATE TABLE test_types; + INSERT INTO test_types - (id, test_type, test_name_short, test_name_long, test_description, except_message, measure_uom, measure_uom_description, selection_criteria, dq_score_prevalence_formula, dq_score_risk_factor, column_name_prompt, column_name_help, default_parm_columns, default_parm_values, default_parm_prompts, default_parm_help, default_severity, run_type, test_scope, dq_dimension, health_dimension, threshold_description, usage_notes, active) -VALUES ('1004', 'Alpha_Trunc', 'Alpha Truncation', 'Maximum character count consistent', 'Tests that the maximum count of characters in a column value has not dropped vs. baseline data', 'Maximum length of values has dropped from prior expected length.', 'Values over max', NULL, 'general_type =''A'' AND max_length > 0 AND ( (min_length = avg_length AND max_length = avg_length) OR (numeric_ct <> value_ct ) ) AND functional_table_type NOT LIKE ''%window%'' /* The conditions below are to eliminate overlap with : LOV_Match (excluded selection criteria for this test_type), Pattern_Match (excluded selection criteria for this test_type), Constant (excluded functional_data_type Constant and Boolean) */ AND ( (distinct_value_ct NOT BETWEEN 2 AND 10 AND functional_data_type NOT IN ( ''Constant'', ''Boolean'') ) AND NOT ( fn_charcount(top_patterns, E'' \| '' ) = 1 AND fn_charcount(top_patterns, E'' \| '' ) IS NOT NULL AND REPLACE(SPLIT_PART(top_patterns, ''|'' , 2), ''N'' , '''' ) > ''''))', '{VALUE_CT}::FLOAT * (FN_NORMAL_CDF(({MAX_LENGTH}::FLOAT - {AVG_LENGTH}::FLOAT) / (NULLIF({MAX_LENGTH}::FLOAT, 0) / 3)) - FN_NORMAL_CDF(({RESULT_MEASURE}::FLOAT - {AVG_LENGTH}::FLOAT) / (NULLIF({MAX_LENGTH}::FLOAT, 0) / 3)) ) /NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'threshold_value', 'FLOOR(0.95 * max_length::FLOAT)', 'Maximum String Length at Baseline', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Maximum length expected', 'Alpha Truncation tests that the longest text value in a column hasn''t become shorter than the defined threshold, initially 95% of the longest value at baseline. This could indicate a problem in a cumulative dataset, where prior values should still exist unchanged. A failure here would suggest that some process changed data that you would still expect to be present and matching its value when the column was profiled. This test would not be appropriate for an incremental or windowed dataset.', 'Y'), - ('1005', 'Avg_Shift', 'Average Shift', 'Column mean is consistent with reference', 'Tests for statistically-significant shift in mean value for column from average calculated at baseline.', 'Standardized difference between averages is over the selected threshold level.', 'Difference Measure', 'Cohen''s D Difference (0.20 small, 0.5 mod, 0.8 large, 1.2 very large, 2.0 huge)', 'general_type=''N'' AND distinct_value_ct > 10 AND functional_data_type ilike ''Measure%'' AND functional_data_type <> ''Measurement Spike'' AND column_name NOT ilike ''%latitude%'' AND column_name NOT ilike ''%longitude%''', '2.0 * (1.0 - fn_normal_cdf(ABS({RESULT_MEASURE}::FLOAT) / 2.0))', '0.75', NULL, NULL, 'baseline_value_ct,baseline_avg,baseline_sd,threshold_value', 'value_ct,avg_value,stdev_value,0.5::VARCHAR', 'Value Ct at Baseline,Mean at Baseline,Std Deviation at Baseline,Threshold Difference Measure ', NULL, 'Warning', 'CAT', 'column', 'Consistency', 'Data Drift', 'Standardized Difference Measure', 'Average Shift tests that the average of a numeric column has not significantly changed since baseline, when profiling was done. A significant shift may indicate errors in processing, differences in source data, or valid changes that may nevertheless impact assumptions in downstream data products. The test uses Cohen''s D, a statistical technique to identify significant shifts in a value. Cohen''s D measures the difference between the two averages, reporting results on a standardized scale, which can be interpreted via a rule-of-thumb from small to huge. Depending on your data, some difference may be expected, so it''s reasonable to adjust the threshold value that triggers test failure. This test works well for measures, or even for identifiers if you expect them to increment consistently. You may want to periodically adjust the expected threshold, or even the expected average value if you expect shifting over time. Consider this test along with Variability Increase. If variability rises too, process or measurement flaws could be at work. If variability remains consistent, the issue is more likely to be with the source data itself. ', 'Y'), - ('1007', 'Constant', 'Constant Match', 'All column values match constant value', 'Tests that all values in the column match the constant value identified in baseline data', 'A constant value is expected for this column.', 'Mismatched values', NULL, 'TEMPLATE', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'baseline_value,threshold_value', NULL, 'Constant Value at Baseline,Threshold Error Count', 'The single, unchanging value of the column, per baseline|The number of errors that are acceptable before test fails.', 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Count of records with unexpected values', 'Constant Match tests that a single value determined to be a constant in baseline profiling is still the only value for the column that appears in subsequent versions of the dataset. Sometimes new data or business knowledge may reveal that the value is not a constant at all, even though only one value was present at profiling. In this case, you will want to disable this test. Alternatively, you can use the Value Match test to provide a limited number of valid values for the column.', 'Y'), - ('1009', 'Daily_Record_Ct', 'Daily Records', 'All dates present within date range', 'Tests for presence of every calendar date within min/max date range, per baseline data', 'Not every date value between min and max dates is present, unlike at baseline.', 'Missing dates', NULL, 'general_type= ''D'' AND date_days_present > 21 AND date_days_present - (DATEDIFF(''day'', ''1800-01-05''::DATE, max_date) - DATEDIFF(''day'', ''1800-01-05''::DATE, min_date) + 1) = 0 AND future_date_ct::FLOAT / NULLIF(value_ct, 0) <= 0.75', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT*{PRO_RECORD_CT}::FLOAT/NULLIF({DATE_DAYS_PRESENT}::FLOAT, 0)/NULLIF({RECORD_CT}::FLOAT, 0)', '0.75', NULL, NULL, 'threshold_value', '0', 'Threshold Missing Calendar Days', NULL, 'Warning', 'CAT', 'column', 'Completeness', 'Volume', 'Missing calendar days within min/max range', 'Daily Records tests that at least one record is present for every day within the minimum and maximum date range for the column. The test is relevant for transactional data, where you would expect at least one transaction to be recorded each day. A failure here would suggest missing records for the number of days identified without data. You can adjust the threshold to accept a number of days that you know legitimately have no records. ', 'Y'), - ('1011', 'Dec_Trunc', 'Decimal Truncation', 'Sum of fractional values at or above reference', 'Tests for decimal truncation by confirming that the sum of fractional values in data is no less than the sum at baseline', 'The sum of fractional values is under baseline, which may indicate decimal truncation', 'Fractional sum', 'The sum of all decimal values from all data for this column', 'fractional_sum > 0 AND functional_table_type LIKE''%cumulative%''', '1', '1.0', NULL, NULL, 'threshold_value', 'ROUND(fractional_sum, 0)', 'Sum of Fractional Values at Baseline', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Minimum expected sum of all fractional values', 'Decimal Truncation tests that the fractional (decimal) part of a numeric column has not been truncated since Baseline. This works by summing all the fractional values after the decimal point and confirming that the total is at least equal to the fractional total at baseline. This could indicate a problem in a cumulative dataset, where prior values should still exist unchanged. A failure here would suggest that some process changed data that you would still expect to be present and matching its value when the column was profiled. This test would not be appropriate for an incremental or windowed dataset.', 'Y'), - ('1012', 'Distinct_Date_Ct', 'Date Count', 'Count of distinct dates at or above reference', 'Tests that the count of distinct dates referenced in the column has not dropped vs. baseline data', 'Drop in count of unique dates recorded in column.', 'Unique dates', 'Count of unique dates in transactional date column', 'functional_data_type ILIKE ''Transactional Date%'' AND date_days_present > 1 AND functional_table_type ILIKE ''%cumulative%''', '(({RECORD_CT}-{PRO_RECORD_CT})::FLOAT*{DISTINCT_VALUE_CT}::FLOAT/NULLIF({PRO_RECORD_CT}::FLOAT, 0))/NULLIF({PRO_RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'baseline_value,threshold_value', 'date_days_present,date_days_present', 'Distinct Date Count at Baseline,Min Expected Date Count', NULL, 'Fail', 'CAT', 'column', 'Timeliness', 'Recency', 'Minimum distinct date count expected', 'Date Count tests that the count of distinct dates present in the column has not dropped since baseline. The test is relevant for cumulative datasets, where old records are retained. A failure here would indicate missing records, which could be caused by a processing error or changed upstream data sources.', 'Y'), - ('1013', 'Distinct_Value_Ct', 'Value Count', 'Count of distinct values has not dropped', 'Tests that the count of unique values in the column has not changed from baseline.', 'Count of unique values in column has changed from baseline.', 'Unique Values', NULL, 'distinct_value_ct between 2 and 10 AND value_ct > 50 AND functional_data_type IN (''Code'', ''Category'', ''Attribute'', ''Description'') AND NOT coalesce(top_freq_values,'''') > ''''', 'ABS({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT*{PRO_RECORD_CT}::FLOAT/NULLIF({DISTINCT_VALUE_CT}::FLOAT, 0)/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'baseline_value_ct,threshold_value', 'distinct_value_ct,distinct_value_ct', 'Distinct Value Count at Baseline,Min Expected Value Count', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected distinct value count', 'Value Count tests that the count of unique values present in the column has not dropped since baseline. The test is relevant for cumulative datasets, where old records are retained, or for any dataset where you would expect a set number of distinct values should be present. A failure here would indicate missing records or a change in categories or value assignment.', 'Y'), - ('1014', 'Email_Format', 'Email Format', 'Email is correctly formatted', 'Tests that non-blank, non-empty email addresses match the standard format', 'Invalid email address formats found.', 'Invalid emails', 'Number of emails that do not match standard format', 'std_pattern_match=''EMAIL''', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'threshold_value', '0', 'Maximum Invalid Email Count', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected count of invalid email addresses', NULL, 'Y'), - ('1015', 'Future_Date', 'Past Dates', 'Latest date is prior to test run date', 'Tests that the maximum date referenced in the column is no greater than the test date, consistent with baseline data', 'Future date found when absent in baseline data.', 'Future dates', NULL, 'general_type=''D''AND future_date_ct = 0', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'threshold_value', '0', 'Maximum Future Date Count', NULL, 'Fail', 'CAT', 'column', 'Timeliness', 'Recency', 'Expected count of future dates', NULL, 'Y'), - ('1016', 'Future_Date_1Y', 'Future Year', 'Future dates within year of test run date', 'Tests that the maximum date referenced in the column is no greater than one year beyond the test date, consistent with baseline data', 'Future date beyond one-year found when absent in baseline.', 'Future dates post 1 year', NULL, 'general_type=''D''AND future_date_ct > 0 AND max_date <=''{AS_OF_DATE}''::DATE + INTERVAL''365 DAYS''', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'threshold_value', '0', 'Maximum Post 1-Year Future Date Count', NULL, 'Fail', 'CAT', 'column', 'Timeliness', 'Recency', 'Expected count of future dates beyond one year', 'Future Year looks for date values in the column that extend beyond one year after the test date. This would be appropriate for transactional dates where you would expect to find dates in the near future, but not beyond one year ahead. Errors could indicate invalid entries or possibly dummy dates representing blank values.', 'Y'), - ('1017', 'Incr_Avg_Shift', 'New Shift', 'New record mean is consistent with reference', 'Tests for statistically-significant shift in mean of new values for column compared to average calculated at baseline.', 'Significant shift in average of new values vs. baseline avg', 'Z-score of mean shift', 'Absolute Z-score (number of SD''s outside mean) of prior avg - incremental avg', 'general_type=''N'' AND distinct_value_ct > 10 AND functional_data_type ilike ''Measure%'' AND functional_data_type <> ''Measurement Spike'' AND column_name NOT ilike ''%latitude%'' AND column_name NOT ilike ''%longitude%''', '{RECORD_CT}::FLOAT*(1-FN_NORMAL_CDF({RESULT_MEASURE}::FLOAT))/NULLIF({RECORD_CT}::FLOAT, 0)', '0.75', NULL, NULL, 'baseline_value_ct,baseline_sum,baseline_avg,baseline_sd,threshold_value', 'value_ct,(avg_value * value_ct)::FLOAT,avg_value,stdev_value,2', 'Value Count at Baseline,Sum at Baseline,Mean Value at Baseline,Std Deviation at Baseline,Threshold Max Z-Score', NULL, 'Warning', 'CAT', 'column', 'Accuracy', 'Data Drift', 'Maximum Z-Score (number of SD''s beyond mean) expected', 'This is a more sensitive test than Average Shift, because it calculates an incremental difference in the average of new values compared to the average of values at baseline. This is appropriate for a cumulative dataset only, because it calculates the average of new entries based on the assumption that the count and average of records present at baseline are still present at the time of the test. This test compares the mean of new values with the standard deviation of the baseline average to calculate a Z-score. If the new mean falls outside the Z-score threshold, a shift is detected. Potential Z-score thresholds may range from 0 to 3, depending on the sensitivity you prefer. A failed test could indicate a quality issue or a legitimate shift in new data that should be noted and assessed by business users. Consider this test along with Variability Increase. If variability rises too, process, methodology or measurement flaws could be at issue. If variability remains consistent, the problem is more likely to be with the source data itself.', 'Y'), - ('1018', 'LOV_All', 'Value Match All', 'List of expected values all present in column', 'Tests that all values match a pipe-delimited list of expected values and that all expected values are present', 'Column values found don''t exactly match the expected list of values', 'Values found', NULL, NULL, '1', '1.0', NULL, NULL, 'threshold_value', NULL, 'List of Expected Values', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'List of values expected, in form (''Val1'',''Val2)', 'This is a more restrictive form of Value Match, testing that all values in the dataset match the list provided, and also that all values present in the list appear at least once in the dataset. This would be appropriate for tables where all category values in the column are represented at least once.', 'Y'), - ('1019', 'LOV_Match', 'Value Match', 'All column values present in expected list', 'Tests that all values in the column match the list-of-values identified in baseline data.', 'Values not matching expected List-of-Values from baseline.', 'Non-matching records', NULL, 'functional_data_type IN (''Boolean'', ''Code'', ''Category'') AND top_freq_values > '''' AND distinct_value_ct BETWEEN 2 and 10 AND value_ct > 5', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'baseline_value,threshold_value', '''('' || SUBSTRING( CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 2) > '''' THEN '','''''' || TRIM( REPLACE ( SPLIT_PART(top_freq_values, ''|'' , 2), '''''''' , '''''''''''' ) ) || '''''''' ELSE '''' END || CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 4) > '''' THEN '','''''' || TRIM(REPLACE(SPLIT_PART(top_freq_values, ''|'' , 4), '''''''' , '''''''''''' )) || '''''''' ELSE '''' END || CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 6) > '''' THEN '','''''' || TRIM(REPLACE(SPLIT_PART(top_freq_values, ''|'' , 6), '''''''' , '''''''''''' )) || '''''''' ELSE '''' END || CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 8) > '''' THEN '','''''' || TRIM(REPLACE(SPLIT_PART(top_freq_values, ''|'' , 8), '''''''' , '''''''''''' )) || '''''''' ELSE '''' END || CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 10) > '''' THEN '','''''' || TRIM(REPLACE(SPLIT_PART(top_freq_values, ''|'' , 10), '''''''' , '''''''''''' )) || '''''''' ELSE '''' END || CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 12) > '''' THEN '','''''' || TRIM(REPLACE(SPLIT_PART(top_freq_values, ''|'' , 12), '''''''' , '''''''''''' )) || '''''''' ELSE '''' END || CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 14) > '''' THEN '','''''' || TRIM(REPLACE(SPLIT_PART(top_freq_values, ''|'' , 14), '''''''' , '''''''''''' )) || '''''''' ELSE '''' END || CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 16) > '''' THEN '','''''' || TRIM(REPLACE(SPLIT_PART(top_freq_values, ''|'' , 16), '''''''' , '''''''''''' )) || '''''''' ELSE '''' END || CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 18) > '''' THEN '','''''' || TRIM(REPLACE(SPLIT_PART(top_freq_values, ''|'' , 18), '''''''' , '''''''''''' )) || '''''''' ELSE '''' END || CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 20) > '''' THEN '','''''' || TRIM(REPLACE(SPLIT_PART(top_freq_values, ''|'' , 20), '''''''' , '''''''''''' )) || '''''''' ELSE '''' END, 2, 999) || '')'',0', 'List of Expected Values,Threshold Error Count', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'List of values expected, in form (''Val1'',''Val2)', 'This tests that all values in the column match the hard-coded list provided. This is relevant when the list of allowable values is small and not expected to change often. Even if new values might occasionally be added, this test is useful for downstream data products to provide warning that assumptions and logic may need to change.', 'Y'), - ('1020', 'Min_Date', 'Minimum Date', 'All dates on or after set minimum', 'Tests that the earliest date referenced in the column is no earlier than baseline data', 'The earliest date value found is before the earliest value at baseline.', 'Dates prior to limit', NULL, 'general_type=''D''and min_date IS NOT NULL AND distinct_value_ct > 1', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'baseline_value,threshold_value', 'min_date,0', 'Minimum Date at Baseline,Threshold Error Count', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected count of dates prior to minimum', 'This test is appropriate for a cumulative dataset only, because it assumes all prior values are still present. It''s appropriate where new records are added with more recent dates, but old dates dates do not change.', 'Y'), - ('1021', 'Min_Val', 'Minimum Value', 'All values at or above set minimum', 'Tests that the minimum value present in the column is no lower than the minimum value in baseline data', 'Minimum column value less than baseline.', 'Values under limit', NULL, 'general_type=''N'' AND functional_data_type ILIKE ''Measure%'' AND min_value IS NOT NULL AND (distinct_value_ct >= 2 OR (distinct_value_ct=2 and min_value<>0 and max_value<>1))', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'baseline_value,threshold_value', 'min_value,0', 'Minimum Value at Baseline,Threshold Error Count', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected count of values under limit', 'This test is appropriate for a cumulative dataset only, assuming all prior values are still present. It is also appropriate for any measure that has an absolute, definable minimum value, or a heuristic that makes senes for valid data.', 'Y'), - ('1022', 'Missing_Pct', 'Percent Missing', 'Consistent ratio of missing values', 'Tests for statistically-significant shift in percentage of missing values in column vs. baseline data', 'Significant shift in percent of missing values vs. baseline.', 'Difference measure', 'Cohen''s H Difference (0.20 small, 0.5 mod, 0.8 large, 1.2 very large, 2.0 huge)', 'record_ct <> value_ct', '2.0 * (1.0 - fn_normal_cdf(ABS({RESULT_MEASURE}::FLOAT) / 2.0))', '0.75', NULL, NULL, 'baseline_ct,baseline_value_ct,threshold_value', 'record_ct,value_ct,2::VARCHAR(10)', 'Baseline Record Count,Baseline Value Count,Standardized Difference Measure', NULL, 'Warning', 'CAT', 'column', 'Completeness', 'Data Drift', 'Expected maximum Cohen''s H Difference', 'This test uses Cohen''s H, a statistical test to identify a significant difference between two ratios. Results are reported on a standardized scale, which can be interpreted via a rule-of-thumb from small to huge. An uptick in missing data may indicate a collection issue at the source. A larger change may indicate a processing failure. A drop in missing data may also be significant, if it affects assumptions built into analytic products downstream. You can refine the expected threshold value as you view legitimate results of the measure over time.', 'Y'), - ('1023', 'Monthly_Rec_Ct', 'Monthly Records', 'At least one date per month present within date range', 'Tests for presence of at least one date per calendar month within min/max date range, per baseline data', 'At least one date per month expected in min/max date range.', 'Missing months', 'Calendar months without date values present', 'functional_data_type ILIKE ''Transactional Date%'' AND date_days_present > 1 AND functional_table_type ILIKE ''%cumulative%'' AND date_months_present > 2 AND date_months_present - (datediff( ''MON'' , min_date, max_date) + 1) = 0 AND future_date_ct::FLOAT / NULLIF(value_ct, 0) <= 0.75', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT*{PRO_RECORD_CT}::FLOAT/NULLIF({DATE_MONTHS_PRESENT}::FLOAT, 0)/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'threshold_value', '0', 'Threshold Count of Months without Dates', NULL, 'Fail', 'CAT', 'column', 'Completeness', 'Volume', 'Expected maximum count of calendar months without dates present', 'Monthly Records tests that at least one record is present for every calendar month within the minimum and maximum date range for the column. The test is relevant for transactional data, where you would expect at least one transaction to be recorded each month. A failure here would suggest missing records for the number of months identified without data. You can adjust the threshold to accept a number of month that you know legitimately have no records.', 'Y'), - ('1024', 'Outlier_Pct_Above', 'Outliers Above', 'Consistent outlier counts over 2 SD above mean', 'Tests that percent of outliers over 2 SD above Mean doesn''t exceed threshold', 'Percent of outliers exceeding 2 SD above the mean is greater than expected threshold.', 'Pct records over limit', NULL, 'functional_data_type = ''Measurement'' AND distinct_value_ct > 30 AND NOT distinct_value_ct = max_value - min_value + 1 AND distinct_value_ct::FLOAT/value_ct::FLOAT > 0.1 AND stdev_value::FLOAT/avg_value::FLOAT > 0.01 AND column_name NOT ILIKE ''%latitude%'' AND column_name NOT ilike ''%longitude%''', 'GREATEST(0, {RESULT_MEASURE}::FLOAT-{THRESHOLD_VALUE}::FLOAT)', '0.75', NULL, NULL, 'baseline_avg,baseline_sd,threshold_value', 'avg_value,stdev_value,0.05', 'Baseline Mean, Baseline Std Deviation, Pct Records over 2 SD', NULL, 'Warning', 'CAT', 'column', 'Accuracy', 'Data Drift', 'Expected maximum pct records over upper 2 SD limit', 'This test counts the number of data points that may be considered as outliers, determined by whether their value exceeds 2 standard deviations above the mean at baseline. Assuming a normal distribution, a small percentage (defaulted to 5%) of outliers is expected. The actual number may vary for different distributions. The expected threshold reflects the maximum percentage of outliers you expect to see. This test uses the baseline mean rather than the mean for the latest dataset to capture systemic shift as well as individual outliers. ', 'Y'), - ('1025', 'Outlier_Pct_Below', 'Outliers Below', 'Consistent outlier counts under 2 SD below mean', 'Tests that percent of outliers over 2 SD below Mean doesn''t exceed threshold', 'Percent of outliers exceeding 2 SD below the mean is greater than expected threshold.', 'Pct records under limit', NULL, 'functional_data_type = ''Measurement'' AND distinct_value_ct > 30 AND NOT distinct_value_ct = max_value - min_value + 1 AND distinct_value_ct::FLOAT/value_ct::FLOAT > 0.1 AND stdev_value::FLOAT/avg_value::FLOAT > 0.01 AND column_name NOT ILIKE ''%latitude%'' AND column_name NOT ilike ''%longitude%''', 'GREATEST(0, {RESULT_MEASURE}::FLOAT-{THRESHOLD_VALUE}::FLOAT)', '0.75', NULL, NULL, 'baseline_avg,baseline_sd,threshold_value', 'avg_value,stdev_value,0.05', 'Baseline Mean, Baseline Std Deviation, Pct Records over 2 SD', NULL, 'Warning', 'CAT', 'column', 'Accuracy', 'Data Drift', 'Expected maximum pct records over lower 2 SD limit', 'This test counts the number of data points that may be considered as outliers, determined by whether their value exceeds 2 standard deviations below the mean at baseline. Assuming a normal distribution, a small percentage (defaulted to 5%) of outliers is expected. The actual number may vary for different distributions. The expected threshold reflects the maximum percentage of outliers you expect to see. This test uses the baseline mean rather than the mean for the latest dataset to capture systemic shift as well as individual outliers. ', 'Y'), - ('1026', 'Pattern_Match', 'Pattern Match', 'Column values match alpha-numeric pattern', 'Tests that all values in the column match the same alpha-numeric pattern identified in baseline data', 'Alpha values do not match consistent pattern in baseline.', 'Pattern Mismatches', NULL, '(functional_data_type IN (''Attribute'', ''DateTime Stamp'', ''Phone'') OR functional_data_type ILIKE ''ID%'' OR functional_data_type ILIKE ''Period%'') AND fn_charcount(top_patterns, E'' \| '' ) = 1 AND REPLACE(SPLIT_PART(top_patterns, ''|'' , 2), ''N'' , '''' ) > '''' AND distinct_value_ct > 10', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'baseline_value,threshold_value', 'TRIM(REPLACE(REPLACE(REPLACE(REGEXP_REPLACE(SPLIT_PART(top_patterns, '' | '', 2), ''([*+\-%_])'', ''[\1]'', ''g''), ''A'', ''[A-Z]''), ''N'', ''[0-9]''), ''a'', ''[a-z]'')),0', 'Pattern at Baseline,Threshold Error Count', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected count of pattern mismatches', 'This test is appropriate for character fields that are expected to appear in a consistent format. It uses pattern matching syntax as appropriate for your database: REGEX matching if available, otherwise LIKE expressions. The expected threshold is the number of records that fail to match the defined pattern.', 'Y'), - ('1028', 'Recency', 'Recency', 'Latest date within expected range of test date', 'Tests that the latest date in column is within a set number of days of the test date', 'Most recent date value not within expected days of test date.', 'Days before test', 'Number of days that most recent date precedes the date of test', 'general_type= ''D'' AND max_date <= run_date AND NOT column_name IN ( ''filedate'' , ''file_date'' ) AND NOT functional_data_type IN (''Future Date'', ''Schedule Date'') AND DATEDIFF( ''DAY'' , max_date, run_date) <= 62', '(ABS({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT*{PRO_RECORD_CT}::FLOAT/(1.0+DATEDIFF(''DAY'', ''{MIN_DATE}'', ''{MAX_DATE}''))::FLOAT)/NULLIF({RECORD_CT}::FLOAT, 0)', '0.75', NULL, NULL, 'threshold_value', 'CASE WHEN DATEDIFF( ''DAY'' , max_date, run_date) <= 3 THEN DATEDIFF(''DAY'', max_date, run_date) + 3 WHEN DATEDIFF(''DAY'', max_date, run_date) <= 7 then DATEDIFF(''DAY'', max_date, run_date) + 7 WHEN DATEDIFF( ''DAY'' , max_date, run_date) <= 31 THEN CEILING( DATEDIFF( ''DAY'' , max_date, run_date)::FLOAT / 7.0) * 7 WHEN DATEDIFF( ''DAY'' , max_date, run_date) > 31 THEN CEILING( DATEDIFF( ''DAY'' , max_date, run_date)::FLOAT / 30.0) * 30 END', 'Threshold Maximum Days before Test', NULL, 'Warning', 'CAT', 'column', 'Timeliness', 'Recency', 'Expected maximum count of days preceding test date', 'This test evaluates recency based on the latest referenced dates in the column. The test is appropriate for transactional dates and timestamps. The test can be especially valuable because timely data deliveries themselves may not assure that the most recent data is present. You can adjust the expected threshold to the maximum number of days that you expect the data to age before the dataset is refreshed. ', 'Y'), - ('1030', 'Required', 'Required Entry', 'Required non-null value present', 'Tests that a non-null value is present in each record for the column, consistent with baseline data', 'Every record for this column is expected to be filled, but some are missing.', 'Missing values', NULL, 'record_ct = value_ct AND record_ct > 10', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'threshold_value', '0', 'Threshold Missing Value Count', NULL, 'Fail', 'CAT', 'column', 'Completeness', 'Schema Drift', 'Expected count of missing values', NULL, 'Y'), - ('1033', 'Street_Addr_Pattern', 'Street Address', 'Enough street address entries match defined pattern', 'Tests for percent of records matching standard street address pattern.', 'Percent of values matching standard street address format is under expected threshold.', 'Percent matches', 'Percent of records that match street address pattern', '(std_pattern_match=''STREET_ADDR'') AND (avg_length <> round(avg_length)) AND (avg_embedded_spaces BETWEEN 2 AND 6) AND (avg_length < 35)', '({VALUE_CT}::FLOAT * ({RESULT_MEASURE}::FLOAT - {THRESHOLD_VALUE}::FLOAT)/100.0)/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'threshold_value', '75', 'Threshold Pct that Match Address Pattern', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected percent of records that match standard street address pattern', 'The street address pattern used in this test should match the vast majority of USA addresses. You can adjust the threshold percent of matches based on the results you are getting -- you may well want to tighten it to make the test more sensitive to invalid entries.', 'Y'), - ('1034', 'Unique', 'Unique Values', 'Each column value is unique', 'Tests that no values for the column are repeated in multiple records.', 'Column values should be unique per row.', 'Duplicate values', 'Count of non-unique values', 'record_ct > 500 and record_ct = distinct_value_ct and value_ct > 0', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'threshold_value', '0', 'Threshold Duplicate Value Count', NULL, 'Fail', 'CAT', 'column', 'Uniqueness', 'Schema Drift', 'Expected count of duplicate values', 'This test is ideal when the database itself does not enforce a primary key constraint on the table. It serves as an independent check on uniqueness. If''s also useful when there are a small number of exceptions to uniqueness, which can be reflected in the expected threshold count of duplicates.', 'Y'), - ('1035', 'Unique_Pct', 'Percent Unique', 'Consistent ratio of unique values', 'Tests for statistically-significant shift in percentage of unique values vs. baseline data.', 'Significant shift in percent of unique values vs. baseline.', 'Difference measure', 'Cohen''s H Difference (0.20 small, 0.5 mod, 0.8 large, 1.2 very large, 2.0 huge)', 'distinct_value_ct > 10 AND functional_data_type NOT ILIKE ''Measurement%''', '2.0 * (1.0 - fn_normal_cdf(ABS({RESULT_MEASURE}::FLOAT) / 2.0))', '0.75', NULL, NULL, 'baseline_value_ct,baseline_unique_ct,threshold_value', 'value_ct,distinct_value_ct,0.5', 'Value Count at Baseline,Distinct Value Count at Baseline,Standardized Difference Measure (0 to 1)', NULL, 'Warning', 'CAT', 'column', 'Uniqueness', 'Data Drift', 'Expected maximum Cohen''s H Difference', 'You can think of this as a test of similarity that measures whether the percentage of unique values is consistent with the percentage at baseline. A significant change might indicate duplication or a telling shift in cardinality between entities. The test uses Cohen''s H, a statistical test to identify a significant difference between two ratios. Results are reported on a standardized scale, which can be interpreted via a rule-of-thumb from small to huge. You can refine the expected threshold value as you view legitimate results of the measure over time.', 'Y'), - ('1036', 'US_State', 'US State', 'Column value is two-letter US state code', 'Tests that the recorded column value is a valid US state.', 'Column Value is not a valid US state.', 'Not US States', 'Values that doo not match 2-character US state abbreviations.', 'general_type= ''A'' AND column_name ILIKE ''%state%'' AND distinct_value_ct < 70 AND max_length = 2', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'threshold_value', '0', 'Threshold Count not Matching State Abbreviations', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected count of values that are not US state abbreviations', 'This test validates entries against a fixed list of two-character US state codes and related Armed Forces codes.', 'Y'), - ('1037', 'Weekly_Rec_Ct', 'Weekly Records', 'At least one date per week present within date range', 'Tests for presence of at least one date per calendar week within min/max date range, per baseline data', 'At least one date per week expected in min/max date range.', 'Missing weeks', 'Calendar weeks without date values present', 'functional_data_type ILIKE ''Transactional Date%'' AND date_days_present > 1 AND functional_table_type ILIKE ''%cumulative%'' AND date_weeks_present > 3 AND date_weeks_present - (DATEDIFF(''week'', ''1800-01-05''::DATE, max_date) - DATEDIFF(''week'', ''1800-01-05''::DATE, min_date) + 1) = 0 AND future_date_ct::FLOAT / NULLIF(value_ct, 0) <= 0.75', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT*{PRO_RECORD_CT}::FLOAT/NULLIF({DATE_WEEKS_PRESENT}::FLOAT, 0)/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'threshold_value', '0', 'Threshold Weeks without Dates', NULL, 'Fail', 'CAT', 'column', 'Completeness', 'Volume', 'Expected maximum count of calendar weeks without dates present', 'Weekly Records tests that at least one record is present for every calendar week within the minimum and maximum date range for the column. The test is relevant for transactional data, where you would expect at least one transaction to be recorded each week. A failure here would suggest missing records for the number of weeks identified without data. You can adjust the threshold to accept a number of weeks that you know legitimately have no records.', 'Y'), - ('1040', 'Variability_Increase', 'Variability Increase', 'Variability has increased above threshold', 'Tests that the spread or dispersion of column values has increased significantly over baseline, indicating a drop in stability of the measure.', 'The Standard Deviation of the measure has increased beyond the defined threshold. This could signal a change in a process or a data quality issue.', 'Pct SD shift', 'Percent of baseline Standard Deviation', 'general_type = ''N'' AND functional_data_type ilike ''Measure%'' AND functional_data_type <> ''Measurement Spike'' AND column_name NOT ilike ''%latitude%'' AND column_name NOT ilike ''%longitude%'' AND value_ct <> distinct_value_ct AND distinct_value_ct > 10 AND stdev_value > 0 AND avg_value IS NOT NULL AND NOT (distinct_value_ct = max_value - min_value + 1 AND distinct_value_ct > 2)', '1', '0.75', NULL, NULL, 'baseline_sd,threshold_value', 'stdev_value,120', 'Std Deviation at Baseline,Expected Maximum Percent', NULL, 'Warning', 'CAT', 'column', 'Accuracy', 'Data Drift', 'Expected maximum pct of baseline Standard Deviation (SD)', 'This test looks for percent shifts in standard deviation as a measure of the stability of a measure over time. A significant change could indicate that new values are erroneous, or that the cohort being evaluated is significantly different from baseline. An increase in particular could mark new problems in measurement, a more heterogeneous cohort, or that significant outliers have been introduced. Consider this test along with Average Shift and New Shift. If the average shifts as well, there may be a fundamental shift in the dataset or process used to collect the data point. This might suggest a data shift that should be noted and assessed by business users. If the average does not shift, this may point to a data quality or data collection problem. ', 'Y'), - ('1041', 'Variability_Decrease', 'Variability Decrease', 'Variability has decreased below threshold', 'Tests that the spread or dispersion of column values has decreased significantly over baseline, indicating a shift in stability of the measure. This could signal a change in a process or a data quality issue.', 'The Standard Deviation of the measure has decreased below the defined threshold. This could signal a change in a process or a data quality issue.', 'Pct SD shift', 'Percent of baseline Standard Deviation', 'general_type = ''N'' AND functional_data_type ilike ''Measure%'' AND functional_data_type <> ''Measurement Spike'' AND column_name NOT ilike ''%latitude%'' AND column_name NOT ilike ''%longitude%'' AND value_ct <> distinct_value_ct AND distinct_value_ct > 10 AND stdev_value > 0 AND avg_value IS NOT NULL AND NOT (distinct_value_ct = max_value - min_value + 1 AND distinct_value_ct > 2)', '1', '0.75', NULL, NULL, 'baseline_sd,threshold_value', 'stdev_value, 80', 'Std Deviation at Baseline,Expected Minimum Percent', NULL, 'Warning', 'CAT', 'column', 'Accuracy', 'Data Drift', 'Expected minimum pct of baseline Standard Deviation (SD)', 'This test looks for percent shifts in standard deviation as a measure of the stability of a measure over time. A significant change could indicate that new values are erroneous, or that the cohort being evaluated is significantly different from baseline. A decrease in particular could indicate an improved process, better precision in measurement, the elimination of outliers, or a more homogeneous cohort. ', 'Y'), - ('1042', 'Valid_Month', 'Valid Month', 'Valid calendar month in expected format', 'Tests for the presence of a valid representation of a calendar month consistent with the format at baseline.', 'Column values are not a valid representation of a calendar month consistent with the format at baseline.', 'Invalid months', NULL, 'functional_data_type = ''Period Month''', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'threshold_value,baseline_value', '0,CASE WHEN max_length > 3 AND initcap(min_text) = min_text THEN ''''''January'''',''''February'''',''''March'''',''''April'''',''''May'''',''''June'''',''''July'''',''''August'''',''''September'''',''''October'''',''''November'''',''''December'''''' WHEN max_length > 3 AND upper(min_text) = min_text THEN ''''''JANUARY'''',''''FEBRUARY'''',''''MARCH'''',''''APRIL'''',''''MAY'''',''''JUNE'''',''''JULY'''',''''AUGUST'''',''''SEPTEMBER'''',''''OCTOBER'''',''''NOVEMBER'''',''''DECEMBER'''''' WHEN max_length > 3 AND lower(min_text) = min_text THEN ''''''january'''',''''february'''',''''march'''',''''april'''',''''may'''',''''june'''',''''july'''',''''august'''',''''september'''',''''october'''',''''november'''',''''december'''''' WHEN max_length = 3 AND initcap(min_text) = min_text THEN ''''''Jan'''',''''Feb'''',''''Mar'''',''''Apr'''',''''May'''',''''Jun'''',''''Jul'''',''''Aug'''',''''Sep'''',''''Oct'''',''''Nov'''',''''Dec'''''' WHEN max_length = 3 AND upper(min_text) = min_text THEN ''''''JAN'''',''''FEB'''',''''MAR'''',''''APR'''',''''MAY'''',''''JUN'''',''''JUL'''',''''AUG'''',''''SEP'''',''''OCT'''',''''NOV'''',''''DEC'''''' WHEN max_length = 3 AND lower(min_text) = min_text THEN ''''''jan'''',''''feb'''',''''mar'''',''''apr'''',''''may'''',''''jun'''',''''jul'''',''''aug'''',''''sep'''',''''oct'''',''''nov'''',''''dec'''''' WHEN max_length = 2 AND min_text = ''01'' THEN ''''''01'''',''''02'''',''''03'''',''''04'''',''''05'''',''''06'''',''''07'''',''''08'''',''''09'''',''''10'''',''''11'''',''''12'''''' WHEN max_length = 2 AND min_text = ''1'' THEN ''''''1'''',''''2'''',''''3'''',''''4'''',''''5'''',''''6'''',''''7'''',''''8'''',''''9'''',''''10'''',''''11'''',''''12'''''' WHEN min_value = 1 THEN ''1,2,3,4,5,6,7,8,9,10,11,12'' ELSE ''NULL'' END', 'Threshold Invalid Months,Valid Month List', 'The acceptable number of records with invalid months present.|List of valid month values for this field, in quotes if field is numeric, separated by commas.', 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected count of invalid months', NULL, 'N'), - ('1043', 'Valid_Characters', 'Valid Characters', 'Column contains no invalid characters', 'Tests for the presence of non-printing characters, leading spaces, or surrounding quotes.', 'Invalid characters, such as non-printing characters, leading spaces, or surrounding quotes, were found.', 'Invalid records', 'Expected count of values with invalid characters', 'general_type = ''A''', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '0.75', NULL, NULL, 'threshold_value', '0', NULL, 'The acceptable number of records with invalid character values present.', 'Warning', 'CAT', 'column', 'Validity', 'Schema Drift', 'Threshold Invalid Value Count', 'This test looks for the presence of non-printing ASCII characters that are considered non-standard in basic text processing. It also identifies leading spaces and values enclosed in quotes. Values that fail this test may be artifacts of data conversion, or just more difficult to process or analyze downstream.', 'N'), - ('1044', 'Valid_US_Zip', 'Valid US Zip', 'Valid USA Postal Codes', 'Tests that postal codes match the 5 or 9 digit standard US format', 'Invalid US Zip Code formats found.', 'Invalid Zip Codes', 'Expected count of values with invalid Zip Codes', 'functional_data_type = ''Zip''', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '0.75', NULL, NULL, 'threshold_value', '0', NULL, NULL, 'Warning', 'CAT', 'column', 'Validity', 'Schema Drift', 'Threshold Invalid Value Count', NULL, 'Y'), - ('1045', 'Valid_US_Zip3', 'Valid US Zip-3 ', 'Valid USA Zip-3 Prefix', 'Tests that postal codes match the 3 digit format of a regional prefix.', 'Invalid 3-digit US Zip Code regional prefix formats found.', 'Invalid Zip-3 Prefix', 'Expected count of values with invalid Zip-3 Prefix Codes', 'functional_data_type = ''Zip3''', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '0.75', NULL, NULL, 'threshold_value', '0', NULL, NULL, 'Warning', 'CAT', 'column', 'Validity', 'Schema Drift', 'Threshold Invalid Zip3 Count', 'This test looks for the presence of values that fail to match the three-digit numeric code expected for US Zip Code regional prefixes. These prefixes are often used to roll up Zip Code data to a regional level, and may be critical to anonymize detailed data and protect PID. Depending on your needs and regulatory requirements, longer zip codes could place PID at risk.', 'Y'), - ('1006', 'Condition_Flag', 'Custom Condition', 'Column values match pre-defined condition', 'Tests that each record in the table matches a pre-defined, custom condition', 'Value(s) found not matching defined condition.', 'Values Failing', NULL, NULL, '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', 'Test Focus', 'Specify a brief descriptor of the focus of this test that is unique within this Test Suite for the Table and Test Type. This distinguishes this test from others of the same type on the same table. Example: `Quantity Consistency` if you are testing that quantity ordered matches quantity shipped.', 'threshold_value,custom_query', NULL, 'Threshold Error Count,Custom SQL Expression (TRUE on error)', 'The number of errors that are acceptable before test fails.|Expression should evaluate to TRUE to register an error or FALSE if no error. An expression can reference only columns in the selected table.', 'Fail', 'CAT', 'custom', 'Validity', 'Schema Drift', 'Count of records that don''t meet test condition', 'Custom Condition is a business-rule test for a user-defined error condition based on the value of one or more columns. The condition is applied to each record within the table, and the count of records failing the condition is added up. If that count exceeds a threshold of errors, the test as a whole is failed. This test is ideal for error conditions that TestGen cannot automatically infer, and any condition that involves the values of more than one column in the same record. Performance of this test is fast, since it is performed together with other aggregate tests. Interpretation is based on the user-defined meaning of the test.', 'Y'), - - ('1031', 'Row_Ct', 'Row Count', 'Number of rows is at or above threshold', 'Tests that the count of records has not decreased from the baseline count.', 'Row count less than baseline count.', 'Row count', NULL, 'TEMPLATE', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({THRESHOLD_VALUE}::FLOAT, 0)', '1.0', NULL, NULL, 'threshold_value', NULL, 'Threshold Minimum Record Count', NULL, 'Fail', 'CAT', 'table', 'Completeness', 'Volume', 'Expected minimum row count', 'Because this tests the row count against a constant minimum threshold, it''s appropriate for any dataset, as long as the number of rows doesn''t radically change from refresh to refresh. But it''s not responsive to change over time. You may want to adjust the threshold periodically if you are dealing with a cumulative dataset.', 'Y'), - ('1032', 'Row_Ct_Pct', 'Row Range', 'Number of rows within percent range of threshold', 'Tests that the count of records is within a percentage above or below the baseline count.', 'Row Count is outside of threshold percent of baseline count.', 'Percent of baseline', 'Row count percent above or below baseline', 'TEMPLATE', '(100.0 - {RESULT_MEASURE}::FLOAT)/100.0', '1.0', NULL, NULL, 'baseline_ct,threshold_value', NULL, 'Baseline Record Count,Threshold Pct Above or Below Baseline', NULL, 'Fail', 'CAT', 'table', 'Completeness', 'Volume', 'Expected percent window below or above baseline', 'This test is better than Row Count for an incremental or windowed dataset where you would expect the row count to range within a percentage of baseline.', 'Y'), - - ('1008', 'CUSTOM', 'Custom Test', 'Custom-defined business rule', 'Custom SQL Query Test', 'Errors were detected according to test definition.', 'Errors found', 'Count of errors identified by query', NULL, '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', 'Test Focus', 'Specify a brief descriptor of the focus of this test that is unique within this Test Suite for the Table and Test Type. This distinguishes this test from others of the same type on the same table. Example: `Order Total Matches Detail` if you are testing that the total in one table matches the sum of lines in another.', 'custom_query', NULL, 'Custom SQL Query Returning Error Records', 'Query should return records indicating one or more errors. The test passes if no records are returned. Results of the query will be shown when you click `Review Source Data` for a failed test, so be sure to include enough data in your results to follow-up. \n\nA query can refer to any tables in the database. You must hard-code the schema or use `{DATA_SCHEMA}` to represent the schema defined for the Table Group.', 'Fail', 'QUERY', 'custom', 'Accuracy', 'Data Drift', 'Expected count of errors found by custom query', 'This business-rule test is highly flexible, covering any error state that can be expressed by a SQL query against one or more tables in the database. In operation, the user-defined query is embedded within a parent query returning the count of error rows identified. Any row returned by the query is interpreted as a single error condition in the test. Note that this query is run independently of other tests, and that performance will be slower, depending in large part on the efficiency of the query you write. Interpretation is based on the user-defined meaning of the test. Your query might be written to return errors in individual rows identified by joining tables. Or it might return an error based on a multi-column aggregate condition returning a single row if an error is found. This query is run separately when you click `Review Source Data` from Test Results, so be sure to include enough data in your results to follow-up. Interpretation is based on the user-defined meaning of the test.', 'Y'), - - ('1500', 'Aggregate_Balance', 'Aggregate Balance', 'Aggregate values per group match reference', 'Tests for exact match in aggregate values for each set of column values vs. reference dataset', 'Aggregate measure per set of column values does not exactly match reference dataset.', 'Mismatched measures', NULL, NULL, '1', '1.0', 'Aggregate Expression', 'Specify an aggregate column expression: one of `SUM([column_name])` or `COUNT([column_name])`', 'subset_condition,groupby_names,having_condition,match_schema_name,match_table_name,match_column_names,match_subset_condition,match_groupby_names,match_having_condition', NULL, 'Record Subset Condition,Grouping Columns,Group Subset Condition,Matching Schema Name,Matching Table Name,Matching Aggregate Expression,Matching Record Subset Condition,Matching Grouping Columns,Matching Group Subset Condition', 'Condition defining a subset of records in main table, written like a condition within a SQL WHERE clause - OPTIONAL|Category columns in main table separated by commas (e.g. GROUP BY columns)|Condition defining a subset of aggregate records in main table (e.g. HAVING clause) - OPTIONAL|Schema location of matching table|Matching table name|Agregate column expression in matching table: one of `SUM([column_name])` or `COUNT([column_name])`|Condition defining a subset of records in matching table, written like a condition within a SQL WHERE clause - OPTIONAL|Category columns in matching table separated by commas (e.g. GROUP BY columns)|Condition defining a subset of aggregate records in matching table (e.g. HAVING clause) - OPTIONAL', 'Fail', 'QUERY', 'referential', 'Consistency', 'Data Drift', 'Expected count of group totals not matching aggregate value', 'This test compares sums or counts of a column rolled up to one or more category combinations across two different tables. Both tables must be accessible at the same time. It''s ideal for confirming that two datasets exactly match -- that the sum of a measure or count of a value hasn''t changed or shifted between categories. Use this test to compare a raw and processed version of the same dataset, or to confirm that an aggregated table exactly matches the detail table that it''s built from. An error here means that one or more value combinations fail to match. New categories or combinations will cause failure.', 'Y'), - ('1501', 'Aggregate_Minimum', 'Aggregate Minimum', 'Aggregate values per group are at or above reference', 'Tests that aggregate values for each set of column values are at least the same as reference dataset', 'Aggregate measure per set of column values is not at least the same as reference dataset.', 'Mismatched measures', NULL, NULL, '1', '1.0', 'Aggregate Expression', 'Specify an aggregate column expression: one of `SUM([column_name])` or `COUNT([column_name])`', 'subset_condition,groupby_names,having_condition,match_schema_name,match_table_name,match_column_names,match_subset_condition,match_groupby_names,match_having_condition', NULL, 'Record Subset Condition,Grouping Columns,Group Subset Condition,Matching Schema Name,Matching Table Name,Matching Aggregate Expression,Matching Record Subset Condition,Matching Grouping Columns,Matching Group Subset Condition', 'Condition defining a subset of records in main table, written like a condition within a SQL WHERE clause - OPTIONAL|Category columns in main table separated by commas (e.g. GROUP BY columns)|Condition defining a subset of aggregate records in main table (e.g. HAVING clause) - OPTIONAL|Schema location of reference table|Reference table name|Aggregate column expression in reference table (e.g. `SUM(sales)`)|Condition defining a subset of records in reference table, written like a condition within a SQL WHERE clause - OPTIONAL|Category columns in reference table separated by commas (e.g. GROUP BY columns)|Condition defining a subset of aggregate records in reference table (e.g. HAVING clause) - OPTIONAL', 'Fail', 'QUERY', 'referential', 'Accuracy', 'Data Drift', 'Expected count of group totals below aggregate value', 'This test compares sums or counts of a column rolled up to one or more category combinations, but requires a match or increase in the aggregate value, rather than an exact match, across two different tables. Both tables must be accessible at the same time. Use this to confirm that aggregate values have not dropped for any set of categories, even if some values may rise. This test is useful to compare an older and newer version of a cumulative dataset. An error here means that one or more values per category set fail to match or exceed the prior dataset. New categories or combinations are allowed (but can be restricted independently with a Combo_Match test). Both tables must be present to run this test.', 'Y'), - ('1502', 'Combo_Match', 'Reference Match', 'Column values or combinations found in reference', 'Tests for the presence of one or a set of column values in a reference table', 'Column value combinations are not found in reference table values.', 'Missing values', NULL, NULL, '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', 'Categorical Column List', 'Specify one or more Categorical columns, separated by commas. \n\nDo not use continuous mesurements here. Do not use numeric values unless they represent discrete categories.', 'subset_condition,having_condition,match_schema_name,match_table_name,match_groupby_names,match_subset_condition,match_having_condition', NULL, 'Record Subset Condition,Group Subset Condition,Reference Schema Name,Reference Table Name,Matching Columns,Matching Record Subset Condition,Matching Group Subset Condition', 'Condition defining a subset of records in main table to evaluate, written like a condition within a SQL WHERE clause - OPTIONAL|Condition based on aggregate expression used to exclude value combinations in source table, written like a condition within a SQL HAVING clause (e.g. `SUM(sales) < 100`) - OPTIONAL|Schema location of matching table|Matching table name|Column Names in reference table used to validate source table values (separated by commas)|Condition defining a subset of records in reference table to match against, written like a condition within a SQL WHERE clause - OPTIONAL|Condition based on aggregate expression used to exclude value combinations in reference table, written like a condition within a SQL HAVING clause (e.g. `SUM(sales) < 100`) - OPTIONAL', 'Fail', 'QUERY', 'referential', 'Validity', 'Schema Drift', 'Expected count of non-matching value combinations', 'This test verifies that values, or combinations of values, that are present in the main table are also found in a reference table. This is a useful test for referential integrity between fact and dimension tables. You can also use it to confirm the validity of a code or category, or of combinations of values that should only be found together within each record, such as product/size/color. An error here means that one or more category combinations in the main table are not found in the reference table. Both tables must be present to run this test.', 'Y'), - ('1503', 'Distribution_Shift', 'Distribution Shift', 'Probability distribution consistent with reference', 'Tests the closeness of match between two distributions of aggregate measures across combinations of column values, using Jensen-Shannon Divergence test', 'Divergence between two distributions exceeds specified threshold.', 'Divergence level (0-1)', 'Jensen-Shannon Divergence, from 0 (identical distributions), to 1.0 (max divergence)', NULL, '1', '0.75', 'Categorical Column List', 'Specify one or more Categorical columns, separated by commas. Do not use continuous mesurements here. Do not use numeric values unless they represent discrete categories.', 'subset_condition,match_schema_name,match_table_name,match_groupby_names,match_subset_condition', NULL, 'Record Subset Condition,Reference Schema Name,Reference Table Name,Matching Columns to Compare,Matching Record Subset Condition', 'Condition defining a subset of records in main table to evaluate, written like a condition within a SQL WHERE clause - OPTIONAL|Schema location of matching table|Matching table name|Column Names in reference table used to compare counts with source table values (separated by commas)|Condition defining a subset of records in reference table to match against, written like a condition within a SQL WHERE clause - OPTIONAL', 'Warning', 'QUERY', 'referential', 'Consistency', 'Data Drift', 'Expected maximum divergence level between 0 and 1', 'This test measures the similarity of two sets of counts per categories, by using their proportional counts as probability distributions. Using Jensen-Shannon divergence, a measure of relative entropy or difference between two distributions, the test assigns a score ranging from 0, meaning that the distributions are identical, to 1, meaning that the distributions are completely unrelated. This test can be used to compare datasets that may not match exactly, but should have similar distributions. For example, it is a useful sanity check for data from different sources that you would expect to have a consistent spread, such as shipment of building materials per state and construction projects by state. Scores can be compared over time even if the distributions are not identical -- a dataset can be expected to maintain a comparable divergence score with a reference dataset over time. Both tables must be present to run this test.', 'Y'), - ('1504', 'Aggregate_Balance_Percent', 'Aggregate Balance Percent', 'Aggregate measure per group within percent of reference', 'Tests that aggregate measure for each set of column values fall within a percent range above or below the measure for reference dataset', 'Aggregate measure per set of column values is outside percent range of reference dataset.', 'Mismatched measures', NULL, NULL, '1', '1.0', 'Aggregate Expression', 'Specify an aggregate column expression: one of `SUM([column_name])` or `COUNT([column_name])`', 'subset_condition,groupby_names,having_condition,match_schema_name,match_table_name,match_column_names,match_subset_condition,match_groupby_names,match_having_condition,lower_tolerance,upper_tolerance', NULL, 'Record Subset Condition,Grouping Columns,Group Subset Condition,Matching Schema Name,Matching Table Name,Matching Aggregate Expression,Matching Record Subset Condition,Matching Grouping Columns,Matching Group Subset Condition,Lower Tolerance Percent,Upper Tolerance Percent', 'Condition defining a subset of records in main table, written like a condition within a SQL WHERE clause - OPTIONAL|Category columns in main table separated by commas (e.g. GROUP BY columns)|Condition defining a subset of aggregate records in main table (e.g. HAVING clause) - OPTIONAL|Schema location of matching table|Matching table name|Agregate column expression in matching table: one of `SUM([column_name])` or `COUNT([column_name])`|Condition defining a subset of records in matching table, written like a condition within a SQL WHERE clause - OPTIONAL|Category columns in matching table separated by commas (e.g. GROUP BY columns)|Condition defining a subset of aggregate records in matching table (e.g. HAVING clause) - OPTIONAL|Allowable tolerance below the reference measure expressed as a percent|Allowable tolerance above the reference measure expressed as a percent', 'Fail', 'QUERY', 'referential', 'Consistency', 'Data Drift', 'Expected count of group totals not matching aggregate value', 'This test compares sums or counts of a column rolled up to one or more category combinations across two different tables. Both tables must be accessible at the same time. Use it to confirm that two datasets closely match within the tolerance you set -- that the sum of a measure or count of a value remains sufficiently consistent between categories. You could use this test compare sales per product within one month to another, when you want to be alerted if the difference for any product falls outside of the range defined as 5% below to 10% above the prior month. An error here means that one or more value combinations fail to match within the set tolerances. New categories or combinations will cause failure.', 'Y'), - ('1505', 'Aggregate_Balance_Range', 'Aggregate Balance Range', 'Aggregate measure per group within hard range of reference', 'Tests that aggregate measure for each set of column values fall within a hard range above or below the measure for reference dataset', 'Aggregate measure per set of column values is outside expected range of reference dataset.', 'Mismatched measures', NULL, NULL, '1', '1.0', 'Aggregate Expression', 'Specify an aggregate column expression: one of `SUM([column_name])` or `COUNT([column_name])`', 'subset_condition,groupby_names,having_condition,match_schema_name,match_table_name,match_column_names,match_subset_condition,match_groupby_names,match_having_condition,lower_tolerance,upper_tolerance', NULL, 'Record Subset Condition,Grouping Columns,Group Subset Condition,Matching Schema Name,Matching Table Name,Matching Aggregate Expression,Matching Record Subset Condition,Matching Grouping Columns,Matching Group Subset Condition,Lower Tolerance Constant,Upper Tolerance Constant', 'Condition defining a subset of records in main table, written like a condition within a SQL WHERE clause - OPTIONAL|Category columns in main table separated by commas (e.g. GROUP BY columns)|Condition defining a subset of aggregate records in main table (e.g. HAVING clause) - OPTIONAL|Schema location of matching table|Matching table name|Agregate column expression in matching table: one of `SUM([column_name])` or `COUNT([column_name])`|Condition defining a subset of records in matching table, written like a condition within a SQL WHERE clause - OPTIONAL|Category columns in matching table separated by commas (e.g. GROUP BY columns)|Condition defining a subset of aggregate records in matching table (e.g. HAVING clause) - OPTIONAL|Allowable tolerance below the reference measure expressed as a constant value|Allowable tolerance above the reference measure expressed as a constant value', 'Fail', 'QUERY', 'referential', 'Consistency', 'Data Drift', 'Expected count of group totals not matching aggregate value', 'This test compares sums or counts of a column rolled up to one or more category combinations across two different tables. Both tables must be accessible at the same time. Use it to confirm that two datasets closely match within the tolerances you define as specific values above or below the aggregate measure for the same categories in the reference dataset -- that the sum of a measure or count of a value remains sufficiently consistent between categories. For instance, you can use this test to compare sales per product within one month to another, when you want to be alerted if the difference for any product falls outside of the range defined as 10000 dollars above or below the prior week. An error here means that one or more value combinations fail to match within the set tolerances. New categories or combinations will cause failure.', 'Y'), - ('1508', 'Timeframe_Combo_Gain', 'Timeframe No Drops', 'Latest timeframe has at least all value combinations from prior period', 'Tests that column values in most recent time-window include at least same as prior time window', 'Column values in most recent time-window don''t include all values in prior window.', 'Mismatched values', NULL, NULL, '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', 'Categorical Column List', 'Specify one or more Categorical columns, separated by commas. Make sure not to use continuous measurements here. Do not use numeric values unless they represent discrete categories.', 'window_date_column,window_days,subset_condition', NULL, 'Date Column for Time Windows,Time Window in Days,Record Subset Condition', 'The date column used to define the time windows. This must be a DATE or DATETIME type.|Length in days of the time window. The test will compare the most recent period of days to the prior period of the same duration.|Condition defining a subset of records in main table to evaluate, written like a condition within a SQL WHERE clause - OPTIONAL', 'Fail', 'QUERY', 'referential', 'Consistency', 'Data Drift', 'Expected count of missing value combinations', 'This test checks a single transactional table to verify that categorical values or combinations that are present in the most recent time window you define include at least all those found in the prior time window of the same duration. Missing values in the latest time window will trigger the test to fail. New values are permitted. Use this test to confirm that codes or categories are not lost across successive time periods in a transactional table.', 'Y'), - ('1509', 'Timeframe_Combo_Match', 'Timeframe Match', 'Column value combinations from latest timeframe same as prior period', 'Tests for presence of same column values in most recent time-window vs. prior time window', 'Column values don''t match in most recent time-windows.', 'Mismatched values', NULL, NULL, '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', 'Categorical Column List', 'Specify one or more Categorical columns, separated by commas. Do not use continuous measurements here. Do not use numeric values unless they represent discrete categories.', 'window_date_column,window_days,subset_condition', NULL, 'Date Column for Time Windows,Time Window in Days,Record Subset Condition', NULL, 'Fail', 'QUERY', 'referential', 'Consistency', 'Data Drift', 'Expected count of non-matching value combinations', 'This test checks a single transactional table (such as a fact table) to verify that categorical values or combinations that are present in the most recent time window you define match those found in the prior time window of the same duration. New or missing values in the latest time window will trigger the test to fail. Use this test to confirm the consistency in the occurrence of codes or categories across successive time periods in a transactional table.', 'Y'), - ('1510', 'Dupe_Rows', 'Duplicate Rows', 'Rows are not duplicated in table', 'Tests for the absence of duplicate rows based on unique combination of column values', 'Column value combinations are duplicated in the table.', 'Duplicate records', NULL, NULL, '(({RESULT_MEASURE}-{THRESHOLD_VALUE}))::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', 'null', 'null', 'groupby_names', NULL, 'Columns to Compare', 'List of columns in the table that define a duplicate record when the combination of values is repeated on multiple rows', 'Fail', 'QUERY', 'table', 'Uniqueness', 'Schema Drift', 'Expected count of duplicate value combinations', 'This test verifies that combinations of values are not repeated within the table. By default when auto-generated, the test considers all columns to protect against duplication of entire rows. If you know the minimum columns that should constitute a unique record, such as a set of ID''s, you should use those to make the test as sensitive as possible. Alternatively, if you know of columns you can always exclude, such as file_date or refresh_snapshot_id, remove them to tighten the test somewhat.', 'Y'), - ('1511', 'Stale_Table', 'Stale Table', 'Stale Table Not Updated', 'Confirms whether table has been updated based on data fingerprint', 'Table has not been updated.', 'Update detected', NULL, 'TEMPLATE', '(({RESULT_MEASURE}-{THRESHOLD_VALUE}))::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '0.0', 'null', 'null', 'history_calculation,history_lookback,subset_condition,custom_query', NULL, 'History Aggregate,History Lookback,Record Subset Condition,Fingerprint Expression', 'Aggregate calculation to be performed on the N lookback results|Last N tests to use for history aggregate calculation|Condition defining a subset of records in main table|String expression combining key column measures into a distinct representation of table state', 'Log', 'QUERY', 'table', 'Recency', 'Recency', 'Most recent prior table fingerprint', 'This test compares the current table fingerprint, calculated signature of column contents, to confirm that the table has been updated. The table fingerprint is derived from a set of values and aggregates from columns most likely to change. This test allows you to track the schedule and frequency of updates and refreshes to the table.', 'Y') + (id, test_type, test_name_short, test_name_long, test_description, except_message, measure_uom, measure_uom_description, selection_criteria, dq_score_prevalence_formula, dq_score_risk_factor, column_name_prompt, column_name_help, default_parm_columns, default_parm_values, default_parm_prompts, default_parm_help, default_severity, run_type, test_scope, dq_dimension, health_dimension, threshold_description, usage_notes, active, result_visualization, result_visualization_params) +VALUES ('1004', 'Alpha_Trunc', 'Alpha Truncation', 'Maximum character count consistent', 'Tests that the maximum count of characters in a column value has not dropped vs. baseline data', 'Maximum length of values has dropped from prior expected length.', 'Values over max', NULL, 'general_type =''A'' AND max_length > 0 AND ( (min_length = avg_length AND max_length = avg_length) OR (numeric_ct <> value_ct ) ) AND functional_table_type NOT LIKE ''%window%'' /* The conditions below are to eliminate overlap with : LOV_Match (excluded selection criteria for this test_type), Pattern_Match (excluded selection criteria for this test_type), Constant (excluded functional_data_type Constant and Boolean) */ AND ( (distinct_value_ct NOT BETWEEN 2 AND 10 AND functional_data_type NOT IN ( ''Constant'', ''Boolean'') ) AND NOT ( fn_charcount(top_patterns, E'' \| '' ) = 1 AND fn_charcount(top_patterns, E'' \| '' ) IS NOT NULL AND REPLACE(SPLIT_PART(top_patterns, ''|'' , 2), ''N'' , '''' ) > ''''))', '{VALUE_CT}::FLOAT * (FN_NORMAL_CDF(({MAX_LENGTH}::FLOAT - {AVG_LENGTH}::FLOAT) / (NULLIF({MAX_LENGTH}::FLOAT, 0) / 3)) - FN_NORMAL_CDF(({RESULT_MEASURE}::FLOAT - {AVG_LENGTH}::FLOAT) / (NULLIF({MAX_LENGTH}::FLOAT, 0) / 3)) ) /NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'threshold_value', 'FLOOR(0.95 * max_length::FLOAT)', 'Maximum String Length at Baseline', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Maximum length expected', 'Alpha Truncation tests that the longest text value in a column hasn''t become shorter than the defined threshold, initially 95% of the longest value at baseline. This could indicate a problem in a cumulative dataset, where prior values should still exist unchanged. A failure here would suggest that some process changed data that you would still expect to be present and matching its value when the column was profiled. This test would not be appropriate for an incremental or windowed dataset.', 'Y', 'line_chart', NULL), + ('1005', 'Avg_Shift', 'Average Shift', 'Column mean is consistent with reference', 'Tests for statistically-significant shift in mean value for column from average calculated at baseline.', 'Standardized difference between averages is over the selected threshold level.', 'Difference Measure', 'Cohen''s D Difference (0.20 small, 0.5 mod, 0.8 large, 1.2 very large, 2.0 huge)', 'general_type=''N'' AND distinct_value_ct > 10 AND functional_data_type ilike ''Measure%'' AND functional_data_type <> ''Measurement Spike'' AND column_name NOT ilike ''%latitude%'' AND column_name NOT ilike ''%longitude%''', '2.0 * (1.0 - fn_normal_cdf(ABS({RESULT_MEASURE}::FLOAT) / 2.0))', '0.75', NULL, NULL, 'baseline_value_ct,baseline_avg,baseline_sd,threshold_value', 'value_ct,avg_value,stdev_value,0.5::VARCHAR', 'Value Ct at Baseline,Mean at Baseline,Std Deviation at Baseline,Threshold Difference Measure ', NULL, 'Warning', 'CAT', 'column', 'Consistency', 'Data Drift', 'Standardized Difference Measure', 'Average Shift tests that the average of a numeric column has not significantly changed since baseline, when profiling was done. A significant shift may indicate errors in processing, differences in source data, or valid changes that may nevertheless impact assumptions in downstream data products. The test uses Cohen''s D, a statistical technique to identify significant shifts in a value. Cohen''s D measures the difference between the two averages, reporting results on a standardized scale, which can be interpreted via a rule-of-thumb from small to huge. Depending on your data, some difference may be expected, so it''s reasonable to adjust the threshold value that triggers test failure. This test works well for measures, or even for identifiers if you expect them to increment consistently. You may want to periodically adjust the expected threshold, or even the expected average value if you expect shifting over time. Consider this test along with Variability Increase. If variability rises too, process or measurement flaws could be at work. If variability remains consistent, the issue is more likely to be with the source data itself.', 'Y', 'line_chart', NULL), + ('1007', 'Constant', 'Constant Match', 'All column values match constant value', 'Tests that all values in the column match the constant value identified in baseline data', 'A constant value is expected for this column.', 'Mismatched values', NULL, 'TEMPLATE', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'baseline_value,threshold_value', NULL, 'Constant Value at Baseline,Threshold Error Count', 'The single, unchanging value of the column, per baseline|The number of errors that are acceptable before test fails.', 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Count of records with unexpected values', 'Constant Match tests that a single value determined to be a constant in baseline profiling is still the only value for the column that appears in subsequent versions of the dataset. Sometimes new data or business knowledge may reveal that the value is not a constant at all, even though only one value was present at profiling. In this case, you will want to disable this test. Alternatively, you can use the Value Match test to provide a limited number of valid values for the column.', 'Y', 'line_chart', NULL), + ('1009', 'Daily_Record_Ct', 'Daily Records', 'All dates present within date range', 'Tests for presence of every calendar date within min/max date range, per baseline data', 'Not every date value between min and max dates is present, unlike at baseline.', 'Missing dates', NULL, 'general_type= ''D'' AND date_days_present > 21 AND date_days_present - (DATEDIFF(''day'', ''1800-01-05''::DATE, max_date) - DATEDIFF(''day'', ''1800-01-05''::DATE, min_date) + 1) = 0 AND future_date_ct::FLOAT / NULLIF(value_ct, 0) <= 0.75', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT*{PRO_RECORD_CT}::FLOAT/NULLIF({DATE_DAYS_PRESENT}::FLOAT, 0)/NULLIF({RECORD_CT}::FLOAT, 0)', '0.75', NULL, NULL, 'threshold_value', '0', 'Threshold Missing Calendar Days', NULL, 'Warning', 'CAT', 'column', 'Completeness', 'Volume', 'Missing calendar days within min/max range', 'Daily Records tests that at least one record is present for every day within the minimum and maximum date range for the column. The test is relevant for transactional data, where you would expect at least one transaction to be recorded each day. A failure here would suggest missing records for the number of days identified without data. You can adjust the threshold to accept a number of days that you know legitimately have no records. ', 'Y', 'line_chart', NULL), + ('1011', 'Dec_Trunc', 'Decimal Truncation', 'Sum of fractional values at or above reference', 'Tests for decimal truncation by confirming that the sum of fractional values in data is no less than the sum at baseline', 'The sum of fractional values is under baseline, which may indicate decimal truncation', 'Fractional sum', 'The sum of all decimal values from all data for this column', 'fractional_sum > 0 AND functional_table_type LIKE''%cumulative%''', '1', '1.0', NULL, NULL, 'threshold_value', 'ROUND(fractional_sum, 0)', 'Sum of Fractional Values at Baseline', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Minimum expected sum of all fractional values', 'Decimal Truncation tests that the fractional (decimal) part of a numeric column has not been truncated since Baseline. This works by summing all the fractional values after the decimal point and confirming that the total is at least equal to the fractional total at baseline. This could indicate a problem in a cumulative dataset, where prior values should still exist unchanged. A failure here would suggest that some process changed data that you would still expect to be present and matching its value when the column was profiled. This test would not be appropriate for an incremental or windowed dataset.', 'Y', 'line_chart', NULL), + ('1012', 'Distinct_Date_Ct', 'Date Count', 'Count of distinct dates at or above reference', 'Tests that the count of distinct dates referenced in the column has not dropped vs. baseline data', 'Drop in count of unique dates recorded in column.', 'Unique dates', 'Count of unique dates in transactional date column', 'functional_data_type ILIKE ''Transactional Date%'' AND date_days_present > 1 AND functional_table_type ILIKE ''%cumulative%''', '(({RECORD_CT}-{PRO_RECORD_CT})::FLOAT*{DISTINCT_VALUE_CT}::FLOAT/NULLIF({PRO_RECORD_CT}::FLOAT, 0))/NULLIF({PRO_RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'baseline_value,threshold_value', 'date_days_present,date_days_present', 'Distinct Date Count at Baseline,Min Expected Date Count', NULL, 'Fail', 'CAT', 'column', 'Timeliness', 'Recency', 'Minimum distinct date count expected', 'Date Count tests that the count of distinct dates present in the column has not dropped since baseline. The test is relevant for cumulative datasets, where old records are retained. A failure here would indicate missing records, which could be caused by a processing error or changed upstream data sources.', 'Y', 'line_chart', NULL), + ('1013', 'Distinct_Value_Ct', 'Value Count', 'Count of distinct values has not dropped', 'Tests that the count of unique values in the column has not changed from baseline.', 'Count of unique values in column has changed from baseline.', 'Unique Values', NULL, 'distinct_value_ct between 2 and 10 AND value_ct > 50 AND functional_data_type IN (''Code'', ''Category'', ''Attribute'', ''Description'') AND NOT coalesce(top_freq_values,'''') > ''''', 'ABS({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT*{PRO_RECORD_CT}::FLOAT/NULLIF({DISTINCT_VALUE_CT}::FLOAT, 0)/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'baseline_value_ct,threshold_value', 'distinct_value_ct,distinct_value_ct', 'Distinct Value Count at Baseline,Min Expected Value Count', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected distinct value count', 'Value Count tests that the count of unique values present in the column has not dropped since baseline. The test is relevant for cumulative datasets, where old records are retained, or for any dataset where you would expect a set number of distinct values should be present. A failure here would indicate missing records or a change in categories or value assignment.', 'Y', 'line_chart', NULL), + ('1014', 'Email_Format', 'Email Format', 'Email is correctly formatted', 'Tests that non-blank, non-empty email addresses match the standard format', 'Invalid email address formats found.', 'Invalid emails', 'Number of emails that do not match standard format', 'std_pattern_match=''EMAIL''', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'threshold_value', '0', 'Maximum Invalid Email Count', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected count of invalid email addresses', NULL, 'Y', 'line_chart', NULL), + ('1015', 'Future_Date', 'Past Dates', 'Latest date is prior to test run date', 'Tests that the maximum date referenced in the column is no greater than the test date, consistent with baseline data', 'Future date found when absent in baseline data.', 'Future dates', NULL, 'general_type=''D''AND future_date_ct = 0', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'threshold_value', '0', 'Maximum Future Date Count', NULL, 'Fail', 'CAT', 'column', 'Timeliness', 'Recency', 'Expected count of future dates', NULL, 'Y', 'line_chart', NULL), + ('1016', 'Future_Date_1Y', 'Future Year', 'Future dates within year of test run date', 'Tests that the maximum date referenced in the column is no greater than one year beyond the test date, consistent with baseline data', 'Future date beyond one-year found when absent in baseline.', 'Future dates post 1 year', NULL, 'general_type=''D''AND future_date_ct > 0 AND max_date <=''{AS_OF_DATE}''::DATE + INTERVAL''365 DAYS''', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'threshold_value', '0', 'Maximum Post 1-Year Future Date Count', NULL, 'Fail', 'CAT', 'column', 'Timeliness', 'Recency', 'Expected count of future dates beyond one year', 'Future Year looks for date values in the column that extend beyond one year after the test date. This would be appropriate for transactional dates where you would expect to find dates in the near future, but not beyond one year ahead. Errors could indicate invalid entries or possibly dummy dates representing blank values.', 'Y', 'line_chart', NULL), + ('1017', 'Incr_Avg_Shift', 'New Shift', 'New record mean is consistent with reference', 'Tests for statistically-significant shift in mean of new values for column compared to average calculated at baseline.', 'Significant shift in average of new values vs. baseline avg', 'Z-score of mean shift', 'Absolute Z-score (number of SD''s outside mean) of prior avg - incremental avg', 'general_type=''N'' AND distinct_value_ct > 10 AND functional_data_type ilike ''Measure%'' AND functional_data_type <> ''Measurement Spike'' AND column_name NOT ilike ''%latitude%'' AND column_name NOT ilike ''%longitude%''', '{RECORD_CT}::FLOAT*(1-FN_NORMAL_CDF({RESULT_MEASURE}::FLOAT))/NULLIF({RECORD_CT}::FLOAT, 0)', '0.75', NULL, NULL, 'baseline_value_ct,baseline_sum,baseline_avg,baseline_sd,threshold_value', 'value_ct,(avg_value * value_ct)::FLOAT,avg_value,stdev_value,2', 'Value Count at Baseline,Sum at Baseline,Mean Value at Baseline,Std Deviation at Baseline,Threshold Max Z-Score', NULL, 'Warning', 'CAT', 'column', 'Accuracy', 'Data Drift', 'Maximum Z-Score (number of SD''s beyond mean) expected', 'This is a more sensitive test than Average Shift, because it calculates an incremental difference in the average of new values compared to the average of values at baseline. This is appropriate for a cumulative dataset only, because it calculates the average of new entries based on the assumption that the count and average of records present at baseline are still present at the time of the test. This test compares the mean of new values with the standard deviation of the baseline average to calculate a Z-score. If the new mean falls outside the Z-score threshold, a shift is detected. Potential Z-score thresholds may range from 0 to 3, depending on the sensitivity you prefer. A failed test could indicate a quality issue or a legitimate shift in new data that should be noted and assessed by business users. Consider this test along with Variability Increase. If variability rises too, process, methodology or measurement flaws could be at issue. If variability remains consistent, the problem is more likely to be with the source data itself.', 'Y', 'line_chart', NULL), + ('1018', 'LOV_All', 'Value Match All', 'List of expected values all present in column', 'Tests that all values match a pipe-delimited list of expected values and that all expected values are present', 'Column values found don''t exactly match the expected list of values', 'Values found', NULL, NULL, '1', '1.0', NULL, NULL, 'threshold_value', NULL, 'List of Expected Values', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'List of values expected, in form (''Val1'',''Val2)', 'This is a more restrictive form of Value Match, testing that all values in the dataset match the list provided, and also that all values present in the list appear at least once in the dataset. This would be appropriate for tables where all category values in the column are represented at least once.', 'Y', 'line_chart', NULL), + ('1019', 'LOV_Match', 'Value Match', 'All column values present in expected list', 'Tests that all values in the column match the list-of-values identified in baseline data.', 'Values not matching expected List-of-Values from baseline.', 'Non-matching records', NULL, 'functional_data_type IN (''Boolean'', ''Code'', ''Category'') AND top_freq_values > '''' AND distinct_value_ct BETWEEN 2 and 10 AND value_ct > 5', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'baseline_value,threshold_value', '''('' || SUBSTRING( CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 2) > '''' THEN '','''''' || TRIM( REPLACE ( SPLIT_PART(top_freq_values, ''|'' , 2), '''''''' , '''''''''''' ) ) || '''''''' ELSE '''' END || CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 4) > '''' THEN '','''''' || TRIM(REPLACE(SPLIT_PART(top_freq_values, ''|'' , 4), '''''''' , '''''''''''' )) || '''''''' ELSE '''' END || CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 6) > '''' THEN '','''''' || TRIM(REPLACE(SPLIT_PART(top_freq_values, ''|'' , 6), '''''''' , '''''''''''' )) || '''''''' ELSE '''' END || CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 8) > '''' THEN '','''''' || TRIM(REPLACE(SPLIT_PART(top_freq_values, ''|'' , 8), '''''''' , '''''''''''' )) || '''''''' ELSE '''' END || CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 10) > '''' THEN '','''''' || TRIM(REPLACE(SPLIT_PART(top_freq_values, ''|'' , 10), '''''''' , '''''''''''' )) || '''''''' ELSE '''' END || CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 12) > '''' THEN '','''''' || TRIM(REPLACE(SPLIT_PART(top_freq_values, ''|'' , 12), '''''''' , '''''''''''' )) || '''''''' ELSE '''' END || CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 14) > '''' THEN '','''''' || TRIM(REPLACE(SPLIT_PART(top_freq_values, ''|'' , 14), '''''''' , '''''''''''' )) || '''''''' ELSE '''' END || CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 16) > '''' THEN '','''''' || TRIM(REPLACE(SPLIT_PART(top_freq_values, ''|'' , 16), '''''''' , '''''''''''' )) || '''''''' ELSE '''' END || CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 18) > '''' THEN '','''''' || TRIM(REPLACE(SPLIT_PART(top_freq_values, ''|'' , 18), '''''''' , '''''''''''' )) || '''''''' ELSE '''' END || CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 20) > '''' THEN '','''''' || TRIM(REPLACE(SPLIT_PART(top_freq_values, ''|'' , 20), '''''''' , '''''''''''' )) || '''''''' ELSE '''' END, 2, 999) || '')'',0', 'List of Expected Values,Threshold Error Count', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'List of values expected, in form (''Val1'',''Val2)', 'This tests that all values in the column match the hard-coded list provided. This is relevant when the list of allowable values is small and not expected to change often. Even if new values might occasionally be added, this test is useful for downstream data products to provide warning that assumptions and logic may need to change.', 'Y', 'line_chart', NULL), + ('1020', 'Min_Date', 'Minimum Date', 'All dates on or after set minimum', 'Tests that the earliest date referenced in the column is no earlier than baseline data', 'The earliest date value found is before the earliest value at baseline.', 'Dates prior to limit', NULL, 'general_type=''D''and min_date IS NOT NULL AND distinct_value_ct > 1', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'baseline_value,threshold_value', 'min_date,0', 'Minimum Date at Baseline,Threshold Error Count', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected count of dates prior to minimum', 'This test is appropriate for a cumulative dataset only, because it assumes all prior values are still present. It''s appropriate where new records are added with more recent dates, but old dates dates do not change.', 'Y', 'line_chart', NULL), + ('1021', 'Min_Val', 'Minimum Value', 'All values at or above set minimum', 'Tests that the minimum value present in the column is no lower than the minimum value in baseline data', 'Minimum column value less than baseline.', 'Values under limit', NULL, 'general_type=''N'' AND functional_data_type ILIKE ''Measure%'' AND min_value IS NOT NULL AND (distinct_value_ct >= 2 OR (distinct_value_ct=2 and min_value<>0 and max_value<>1))', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'baseline_value,threshold_value', 'min_value,0', 'Minimum Value at Baseline,Threshold Error Count', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected count of values under limit', 'This test is appropriate for a cumulative dataset only, assuming all prior values are still present. It is also appropriate for any measure that has an absolute, definable minimum value, or a heuristic that makes senes for valid data.', 'Y', 'line_chart', NULL), + ('1022', 'Missing_Pct', 'Percent Missing', 'Consistent ratio of missing values', 'Tests for statistically-significant shift in percentage of missing values in column vs. baseline data', 'Significant shift in percent of missing values vs. baseline.', 'Difference measure', 'Cohen''s H Difference (0.20 small, 0.5 mod, 0.8 large, 1.2 very large, 2.0 huge)', 'record_ct <> value_ct', '2.0 * (1.0 - fn_normal_cdf(ABS({RESULT_MEASURE}::FLOAT) / 2.0))', '0.75', NULL, NULL, 'baseline_ct,baseline_value_ct,threshold_value', 'record_ct,value_ct,2::VARCHAR(10)', 'Baseline Record Count,Baseline Value Count,Standardized Difference Measure', NULL, 'Warning', 'CAT', 'column', 'Completeness', 'Data Drift', 'Expected maximum Cohen''s H Difference', 'This test uses Cohen''s H, a statistical test to identify a significant difference between two ratios. Results are reported on a standardized scale, which can be interpreted via a rule-of-thumb from small to huge. An uptick in missing data may indicate a collection issue at the source. A larger change may indicate a processing failure. A drop in missing data may also be significant, if it affects assumptions built into analytic products downstream. You can refine the expected threshold value as you view legitimate results of the measure over time.', 'Y', 'line_chart', NULL), + ('1023', 'Monthly_Rec_Ct', 'Monthly Records', 'At least one date per month present within date range', 'Tests for presence of at least one date per calendar month within min/max date range, per baseline data', 'At least one date per month expected in min/max date range.', 'Missing months', 'Calendar months without date values present', 'functional_data_type ILIKE ''Transactional Date%'' AND date_days_present > 1 AND functional_table_type ILIKE ''%cumulative%'' AND date_months_present > 2 AND date_months_present - (datediff( ''MON'' , min_date, max_date) + 1) = 0 AND future_date_ct::FLOAT / NULLIF(value_ct, 0) <= 0.75', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT*{PRO_RECORD_CT}::FLOAT/NULLIF({DATE_MONTHS_PRESENT}::FLOAT, 0)/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'threshold_value', '0', 'Threshold Count of Months without Dates', NULL, 'Fail', 'CAT', 'column', 'Completeness', 'Volume', 'Expected maximum count of calendar months without dates present', 'Monthly Records tests that at least one record is present for every calendar month within the minimum and maximum date range for the column. The test is relevant for transactional data, where you would expect at least one transaction to be recorded each month. A failure here would suggest missing records for the number of months identified without data. You can adjust the threshold to accept a number of month that you know legitimately have no records.', 'Y', 'line_chart', NULL), + ('1024', 'Outlier_Pct_Above', 'Outliers Above', 'Consistent outlier counts over 2 SD above mean', 'Tests that percent of outliers over 2 SD above Mean doesn''t exceed threshold', 'Percent of outliers exceeding 2 SD above the mean is greater than expected threshold.', 'Pct records over limit', NULL, 'functional_data_type = ''Measurement'' AND distinct_value_ct > 30 AND NOT distinct_value_ct = max_value - min_value + 1 AND distinct_value_ct::FLOAT/value_ct::FLOAT > 0.1 AND stdev_value::FLOAT/avg_value::FLOAT > 0.01 AND column_name NOT ILIKE ''%latitude%'' AND column_name NOT ilike ''%longitude%''', 'GREATEST(0, {RESULT_MEASURE}::FLOAT-{THRESHOLD_VALUE}::FLOAT)', '0.75', NULL, NULL, 'baseline_avg,baseline_sd,threshold_value', 'avg_value,stdev_value,0.05', 'Baseline Mean, Baseline Std Deviation, Pct Records over 2 SD', NULL, 'Warning', 'CAT', 'column', 'Accuracy', 'Data Drift', 'Expected maximum pct records over upper 2 SD limit', 'This test counts the number of data points that may be considered as outliers, determined by whether their value exceeds 2 standard deviations above the mean at baseline. Assuming a normal distribution, a small percentage (defaulted to 5%) of outliers is expected. The actual number may vary for different distributions. The expected threshold reflects the maximum percentage of outliers you expect to see. This test uses the baseline mean rather than the mean for the latest dataset to capture systemic shift as well as individual outliers. ', 'Y', 'line_chart', NULL), + ('1025', 'Outlier_Pct_Below', 'Outliers Below', 'Consistent outlier counts under 2 SD below mean', 'Tests that percent of outliers over 2 SD below Mean doesn''t exceed threshold', 'Percent of outliers exceeding 2 SD below the mean is greater than expected threshold.', 'Pct records under limit', NULL, 'functional_data_type = ''Measurement'' AND distinct_value_ct > 30 AND NOT distinct_value_ct = max_value - min_value + 1 AND distinct_value_ct::FLOAT/value_ct::FLOAT > 0.1 AND stdev_value::FLOAT/avg_value::FLOAT > 0.01 AND column_name NOT ILIKE ''%latitude%'' AND column_name NOT ilike ''%longitude%''', 'GREATEST(0, {RESULT_MEASURE}::FLOAT-{THRESHOLD_VALUE}::FLOAT)', '0.75', NULL, NULL, 'baseline_avg,baseline_sd,threshold_value', 'avg_value,stdev_value,0.05', 'Baseline Mean, Baseline Std Deviation, Pct Records over 2 SD', NULL, 'Warning', 'CAT', 'column', 'Accuracy', 'Data Drift', 'Expected maximum pct records over lower 2 SD limit', 'This test counts the number of data points that may be considered as outliers, determined by whether their value exceeds 2 standard deviations below the mean at baseline. Assuming a normal distribution, a small percentage (defaulted to 5%) of outliers is expected. The actual number may vary for different distributions. The expected threshold reflects the maximum percentage of outliers you expect to see. This test uses the baseline mean rather than the mean for the latest dataset to capture systemic shift as well as individual outliers. ', 'Y', 'line_chart', NULL), + ('1026', 'Pattern_Match', 'Pattern Match', 'Column values match alpha-numeric pattern', 'Tests that all values in the column match the same alpha-numeric pattern identified in baseline data', 'Alpha values do not match consistent pattern in baseline.', 'Pattern Mismatches', NULL, '(functional_data_type IN (''Attribute'', ''DateTime Stamp'', ''Phone'') OR functional_data_type ILIKE ''ID%'' OR functional_data_type ILIKE ''Period%'') AND fn_charcount(top_patterns, E'' \| '' ) = 1 AND REPLACE(SPLIT_PART(top_patterns, ''|'' , 2), ''N'' , '''' ) > '''' AND distinct_value_ct > 10', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'baseline_value,threshold_value', 'TRIM(REPLACE(REPLACE(REPLACE(REGEXP_REPLACE(SPLIT_PART(top_patterns, '' | '', 2), ''([*+\-%_])'', ''[\1]'', ''g''), ''A'', ''[A-Z]''), ''N'', ''[0-9]''), ''a'', ''[a-z]'')),0', 'Pattern at Baseline,Threshold Error Count', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected count of pattern mismatches', 'This test is appropriate for character fields that are expected to appear in a consistent format. It uses pattern matching syntax as appropriate for your database: REGEX matching if available, otherwise LIKE expressions. The expected threshold is the number of records that fail to match the defined pattern.', 'Y', 'line_chart', NULL), + ('1028', 'Recency', 'Recency', 'Latest date within expected range of test date', 'Tests that the latest date in column is within a set number of days of the test date', 'Most recent date value not within expected days of test date.', 'Days before test', 'Number of days that most recent date precedes the date of test', 'general_type= ''D'' AND max_date <= run_date AND NOT column_name IN ( ''filedate'' , ''file_date'' ) AND NOT functional_data_type IN (''Future Date'', ''Schedule Date'') AND DATEDIFF( ''DAY'' , max_date, run_date) <= 62', '(ABS({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT*{PRO_RECORD_CT}::FLOAT/(1.0+DATEDIFF(''DAY'', ''{MIN_DATE}'', ''{MAX_DATE}''))::FLOAT)/NULLIF({RECORD_CT}::FLOAT, 0)', '0.75', NULL, NULL, 'threshold_value', 'CASE WHEN DATEDIFF( ''DAY'' , max_date, run_date) <= 3 THEN DATEDIFF(''DAY'', max_date, run_date) + 3 WHEN DATEDIFF(''DAY'', max_date, run_date) <= 7 then DATEDIFF(''DAY'', max_date, run_date) + 7 WHEN DATEDIFF( ''DAY'' , max_date, run_date) <= 31 THEN CEILING( DATEDIFF( ''DAY'' , max_date, run_date)::FLOAT / 7.0) * 7 WHEN DATEDIFF( ''DAY'' , max_date, run_date) > 31 THEN CEILING( DATEDIFF( ''DAY'' , max_date, run_date)::FLOAT / 30.0) * 30 END', 'Threshold Maximum Days before Test', NULL, 'Warning', 'CAT', 'column', 'Timeliness', 'Recency', 'Expected maximum count of days preceding test date', 'This test evaluates recency based on the latest referenced dates in the column. The test is appropriate for transactional dates and timestamps. The test can be especially valuable because timely data deliveries themselves may not assure that the most recent data is present. You can adjust the expected threshold to the maximum number of days that you expect the data to age before the dataset is refreshed.', 'Y', 'line_chart', NULL), + ('1030', 'Required', 'Required Entry', 'Required non-null value present', 'Tests that a non-null value is present in each record for the column, consistent with baseline data', 'Every record for this column is expected to be filled, but some are missing.', 'Missing values', NULL, 'record_ct = value_ct AND record_ct > 10', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'threshold_value', '0', 'Threshold Missing Value Count', NULL, 'Fail', 'CAT', 'column', 'Completeness', 'Schema Drift', 'Expected count of missing values', NULL, 'Y', 'line_chart', NULL), + ('1033', 'Street_Addr_Pattern', 'Street Address', 'Enough street address entries match defined pattern', 'Tests for percent of records matching standard street address pattern.', 'Percent of values matching standard street address format is under expected threshold.', 'Percent matches', 'Percent of records that match street address pattern', '(std_pattern_match=''STREET_ADDR'') AND (avg_length <> round(avg_length)) AND (avg_embedded_spaces BETWEEN 2 AND 6) AND (avg_length < 35)', '({VALUE_CT}::FLOAT * ({RESULT_MEASURE}::FLOAT - {THRESHOLD_VALUE}::FLOAT)/100.0)/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'threshold_value', '75', 'Threshold Pct that Match Address Pattern', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected percent of records that match standard street address pattern', 'The street address pattern used in this test should match the vast majority of USA addresses. You can adjust the threshold percent of matches based on the results you are getting -- you may well want to tighten it to make the test more sensitive to invalid entries.', 'Y', 'line_chart', NULL), + ('1034', 'Unique', 'Unique Values', 'Each column value is unique', 'Tests that no values for the column are repeated in multiple records.', 'Column values should be unique per row.', 'Duplicate values', 'Count of non-unique values', 'record_ct > 500 and record_ct = distinct_value_ct and value_ct > 0', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'threshold_value', '0', 'Threshold Duplicate Value Count', NULL, 'Fail', 'CAT', 'column', 'Uniqueness', 'Schema Drift', 'Expected count of duplicate values', 'This test is ideal when the database itself does not enforce a primary key constraint on the table. It serves as an independent check on uniqueness. If''s also useful when there are a small number of exceptions to uniqueness, which can be reflected in the expected threshold count of duplicates.', 'Y', 'line_chart', NULL), + ('1035', 'Unique_Pct', 'Percent Unique', 'Consistent ratio of unique values', 'Tests for statistically-significant shift in percentage of unique values vs. baseline data.', 'Significant shift in percent of unique values vs. baseline.', 'Difference measure', 'Cohen''s H Difference (0.20 small, 0.5 mod, 0.8 large, 1.2 very large, 2.0 huge)', 'distinct_value_ct > 10 AND functional_data_type NOT ILIKE ''Measurement%''', '2.0 * (1.0 - fn_normal_cdf(ABS({RESULT_MEASURE}::FLOAT) / 2.0))', '0.75', NULL, NULL, 'baseline_value_ct,baseline_unique_ct,threshold_value', 'value_ct,distinct_value_ct,0.5', 'Value Count at Baseline,Distinct Value Count at Baseline,Standardized Difference Measure (0 to 1)', NULL, 'Warning', 'CAT', 'column', 'Uniqueness', 'Data Drift', 'Expected maximum Cohen''s H Difference', 'You can think of this as a test of similarity that measures whether the percentage of unique values is consistent with the percentage at baseline. A significant change might indicate duplication or a telling shift in cardinality between entities. The test uses Cohen''s H, a statistical test to identify a significant difference between two ratios. Results are reported on a standardized scale, which can be interpreted via a rule-of-thumb from small to huge. You can refine the expected threshold value as you view legitimate results of the measure over time.', 'Y', 'line_chart', NULL), + ('1036', 'US_State', 'US State', 'Column value is two-letter US state code', 'Tests that the recorded column value is a valid US state.', 'Column Value is not a valid US state.', 'Not US States', 'Values that doo not match 2-character US state abbreviations.', 'general_type= ''A'' AND column_name ILIKE ''%state%'' AND distinct_value_ct < 70 AND max_length = 2', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'threshold_value', '0', 'Threshold Count not Matching State Abbreviations', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected count of values that are not US state abbreviations', 'This test validates entries against a fixed list of two-character US state codes and related Armed Forces codes.', 'Y', 'line_chart', NULL), + ('1037', 'Weekly_Rec_Ct', 'Weekly Records', 'At least one date per week present within date range', 'Tests for presence of at least one date per calendar week within min/max date range, per baseline data', 'At least one date per week expected in min/max date range.', 'Missing weeks', 'Calendar weeks without date values present', 'functional_data_type ILIKE ''Transactional Date%'' AND date_days_present > 1 AND functional_table_type ILIKE ''%cumulative%'' AND date_weeks_present > 3 AND date_weeks_present - (DATEDIFF(''week'', ''1800-01-05''::DATE, max_date) - DATEDIFF(''week'', ''1800-01-05''::DATE, min_date) + 1) = 0 AND future_date_ct::FLOAT / NULLIF(value_ct, 0) <= 0.75', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT*{PRO_RECORD_CT}::FLOAT/NULLIF({DATE_WEEKS_PRESENT}::FLOAT, 0)/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'threshold_value', '0', 'Threshold Weeks without Dates', NULL, 'Fail', 'CAT', 'column', 'Completeness', 'Volume', 'Expected maximum count of calendar weeks without dates present', 'Weekly Records tests that at least one record is present for every calendar week within the minimum and maximum date range for the column. The test is relevant for transactional data, where you would expect at least one transaction to be recorded each week. A failure here would suggest missing records for the number of weeks identified without data. You can adjust the threshold to accept a number of weeks that you know legitimately have no records.', 'Y', 'line_chart', NULL), + ('1040', 'Variability_Increase', 'Variability Increase', 'Variability has increased above threshold', 'Tests that the spread or dispersion of column values has increased significantly over baseline, indicating a drop in stability of the measure.', 'The Standard Deviation of the measure has increased beyond the defined threshold. This could signal a change in a process or a data quality issue.', 'Pct SD shift', 'Percent of baseline Standard Deviation', 'general_type = ''N'' AND functional_data_type ilike ''Measure%'' AND functional_data_type <> ''Measurement Spike'' AND column_name NOT ilike ''%latitude%'' AND column_name NOT ilike ''%longitude%'' AND value_ct <> distinct_value_ct AND distinct_value_ct > 10 AND stdev_value > 0 AND avg_value IS NOT NULL AND NOT (distinct_value_ct = max_value - min_value + 1 AND distinct_value_ct > 2)', '1', '0.75', NULL, NULL, 'baseline_sd,threshold_value', 'stdev_value,120', 'Std Deviation at Baseline,Expected Maximum Percent', NULL, 'Warning', 'CAT', 'column', 'Accuracy', 'Data Drift', 'Expected maximum pct of baseline Standard Deviation (SD)', 'This test looks for percent shifts in standard deviation as a measure of the stability of a measure over time. A significant change could indicate that new values are erroneous, or that the cohort being evaluated is significantly different from baseline. An increase in particular could mark new problems in measurement, a more heterogeneous cohort, or that significant outliers have been introduced. Consider this test along with Average Shift and New Shift. If the average shifts as well, there may be a fundamental shift in the dataset or process used to collect the data point. This might suggest a data shift that should be noted and assessed by business users. If the average does not shift, this may point to a data quality or data collection problem. ', 'Y', 'line_chart', NULL), + ('1041', 'Variability_Decrease', 'Variability Decrease', 'Variability has decreased below threshold', 'Tests that the spread or dispersion of column values has decreased significantly over baseline, indicating a shift in stability of the measure. This could signal a change in a process or a data quality issue.', 'The Standard Deviation of the measure has decreased below the defined threshold. This could signal a change in a process or a data quality issue.', 'Pct SD shift', 'Percent of baseline Standard Deviation', 'general_type = ''N'' AND functional_data_type ilike ''Measure%'' AND functional_data_type <> ''Measurement Spike'' AND column_name NOT ilike ''%latitude%'' AND column_name NOT ilike ''%longitude%'' AND value_ct <> distinct_value_ct AND distinct_value_ct > 10 AND stdev_value > 0 AND avg_value IS NOT NULL AND NOT (distinct_value_ct = max_value - min_value + 1 AND distinct_value_ct > 2)', '1', '0.75', NULL, NULL, 'baseline_sd,threshold_value', 'stdev_value, 80', 'Std Deviation at Baseline,Expected Minimum Percent', NULL, 'Warning', 'CAT', 'column', 'Accuracy', 'Data Drift', 'Expected minimum pct of baseline Standard Deviation (SD)', 'This test looks for percent shifts in standard deviation as a measure of the stability of a measure over time. A significant change could indicate that new values are erroneous, or that the cohort being evaluated is significantly different from baseline. A decrease in particular could indicate an improved process, better precision in measurement, the elimination of outliers, or a more homogeneous cohort. ', 'Y', 'line_chart', NULL), + ('1042', 'Valid_Month', 'Valid Month', 'Valid calendar month in expected format', 'Tests for the presence of a valid representation of a calendar month consistent with the format at baseline.', 'Column values are not a valid representation of a calendar month consistent with the format at baseline.', 'Invalid months', NULL, 'functional_data_type = ''Period Month''', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'threshold_value,baseline_value', '0,CASE WHEN max_length > 3 AND initcap(min_text) = min_text THEN ''''''January'''',''''February'''',''''March'''',''''April'''',''''May'''',''''June'''',''''July'''',''''August'''',''''September'''',''''October'''',''''November'''',''''December'''''' WHEN max_length > 3 AND upper(min_text) = min_text THEN ''''''JANUARY'''',''''FEBRUARY'''',''''MARCH'''',''''APRIL'''',''''MAY'''',''''JUNE'''',''''JULY'''',''''AUGUST'''',''''SEPTEMBER'''',''''OCTOBER'''',''''NOVEMBER'''',''''DECEMBER'''''' WHEN max_length > 3 AND lower(min_text) = min_text THEN ''''''january'''',''''february'''',''''march'''',''''april'''',''''may'''',''''june'''',''''july'''',''''august'''',''''september'''',''''october'''',''''november'''',''''december'''''' WHEN max_length = 3 AND initcap(min_text) = min_text THEN ''''''Jan'''',''''Feb'''',''''Mar'''',''''Apr'''',''''May'''',''''Jun'''',''''Jul'''',''''Aug'''',''''Sep'''',''''Oct'''',''''Nov'''',''''Dec'''''' WHEN max_length = 3 AND upper(min_text) = min_text THEN ''''''JAN'''',''''FEB'''',''''MAR'''',''''APR'''',''''MAY'''',''''JUN'''',''''JUL'''',''''AUG'''',''''SEP'''',''''OCT'''',''''NOV'''',''''DEC'''''' WHEN max_length = 3 AND lower(min_text) = min_text THEN ''''''jan'''',''''feb'''',''''mar'''',''''apr'''',''''may'''',''''jun'''',''''jul'''',''''aug'''',''''sep'''',''''oct'''',''''nov'''',''''dec'''''' WHEN max_length = 2 AND min_text = ''01'' THEN ''''''01'''',''''02'''',''''03'''',''''04'''',''''05'''',''''06'''',''''07'''',''''08'''',''''09'''',''''10'''',''''11'''',''''12'''''' WHEN max_length = 2 AND min_text = ''1'' THEN ''''''1'''',''''2'''',''''3'''',''''4'''',''''5'''',''''6'''',''''7'''',''''8'''',''''9'''',''''10'''',''''11'''',''''12'''''' WHEN min_value = 1 THEN ''1,2,3,4,5,6,7,8,9,10,11,12'' ELSE ''NULL'' END', 'Threshold Invalid Months,Valid Month List', 'The acceptable number of records with invalid months present.|List of valid month values for this field, in quotes if field is numeric, separated by commas.', 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected count of invalid months', NULL, 'N', 'line_chart', NULL), + ('1043', 'Valid_Characters', 'Valid Characters', 'Column contains no invalid characters', 'Tests for the presence of non-printing characters, leading spaces, or surrounding quotes.', 'Invalid characters, such as non-printing characters, leading spaces, or surrounding quotes, were found.', 'Invalid records', 'Expected count of values with invalid characters', 'general_type = ''A''', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '0.75', NULL, NULL, 'threshold_value', '0', NULL, 'The acceptable number of records with invalid character values present.', 'Warning', 'CAT', 'column', 'Validity', 'Schema Drift', 'Threshold Invalid Value Count', 'This test looks for the presence of non-printing ASCII characters that are considered non-standard in basic text processing. It also identifies leading spaces and values enclosed in quotes. Values that fail this test may be artifacts of data conversion, or just more difficult to process or analyze downstream.', 'N', 'line_chart', NULL), + ('1044', 'Valid_US_Zip', 'Valid US Zip', 'Valid USA Postal Codes', 'Tests that postal codes match the 5 or 9 digit standard US format', 'Invalid US Zip Code formats found.', 'Invalid Zip Codes', 'Expected count of values with invalid Zip Codes', 'functional_data_type = ''Zip''', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '0.75', NULL, NULL, 'threshold_value', '0', NULL, NULL, 'Warning', 'CAT', 'column', 'Validity', 'Schema Drift', 'Threshold Invalid Value Count', NULL, 'Y', 'line_chart', NULL), + ('1045', 'Valid_US_Zip3', 'Valid US Zip-3 ', 'Valid USA Zip-3 Prefix', 'Tests that postal codes match the 3 digit format of a regional prefix.', 'Invalid 3-digit US Zip Code regional prefix formats found.', 'Invalid Zip-3 Prefix', 'Expected count of values with invalid Zip-3 Prefix Codes', 'functional_data_type = ''Zip3''', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '0.75', NULL, NULL, 'threshold_value', '0', NULL, NULL, 'Warning', 'CAT', 'column', 'Validity', 'Schema Drift', 'Threshold Invalid Zip3 Count', 'This test looks for the presence of values that fail to match the three-digit numeric code expected for US Zip Code regional prefixes. These prefixes are often used to roll up Zip Code data to a regional level, and may be critical to anonymize detailed data and protect PID. Depending on your needs and regulatory requirements, longer zip codes could place PID at risk.', 'Y', 'line_chart', NULL), + ('1006', 'Condition_Flag', 'Custom Condition', 'Column values match pre-defined condition', 'Tests that each record in the table matches a pre-defined, custom condition', 'Value(s) found not matching defined condition.', 'Values Failing', NULL, NULL, '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', 'Test Focus', 'Specify a brief descriptor of the focus of this test that is unique within this Test Suite for the Table and Test Type. This distinguishes this test from others of the same type on the same table. Example: `Quantity Consistency` if you are testing that quantity ordered matches quantity shipped.', 'threshold_value,custom_query', NULL, 'Threshold Error Count,Custom SQL Expression (TRUE on error)', 'The number of errors that are acceptable before test fails.|Expression should evaluate to TRUE to register an error or FALSE if no error. An expression can reference only columns in the selected table.', 'Fail', 'CAT', 'custom', 'Validity', 'Schema Drift', 'Count of records that don''t meet test condition', 'Custom Condition is a business-rule test for a user-defined error condition based on the value of one or more columns. The condition is applied to each record within the table, and the count of records failing the condition is added up. If that count exceeds a threshold of errors, the test as a whole is failed. This test is ideal for error conditions that TestGen cannot automatically infer, and any condition that involves the values of more than one column in the same record. Performance of this test is fast, since it is performed together with other aggregate tests. Interpretation is based on the user-defined meaning of the test.', 'Y', 'line_chart', NULL), + + ('1031', 'Row_Ct', 'Row Count', 'Number of rows is at or above threshold', 'Tests that the count of records has not decreased from the baseline count.', 'Row count less than baseline count.', 'Row count', NULL, 'TEMPLATE', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({THRESHOLD_VALUE}::FLOAT, 0)', '1.0', NULL, NULL, 'threshold_value', NULL, 'Threshold Minimum Record Count', NULL, 'Fail', 'CAT', 'table', 'Completeness', 'Volume', 'Expected minimum row count', 'Because this tests the row count against a constant minimum threshold, it''s appropriate for any dataset, as long as the number of rows doesn''t radically change from refresh to refresh. But it''s not responsive to change over time. You may want to adjust the threshold periodically if you are dealing with a cumulative dataset.', 'Y', 'line_chart', NULL), + ('1032', 'Row_Ct_Pct', 'Row Range', 'Number of rows within percent range of threshold', 'Tests that the count of records is within a percentage above or below the baseline count.', 'Row Count is outside of threshold percent of baseline count.', 'Percent of baseline', 'Row count percent above or below baseline', 'TEMPLATE', '(100.0 - {RESULT_MEASURE}::FLOAT)/100.0', '1.0', NULL, NULL, 'baseline_ct,threshold_value', NULL, 'Baseline Record Count,Threshold Pct Above or Below Baseline', NULL, 'Fail', 'CAT', 'table', 'Completeness', 'Volume', 'Expected percent window below or above baseline', 'This test is better than Row Count for an incremental or windowed dataset where you would expect the row count to range within a percentage of baseline.', 'Y', 'line_chart', NULL), + + ('1008', 'CUSTOM', 'Custom Test', 'Custom-defined business rule', 'Custom SQL Query Test', 'Errors were detected according to test definition.', 'Errors found', 'Count of errors identified by query', NULL, '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', 'Test Focus', 'Specify a brief descriptor of the focus of this test that is unique within this Test Suite for the Table and Test Type. This distinguishes this test from others of the same type on the same table. Example: `Order Total Matches Detail` if you are testing that the total in one table matches the sum of lines in another.', 'custom_query', NULL, 'Custom SQL Query Returning Error Records', 'Query should return records indicating one or more errors. The test passes if no records are returned. Results of the query will be shown when you click `Review Source Data` for a failed test, so be sure to include enough data in your results to follow-up. \n\nA query can refer to any tables in the database. You must hard-code the schema or use `{DATA_SCHEMA}` to represent the schema defined for the Table Group.', 'Fail', 'QUERY', 'custom', 'Accuracy', 'Data Drift', 'Expected count of errors found by custom query', 'This business-rule test is highly flexible, covering any error state that can be expressed by a SQL query against one or more tables in the database. In operation, the user-defined query is embedded within a parent query returning the count of error rows identified. Any row returned by the query is interpreted as a single error condition in the test. Note that this query is run independently of other tests, and that performance will be slower, depending in large part on the efficiency of the query you write. Interpretation is based on the user-defined meaning of the test. Your query might be written to return errors in individual rows identified by joining tables. Or it might return an error based on a multi-column aggregate condition returning a single row if an error is found. This query is run separately when you click `Review Source Data` from Test Results, so be sure to include enough data in your results to follow-up. Interpretation is based on the user-defined meaning of the test.', 'Y', 'line_chart', NULL), + + ('1500', 'Aggregate_Balance', 'Aggregate Balance', 'Aggregate values per group match reference', 'Tests for exact match in aggregate values for each set of column values vs. reference dataset', 'Aggregate measure per set of column values does not exactly match reference dataset.', 'Mismatched measures', NULL, NULL, '1', '1.0', 'Aggregate Expression', 'Specify an aggregate column expression: one of `SUM([column_name])` or `COUNT([column_name])`', 'subset_condition,groupby_names,having_condition,match_schema_name,match_table_name,match_column_names,match_subset_condition,match_groupby_names,match_having_condition', NULL, 'Record Subset Condition,Grouping Columns,Group Subset Condition,Matching Schema Name,Matching Table Name,Matching Aggregate Expression,Matching Record Subset Condition,Matching Grouping Columns,Matching Group Subset Condition', 'Condition defining a subset of records in main table, written like a condition within a SQL WHERE clause - OPTIONAL|Category columns in main table separated by commas (e.g. GROUP BY columns)|Condition defining a subset of aggregate records in main table (e.g. HAVING clause) - OPTIONAL|Schema location of matching table|Matching table name|Agregate column expression in matching table: one of `SUM([column_name])` or `COUNT([column_name])`|Condition defining a subset of records in matching table, written like a condition within a SQL WHERE clause - OPTIONAL|Category columns in matching table separated by commas (e.g. GROUP BY columns)|Condition defining a subset of aggregate records in matching table (e.g. HAVING clause) - OPTIONAL', 'Fail', 'QUERY', 'referential', 'Consistency', 'Data Drift', 'Expected count of group totals not matching aggregate value', 'This test compares sums or counts of a column rolled up to one or more category combinations across two different tables. Both tables must be accessible at the same time. It''s ideal for confirming that two datasets exactly match -- that the sum of a measure or count of a value hasn''t changed or shifted between categories. Use this test to compare a raw and processed version of the same dataset, or to confirm that an aggregated table exactly matches the detail table that it''s built from. An error here means that one or more value combinations fail to match. New categories or combinations will cause failure.', 'Y', 'line_chart', NULL), + ('1501', 'Aggregate_Minimum', 'Aggregate Minimum', 'Aggregate values per group are at or above reference', 'Tests that aggregate values for each set of column values are at least the same as reference dataset', 'Aggregate measure per set of column values is not at least the same as reference dataset.', 'Mismatched measures', NULL, NULL, '1', '1.0', 'Aggregate Expression', 'Specify an aggregate column expression: one of `SUM([column_name])` or `COUNT([column_name])`', 'subset_condition,groupby_names,having_condition,match_schema_name,match_table_name,match_column_names,match_subset_condition,match_groupby_names,match_having_condition', NULL, 'Record Subset Condition,Grouping Columns,Group Subset Condition,Matching Schema Name,Matching Table Name,Matching Aggregate Expression,Matching Record Subset Condition,Matching Grouping Columns,Matching Group Subset Condition', 'Condition defining a subset of records in main table, written like a condition within a SQL WHERE clause - OPTIONAL|Category columns in main table separated by commas (e.g. GROUP BY columns)|Condition defining a subset of aggregate records in main table (e.g. HAVING clause) - OPTIONAL|Schema location of reference table|Reference table name|Aggregate column expression in reference table (e.g. `SUM(sales)`)|Condition defining a subset of records in reference table, written like a condition within a SQL WHERE clause - OPTIONAL|Category columns in reference table separated by commas (e.g. GROUP BY columns)|Condition defining a subset of aggregate records in reference table (e.g. HAVING clause) - OPTIONAL', 'Fail', 'QUERY', 'referential', 'Accuracy', 'Data Drift', 'Expected count of group totals below aggregate value', 'This test compares sums or counts of a column rolled up to one or more category combinations, but requires a match or increase in the aggregate value, rather than an exact match, across two different tables. Both tables must be accessible at the same time. Use this to confirm that aggregate values have not dropped for any set of categories, even if some values may rise. This test is useful to compare an older and newer version of a cumulative dataset. An error here means that one or more values per category set fail to match or exceed the prior dataset. New categories or combinations are allowed (but can be restricted independently with a Combo_Match test). Both tables must be present to run this test.', 'Y', 'line_chart', NULL), + ('1502', 'Combo_Match', 'Reference Match', 'Column values or combinations found in reference', 'Tests for the presence of one or a set of column values in a reference table', 'Column value combinations are not found in reference table values.', 'Missing values', NULL, NULL, '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', 'Categorical Column List', 'Specify one or more Categorical columns, separated by commas. \n\nDo not use continuous mesurements here. Do not use numeric values unless they represent discrete categories.', 'subset_condition,having_condition,match_schema_name,match_table_name,match_groupby_names,match_subset_condition,match_having_condition', NULL, 'Record Subset Condition,Group Subset Condition,Reference Schema Name,Reference Table Name,Matching Columns,Matching Record Subset Condition,Matching Group Subset Condition', 'Condition defining a subset of records in main table to evaluate, written like a condition within a SQL WHERE clause - OPTIONAL|Condition based on aggregate expression used to exclude value combinations in source table, written like a condition within a SQL HAVING clause (e.g. `SUM(sales) < 100`) - OPTIONAL|Schema location of matching table|Matching table name|Column Names in reference table used to validate source table values (separated by commas)|Condition defining a subset of records in reference table to match against, written like a condition within a SQL WHERE clause - OPTIONAL|Condition based on aggregate expression used to exclude value combinations in reference table, written like a condition within a SQL HAVING clause (e.g. `SUM(sales) < 100`) - OPTIONAL', 'Fail', 'QUERY', 'referential', 'Validity', 'Schema Drift', 'Expected count of non-matching value combinations', 'This test verifies that values, or combinations of values, that are present in the main table are also found in a reference table. This is a useful test for referential integrity between fact and dimension tables. You can also use it to confirm the validity of a code or category, or of combinations of values that should only be found together within each record, such as product/size/color. An error here means that one or more category combinations in the main table are not found in the reference table. Both tables must be present to run this test.', 'Y', 'line_chart', NULL), + ('1503', 'Distribution_Shift', 'Distribution Shift', 'Probability distribution consistent with reference', 'Tests the closeness of match between two distributions of aggregate measures across combinations of column values, using Jensen-Shannon Divergence test', 'Divergence between two distributions exceeds specified threshold.', 'Divergence level (0-1)', 'Jensen-Shannon Divergence, from 0 (identical distributions), to 1.0 (max divergence)', NULL, '1', '0.75', 'Categorical Column List', 'Specify one or more Categorical columns, separated by commas. Do not use continuous mesurements here. Do not use numeric values unless they represent discrete categories.', 'subset_condition,match_schema_name,match_table_name,match_groupby_names,match_subset_condition', NULL, 'Record Subset Condition,Reference Schema Name,Reference Table Name,Matching Columns to Compare,Matching Record Subset Condition', 'Condition defining a subset of records in main table to evaluate, written like a condition within a SQL WHERE clause - OPTIONAL|Schema location of matching table|Matching table name|Column Names in reference table used to compare counts with source table values (separated by commas)|Condition defining a subset of records in reference table to match against, written like a condition within a SQL WHERE clause - OPTIONAL', 'Warning', 'QUERY', 'referential', 'Consistency', 'Data Drift', 'Expected maximum divergence level between 0 and 1', 'This test measures the similarity of two sets of counts per categories, by using their proportional counts as probability distributions. Using Jensen-Shannon divergence, a measure of relative entropy or difference between two distributions, the test assigns a score ranging from 0, meaning that the distributions are identical, to 1, meaning that the distributions are completely unrelated. This test can be used to compare datasets that may not match exactly, but should have similar distributions. For example, it is a useful sanity check for data from different sources that you would expect to have a consistent spread, such as shipment of building materials per state and construction projects by state. Scores can be compared over time even if the distributions are not identical -- a dataset can be expected to maintain a comparable divergence score with a reference dataset over time. Both tables must be present to run this test.', 'Y', 'line_chart', NULL), + ('1504', 'Aggregate_Balance_Percent', 'Aggregate Balance Percent', 'Aggregate measure per group within percent of reference', 'Tests that aggregate measure for each set of column values fall within a percent range above or below the measure for reference dataset', 'Aggregate measure per set of column values is outside percent range of reference dataset.', 'Mismatched measures', NULL, NULL, '1', '1.0', 'Aggregate Expression', 'Specify an aggregate column expression: one of `SUM([column_name])` or `COUNT([column_name])`', 'subset_condition,groupby_names,having_condition,match_schema_name,match_table_name,match_column_names,match_subset_condition,match_groupby_names,match_having_condition,lower_tolerance,upper_tolerance', NULL, 'Record Subset Condition,Grouping Columns,Group Subset Condition,Matching Schema Name,Matching Table Name,Matching Aggregate Expression,Matching Record Subset Condition,Matching Grouping Columns,Matching Group Subset Condition,Lower Tolerance Percent,Upper Tolerance Percent', 'Condition defining a subset of records in main table, written like a condition within a SQL WHERE clause - OPTIONAL|Category columns in main table separated by commas (e.g. GROUP BY columns)|Condition defining a subset of aggregate records in main table (e.g. HAVING clause) - OPTIONAL|Schema location of matching table|Matching table name|Agregate column expression in matching table: one of `SUM([column_name])` or `COUNT([column_name])`|Condition defining a subset of records in matching table, written like a condition within a SQL WHERE clause - OPTIONAL|Category columns in matching table separated by commas (e.g. GROUP BY columns)|Condition defining a subset of aggregate records in matching table (e.g. HAVING clause) - OPTIONAL|Allowable tolerance below the reference measure expressed as a percent|Allowable tolerance above the reference measure expressed as a percent', 'Fail', 'QUERY', 'referential', 'Consistency', 'Data Drift', 'Expected count of group totals not matching aggregate value', 'This test compares sums or counts of a column rolled up to one or more category combinations across two different tables. Both tables must be accessible at the same time. Use it to confirm that two datasets closely match within the tolerance you set -- that the sum of a measure or count of a value remains sufficiently consistent between categories. You could use this test compare sales per product within one month to another, when you want to be alerted if the difference for any product falls outside of the range defined as 5% below to 10% above the prior month. An error here means that one or more value combinations fail to match within the set tolerances. New categories or combinations will cause failure.', 'Y', 'line_chart', NULL), + ('1505', 'Aggregate_Balance_Range', 'Aggregate Balance Range', 'Aggregate measure per group within hard range of reference', 'Tests that aggregate measure for each set of column values fall within a hard range above or below the measure for reference dataset', 'Aggregate measure per set of column values is outside expected range of reference dataset.', 'Mismatched measures', NULL, NULL, '1', '1.0', 'Aggregate Expression', 'Specify an aggregate column expression: one of `SUM([column_name])` or `COUNT([column_name])`', 'subset_condition,groupby_names,having_condition,match_schema_name,match_table_name,match_column_names,match_subset_condition,match_groupby_names,match_having_condition,lower_tolerance,upper_tolerance', NULL, 'Record Subset Condition,Grouping Columns,Group Subset Condition,Matching Schema Name,Matching Table Name,Matching Aggregate Expression,Matching Record Subset Condition,Matching Grouping Columns,Matching Group Subset Condition,Lower Tolerance Constant,Upper Tolerance Constant', 'Condition defining a subset of records in main table, written like a condition within a SQL WHERE clause - OPTIONAL|Category columns in main table separated by commas (e.g. GROUP BY columns)|Condition defining a subset of aggregate records in main table (e.g. HAVING clause) - OPTIONAL|Schema location of matching table|Matching table name|Agregate column expression in matching table: one of `SUM([column_name])` or `COUNT([column_name])`|Condition defining a subset of records in matching table, written like a condition within a SQL WHERE clause - OPTIONAL|Category columns in matching table separated by commas (e.g. GROUP BY columns)|Condition defining a subset of aggregate records in matching table (e.g. HAVING clause) - OPTIONAL|Allowable tolerance below the reference measure expressed as a constant value|Allowable tolerance above the reference measure expressed as a constant value', 'Fail', 'QUERY', 'referential', 'Consistency', 'Data Drift', 'Expected count of group totals not matching aggregate value', 'This test compares sums or counts of a column rolled up to one or more category combinations across two different tables. Both tables must be accessible at the same time. Use it to confirm that two datasets closely match within the tolerances you define as specific values above or below the aggregate measure for the same categories in the reference dataset -- that the sum of a measure or count of a value remains sufficiently consistent between categories. For instance, you can use this test to compare sales per product within one month to another, when you want to be alerted if the difference for any product falls outside of the range defined as 10000 dollars above or below the prior week. An error here means that one or more value combinations fail to match within the set tolerances. New categories or combinations will cause failure.', 'Y', 'line_chart', NULL), + ('1508', 'Timeframe_Combo_Gain', 'Timeframe No Drops', 'Latest timeframe has at least all value combinations from prior period', 'Tests that column values in most recent time-window include at least same as prior time window', 'Column values in most recent time-window don''t include all values in prior window.', 'Mismatched values', NULL, NULL, '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', 'Categorical Column List', 'Specify one or more Categorical columns, separated by commas. Make sure not to use continuous measurements here. Do not use numeric values unless they represent discrete categories.', 'window_date_column,window_days,subset_condition', NULL, 'Date Column for Time Windows,Time Window in Days,Record Subset Condition', 'The date column used to define the time windows. This must be a DATE or DATETIME type.|Length in days of the time window. The test will compare the most recent period of days to the prior period of the same duration.|Condition defining a subset of records in main table to evaluate, written like a condition within a SQL WHERE clause - OPTIONAL', 'Fail', 'QUERY', 'referential', 'Consistency', 'Data Drift', 'Expected count of missing value combinations', 'This test checks a single transactional table to verify that categorical values or combinations that are present in the most recent time window you define include at least all those found in the prior time window of the same duration. Missing values in the latest time window will trigger the test to fail. New values are permitted. Use this test to confirm that codes or categories are not lost across successive time periods in a transactional table.', 'Y', 'line_chart', NULL), + ('1509', 'Timeframe_Combo_Match', 'Timeframe Match', 'Column value combinations from latest timeframe same as prior period', 'Tests for presence of same column values in most recent time-window vs. prior time window', 'Column values don''t match in most recent time-windows.', 'Mismatched values', NULL, NULL, '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', 'Categorical Column List', 'Specify one or more Categorical columns, separated by commas. Do not use continuous measurements here. Do not use numeric values unless they represent discrete categories.', 'window_date_column,window_days,subset_condition', NULL, 'Date Column for Time Windows,Time Window in Days,Record Subset Condition', NULL, 'Fail', 'QUERY', 'referential', 'Consistency', 'Data Drift', 'Expected count of non-matching value combinations', 'This test checks a single transactional table (such as a fact table) to verify that categorical values or combinations that are present in the most recent time window you define match those found in the prior time window of the same duration. New or missing values in the latest time window will trigger the test to fail. Use this test to confirm the consistency in the occurrence of codes or categories across successive time periods in a transactional table.', 'Y', 'line_chart', NULL), + ('1510', 'Dupe_Rows', 'Duplicate Rows', 'Rows are not duplicated in table', 'Tests for the absence of duplicate rows based on unique combination of column values', 'Column value combinations are duplicated in the table.', 'Duplicate records', NULL, NULL, '(({RESULT_MEASURE}-{THRESHOLD_VALUE}))::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', 'null', 'null', 'groupby_names', NULL, 'Columns to Compare', 'List of columns in the table that define a duplicate record when the combination of values is repeated on multiple rows', 'Fail', 'QUERY', 'table', 'Uniqueness', 'Schema Drift', 'Expected count of duplicate value combinations', 'This test verifies that combinations of values are not repeated within the table. By default when auto-generated, the test considers all columns to protect against duplication of entire rows. If you know the minimum columns that should constitute a unique record, such as a set of ID''s, you should use those to make the test as sensitive as possible. Alternatively, if you know of columns you can always exclude, such as file_date or refresh_snapshot_id, remove them to tighten the test somewhat.', 'Y', 'line_chart', NULL), + ('1511', 'Stale_Table', 'Stale Table', 'Stale Table Not Updated', 'Confirms whether table has been updated based on data fingerprint', 'Table has not been updated.', 'Was Change Detected', NULL, 'TEMPLATE', '(({RESULT_MEASURE}-{THRESHOLD_VALUE}))::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '0.0', 'null', 'null', 'history_calculation,history_lookback,subset_condition,custom_query', NULL, 'History Aggregate,History Lookback,Record Subset Condition,Fingerprint Expression', 'Aggregate calculation to be performed on the N lookback results|Last N tests to use for history aggregate calculation|Condition defining a subset of records in main table|String expression combining key column measures into a distinct representation of table state', 'Log', 'QUERY', 'table', 'Recency', 'Recency', 'Most recent prior table fingerprint', 'This test compares the current table fingerprint, calculated signature of column contents, to confirm that the table has been updated. The table fingerprint is derived from a set of values and aggregates from columns most likely to change. This test allows you to track the schedule and frequency of updates and refreshes to the table.', 'Y', 'binary_chart', '{"legend":{"labels":{"0":"Stale","1":"Updated"}}}') ; @@ -169,7 +170,8 @@ VALUES ('Monitor', 'Recency'), ('Monitor', 'Row_Ct_Pct'), ('Monitor', 'Daily_Record_Ct'), ('Monitor', 'Monthly_Rec_Ct'), - ('Monitor', 'Weekly_Rec_Ct'); + ('Monitor', 'Weekly_Rec_Ct'), + ('Monitor', 'Stale_Table'); TRUNCATE TABLE test_templates; diff --git a/testgen/template/dbsetup/060_create_standard_views.sql b/testgen/template/dbsetup/060_create_standard_views.sql index 563e5224..0eea3855 100644 --- a/testgen/template/dbsetup/060_create_standard_views.sql +++ b/testgen/template/dbsetup/060_create_standard_views.sql @@ -128,6 +128,8 @@ SELECT p.project_name, r.result_status, r.input_parameters, r.result_message, + tt.result_visualization, + tt.result_visualization_params, CASE WHEN result_code <> 1 THEN r.severity END as severity, CASE WHEN result_code <> 1 THEN r.disposition diff --git a/testgen/template/dbupgrade/0501_incremental_upgrade.sql b/testgen/template/dbupgrade/0501_incremental_upgrade.sql index 57b13840..49653f83 100644 --- a/testgen/template/dbupgrade/0501_incremental_upgrade.sql +++ b/testgen/template/dbupgrade/0501_incremental_upgrade.sql @@ -1,3 +1,8 @@ SET SEARCH_PATH TO {SCHEMA_NAME}; ALTER TABLE test_runs ADD COLUMN log_ct INTEGER; + +DROP VIEW IF EXISTS v_test_results; + +ALTER TABLE test_types ADD COLUMN result_visualization VARCHAR(50) DEFAULT 'line_chart'; +ALTER TABLE test_types ADD COLUMN result_visualization_params TEXT DEFAULT NULL; diff --git a/testgen/template/execution/ex_update_history_threshold_last_n.sql b/testgen/template/execution/ex_update_history_threshold_last_n.sql index 414de787..b8b9d532 100644 --- a/testgen/template/execution/ex_update_history_threshold_last_n.sql +++ b/testgen/template/execution/ex_update_history_threshold_last_n.sql @@ -14,7 +14,7 @@ WITH stats AS ( FROM test_results tr WHERE tr.test_definition_id = d.id ORDER BY tr.test_time DESC - LIMIT d.history_lookback + LIMIT CASE WHEN d.history_calculation = 'Value' THEN 1 ELSE d.history_lookback END ) AS r ON TRUE WHERE d.test_suite_id = '{TEST_SUITE_ID}' AND d.test_active = 'Y' diff --git a/testgen/template/gen_query_tests/gen_table_changed_test.sql b/testgen/template/gen_query_tests/gen_table_changed_test.sql index 836a65fd..d87bf21e 100644 --- a/testgen/template/gen_query_tests/gen_table_changed_test.sql +++ b/testgen/template/gen_query_tests/gen_table_changed_test.sql @@ -1,7 +1,7 @@ INSERT INTO test_definitions (table_groups_id, profile_run_id, test_type, test_suite_id, schema_name, table_name, skip_errors, test_active, last_auto_gen_date, profiling_as_of_date, - lock_refresh, custom_query ) + lock_refresh, history_calculation, history_lookback, custom_query ) WITH last_run AS (SELECT r.table_groups_id, MAX(run_date) AS last_run_date FROM profile_results p INNER JOIN profiling_runs r @@ -134,6 +134,8 @@ SELECT '{TABLE_GROUPS_ID}'::UUID as table_groups_id, '{RUN_DATE}'::TIMESTAMP as last_auto_gen_date, '{AS_OF_DATE}'::TIMESTAMP as profiling_as_of_date, 'N' as lock_refresh, + 'Value' as history_calculation, + 1 as history_lookback, fingerprint as custom_query FROM newtests n INNER JOIN test_types t diff --git a/testgen/ui/queries/test_result_queries.py b/testgen/ui/queries/test_result_queries.py index 991969ab..52a51767 100644 --- a/testgen/ui/queries/test_result_queries.py +++ b/testgen/ui/queries/test_result_queries.py @@ -136,7 +136,9 @@ def get_test_result_history(tr_data, limit: int | None = None): test_operator, threshold_value::NUMERIC, result_measure::NUMERIC, - result_status + result_status, + result_visualization, + result_visualization_params FROM v_test_results WHERE {f""" test_suite_id = :test_suite_id diff --git a/testgen/ui/views/test_definitions.py b/testgen/ui/views/test_definitions.py index e21c9a2c..0331f1f5 100644 --- a/testgen/ui/views/test_definitions.py +++ b/testgen/ui/views/test_definitions.py @@ -575,10 +575,22 @@ def render_dynamic_attribute(attribute: str, container: DeltaGenerator): help=help_text, ) elif attribute in int_numeric_attributes: + max_value = None + if ( + attribute == "history_lookback" + and int(value) <= 1 + and ( + not test_definition.get("history_calculation") + or test_definition.get("history_calculation") == "Value" + ) + ): + max_value = 1 test_definition[attribute] = container.number_input( label=label_text, - value=int(value), step=1, + value=int(value), + max_value=max_value, + min_value=0, help=help_text, ) elif attribute in choice_fields: diff --git a/testgen/ui/views/test_results.py b/testgen/ui/views/test_results.py index 7991d88d..d1011c92 100644 --- a/testgen/ui/views/test_results.py +++ b/testgen/ui/views/test_results.py @@ -1,4 +1,6 @@ +import json import typing +from datetime import datetime, timedelta from functools import partial from io import BytesIO from itertools import zip_longest @@ -353,7 +355,7 @@ def get_test_result_summary(test_run_id: str) -> list[dict]: { "label": "Warning", "value": result.warning_ct, "color": "yellow" }, { "label": "Failed", "value": result.failed_ct, "color": "red" }, { "label": "Error", "value": result.error_ct, "color": "brown" }, - { "label": "Log", "value": result.error_ct, "color": "darkGrey" }, + { "label": "Log", "value": result.log_ct, "color": "darkGrey" }, { "label": "Dismissed", "value": result.dismissed_ct, "color": "grey" }, ] @@ -638,10 +640,21 @@ def get_excel_report_data( ) -def write_history_graph(dfh): +def write_history_graph(data: pd.DataFrame): + chart_type = data.at[0, "result_visualization"] + chart_params = json.loads(data.at[0, "result_visualization_params"] or "{}") + + match chart_type: + case "binary_chart": + render_binary_chart(data, **chart_params) + case _: render_line_chart(data, **chart_params) + + +def render_line_chart(dfh: pd.DataFrame, **_params: dict) -> None: + str_uom = dfh.at[0, "measure_uom"] + y_min = min(dfh["result_measure"].min(), dfh["threshold_value"].min()) y_max = max(dfh["result_measure"].max(), dfh["threshold_value"].max()) - str_uom = dfh.at[0, "measure_uom"] fig = px.line( dfh, @@ -716,6 +729,52 @@ def write_history_graph(dfh): st.plotly_chart(fig) +def render_binary_chart(data: pd.DataFrame, **params: dict) -> None: + history = data.copy(deep=True) + legend_labels = params.get("legend", {}).get("labels") or {"0": "0", "1": "1"} + + history["test_start"] = history["test_date"].apply(datetime.fromisoformat) + history["test_end"] = history["test_start"].apply(lambda start: start + timedelta(seconds=30)) + history["formatted_test_date"] = history["test_date"].apply(lambda date_str: datetime.fromisoformat(date_str).strftime("%I:%M:%S %p, %d/%m/%Y")) + history["result_measure_with_status"] = history.apply(lambda row: f"{legend_labels[str(int(row['result_measure']))]} ({row['result_status']})", axis=1) + + fig = px.timeline( + history, + x_start="test_start", + x_end="test_end", + y="measure_uom", + color="result_measure_with_status", + color_discrete_map={ + f"{legend_labels['0']} (Failed)": "#EF5350", + f"{legend_labels['0']} (Warning)": "#FF9800", + f"{legend_labels['0']} (Log)": "#BDBDBD", + f"{legend_labels['1']} (Passed)": "#9CCC65", + f"{legend_labels['1']} (Log)": "#42A5F5", + }, + hover_name="formatted_test_date", + hover_data={ + "test_start": False, + "test_end": False, + "result_measure": False, + "result_measure_with_status": False, + "measure_uom": False, + }, + labels={ + "result_measure_with_status": "", + }, + ) + fig.update_layout( + yaxis_visible=False, + xaxis_showline=True, + paper_bgcolor="rgba(0,0,0,0)", + plot_bgcolor="rgba(0,0,0,0)", + legend={"x": 0.5, "y": 1.1, "xanchor": "center", "yanchor": "top", "orientation": "h"}, + width=500, + ) + + st.plotly_chart(fig) + + def do_disposition_update(selected, str_new_status): str_result = None if selected: From 3b3d3f33ad15592fff63412b0a63a932c26386d9 Mon Sep 17 00:00:00 2001 From: Luis Date: Mon, 18 Aug 2025 14:47:08 -0400 Subject: [PATCH 21/28] fix(tests): add missing result_signal column --- .../ex_window_match_no_drops_databricks.sql | 1 + .../ex_window_match_same_databricks.sql | 1 + .../ex_relative_entropy_mssql.sql | 1 + .../ex_window_match_no_drops_postgresql.sql | 1 + .../ex_window_match_same_postgresql.sql | 1 + .../get_entities/get_test_generation_list.sql | 26 +++++++++---------- .../get_test_results_for_run_cli.sql | 4 +-- 7 files changed, 20 insertions(+), 15 deletions(-) diff --git a/testgen/template/flavors/databricks/exec_query_tests/ex_window_match_no_drops_databricks.sql b/testgen/template/flavors/databricks/exec_query_tests/ex_window_match_no_drops_databricks.sql index 5ca11540..ad1f7581 100644 --- a/testgen/template/flavors/databricks/exec_query_tests/ex_window_match_no_drops_databricks.sql +++ b/testgen/template/flavors/databricks/exec_query_tests/ex_window_match_no_drops_databricks.sql @@ -11,6 +11,7 @@ SELECT '{TEST_TYPE}' as test_type, '{SKIP_ERRORS}' as threshold_value, {SKIP_ERRORS} as skip_errors, '{INPUT_PARAMETERS}' as input_parameters, + NULL as result_signal, CASE WHEN COUNT (*) > {SKIP_ERRORS} THEN 0 ELSE 1 END as result_code, CASE WHEN COUNT(*) > 0 THEN diff --git a/testgen/template/flavors/databricks/exec_query_tests/ex_window_match_same_databricks.sql b/testgen/template/flavors/databricks/exec_query_tests/ex_window_match_same_databricks.sql index 80953b2c..2fe39587 100644 --- a/testgen/template/flavors/databricks/exec_query_tests/ex_window_match_same_databricks.sql +++ b/testgen/template/flavors/databricks/exec_query_tests/ex_window_match_same_databricks.sql @@ -11,6 +11,7 @@ SELECT '{TEST_TYPE}' as test_type, '{SKIP_ERRORS}' as threshold_value, {SKIP_ERRORS} as skip_errors, '{INPUT_PARAMETERS}' as input_parameters, + NULL as result_signal, CASE WHEN COUNT (*) > {SKIP_ERRORS} THEN 0 ELSE 1 END as result_code, CASE WHEN COUNT(*) > 0 THEN diff --git a/testgen/template/flavors/mssql/exec_query_tests/ex_relative_entropy_mssql.sql b/testgen/template/flavors/mssql/exec_query_tests/ex_relative_entropy_mssql.sql index e6196752..be5fa577 100644 --- a/testgen/template/flavors/mssql/exec_query_tests/ex_relative_entropy_mssql.sql +++ b/testgen/template/flavors/mssql/exec_query_tests/ex_relative_entropy_mssql.sql @@ -39,6 +39,7 @@ SELECT '{TEST_TYPE}' as test_type, '{THRESHOLD_VALUE}' as threshold_value, NULL as skip_errors, '{INPUT_PARAMETERS}' as input_parameters, + NULL as result_signal, CASE WHEN js_divergence > {THRESHOLD_VALUE} THEN 0 ELSE 1 END as result_code, CONCAT('Divergence Level: ', CONCAT(CAST(js_divergence AS VARCHAR), diff --git a/testgen/template/flavors/postgresql/exec_query_tests/ex_window_match_no_drops_postgresql.sql b/testgen/template/flavors/postgresql/exec_query_tests/ex_window_match_no_drops_postgresql.sql index 13856572..178998b0 100644 --- a/testgen/template/flavors/postgresql/exec_query_tests/ex_window_match_no_drops_postgresql.sql +++ b/testgen/template/flavors/postgresql/exec_query_tests/ex_window_match_no_drops_postgresql.sql @@ -11,6 +11,7 @@ SELECT '{TEST_TYPE}' as test_type, '{SKIP_ERRORS}' as threshold_value, {SKIP_ERRORS} as skip_errors, '{INPUT_PARAMETERS}' as input_parameters, + NULL as result_signal, CASE WHEN COUNT (*) > {SKIP_ERRORS} THEN 0 ELSE 1 END as result_code, CASE WHEN COUNT(*) > 0 THEN diff --git a/testgen/template/flavors/postgresql/exec_query_tests/ex_window_match_same_postgresql.sql b/testgen/template/flavors/postgresql/exec_query_tests/ex_window_match_same_postgresql.sql index 20a07913..4a6aaee4 100644 --- a/testgen/template/flavors/postgresql/exec_query_tests/ex_window_match_same_postgresql.sql +++ b/testgen/template/flavors/postgresql/exec_query_tests/ex_window_match_same_postgresql.sql @@ -11,6 +11,7 @@ SELECT '{TEST_TYPE}' as test_type, '{SKIP_ERRORS}' as threshold_value, {SKIP_ERRORS} as skip_errors, '{INPUT_PARAMETERS}' as input_parameters, + NULL as result_signal, CASE WHEN COUNT (*) > {SKIP_ERRORS} THEN 0 ELSE 1 END as result_code, CASE WHEN COUNT(*) > 0 THEN diff --git a/testgen/template/get_entities/get_test_generation_list.sql b/testgen/template/get_entities/get_test_generation_list.sql index 95600b7e..b4322693 100644 --- a/testgen/template/get_entities/get_test_generation_list.sql +++ b/testgen/template/get_entities/get_test_generation_list.sql @@ -2,18 +2,18 @@ Output: list all test generation runs based on last_auto_run_date Optional: n/a*/ - SELECT ts.test_suite AS test_suite_key, - ts.table_groups_id, - td.last_auto_gen_date, - td.profiling_as_of_date, - td.lock_refresh, - COUNT(DISTINCT td.schema_name || '.' || td.table_name) as tables, - COUNT(DISTINCT td.schema_name || '.' || td.table_name || '.' || td.column_name) as columns, - COUNT(*) as tests - FROM test_definitions td - JOIN test_suites ts ON td.test_suite_id = ts.id - WHERE ts.project_code = :PROJECT_CODE - AND ts.test_suite = :TEST_SUITE - AND td.last_auto_gen_date IS NOT NULL +SELECT ts.test_suite AS test_suite_key, + ts.table_groups_id, + td.last_auto_gen_date, + td.profiling_as_of_date, + td.lock_refresh, + COUNT(DISTINCT td.schema_name || '.' || td.table_name) as tables, + COUNT(DISTINCT td.schema_name || '.' || td.table_name || '.' || td.column_name) as columns, + COUNT(*) as tests +FROM test_definitions td +JOIN test_suites ts ON td.test_suite_id = ts.id +WHERE ts.project_code = :PROJECT_CODE + AND ts.test_suite = :TEST_SUITE + AND td.last_auto_gen_date IS NOT NULL GROUP BY ts.id, td.last_auto_gen_date, td.profiling_as_of_date, td.lock_refresh ORDER BY td.last_auto_gen_date desc; diff --git a/testgen/template/get_entities/get_test_results_for_run_cli.sql b/testgen/template/get_entities/get_test_results_for_run_cli.sql index 083d65e4..127ce3d3 100644 --- a/testgen/template/get_entities/get_test_results_for_run_cli.sql +++ b/testgen/template/get_entities/get_test_results_for_run_cli.sql @@ -1,6 +1,6 @@ SELECT ts.test_suite as test_suite_key, - table_name, - column_names as column_name, + r.table_name, + r.column_names as column_name, r.test_type, CASE WHEN COALESCE(td.severity, ts.severity, tt.default_severity) = 'Log' THEN 'Log' From 80c644b86480b7566b71d29e3987dafad04e9a02 Mon Sep 17 00:00:00 2001 From: Luis Date: Tue, 19 Aug 2025 11:03:21 -0400 Subject: [PATCH 22/28] fix(tests): add flavor specific stale table test generation --- .../commands/queries/generate_tests_query.py | 37 +++- ...grade.sql => 0147_incremental_upgrade.sql} | 7 + .../dbupgrade/0500_incremental_upgrade.sql | 8 - .../gen_table_changed_test.sql | 153 +++++++++++++++++ .../gen_table_changed_test.sql | 158 ++++++++++++++++++ .../gen_table_changed_test.sql | 18 +- testgen/ui/components/frontend/css/shared.css | 1 - .../frontend/js/data_profiling/data_issues.js | 2 +- .../components/frontend/js/display_utils.js | 1 - .../frontend/js/pages/project_dashboard.js | 2 +- .../components/frontend/js/pages/test_runs.js | 2 +- .../frontend/js/pages/test_suites.js | 2 +- testgen/ui/services/form_service.py | 2 +- testgen/ui/views/test_results.py | 2 +- testgen/ui/views/test_suites.py | 3 +- 15 files changed, 366 insertions(+), 32 deletions(-) rename testgen/template/dbupgrade/{0501_incremental_upgrade.sql => 0147_incremental_upgrade.sql} (60%) delete mode 100644 testgen/template/dbupgrade/0500_incremental_upgrade.sql create mode 100644 testgen/template/flavors/databricks/gen_query_tests/gen_table_changed_test.sql create mode 100644 testgen/template/flavors/mssql/gen_query_tests/gen_table_changed_test.sql diff --git a/testgen/commands/queries/generate_tests_query.py b/testgen/commands/queries/generate_tests_query.py index 5f0b1ce2..4f887af4 100644 --- a/testgen/commands/queries/generate_tests_query.py +++ b/testgen/commands/queries/generate_tests_query.py @@ -2,7 +2,8 @@ from typing import ClassVar, TypedDict from testgen.common import CleanSQL, date_service, read_template_sql_file -from testgen.common.database.database_service import get_queries_for_command, replace_params +from testgen.common.database.database_service import replace_params +from testgen.common.read_file import get_template_files LOG = logging.getLogger("testgen") @@ -67,11 +68,35 @@ def GetTestTypesSQL(self) -> tuple[str, dict]: def GetTestDerivationQueriesAsList(self, template_directory: str) -> list[tuple[str, dict]]: # Runs on App database - params = self._get_params() - queries = get_queries_for_command(template_directory, params) - if self._use_clean: - queries = [ CleanSQL(query) for query in queries ] - return [ (query, params) for query in queries ] + generic_template_directory = template_directory + flavor_template_directory = f"flavors.{self.sql_flavor}.{template_directory}" + + query_templates = {} + try: + for query_file in get_template_files(r"^.*sql$", generic_template_directory): + query_templates[query_file.name] = generic_template_directory + except: + LOG.debug( + f"query template '{generic_template_directory}' directory does not exist", + exc_info=True, + stack_info=True, + ) + + try: + for query_file in get_template_files(r"^.*sql$", flavor_template_directory): + query_templates[query_file.name] = flavor_template_directory + except: + LOG.debug( + f"query template '{generic_template_directory}' directory does not exist", + exc_info=True, + stack_info=True, + ) + + queries = [] + for filename, sub_directory in query_templates.items(): + queries.append(self._get_query(filename, sub_directory=sub_directory)) + + return queries def GetTestQueriesFromGenericFile(self) -> tuple[str, dict]: # Runs on App database diff --git a/testgen/template/dbupgrade/0501_incremental_upgrade.sql b/testgen/template/dbupgrade/0147_incremental_upgrade.sql similarity index 60% rename from testgen/template/dbupgrade/0501_incremental_upgrade.sql rename to testgen/template/dbupgrade/0147_incremental_upgrade.sql index 49653f83..3e2f043a 100644 --- a/testgen/template/dbupgrade/0501_incremental_upgrade.sql +++ b/testgen/template/dbupgrade/0147_incremental_upgrade.sql @@ -1,5 +1,12 @@ SET SEARCH_PATH TO {SCHEMA_NAME}; +ALTER TABLE test_definitions + ADD COLUMN history_calculation VARCHAR(20), + ADD COLUMN history_lookback INTEGER; + +ALTER TABLE test_results + ADD COLUMN result_signal VARCHAR(1000); + ALTER TABLE test_runs ADD COLUMN log_ct INTEGER; DROP VIEW IF EXISTS v_test_results; diff --git a/testgen/template/dbupgrade/0500_incremental_upgrade.sql b/testgen/template/dbupgrade/0500_incremental_upgrade.sql deleted file mode 100644 index 7bb0f165..00000000 --- a/testgen/template/dbupgrade/0500_incremental_upgrade.sql +++ /dev/null @@ -1,8 +0,0 @@ -SET SEARCH_PATH TO {SCHEMA_NAME}; - -ALTER TABLE test_definitions - ADD COLUMN history_calculation VARCHAR(20), - ADD COLUMN history_lookback INTEGER; - -ALTER TABLE test_results - ADD COLUMN result_signal VARCHAR(1000); \ No newline at end of file diff --git a/testgen/template/flavors/databricks/gen_query_tests/gen_table_changed_test.sql b/testgen/template/flavors/databricks/gen_query_tests/gen_table_changed_test.sql new file mode 100644 index 00000000..d113ab2e --- /dev/null +++ b/testgen/template/flavors/databricks/gen_query_tests/gen_table_changed_test.sql @@ -0,0 +1,153 @@ +INSERT INTO test_definitions (table_groups_id, profile_run_id, test_type, test_suite_id, + schema_name, table_name, + skip_errors, test_active, last_auto_gen_date, profiling_as_of_date, + lock_refresh, history_calculation, history_lookback, custom_query ) +WITH last_run AS (SELECT r.table_groups_id, MAX(run_date) AS last_run_date + FROM profile_results p + INNER JOIN profiling_runs r + ON (p.profile_run_id = r.id) + INNER JOIN test_suites ts + ON p.project_code = ts.project_code + AND p.connection_id = ts.connection_id + WHERE p.project_code = '{PROJECT_CODE}' + AND r.table_groups_id = '{TABLE_GROUPS_ID}'::UUID + AND ts.id = '{TEST_SUITE_ID}' + AND p.run_date::DATE <= '{AS_OF_DATE}' + GROUP BY r.table_groups_id), +curprof AS (SELECT p.profile_run_id, schema_name, table_name, column_name, functional_data_type, general_type, + distinct_value_ct, record_ct, max_value, min_value, avg_value, stdev_value, null_value_ct + FROM last_run lr + INNER JOIN profile_results p + ON (lr.table_groups_id = p.table_groups_id + AND lr.last_run_date = p.run_date) ), +locked AS (SELECT schema_name, table_name + FROM test_definitions + WHERE table_groups_id = '{TABLE_GROUPS_ID}'::UUID + AND test_suite_id = '{TEST_SUITE_ID}' + AND test_type = 'Stale_Table' + AND lock_refresh = 'Y'), +-- IDs - TOP 2 +id_cols + AS ( SELECT profile_run_id, schema_name, table_name, column_name, functional_data_type, general_type, + distinct_value_ct, + ROW_NUMBER() OVER (PARTITION BY schema_name, table_name + ORDER BY + CASE + WHEN functional_data_type ILIKE 'ID-Unique%' THEN 1 + WHEN functional_data_type = 'ID-Secondary' THEN 2 + ELSE 3 + END, distinct_value_ct, column_name DESC) AS rank + FROM curprof + WHERE functional_data_type ILIKE 'ID%'), +-- Process Date - TOP 1 +process_date_cols + AS (SELECT profile_run_id, schema_name, table_name, column_name, functional_data_type, general_type, + distinct_value_ct, + ROW_NUMBER() OVER (PARTITION BY schema_name, table_name + ORDER BY + CASE + WHEN column_name ILIKE '%mod%' THEN 1 + WHEN column_name ILIKE '%up%' THEN 1 + WHEN column_name ILIKE '%cr%' THEN 2 + WHEN column_name ILIKE '%in%' THEN 2 + END , distinct_value_ct DESC, column_name) AS rank + FROM curprof + WHERE functional_data_type ILIKE 'process%'), +-- Transaction Date - TOP 1 +tran_date_cols + AS ( SELECT profile_run_id, schema_name, table_name, column_name, functional_data_type, general_type, + distinct_value_ct, + ROW_NUMBER() OVER (PARTITION BY schema_name, table_name + ORDER BY + distinct_value_ct DESC, column_name) AS rank + FROM curprof + WHERE functional_data_type ILIKE 'transactional date%' OR functional_data_type ILIKE 'period%' + OR functional_data_type = 'timestamp' ), + +-- Numeric Measures +numeric_cols + AS ( SELECT profile_run_id, schema_name, table_name, column_name, functional_data_type, general_type, +/* + -- Subscores + distinct_value_ct * 1.0 / NULLIF(record_ct, 0) AS cardinality_score, + (max_value - min_value) / NULLIF(ABS(NULLIF(avg_value, 0)), 1) AS range_score, + LEAST(1, LOG(GREATEST(distinct_value_ct, 2))) / LOG(GREATEST(record_ct, 2)) AS nontriviality_score, + stdev_value / NULLIF(ABS(NULLIF(avg_value, 0)), 1) AS variability_score, + 1.0 - (null_value_ct * 1.0 / NULLIF(NULLIF(record_ct, 0), 1)) AS null_penalty, +*/ + -- Weighted score + ( + 0.25 * (distinct_value_ct * 1.0 / NULLIF(record_ct, 0)) + + 0.15 * ((max_value - min_value) / NULLIF(ABS(NULLIF(avg_value, 0)), 1)) + + 0.10 * (LEAST(1, LOG(GREATEST(distinct_value_ct, 2))) / LOG(GREATEST(record_ct, 2))) + + 0.40 * (stdev_value / NULLIF(ABS(NULLIF(avg_value, 0)), 1)) + + 0.10 * (1.0 - (null_value_ct * 1.0 / NULLIF(NULLIF(record_ct, 0), 1))) + ) AS change_detection_score + FROM curprof + WHERE general_type = 'N' + AND (functional_data_type ILIKE 'Measure%' OR functional_data_type IN ('Sequence', 'Constant')) + ), +numeric_cols_ranked + AS ( SELECT *, + ROW_NUMBER() OVER (PARTITION BY schema_name, table_name + ORDER BY change_detection_score DESC, column_name) as rank + FROM numeric_cols + WHERE change_detection_score IS NOT NULL), +combined + AS ( SELECT profile_run_id, schema_name, table_name, column_name, 'ID' AS element_type, general_type, 10 + rank AS fingerprint_order + FROM id_cols + WHERE rank <= 2 + UNION ALL + SELECT profile_run_id, schema_name, table_name, column_name, 'DATE_P' AS element_type, general_type, 20 + rank AS fingerprint_order + FROM process_date_cols + WHERE rank = 1 + UNION ALL + SELECT profile_run_id, schema_name, table_name, column_name, 'DATE_T' AS element_type, general_type, 30 + rank AS fingerprint_order + FROM tran_date_cols + WHERE rank = 1 + UNION ALL + SELECT profile_run_id, schema_name, table_name, column_name, 'MEAS' AS element_type, general_type, 40 + rank AS fingerprint_order + FROM numeric_cols_ranked + WHERE rank = 1 ), +newtests + AS (SELECT profile_run_id, schema_name, table_name, + 'COUNT(*)::STRING || ''|'' || ' || + STRING_AGG( + REPLACE( + CASE + WHEN general_type = 'D' THEN 'MIN(@@@)::STRING || ''|'' || MAX(@@@::STRING) || ''|'' || COUNT(DISTINCT @@@)::STRING' + WHEN general_type = 'A' THEN 'MIN(@@@)::STRING || ''|'' || MAX(@@@::STRING) || ''|'' || COUNT(DISTINCT @@@)::STRING || ''|'' || SUM(LENGTH(@@@))::STRING' + WHEN general_type = 'N' THEN 'MIN(@@@)::STRING || ''|'' || MAX(@@@::STRING) || ''|'' || SUM(@@@)::STRING || ''|'' || ROUND(AVG(@@@), 5)::STRING || ''|'' || ROUND(STDDEV(@@@), 5)::STRING' + END, + '@@@', '"' || column_name || '"'), + ' || ''|'' || ' + ORDER BY element_type, fingerprint_order, column_name) as fingerprint + FROM combined + GROUP BY profile_run_id, schema_name, table_name) +SELECT '{TABLE_GROUPS_ID}'::UUID as table_groups_id, + n.profile_run_id, + 'Stale_Table' AS test_type, + '{TEST_SUITE_ID}' AS test_suite_id, + n.schema_name, n.table_name, + 0 as skip_errors, 'Y' as test_active, + + '{RUN_DATE}'::TIMESTAMP as last_auto_gen_date, + '{AS_OF_DATE}'::TIMESTAMP as profiling_as_of_date, + 'N' as lock_refresh, + 'Value' as history_calculation, + 1 as history_lookback, + fingerprint as custom_query +FROM newtests n +INNER JOIN test_types t + ON ('Stale_Table' = t.test_type + AND 'Y' = t.active) +LEFT JOIN generation_sets s + ON (t.test_type = s.test_type + AND '{GENERATION_SET}' = s.generation_set) +LEFT JOIN locked l + ON (n.schema_name = l.schema_name + AND n.table_name = l.table_name) +WHERE (s.generation_set IS NOT NULL + OR '{GENERATION_SET}' = '') + AND l.schema_name IS NULL; + diff --git a/testgen/template/flavors/mssql/gen_query_tests/gen_table_changed_test.sql b/testgen/template/flavors/mssql/gen_query_tests/gen_table_changed_test.sql new file mode 100644 index 00000000..4e332853 --- /dev/null +++ b/testgen/template/flavors/mssql/gen_query_tests/gen_table_changed_test.sql @@ -0,0 +1,158 @@ +INSERT INTO test_definitions (table_groups_id, profile_run_id, test_type, test_suite_id, + schema_name, table_name, + skip_errors, test_active, last_auto_gen_date, profiling_as_of_date, + lock_refresh, history_calculation, history_lookback, custom_query ) +WITH last_run AS (SELECT r.table_groups_id, MAX(run_date) AS last_run_date + FROM profile_results p + INNER JOIN profiling_runs r + ON (p.profile_run_id = r.id) + INNER JOIN test_suites ts + ON p.project_code = ts.project_code + AND p.connection_id = ts.connection_id + WHERE p.project_code = '{PROJECT_CODE}' + AND r.table_groups_id = '{TABLE_GROUPS_ID}'::UUID + AND ts.id = '{TEST_SUITE_ID}' + AND p.run_date::DATE <= '{AS_OF_DATE}' + GROUP BY r.table_groups_id), +curprof AS (SELECT p.profile_run_id, schema_name, table_name, column_name, functional_data_type, general_type, + distinct_value_ct, record_ct, max_value, min_value, avg_value, stdev_value, null_value_ct + FROM last_run lr + INNER JOIN profile_results p + ON (lr.table_groups_id = p.table_groups_id + AND lr.last_run_date = p.run_date) ), +locked AS (SELECT schema_name, table_name + FROM test_definitions + WHERE table_groups_id = '{TABLE_GROUPS_ID}'::UUID + AND test_suite_id = '{TEST_SUITE_ID}' + AND test_type = 'Stale_Table' + AND lock_refresh = 'Y'), +-- IDs - TOP 2 +id_cols + AS ( SELECT profile_run_id, schema_name, table_name, column_name, functional_data_type, general_type, + distinct_value_ct, + ROW_NUMBER() OVER (PARTITION BY schema_name, table_name + ORDER BY + CASE + WHEN functional_data_type ILIKE 'ID-Unique%' THEN 1 + WHEN functional_data_type = 'ID-Secondary' THEN 2 + ELSE 3 + END, distinct_value_ct, column_name DESC) AS rank + FROM curprof + WHERE functional_data_type ILIKE 'ID%'), +-- Process Date - TOP 1 +process_date_cols + AS (SELECT profile_run_id, schema_name, table_name, column_name, functional_data_type, general_type, + distinct_value_ct, + ROW_NUMBER() OVER (PARTITION BY schema_name, table_name + ORDER BY + CASE + WHEN column_name ILIKE '%mod%' THEN 1 + WHEN column_name ILIKE '%up%' THEN 1 + WHEN column_name ILIKE '%cr%' THEN 2 + WHEN column_name ILIKE '%in%' THEN 2 + END , distinct_value_ct DESC, column_name) AS rank + FROM curprof + WHERE functional_data_type ILIKE 'process%'), +-- Transaction Date - TOP 1 +tran_date_cols + AS ( SELECT profile_run_id, schema_name, table_name, column_name, functional_data_type, general_type, + distinct_value_ct, + ROW_NUMBER() OVER (PARTITION BY schema_name, table_name + ORDER BY + distinct_value_ct DESC, column_name) AS rank + FROM curprof + WHERE functional_data_type ILIKE 'transactional date%' OR functional_data_type ILIKE 'period%' + OR functional_data_type = 'timestamp' ), + +-- Numeric Measures +numeric_cols + AS ( SELECT profile_run_id, schema_name, table_name, column_name, functional_data_type, general_type, +/* + -- Subscores + distinct_value_ct * 1.0 / NULLIF(record_ct, 0) AS cardinality_score, + (max_value - min_value) / NULLIF(ABS(NULLIF(avg_value, 0)), 1) AS range_score, + LEAST(1, LOG(GREATEST(distinct_value_ct, 2))) / LOG(GREATEST(record_ct, 2)) AS nontriviality_score, + stdev_value / NULLIF(ABS(NULLIF(avg_value, 0)), 1) AS variability_score, + 1.0 - (null_value_ct * 1.0 / NULLIF(NULLIF(record_ct, 0), 1)) AS null_penalty, +*/ + -- Weighted score + ( + 0.25 * (distinct_value_ct * 1.0 / NULLIF(record_ct, 0)) + + 0.15 * ((max_value - min_value) / NULLIF(ABS(NULLIF(avg_value, 0)), 1)) + + 0.10 * (LEAST(1, LOG(GREATEST(distinct_value_ct, 2))) / LOG(GREATEST(record_ct, 2))) + + 0.40 * (stdev_value / NULLIF(ABS(NULLIF(avg_value, 0)), 1)) + + 0.10 * (1.0 - (null_value_ct * 1.0 / NULLIF(NULLIF(record_ct, 0), 1))) + ) AS change_detection_score + FROM curprof + WHERE general_type = 'N' + AND (functional_data_type ILIKE 'Measure%' OR functional_data_type IN ('Sequence', 'Constant')) + ), +numeric_cols_ranked + AS ( SELECT *, + ROW_NUMBER() OVER (PARTITION BY schema_name, table_name + ORDER BY change_detection_score DESC, column_name) as rank + FROM numeric_cols + WHERE change_detection_score IS NOT NULL), +combined + AS ( SELECT profile_run_id, schema_name, table_name, column_name, 'ID' AS element_type, general_type, 10 + rank AS fingerprint_order + FROM id_cols + WHERE rank <= 2 + UNION ALL + SELECT profile_run_id, schema_name, table_name, column_name, 'DATE_P' AS element_type, general_type, 20 + rank AS fingerprint_order + FROM process_date_cols + WHERE rank = 1 + UNION ALL + SELECT profile_run_id, schema_name, table_name, column_name, 'DATE_T' AS element_type, general_type, 30 + rank AS fingerprint_order + FROM tran_date_cols + WHERE rank = 1 + UNION ALL + SELECT profile_run_id, schema_name, table_name, column_name, 'MEAS' AS element_type, general_type, 40 + rank AS fingerprint_order + FROM numeric_cols_ranked + WHERE rank = 1 ), +newtests AS ( + SELECT + profile_run_id, + schema_name, + table_name, + 'CAST(COUNT(*) AS varchar) + ''|'' + ' || STRING_AGG( + REPLACE( + CASE + WHEN general_type = 'D' THEN 'CAST(MIN(@@@) AS varchar) + ''|'' + MAX(CAST(@@@ AS varchar)) + ''|'' + CAST(COUNT(DISTINCT @@@) AS varchar)' + WHEN general_type = 'A' THEN 'CAST(MIN(@@@) AS varchar) + ''|'' + MAX(CAST(@@@ AS varchar)) + ''|'' + CAST(COUNT(DISTINCT @@@) AS varchar) + ''|'' + CAST(SUM(LEN(@@@)) AS varchar)' + WHEN general_type = 'N' THEN 'CAST(MIN(@@@) AS varchar) + ''|'' + MAX(CAST(@@@ AS varchar)) + ''|'' + CAST(SUM(@@@) AS varchar) + ''|'' + CAST(ROUND(AVG(@@@), 5) AS varchar) + ''|'' + CAST(ROUND(STDEV(@@@), 5) AS varchar)' + END, + '@@@', '"' || column_name || '"' + ), + ' + ''|'' + ' + ORDER BY element_type, fingerprint_order, column_name + ) as fingerprint + FROM combined + GROUP BY profile_run_id, schema_name, table_name +) +SELECT '{TABLE_GROUPS_ID}'::UUID as table_groups_id, + n.profile_run_id, + 'Stale_Table' AS test_type, + '{TEST_SUITE_ID}' AS test_suite_id, + n.schema_name, n.table_name, + 0 as skip_errors, 'Y' as test_active, + + '{RUN_DATE}'::TIMESTAMP as last_auto_gen_date, + '{AS_OF_DATE}'::TIMESTAMP as profiling_as_of_date, + 'N' as lock_refresh, + 'Value' as history_calculation, + 1 as history_lookback, + fingerprint as custom_query +FROM newtests n +INNER JOIN test_types t + ON ('Stale_Table' = t.test_type + AND 'Y' = t.active) +LEFT JOIN generation_sets s + ON (t.test_type = s.test_type + AND '{GENERATION_SET}' = s.generation_set) +LEFT JOIN locked l + ON (n.schema_name = l.schema_name + AND n.table_name = l.table_name) +WHERE (s.generation_set IS NOT NULL + OR '{GENERATION_SET}' = '') + AND l.schema_name IS NULL; + diff --git a/testgen/template/gen_query_tests/gen_table_changed_test.sql b/testgen/template/gen_query_tests/gen_table_changed_test.sql index d87bf21e..18a85dbe 100644 --- a/testgen/template/gen_query_tests/gen_table_changed_test.sql +++ b/testgen/template/gen_query_tests/gen_table_changed_test.sql @@ -36,7 +36,7 @@ id_cols WHEN functional_data_type ILIKE 'ID-Unique%' THEN 1 WHEN functional_data_type = 'ID-Secondary' THEN 2 ELSE 3 - END, distinct_value_ct DESC) AS rank + END, distinct_value_ct, column_name DESC) AS rank FROM curprof WHERE functional_data_type ILIKE 'ID%'), -- Process Date - TOP 1 @@ -50,7 +50,7 @@ process_date_cols WHEN column_name ILIKE '%up%' THEN 1 WHEN column_name ILIKE '%cr%' THEN 2 WHEN column_name ILIKE '%in%' THEN 2 - END , distinct_value_ct DESC) AS rank + END , distinct_value_ct DESC, column_name) AS rank FROM curprof WHERE functional_data_type ILIKE 'process%'), -- Transaction Date - TOP 1 @@ -59,7 +59,7 @@ tran_date_cols distinct_value_ct, ROW_NUMBER() OVER (PARTITION BY schema_name, table_name ORDER BY - distinct_value_ct DESC) AS rank + distinct_value_ct DESC, column_name) AS rank FROM curprof WHERE functional_data_type ILIKE 'transactional date%' OR functional_data_type ILIKE 'period%' OR functional_data_type = 'timestamp' ), @@ -90,23 +90,23 @@ numeric_cols numeric_cols_ranked AS ( SELECT *, ROW_NUMBER() OVER (PARTITION BY schema_name, table_name - ORDER BY change_detection_score DESC) as rank + ORDER BY change_detection_score DESC, column_name) as rank FROM numeric_cols WHERE change_detection_score IS NOT NULL), combined - AS ( SELECT profile_run_id, schema_name, table_name, column_name, 'ID' AS element_type, general_type, rank AS fingerprint_order + AS ( SELECT profile_run_id, schema_name, table_name, column_name, 'ID' AS element_type, general_type, 10 + rank AS fingerprint_order FROM id_cols WHERE rank <= 2 UNION ALL - SELECT profile_run_id, schema_name, table_name, column_name, 'DATE_P' AS element_type, general_type, rank AS fingerprint_order + SELECT profile_run_id, schema_name, table_name, column_name, 'DATE_P' AS element_type, general_type, 20 + rank AS fingerprint_order FROM process_date_cols WHERE rank = 1 UNION ALL - SELECT profile_run_id, schema_name, table_name, column_name, 'DATE_T' AS element_type, general_type, rank AS fingerprint_order + SELECT profile_run_id, schema_name, table_name, column_name, 'DATE_T' AS element_type, general_type, 30 + rank AS fingerprint_order FROM tran_date_cols WHERE rank = 1 UNION ALL - SELECT profile_run_id, schema_name, table_name, column_name, 'MEAS' AS element_type, general_type, rank AS fingerprint_order + SELECT profile_run_id, schema_name, table_name, column_name, 'MEAS' AS element_type, general_type, 40 + rank AS fingerprint_order FROM numeric_cols_ranked WHERE rank = 1 ), newtests @@ -121,7 +121,7 @@ newtests END, '@@@', '"' || column_name || '"'), ' || ''|'' || ' - ORDER BY element_type, fingerprint_order) as fingerprint + ORDER BY element_type, fingerprint_order, column_name) as fingerprint FROM combined GROUP BY profile_run_id, schema_name, table_name) SELECT '{TABLE_GROUPS_ID}'::UUID as table_groups_id, diff --git a/testgen/ui/components/frontend/css/shared.css b/testgen/ui/components/frontend/css/shared.css index 2f0a153c..a3d18383 100644 --- a/testgen/ui/components/frontend/css/shared.css +++ b/testgen/ui/components/frontend/css/shared.css @@ -20,7 +20,6 @@ body { --blue: #42A5F5; --brown: #8D6E63; --grey: #BDBDBD; - --darkGrey: #9E9E9E; --empty: #EEEEEE; --empty-light: #FAFAFA; --empty-teal: #E7F1F0; diff --git a/testgen/ui/components/frontend/js/data_profiling/data_issues.js b/testgen/ui/components/frontend/js/data_profiling/data_issues.js index e0beea80..40265317 100644 --- a/testgen/ui/components/frontend/js/data_profiling/data_issues.js +++ b/testgen/ui/components/frontend/js/data_profiling/data_issues.js @@ -37,7 +37,7 @@ const STATUS_COLORS = { Failed: 'red', Warning: 'yellow', Error: 'brown', - Log: 'darkGrey', + Log: 'blue', }; const PotentialPIICard = (/** @type Properties */ props, /** @type Table | Column */ item) => { diff --git a/testgen/ui/components/frontend/js/display_utils.js b/testgen/ui/components/frontend/js/display_utils.js index 058c415e..9eb5e8c1 100644 --- a/testgen/ui/components/frontend/js/display_utils.js +++ b/testgen/ui/components/frontend/js/display_utils.js @@ -91,7 +91,6 @@ const colorMap = { brownLight: '#D7CCC8', // Brown 100 brownDark: '#4E342E', // Brown 800 grey: '#BDBDBD', // Gray 400 - darkGrey: '#9E9E9E', // Gray 500 empty: 'var(--empty)', // Light: Gray 200, Dark: Gray 800 emptyLight: 'var(--empty-light)', // Light: Gray 50, Dark: Gray 900 emptyTeal: 'var(--empty-teal)', diff --git a/testgen/ui/components/frontend/js/pages/project_dashboard.js b/testgen/ui/components/frontend/js/pages/project_dashboard.js index a1e29e69..5ebdb89e 100644 --- a/testgen/ui/components/frontend/js/pages/project_dashboard.js +++ b/testgen/ui/components/frontend/js/pages/project_dashboard.js @@ -239,7 +239,7 @@ const TableGroupTestSuiteSummary = (/** @type TestSuiteSummary[] */testSuites) = { label: 'Warning', 'value': parseInt(suite.last_run_warning_ct), color: 'yellow' }, { label: 'Failed', 'value': parseInt(suite.last_run_failed_ct), color: 'red' }, { label: 'Error', 'value': parseInt(suite.last_run_error_ct), color: 'brown' }, - { label: 'Log', 'value': parseInt(suite.last_run_log_ct), color: 'darkGrey' }, + { label: 'Log', 'value': parseInt(suite.last_run_log_ct), color: 'blue' }, { label: 'Dismissed', 'value': parseInt(suite.last_run_dismissed_ct), color: 'grey' }, ], width: 350, diff --git a/testgen/ui/components/frontend/js/pages/test_runs.js b/testgen/ui/components/frontend/js/pages/test_runs.js index 3c7cb160..b778cf96 100644 --- a/testgen/ui/components/frontend/js/pages/test_runs.js +++ b/testgen/ui/components/frontend/js/pages/test_runs.js @@ -211,7 +211,7 @@ const TestRunItem = ( { label: 'Warning', value: item.warning_ct, color: 'yellow' }, { label: 'Failed', value: item.failed_ct, color: 'red' }, { label: 'Error', value: item.error_ct, color: 'brown' }, - { label: 'Log', value: item.log_ct, color: 'darkGrey' }, + { label: 'Log', value: item.log_ct, color: 'blue' }, { label: 'Dismissed', value: item.dismissed_ct, color: 'grey' }, ], height: 8, diff --git a/testgen/ui/components/frontend/js/pages/test_suites.js b/testgen/ui/components/frontend/js/pages/test_suites.js index 5e6828cb..837b23bf 100644 --- a/testgen/ui/components/frontend/js/pages/test_suites.js +++ b/testgen/ui/components/frontend/js/pages/test_suites.js @@ -162,7 +162,7 @@ const TestSuites = (/** @type Properties */ props) => { { label: 'Warning', value: parseInt(testSuite.last_run_warning_ct), color: 'yellow' }, { label: 'Failed', value: parseInt(testSuite.last_run_failed_ct), color: 'red' }, { label: 'Error', value: parseInt(testSuite.last_run_error_ct), color: 'brown' }, - { label: 'Log', value: parseInt(testSuite.last_run_log_ct), color: 'darkGrey' }, + { label: 'Log', value: parseInt(testSuite.last_run_log_ct), color: 'blue' }, { label: 'Dismissed', value: parseInt(testSuite.last_run_dismissed_ct), color: 'grey' }, ], height: 20, diff --git a/testgen/ui/services/form_service.py b/testgen/ui/services/form_service.py index 43d30cc6..d9e7223c 100644 --- a/testgen/ui/services/form_service.py +++ b/testgen/ui/services/form_service.py @@ -207,7 +207,7 @@ def render_grid_select( } else if (params.value === 'Log') { style.color = 'black'; style.borderColor = 'var(--ag-odd-row-background-color)'; - style.backgroundColor = "#9E9E9E"; + style.backgroundColor = "#2196F3"; style.display = 'flex'; style.alignItems = 'center'; style.justifyContent = 'center'; diff --git a/testgen/ui/views/test_results.py b/testgen/ui/views/test_results.py index d1011c92..a661b8cd 100644 --- a/testgen/ui/views/test_results.py +++ b/testgen/ui/views/test_results.py @@ -355,7 +355,7 @@ def get_test_result_summary(test_run_id: str) -> list[dict]: { "label": "Warning", "value": result.warning_ct, "color": "yellow" }, { "label": "Failed", "value": result.failed_ct, "color": "red" }, { "label": "Error", "value": result.error_ct, "color": "brown" }, - { "label": "Log", "value": result.log_ct, "color": "darkGrey" }, + { "label": "Log", "value": result.log_ct, "color": "blue" }, { "label": "Dismissed", "value": result.dismissed_ct, "color": "grey" }, ] diff --git a/testgen/ui/views/test_suites.py b/testgen/ui/views/test_suites.py index 820c92a0..b95a7e04 100644 --- a/testgen/ui/views/test_suites.py +++ b/testgen/ui/views/test_suites.py @@ -95,7 +95,7 @@ def edit_test_suite_dialog(project_code, table_groups, test_suite_id: str) -> No def show_test_suite(mode, project_code, table_groups: Iterable[TableGroupMinimal], selected: TestSuite | None = None): - severity_options = ["Inherit", "Log", "Failed", "Warning"] + severity_options = [None, "Log", "Failed", "Warning"] selected_test_suite = selected if mode == "edit" else None table_groups_df = to_dataframe(table_groups, TableGroupMinimal.columns()) @@ -144,6 +144,7 @@ def show_test_suite(mode, project_code, table_groups: Iterable[TableGroupMinimal "severity": right_column.selectbox( label="Severity", options=severity_options, + format_func=lambda value: "Inherit" if value is None else value, index=severity_index, help="Overrides the default severity in 'Test Definition' and/or 'Test Run'.", ), From b76bc224270303b93326f318cc0a5be02bba9496 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Thu, 21 Aug 2025 16:06:19 -0400 Subject: [PATCH 23/28] fix(test definitions): id missing for user-defined tests --- testgen/common/models/test_definition.py | 4 ++-- testgen/template/dbupgrade/0148_incremental_upgrade.sql | 6 ++++++ 2 files changed, 8 insertions(+), 2 deletions(-) create mode 100644 testgen/template/dbupgrade/0148_incremental_upgrade.sql diff --git a/testgen/common/models/test_definition.py b/testgen/common/models/test_definition.py index 445cdd94..a938e12f 100644 --- a/testgen/common/models/test_definition.py +++ b/testgen/common/models/test_definition.py @@ -2,7 +2,7 @@ from dataclasses import dataclass from datetime import datetime from typing import Literal -from uuid import UUID +from uuid import UUID, uuid4 import streamlit as st from sqlalchemy import ( @@ -146,7 +146,7 @@ class TestType(Entity): class TestDefinition(Entity): __tablename__ = "test_definitions" - id: UUID = Column(postgresql.UUID(as_uuid=True)) + id: UUID = Column(postgresql.UUID(as_uuid=True), default=uuid4) cat_test_id: int = Column(BigInteger, Identity(), primary_key=True) table_groups_id: UUID = Column(postgresql.UUID(as_uuid=True)) profile_run_id: UUID = Column(postgresql.UUID(as_uuid=True)) diff --git a/testgen/template/dbupgrade/0148_incremental_upgrade.sql b/testgen/template/dbupgrade/0148_incremental_upgrade.sql new file mode 100644 index 00000000..b69d2b1d --- /dev/null +++ b/testgen/template/dbupgrade/0148_incremental_upgrade.sql @@ -0,0 +1,6 @@ +SET SEARCH_PATH TO {SCHEMA_NAME}; + +UPDATE test_definitions + SET id = gen_random_uuid() + WHERE id IS NULL; + \ No newline at end of file From 49ac8375030d509fcd529be73296f7bd24935641 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Mon, 25 Aug 2025 01:49:01 -0400 Subject: [PATCH 24/28] fix(test types): rename "stale table" to "table freshness" --- .../dbsetup/050_populate_new_schema_metadata.sql | 14 +++++++------- .../gen_query_tests/gen_table_changed_test.sql | 6 +++--- .../gen_query_tests/gen_table_changed_test.sql | 6 +++--- .../gen_query_tests/gen_table_changed_test.sql | 6 +++--- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/testgen/template/dbsetup/050_populate_new_schema_metadata.sql b/testgen/template/dbsetup/050_populate_new_schema_metadata.sql index f5af0ad5..5c7e3873 100644 --- a/testgen/template/dbsetup/050_populate_new_schema_metadata.sql +++ b/testgen/template/dbsetup/050_populate_new_schema_metadata.sql @@ -158,7 +158,7 @@ VALUES ('1004', 'Alpha_Trunc', 'Alpha Truncation', 'Maximum character count con ('1508', 'Timeframe_Combo_Gain', 'Timeframe No Drops', 'Latest timeframe has at least all value combinations from prior period', 'Tests that column values in most recent time-window include at least same as prior time window', 'Column values in most recent time-window don''t include all values in prior window.', 'Mismatched values', NULL, NULL, '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', 'Categorical Column List', 'Specify one or more Categorical columns, separated by commas. Make sure not to use continuous measurements here. Do not use numeric values unless they represent discrete categories.', 'window_date_column,window_days,subset_condition', NULL, 'Date Column for Time Windows,Time Window in Days,Record Subset Condition', 'The date column used to define the time windows. This must be a DATE or DATETIME type.|Length in days of the time window. The test will compare the most recent period of days to the prior period of the same duration.|Condition defining a subset of records in main table to evaluate, written like a condition within a SQL WHERE clause - OPTIONAL', 'Fail', 'QUERY', 'referential', 'Consistency', 'Data Drift', 'Expected count of missing value combinations', 'This test checks a single transactional table to verify that categorical values or combinations that are present in the most recent time window you define include at least all those found in the prior time window of the same duration. Missing values in the latest time window will trigger the test to fail. New values are permitted. Use this test to confirm that codes or categories are not lost across successive time periods in a transactional table.', 'Y', 'line_chart', NULL), ('1509', 'Timeframe_Combo_Match', 'Timeframe Match', 'Column value combinations from latest timeframe same as prior period', 'Tests for presence of same column values in most recent time-window vs. prior time window', 'Column values don''t match in most recent time-windows.', 'Mismatched values', NULL, NULL, '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', 'Categorical Column List', 'Specify one or more Categorical columns, separated by commas. Do not use continuous measurements here. Do not use numeric values unless they represent discrete categories.', 'window_date_column,window_days,subset_condition', NULL, 'Date Column for Time Windows,Time Window in Days,Record Subset Condition', NULL, 'Fail', 'QUERY', 'referential', 'Consistency', 'Data Drift', 'Expected count of non-matching value combinations', 'This test checks a single transactional table (such as a fact table) to verify that categorical values or combinations that are present in the most recent time window you define match those found in the prior time window of the same duration. New or missing values in the latest time window will trigger the test to fail. Use this test to confirm the consistency in the occurrence of codes or categories across successive time periods in a transactional table.', 'Y', 'line_chart', NULL), ('1510', 'Dupe_Rows', 'Duplicate Rows', 'Rows are not duplicated in table', 'Tests for the absence of duplicate rows based on unique combination of column values', 'Column value combinations are duplicated in the table.', 'Duplicate records', NULL, NULL, '(({RESULT_MEASURE}-{THRESHOLD_VALUE}))::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', 'null', 'null', 'groupby_names', NULL, 'Columns to Compare', 'List of columns in the table that define a duplicate record when the combination of values is repeated on multiple rows', 'Fail', 'QUERY', 'table', 'Uniqueness', 'Schema Drift', 'Expected count of duplicate value combinations', 'This test verifies that combinations of values are not repeated within the table. By default when auto-generated, the test considers all columns to protect against duplication of entire rows. If you know the minimum columns that should constitute a unique record, such as a set of ID''s, you should use those to make the test as sensitive as possible. Alternatively, if you know of columns you can always exclude, such as file_date or refresh_snapshot_id, remove them to tighten the test somewhat.', 'Y', 'line_chart', NULL), - ('1511', 'Stale_Table', 'Stale Table', 'Stale Table Not Updated', 'Confirms whether table has been updated based on data fingerprint', 'Table has not been updated.', 'Was Change Detected', NULL, 'TEMPLATE', '(({RESULT_MEASURE}-{THRESHOLD_VALUE}))::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '0.0', 'null', 'null', 'history_calculation,history_lookback,subset_condition,custom_query', NULL, 'History Aggregate,History Lookback,Record Subset Condition,Fingerprint Expression', 'Aggregate calculation to be performed on the N lookback results|Last N tests to use for history aggregate calculation|Condition defining a subset of records in main table|String expression combining key column measures into a distinct representation of table state', 'Log', 'QUERY', 'table', 'Recency', 'Recency', 'Most recent prior table fingerprint', 'This test compares the current table fingerprint, calculated signature of column contents, to confirm that the table has been updated. The table fingerprint is derived from a set of values and aggregates from columns most likely to change. This test allows you to track the schedule and frequency of updates and refreshes to the table.', 'Y', 'binary_chart', '{"legend":{"labels":{"0":"Stale","1":"Updated"}}}') + ('1511', 'Table_Freshness', 'Table Freshness', 'Stale Table Not Updated', 'Confirms whether table has been updated based on data fingerprint', 'Table has not been updated.', 'Was Change Detected', NULL, 'TEMPLATE', '(({RESULT_MEASURE}-{THRESHOLD_VALUE}))::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '0.0', 'null', 'null', 'history_calculation,history_lookback,subset_condition,custom_query', NULL, 'History Aggregate,History Lookback,Record Subset Condition,Fingerprint Expression', 'Aggregate calculation to be performed on the N lookback results|Last N tests to use for history aggregate calculation|Condition defining a subset of records in main table|String expression combining key column measures into a distinct representation of table state', 'Log', 'QUERY', 'table', 'Recency', 'Recency', 'Most recent prior table fingerprint', 'This test compares the current table fingerprint, calculated signature of column contents, to confirm that the table has been updated. The table fingerprint is derived from a set of values and aggregates from columns most likely to change. This test allows you to track the schedule and frequency of updates and refreshes to the table.', 'Y', 'binary_chart', '{"legend":{"labels":{"0":"Stale","1":"Updated"}}}') ; @@ -171,7 +171,7 @@ VALUES ('Monitor', 'Recency'), ('Monitor', 'Daily_Record_Ct'), ('Monitor', 'Monthly_Rec_Ct'), ('Monitor', 'Weekly_Rec_Ct'), - ('Monitor', 'Stale_Table'); + ('Monitor', 'Table_Freshness'); TRUNCATE TABLE test_templates; @@ -232,11 +232,11 @@ VALUES ('2001', 'Combo_Match', 'redshift', 'ex_data_match_generic.sql'), ('2410', 'Aggregate_Balance_Range', 'databricks', 'ex_aggregate_match_range_generic.sql'), ('2411', 'Dupe_Rows', 'databricks', 'ex_dupe_rows_generic.sql'), - ('2012', 'Stale_Table', 'redshift', 'ex_table_changed_generic.sql'), - ('2112', 'Stale_Table', 'snowflake', 'ex_table_changed_generic.sql'), - ('2212', 'Stale_Table', 'mssql', 'ex_table_changed_generic.sql'), - ('2312', 'Stale_Table', 'postgresql', 'ex_table_changed_generic.sql'), - ('2412', 'Stale_Table', 'databricks', 'ex_table_changed_generic.sql') + ('2012', 'Table_Freshness', 'redshift', 'ex_table_changed_generic.sql'), + ('2112', 'Table_Freshness', 'snowflake', 'ex_table_changed_generic.sql'), + ('2212', 'Table_Freshness', 'mssql', 'ex_table_changed_generic.sql'), + ('2312', 'Table_Freshness', 'postgresql', 'ex_table_changed_generic.sql'), + ('2412', 'Table_Freshness', 'databricks', 'ex_table_changed_generic.sql') ; TRUNCATE TABLE cat_test_conditions; diff --git a/testgen/template/flavors/databricks/gen_query_tests/gen_table_changed_test.sql b/testgen/template/flavors/databricks/gen_query_tests/gen_table_changed_test.sql index d113ab2e..6f62ac92 100644 --- a/testgen/template/flavors/databricks/gen_query_tests/gen_table_changed_test.sql +++ b/testgen/template/flavors/databricks/gen_query_tests/gen_table_changed_test.sql @@ -24,7 +24,7 @@ locked AS (SELECT schema_name, table_name FROM test_definitions WHERE table_groups_id = '{TABLE_GROUPS_ID}'::UUID AND test_suite_id = '{TEST_SUITE_ID}' - AND test_type = 'Stale_Table' + AND test_type = 'Table_Freshness' AND lock_refresh = 'Y'), -- IDs - TOP 2 id_cols @@ -126,7 +126,7 @@ newtests GROUP BY profile_run_id, schema_name, table_name) SELECT '{TABLE_GROUPS_ID}'::UUID as table_groups_id, n.profile_run_id, - 'Stale_Table' AS test_type, + 'Table_Freshness' AS test_type, '{TEST_SUITE_ID}' AS test_suite_id, n.schema_name, n.table_name, 0 as skip_errors, 'Y' as test_active, @@ -139,7 +139,7 @@ SELECT '{TABLE_GROUPS_ID}'::UUID as table_groups_id, fingerprint as custom_query FROM newtests n INNER JOIN test_types t - ON ('Stale_Table' = t.test_type + ON ('Table_Freshness' = t.test_type AND 'Y' = t.active) LEFT JOIN generation_sets s ON (t.test_type = s.test_type diff --git a/testgen/template/flavors/mssql/gen_query_tests/gen_table_changed_test.sql b/testgen/template/flavors/mssql/gen_query_tests/gen_table_changed_test.sql index 4e332853..4bd85532 100644 --- a/testgen/template/flavors/mssql/gen_query_tests/gen_table_changed_test.sql +++ b/testgen/template/flavors/mssql/gen_query_tests/gen_table_changed_test.sql @@ -24,7 +24,7 @@ locked AS (SELECT schema_name, table_name FROM test_definitions WHERE table_groups_id = '{TABLE_GROUPS_ID}'::UUID AND test_suite_id = '{TEST_SUITE_ID}' - AND test_type = 'Stale_Table' + AND test_type = 'Table_Freshness' AND lock_refresh = 'Y'), -- IDs - TOP 2 id_cols @@ -131,7 +131,7 @@ newtests AS ( ) SELECT '{TABLE_GROUPS_ID}'::UUID as table_groups_id, n.profile_run_id, - 'Stale_Table' AS test_type, + 'Table_Freshness' AS test_type, '{TEST_SUITE_ID}' AS test_suite_id, n.schema_name, n.table_name, 0 as skip_errors, 'Y' as test_active, @@ -144,7 +144,7 @@ SELECT '{TABLE_GROUPS_ID}'::UUID as table_groups_id, fingerprint as custom_query FROM newtests n INNER JOIN test_types t - ON ('Stale_Table' = t.test_type + ON ('Table_Freshness' = t.test_type AND 'Y' = t.active) LEFT JOIN generation_sets s ON (t.test_type = s.test_type diff --git a/testgen/template/gen_query_tests/gen_table_changed_test.sql b/testgen/template/gen_query_tests/gen_table_changed_test.sql index 18a85dbe..8683520c 100644 --- a/testgen/template/gen_query_tests/gen_table_changed_test.sql +++ b/testgen/template/gen_query_tests/gen_table_changed_test.sql @@ -24,7 +24,7 @@ locked AS (SELECT schema_name, table_name FROM test_definitions WHERE table_groups_id = '{TABLE_GROUPS_ID}'::UUID AND test_suite_id = '{TEST_SUITE_ID}' - AND test_type = 'Stale_Table' + AND test_type = 'Table_Freshness' AND lock_refresh = 'Y'), -- IDs - TOP 2 id_cols @@ -126,7 +126,7 @@ newtests GROUP BY profile_run_id, schema_name, table_name) SELECT '{TABLE_GROUPS_ID}'::UUID as table_groups_id, n.profile_run_id, - 'Stale_Table' AS test_type, + 'Table_Freshness' AS test_type, '{TEST_SUITE_ID}' AS test_suite_id, n.schema_name, n.table_name, 0 as skip_errors, 'Y' as test_active, @@ -139,7 +139,7 @@ SELECT '{TABLE_GROUPS_ID}'::UUID as table_groups_id, fingerprint as custom_query FROM newtests n INNER JOIN test_types t - ON ('Stale_Table' = t.test_type + ON ('Table_Freshness' = t.test_type AND 'Y' = t.active) LEFT JOIN generation_sets s ON (t.test_type = s.test_type From f9e74764ebbc4b788c1bfc42caafb924e718eee4 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Mon, 25 Aug 2025 01:49:16 -0400 Subject: [PATCH 25/28] fix(test definitions): remove unnecessary validation --- testgen/ui/views/test_definitions.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/testgen/ui/views/test_definitions.py b/testgen/ui/views/test_definitions.py index 0331f1f5..9e018d51 100644 --- a/testgen/ui/views/test_definitions.py +++ b/testgen/ui/views/test_definitions.py @@ -669,7 +669,7 @@ def render_dynamic_attribute(attribute: str, container: DeltaGenerator): submit = bottom_left_column.button("Save") if submit: - if validate_form(test_scope, test_type, test_definition, column_name_label): + if validate_form(test_scope, test_definition, column_name_label): if mode == "edit": test_definition["id"] = selected_test_def["id"] TestDefinition(**test_definition).save() @@ -793,13 +793,7 @@ def copy_move_test_dialog( time.sleep(1) st.rerun() -def validate_form(test_scope, test_type, test_definition, column_name_label): - if test_type == "Condition_Flag" and not test_definition["threshold_value"]: - st.error("Threshold Error Count is a required field.") - return False - if not test_definition["test_type"]: - st.error("Test Type is a required field.") - return False +def validate_form(test_scope, test_definition, column_name_label): if test_scope in ["column", "referential", "custom"] and not test_definition["column_name"]: st.error(f"{column_name_label} is a required field.") return False From cddaf91b5d61be0fa34a1a5ee32f53f00db31f9b Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Mon, 25 Aug 2025 01:49:48 -0400 Subject: [PATCH 26/28] fix(test results): make binary bars wider --- testgen/ui/views/test_results.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testgen/ui/views/test_results.py b/testgen/ui/views/test_results.py index a661b8cd..ae6080f8 100644 --- a/testgen/ui/views/test_results.py +++ b/testgen/ui/views/test_results.py @@ -734,7 +734,7 @@ def render_binary_chart(data: pd.DataFrame, **params: dict) -> None: legend_labels = params.get("legend", {}).get("labels") or {"0": "0", "1": "1"} history["test_start"] = history["test_date"].apply(datetime.fromisoformat) - history["test_end"] = history["test_start"].apply(lambda start: start + timedelta(seconds=30)) + history["test_end"] = history["test_start"].apply(lambda start: start + timedelta(seconds=60)) history["formatted_test_date"] = history["test_date"].apply(lambda date_str: datetime.fromisoformat(date_str).strftime("%I:%M:%S %p, %d/%m/%Y")) history["result_measure_with_status"] = history.apply(lambda row: f"{legend_labels[str(int(row['result_measure']))]} ({row['result_status']})", axis=1) From 178388683b34e1c11fa65e50b72d318ed8d855a2 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Mon, 25 Aug 2025 01:51:14 -0400 Subject: [PATCH 27/28] fix(quick-start): adjust dates to 30 days ago --- testgen/__main__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/testgen/__main__.py b/testgen/__main__.py index b3973897..6362a868 100644 --- a/testgen/__main__.py +++ b/testgen/__main__.py @@ -369,7 +369,7 @@ def quick_start( click.echo("loading initial data") run_quick_start_increment(0) - minutes_offset = -30*24*60*3 + minutes_offset = -30*24*60 # 1 month ago table_group_id="0ea85e17-acbe-47fe-8394-9970725ad37d" click.echo(f"run-profile with table_group_id: {table_group_id}") @@ -387,7 +387,7 @@ def quick_start( for iteration in range(1, 4): click.echo(f"Running iteration: {iteration} / 3") - minutes_offset = -30*24*60 * (3-iteration) + minutes_offset = -10*24*60 * (3-iteration) run_quick_start_increment(iteration) run_execution_steps(settings.PROJECT_KEY, settings.DEFAULT_TEST_SUITE_KEY, minutes_offset=minutes_offset) From eaeb1f747a4764a15c0e8e3c12e762df23f7f997 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Mon, 25 Aug 2025 12:11:39 -0400 Subject: [PATCH 28/28] release: 4.20.4 -> 4.22.2 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 30c372e7..26c4cf08 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ build-backend = "setuptools.build_meta" [project] name = "dataops-testgen" -version = "4.20.4" +version = "4.22.2" description = "DataKitchen's Data Quality DataOps TestGen" authors = [ { "name" = "DataKitchen, Inc.", "email" = "info@datakitchen.io" },