diff --git a/deploy/testgen-base.dockerfile b/deploy/testgen-base.dockerfile index 1c7d7ea7..f04aa3ba 100644 --- a/deploy/testgen-base.dockerfile +++ b/deploy/testgen-base.dockerfile @@ -23,7 +23,9 @@ RUN apk update && apk upgrade && apk add --no-cache \ openblas=0.3.28-r0 \ openblas-dev=0.3.28-r0 \ unixodbc=2.3.12-r0 \ - unixodbc-dev=2.3.12-r0 + unixodbc-dev=2.3.12-r0 \ + # Pinned versions for security + xz=5.6.2-r1 RUN apk add --no-cache \ --repository https://dl-cdn.alpinelinux.org/alpine/v3.21/community \ diff --git a/deploy/testgen.dockerfile b/deploy/testgen.dockerfile index 415bc91c..f8ba88fd 100644 --- a/deploy/testgen.dockerfile +++ b/deploy/testgen.dockerfile @@ -1,4 +1,4 @@ -ARG TESTGEN_BASE_LABEL=v5 +ARG TESTGEN_BASE_LABEL=v6 FROM datakitchen/dataops-testgen-base:${TESTGEN_BASE_LABEL} AS release-image diff --git a/pyproject.toml b/pyproject.toml index dded5064..b0f36486 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ build-backend = "setuptools.build_meta" [project] name = "dataops-testgen" -version = "4.0.12" +version = "4.1.2" description = "DataKitchen's Data Quality DataOps TestGen" authors = [ { "name" = "DataKitchen, Inc.", "email" = "info@datakitchen.io" }, @@ -64,7 +64,6 @@ dependencies = [ "snowflake-connector-python==3.13.1", "matplotlib==3.9.2", "scipy==1.14.1", - "tornado==6.4.2", "jinja2==3.1.6", ] diff --git a/testgen/common/models/scores.py b/testgen/common/models/scores.py index e09a29a0..91dcb144 100644 --- a/testgen/common/models/scores.py +++ b/testgen/common/models/scores.py @@ -1,8 +1,8 @@ import enum import uuid -from collections import defaultdict from collections.abc import Iterable from datetime import UTC, datetime +from itertools import groupby from typing import Literal, Self, TypedDict import pandas as pd @@ -69,15 +69,17 @@ class ScoreDefinition(Base): cde_score: bool = Column(Boolean, default=False, nullable=False) category: ScoreCategory | None = Column(Enum(ScoreCategory), nullable=True) - results: Iterable["ScoreDefinitionResult"] = relationship( - "ScoreDefinitionResult", + criteria: "ScoreDefinitionCriteria" = relationship( + "ScoreDefinitionCriteria", cascade="all, delete-orphan", - order_by="ScoreDefinitionResult.category", lazy="joined", + uselist=False, + single_parent=True, ) - filters: Iterable["ScoreDefinitionFilter"] = relationship( - "ScoreDefinitionFilter", + results: Iterable["ScoreDefinitionResult"] = relationship( + "ScoreDefinitionResult", cascade="all, delete-orphan", + order_by="ScoreDefinitionResult.category", lazy="joined", ) breakdown: Iterable["ScoreDefinitionBreakdownItem"] = relationship( @@ -102,9 +104,12 @@ def from_table_group(cls, table_group: dict) -> Self: definition.total_score = True definition.cde_score = True definition.category = ScoreCategory.dq_dimension - definition.filters = [ - ScoreDefinitionFilter(field="table_groups_name", value=table_group["table_groups_name"]), - ] + definition.criteria = ScoreDefinitionCriteria( + operand="AND", + filters=[ + ScoreDefinitionFilter(field="table_groups_name", value=table_group["table_groups_name"]), + ], + ) return definition @classmethod @@ -159,7 +164,7 @@ def as_score_card(self) -> "ScoreCard": score_cards/get_category_scores_by_column.sql score_cards/get_category_scores_by_dimension.sql """ - if len(self.filters) <= 0: + if not self.criteria.has_filters(): return { "id": self.id, "project_code": self.project_code, @@ -378,15 +383,15 @@ def recalculate_scores_history(self) -> None: self.history = list(current_history.values()) def _get_raw_query_filters(self, cde_only: bool = False, prefix: str | None = None) -> list[str]: - values_by_field = defaultdict(list) - for filter_ in self.filters: - values_by_field[filter_.field].append(f"'{filter_.value}'") - values_by_field["project_code"].append(f"'{self.project_code}'") + extra_filters = [ + f"{prefix or ''}project_code = '{self.project_code}'" + ] if cde_only: - values_by_field["critical_data_element"].append("true") + extra_filters.append(f"{prefix or ''}critical_data_element = true") return [ - f"{prefix or ''}{field} IN ({', '.join(values)})" for field, values in values_by_field.items() + *extra_filters, + self.criteria.get_as_sql(prefix=prefix), ] def to_dict(self) -> dict: @@ -397,17 +402,144 @@ def to_dict(self) -> dict: "total_score": self.total_score, "cde_score": self.cde_score, "category": self.category.value if self.category else None, - "filters": [{"field": f.field, "value": f.value} for f in self.filters], + "filters": list(self.criteria), + "filter_by_columns": (not self.criteria.group_by_field) + if self.criteria.group_by_field is not None else None, } +class ScoreDefinitionCriteria(Base): + """ + Hold the filter conditions applied for a given scorecard. + + Properties are as follow: + + :param operand: boolean operand to join the final filters + + Either `AND` or `OR`. The operand is used to join the filters + after they have been individually processed, grouped and + formatted into valid SQL expressions. + + :param group_by_field: boolean to group filters by field name + + Boolean indicating that filters to same field must be combined + to produce the intermediary filters that will later be joined + with :property:`operand`. + + When false, filters are individually converted to valid SQL and + then joined with :property:`operand`. + + When true, filters are sorted and grouped by field name, all + filters for a given field name are combined with an `OR` boolean + condition into a single filter. Then, the resulting filters + are joined with :property:`operand`. + + :param filters: a list of :class:`ScoreDefinitionFilter` objects + """ + + __tablename__ = "score_definition_criteria" + + id: str = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) + definition_id: str = Column(UUID(as_uuid=True), ForeignKey("score_definitions.id", ondelete="CASCADE")) + operand: Literal["AND", "OR"] = Column(String, nullable=False, default="AND") + group_by_field: bool = Column(Boolean, nullable=False, default=True) + filters: list["ScoreDefinitionFilter"] = relationship( + "ScoreDefinitionFilter", + cascade="all, delete-orphan", + lazy="joined", + ) + + def __str__(self): + return self.get_as_sql() + + def get_as_sql( + self, + prefix: str | None = None, + ) -> str | None: + if len(self.filters) > 0: + if self.group_by_field: + filters_sql = [] + grouped_filters = groupby(sorted(self.filters, key=lambda f: f.field), key=lambda f: f.field) + for _, field_filters in grouped_filters: + field_filters_sql = [f.get_as_sql(prefix=prefix, operand="AND") for f in field_filters] + filters_sql.append( + f"({" OR ".join(field_filters_sql)})" if len(field_filters_sql) > 1 else field_filters_sql[0] + ) + else: + filters_sql = [ f.get_as_sql(prefix=prefix, operand="AND") for f in self.filters ] + return f"({f' {self.operand} '.join(filters_sql)})" if len(filters_sql) > 1 else filters_sql[0] + return None + + def __iter__(self): + for filter_ in self.filters: + yield { + "field": filter_.field, + "value": filter_.value, + "others": [ + {"field": linked_filter.field, "value": linked_filter.value} + for linked_filter in filter_.next_filter + ] if filter_.next_filter else [], + } + + def has_filters(self) -> bool: + return len(self.filters) > 0 + + @classmethod + def from_filters(cls, filters: list[dict], group_by_field: bool = True) -> "ScoreDefinitionCriteria": + chained_filters: list[ScoreDefinitionFilter] = [] + for filter_ in filters: + root_filter = current_filter = ScoreDefinitionFilter( + field=filter_["field"], + value=filter_["value"], + next_filter=None, + ) + for linked_filter in (filter_.get("others") or []): + current_filter.next_filter = ScoreDefinitionFilter( + field=linked_filter["field"], + value=linked_filter["value"], + next_filter=None, + ) + current_filter = current_filter.next_filter + chained_filters.append(root_filter) + return cls(operand="AND" if group_by_field else "OR", filters=chained_filters, group_by_field=group_by_field) + + class ScoreDefinitionFilter(Base): __tablename__ = "score_definition_filters" id: str = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) - definition_id: str = Column(UUID(as_uuid=True), ForeignKey("score_definitions.id", ondelete="CASCADE")) + criteria_id = Column( + UUID(as_uuid=True), + ForeignKey("score_definition_criteria.id", ondelete="CASCADE"), + nullable=True, + default=None, + ) field: str = Column(String, nullable=False) value: str = Column(String, nullable=False) + next_filter_id = Column( + UUID(as_uuid=True), + ForeignKey("score_definition_filters.id", ondelete="CASCADE"), + nullable=True, + default=None, + ) + next_filter: "ScoreDefinitionFilter" = relationship( + "ScoreDefinitionFilter", + cascade="all, delete-orphan", + lazy="joined", + uselist=False, + single_parent=True, + ) + + def __iter__(self): + current_filter = self + yield current_filter + while current_filter.next_filter: + yield current_filter.next_filter + current_filter = current_filter.next_filter + + def get_as_sql(self, prefix: str | None = None, operand: Literal["AND", "OR"] = "AND") -> str: + sql_filters = [f"{prefix or ''}{f.field} = '{f.value}'" for f in self] + return f"({f' {operand} '.join(sql_filters)})" class ScoreDefinitionResult(Base): diff --git a/testgen/template/data_chars/data_chars_update.sql b/testgen/template/data_chars/data_chars_update.sql index c8981079..c35dc933 100644 --- a/testgen/template/data_chars/data_chars_update.sql +++ b/testgen/template/data_chars/data_chars_update.sql @@ -104,6 +104,7 @@ FROM last_run l AND d.table_name = n.table_name ) WHERE data_table_chars.table_id = d.table_id + AND d.drop_date IS NULL AND n.table_name IS NULL; -- ============================================================================== @@ -221,4 +222,5 @@ FROM last_run l ) WHERE data_column_chars.table_id = d.table_id AND data_column_chars.column_name = d.column_name + AND d.drop_date IS NULL AND n.column_name IS NULL; diff --git a/testgen/template/dbsetup/030_initialize_new_schema_structure.sql b/testgen/template/dbsetup/030_initialize_new_schema_structure.sql index f7978a4e..6b46af6d 100644 --- a/testgen/template/dbsetup/030_initialize_new_schema_structure.sql +++ b/testgen/template/dbsetup/030_initialize_new_schema_structure.sql @@ -622,7 +622,6 @@ CREATE TABLE auth_users ( email VARCHAR(120), name VARCHAR(120), password VARCHAR(120), - preauthorized BOOLEAN default false, role VARCHAR(20) ); @@ -657,13 +656,23 @@ CREATE TABLE IF NOT EXISTS score_definitions ( category VARCHAR(30) DEFAULT NULL ); +CREATE TABLE IF NOT EXISTS score_definition_criteria ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + definition_id UUID NOT NULL REFERENCES score_definitions(id) ON DELETE CASCADE, + operand VARCHAR NOT NULL DEFAULT 'AND', + group_by_field BOOLEAN NOT NULL DEFAULT true +); + CREATE TABLE IF NOT EXISTS score_definition_filters ( - id UUID DEFAULT gen_random_uuid() PRIMARY KEY, - definition_id UUID CONSTRAINT score_definitions_filters_score_definitions_definition_id_fk - REFERENCES score_definitions (id) - ON DELETE CASCADE, - field TEXT DEFAULT NULL, - value TEXT DEFAULT NULL + id UUID DEFAULT gen_random_uuid() PRIMARY KEY, + criteria_id UUID DEFAULT NULL CONSTRAINT score_definitions_filters_score_definition_criteria_fk + REFERENCES score_definition_criteria (id) + ON DELETE CASCADE, + next_filter_id UUID DEFAULT NULL CONSTRAINT score_definitions_filters_score_definitions_filters_fk + REFERENCES score_definition_filters (id) + ON DELETE CASCADE, + field TEXT DEFAULT NULL, + value TEXT DEFAULT NULL ); CREATE TABLE IF NOT EXISTS score_definition_results ( diff --git a/testgen/template/dbsetup/075_grant_role_rights.sql b/testgen/template/dbsetup/075_grant_role_rights.sql index f5bd1013..1b4f11b5 100644 --- a/testgen/template/dbsetup/075_grant_role_rights.sql +++ b/testgen/template/dbsetup/075_grant_role_rights.sql @@ -33,6 +33,7 @@ GRANT SELECT, INSERT, DELETE, UPDATE ON {SCHEMA_NAME}.data_column_chars, {SCHEMA_NAME}.auth_users, {SCHEMA_NAME}.score_definitions, + {SCHEMA_NAME}.score_definition_criteria, {SCHEMA_NAME}.score_definition_filters, {SCHEMA_NAME}.score_definition_results, {SCHEMA_NAME}.score_definition_results_breakdown, diff --git a/testgen/template/dbupgrade/0137_incremental_upgrade.sql b/testgen/template/dbupgrade/0137_incremental_upgrade.sql index efa3d78a..26d159cf 100644 --- a/testgen/template/dbupgrade/0137_incremental_upgrade.sql +++ b/testgen/template/dbupgrade/0137_incremental_upgrade.sql @@ -2,4 +2,4 @@ SET SEARCH_PATH TO {SCHEMA_NAME}; UPDATE job_schedules SET kwargs = kwargs - 'project_code' || jsonb_build_object('project_key', kwargs->'project_code') -WHERE key = 'run-tests'; \ No newline at end of file +WHERE key = 'run-tests'; diff --git a/testgen/template/dbupgrade/0138_incremental_upgrade.sql b/testgen/template/dbupgrade/0138_incremental_upgrade.sql new file mode 100644 index 00000000..7c26925c --- /dev/null +++ b/testgen/template/dbupgrade/0138_incremental_upgrade.sql @@ -0,0 +1,38 @@ +SET SEARCH_PATH TO {SCHEMA_NAME}; + +CREATE TABLE score_definition_criteria ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + definition_id UUID NOT NULL REFERENCES score_definitions(id) ON DELETE CASCADE, + operand VARCHAR NOT NULL DEFAULT 'AND', + group_by_field BOOLEAN NOT NULL DEFAULT true +); + +ALTER TABLE score_definition_filters + ADD COLUMN criteria_id UUID DEFAULT NULL, + ADD COLUMN next_filter_id UUID DEFAULT NULL, + ADD CONSTRAINT score_definitions_filters_score_definition_criteria_fk FOREIGN KEY (criteria_id) REFERENCES score_definition_criteria (id) ON DELETE CASCADE, + ADD CONSTRAINT score_definitions_filters_score_definitions_filters_fk FOREIGN KEY (next_filter_id) REFERENCES score_definition_filters (id) ON DELETE CASCADE; + +DO $$ +DECLARE + current_definition_id UUID; + new_criteria_id UUID; + definition_filter RECORD; +BEGIN + FOR current_definition_id IN SELECT id FROM score_definitions LOOP + new_criteria_id := gen_random_uuid(); + RAISE NOTICE 'Definition = %', current_definition_id; + RAISE NOTICE 'Create Score Criteria (AND)'; + EXECUTE format( + 'INSERT INTO score_definition_criteria (id, definition_id, operand, group_by_field) VALUES (%L, %L, %L, %L)', + new_criteria_id, current_definition_id, 'AND', true + ); + + FOR definition_filter IN SELECT id, field, value FROM score_definition_filters WHERE definition_id = current_definition_id LOOP + RAISE NOTICE 'Link filter to Score Criteria Field=% Value=%', definition_filter.field, definition_filter.value; + EXECUTE format('UPDATE score_definition_filters SET criteria_id = %L WHERE id = %L', new_criteria_id, definition_filter.id); + END LOOP; + END LOOP; +END $$; + +ALTER TABLE score_definition_filters DROP COLUMN definition_id; diff --git a/testgen/template/dbupgrade/0139_incremental_upgrade.sql b/testgen/template/dbupgrade/0139_incremental_upgrade.sql new file mode 100644 index 00000000..bc19f48b --- /dev/null +++ b/testgen/template/dbupgrade/0139_incremental_upgrade.sql @@ -0,0 +1,3 @@ +SET SEARCH_PATH TO {SCHEMA_NAME}; + +ALTER TABLE auth_users DROP COLUMN preauthorized; diff --git a/testgen/ui/assets/style.css b/testgen/ui/assets/style.css index 49a2f37f..a57b453c 100644 --- a/testgen/ui/assets/style.css +++ b/testgen/ui/assets/style.css @@ -100,6 +100,10 @@ div[data-testid="stDialog"] div[role="dialog"]:has(i.s-dialog) { width: calc(35rem); } +div[data-testid="stDialog"] div[role="dialog"]:has(i.l-dialog) { + width: calc(75rem); +} + div[data-testid="stDialog"] div[role="dialog"]:has(i.xl-dialog) { width: calc(95rem); } @@ -112,6 +116,10 @@ div[data-testid="stSpinner"] > div > i { border-color: var(--primary-color) rgba(49, 51, 63, 0.2) rgba(49, 51, 63, 0.2); } +div.st-key-data_catalog-spinner { + position: absolute; +} + /* Theming for buttons, tabs and form inputs */ button[data-testid="stBaseButton-secondary"]:hover, button[data-testid="stBaseButton-secondary"]:focus:not(:active), diff --git a/testgen/ui/components/frontend/css/shared.css b/testgen/ui/components/frontend/css/shared.css index 8100174a..20452b94 100644 --- a/testgen/ui/components/frontend/css/shared.css +++ b/testgen/ui/components/frontend/css/shared.css @@ -22,6 +22,7 @@ body { --grey: #BDBDBD; --empty: #EEEEEE; --empty-light: #FAFAFA; + --empty-teal: #E7F1F0; --primary-text-color: #000000de; --secondary-text-color: #0000008a; @@ -30,6 +31,7 @@ body { --form-field-color: rgb(240, 242, 246); /* Match Streamlit's form field color */ --border-color: rgba(0, 0, 0, .12); --tooltip-color: #333d; + --tooltip-text-color: #fff; --dk-card-background: #fff; --sidebar-background-color: white; @@ -82,6 +84,7 @@ body { body { --empty: #424242; --empty-light: #212121; + --empty-teal: #242E2D; --primary-text-color: rgba(255, 255, 255); --secondary-text-color: rgba(255, 255, 255, .7); @@ -89,6 +92,8 @@ body { --caption-text-color: rgba(250, 250, 250, .6); /* Match Streamlit's caption color */ --form-field-color: rgb(38, 39, 48); /* Match Streamlit's form field color */ --border-color: rgba(255, 255, 255, .25); + --tooltip-color: #eee; + --tooltip-text-color: #000; --dk-card-background: #14181f; --sidebar-background-color: #14181f; @@ -118,7 +123,7 @@ body { } .clickable { - cursor: pointer; + cursor: pointer !important; } .hidden { diff --git a/testgen/ui/components/frontend/js/components/alert.js b/testgen/ui/components/frontend/js/components/alert.js new file mode 100644 index 00000000..8b4f6e34 --- /dev/null +++ b/testgen/ui/components/frontend/js/components/alert.js @@ -0,0 +1,62 @@ +/** + * @typedef Alert + * @type {object} + * @property {string} value + * @property {string} color + * @property {string} label + * + * @typedef Properties + * @type {object} + * @property {string?} icon + * @property {'info'|'success'|'error'} type + * @property {string?} message + */ +import van from '../van.min.js'; +import { getValue, loadStylesheet } from '../utils.js'; +import { Icon } from './icon.js'; + +const { div } = van.tags; +const alertTypeColors = { + info: {backgroundColor: 'rgba(28, 131, 225, 0.1)', color: 'rgb(0, 66, 128)'}, + success: {backgroundColor: 'rgba(33, 195, 84, 0.1)', color: 'rgb(23, 114, 51)'}, + error: {backgroundColor: 'rgba(255, 43, 43, 0.09)', color: 'rgb(125, 53, 59)'}, +}; + +const Alert = (/** @type Properties */ props, /** @type Array */ ...children) => { + loadStylesheet('alert', stylesheet); + + return div( + { + ...props, + class: () => (getValue(props.class) ?? '') + ` tg-alert flex-row`, + style: () => { + const colors = alertTypeColors[getValue(props.type)]; + return `color: ${colors.color}; background-color: ${colors.backgroundColor};`; + }, + role: 'alert', + }, + () => { + const icon = getValue(props.icon); + return Icon({size: 20, classes: 'mr-2'}, icon); + }, + div( + {class: 'flex-column'}, + ...children, + ), + ); +}; + +const stylesheet = new CSSStyleSheet(); +stylesheet.replace(` +.tg-alert { + padding: 16px; + border-radius: 0.5rem; + font-size: 16px; + line-height: 24px; +} +.tg-alert > .tg-icon { + color: inherit !important; +} +`); + +export { Alert }; diff --git a/testgen/ui/components/frontend/js/components/empty_state.js b/testgen/ui/components/frontend/js/components/empty_state.js index 7c243b50..67d7b677 100644 --- a/testgen/ui/components/frontend/js/components/empty_state.js +++ b/testgen/ui/components/frontend/js/components/empty_state.js @@ -16,6 +16,7 @@ * @property {Message} message * @property {Link?} link * @property {any?} button +* @property {string?} class */ import van from '../van.min.js'; import { Card } from '../components/card.js'; @@ -49,13 +50,17 @@ const EMPTY_STATE_MESSAGE = { line1: 'Track data quality scores', line2: 'Create custom scorecards to assess quality of your data assets across different categories.', }, + explorer: { + line1: 'Track data quality scores', + line2: 'Filter or select columns to assess the quality of your data assets across different categories.', + }, }; const EmptyState = (/** @type Properties */ props) => { loadStylesheet('empty-state', stylesheet); return Card({ - class: 'tg-empty-state flex-column fx-align-flex-center', + class: `tg-empty-state flex-column fx-align-flex-center ${getValue(props.class ?? '')}`, content: [ span({ class: 'tg-empty-state--title mb-5' }, props.label), i({class: 'material-symbols-rounded mb-5'}, props.icon), @@ -63,11 +68,15 @@ const EmptyState = (/** @type Properties */ props) => { span({ class: 'mb-5' }, props.message.line2), ( getValue(props.button) ?? - Link({ - class: 'tg-empty-state--link', - right_icon: 'chevron_right', - ...(getValue(props.link)), - }) + ( + getValue(props.link) + ? Link({ + class: 'tg-empty-state--link', + right_icon: 'chevron_right', + ...(getValue(props.link)), + }) + : '' + ) ), ], }); diff --git a/testgen/ui/components/frontend/js/components/expander_toggle.js b/testgen/ui/components/frontend/js/components/expander_toggle.js index f95a76d2..72aab775 100644 --- a/testgen/ui/components/frontend/js/components/expander_toggle.js +++ b/testgen/ui/components/frontend/js/components/expander_toggle.js @@ -21,7 +21,7 @@ const ExpanderToggle = (/** @type Properties */ props) => { Streamlit.setFrameHeight(24); } - const expandedState = van.state(!!props.default); + const expandedState = van.state(!!getValue(props.default)); const expandLabel = getValue(props.expandLabel) || 'Expand'; const collapseLabel = getValue(props.collapseLabel) || 'Collapse'; diff --git a/testgen/ui/components/frontend/js/components/explorer_column_selector.js b/testgen/ui/components/frontend/js/components/explorer_column_selector.js new file mode 100644 index 00000000..1d86c542 --- /dev/null +++ b/testgen/ui/components/frontend/js/components/explorer_column_selector.js @@ -0,0 +1,283 @@ +/** + * @typedef FilterValue + * @type {object} + * @property {string} field + * @property {string} value + * @property {Array?} others + * + * @typedef Selection + * @type {Array} + * + * @typedef Column + * @type {object} + * @property {string} name + * @property {string} table + * @property {string} table_group + * @property {boolean?} selected + * + * @typedef Properties + * @type {object} + * @property {Array} columns + */ +import van from '../van.min.js'; +import { Streamlit } from '../streamlit.js'; +import { emitEvent, getValue, isEqual, loadStylesheet, slugify } from '../utils.js'; +import { Tree } from './tree.js'; +import { Icon } from './icon.js'; +import { Button } from './button.js'; + +const { div, i, span } = van.tags; +const tableGroupFieldName = 'table_groups_name'; +const tableFieldName = 'table_name'; +const columnFieldName = 'column_name'; + +const TRANSLATIONS = { + table_groups_name: 'Table Group', + table_name: 'Table', + column_name: 'Column', +}; + +const ColumnSelector = (/** @type Properties */ props) => { + loadStylesheet('column-selector', stylesheet); + + window.testgen.isPage = true; + Streamlit.setFrameHeight(400); + + const initialSelection = van.state([]); + const selection = van.state([]); + const valueById = van.state({}); + const treeNodes = van.state([]); + const changed = van.derive(() => { + const current = selection.val; + const initial = initialSelection.val; + return !isEqual(current, initial); + }); + + van.derive(() => { + const initialization = initlialize(getValue(props.columns) ?? []); + + valueById.val = initialization.valueById; + treeNodes.val = initialization.treeNodes; + selection.val = initialization.selection; + initialSelection.val = initialization.selection; + }); + + return div( + {class: 'flex-column fx-gap-2 column-selector-wrapper'}, + div( + {class: 'flex-row column-selector'}, + Tree({ + id: 'column-selector-tree', + classes: 'column-selector--tree', + multiSelect: true, + onMultiSelect: (selected) => { + if (!selected) { + selection.val = []; + return; + } + + selection.val = getSelectionFromTreeNodes(selected, getValue(valueById)); + }, + nodes: treeNodes, + }), + span({class: 'column-selector--divider'}), + () => { + const selection_ = getValue(selection); + return div( + {class: 'flex-row fx-flex-wrap fx-align-flex-start fx-flex-align-content fx-gap-2 column-selector--selected'}, + selection_.map((item) => ColumnFilter(item)), + ); + }, + ), + div( + {class: 'flex-row fx-justify-content-flex-end'}, + Button({ + type: 'stroked', + color: 'primary', + label: 'Apply', + width: 'auto', + disabled: van.derive(() => !changed.val), + onclick: () => emitEvent('ColumnFiltersUpdated', {payload: selection.val}), + }), + ) + ); +}; + +function initlialize(/** @type Array */ columns) { + const valueById = {}; + const treeNodesMapping = {}; + + for (const columnObject of columns) { + const tableGroup = slugify(columnObject.table_group); + const table = slugify(columnObject.table); + const column = slugify(columnObject.name); + + const tableGroupId = `${tableGroupFieldName}:${tableGroup}` + const tableId = `${tableFieldName}:${tableGroup}:${table}` + const columnId = `${columnFieldName}:${tableGroup}:${table}:${column}` + + valueById[tableGroupId] = columnObject.table_group; + valueById[tableId] = columnObject.table; + valueById[columnId] = columnObject.name; + + treeNodesMapping[tableGroupId] = treeNodesMapping[tableGroupId] ?? { + id: tableGroupId, + label: columnObject.table_group, + icon: 'dataset', + selected: false, + children: {}, + }; + treeNodesMapping[tableGroupId].children[tableId] = treeNodesMapping[tableGroupId].children[tableId] ?? { + id: tableId, + label: columnObject.table, + icon: 'table', + selected: false, + children: {}, + }; + treeNodesMapping[tableGroupId].children[tableId].children[columnId] = { + id: columnId, + label: columnObject.name, + icon: 'abc', + selected: columnObject.selected ?? false, + }; + } + + const treeNodes = Object.values(treeNodesMapping); + for (const tableGroup of treeNodes) { + tableGroup.children = Object.values(tableGroup.children); + for (const table of tableGroup.children) { + table.children = Object.values(table.children); + table.selected = table.children.every(child => child.selected); + } + tableGroup.selected = tableGroup.children.every(child => child.selected); + } + + return { treeNodes, valueById, selection: getSelectionFromTreeNodes(treeNodes, valueById) }; +} + +function getSelectionFromTreeNodes(treeNodes, valueById) { + if (!treeNodes || treeNodes.length === 0) { + return []; + } + + const selection = []; + const isFromUserAction = treeNodes[0].all !== undefined; + const propertyToCheck = isFromUserAction ? 'all' : 'selected'; + for (const tableGroup of treeNodes) { + if (tableGroup[propertyToCheck]) { + selection.push({field: tableGroupFieldName, value: valueById[tableGroup.id]}); + continue; + } + + for (const table of tableGroup.children) { + if (table[propertyToCheck]) { + selection.push({ + field: tableFieldName, + value: valueById[table.id], + others: [ + {field: tableGroupFieldName, value: valueById[tableGroup.id]}, + ], + }); + continue; + } + + for (const column of table.children) { + if (isFromUserAction || column.selected) { + selection.push({ + field: columnFieldName, + value: valueById[column.id], + others: [ + {field: tableFieldName, value: valueById[table.id]}, + {field: tableGroupFieldName, value: valueById[tableGroup.id]}, + ], + }); + } + } + } + } + + return selection; +} + +const ColumnFilter = ( + /** @type FilterValue */ filter, +) => { + const expanded = van.state(false); + const expandIcon = van.derive(() => expanded.val ? 'keyboard_arrow_up' : 'keyboard_arrow_down'); + + return div( + { + class: 'flex-row column-selector--filter', + 'data-testid': 'column-selector-filter', + style: 'background: var(--form-field-color); border-radius: 8px; padding: 8px 12px;', + }, + div( + {class: 'flex-column'}, + div( + { class: 'flex-row', 'data-testid': 'column-selector-filter' }, + span({ class: 'text-secondary mr-1', 'data-testid': 'column-selector-filter-label' }, `${TRANSLATIONS[filter.field] ?? filter.field} =`), + span({'data-testid': 'column-selector-filter-value'}, filter.value), + ), + () => { + const expanded_ = getValue(expanded); + if (!expanded_) { + return ''; + } + + return div( + {class: 'flex-column', 'data-testid': 'column-selector-filter-others'}, + filter.others.map((item) => ColumnFilterLine(item.field, item.value)), + ); + }, + ), + filter.others?.length > 0 + ? Icon( + { + size: 16, + classes: 'clickable text-secondary ml-1', + 'data-testid': 'column-selector-filter-expand', + onclick: () => expanded.val = !expanded.val, + }, + expandIcon, + ) + : '', + ); +}; + +const ColumnFilterLine = (/** @type string */ field, /** @type string */ value) => { + return div( + { class: 'flex-row', 'data-testid': 'column-selector-filter' }, + span({ class: 'text-secondary mr-1', 'data-testid': 'column-selector-filter-label' }, `${TRANSLATIONS[field] ?? field} =`), + span({'data-testid': 'column-selector-filter-value'}, value), + ); +}; + +const stylesheet = new CSSStyleSheet(); +stylesheet.replace(` +.column-selector-wrapper { + height: 100%; + overflow-y: hidden; +} + +.column-selector { + height: calc(100% - 48px); + align-items: stretch; +} + +.column-selector--tree { + flex: 1; +} + +.column-selector--divider { + width: 1px; + background-color: var(--grey); + margin: 0 10px; +} + +.column-selector--selected { + flex: 2; + overflow-y: auto; +} +`); + +export { ColumnSelector, ColumnFilter }; diff --git a/testgen/ui/components/frontend/js/components/frequency_bars.js b/testgen/ui/components/frontend/js/components/frequency_bars.js index ed49bf5b..c3ad64a3 100644 --- a/testgen/ui/components/frontend/js/components/frequency_bars.js +++ b/testgen/ui/components/frontend/js/components/frequency_bars.js @@ -8,6 +8,7 @@ * @type {object} * @property {FrequencyItem[]} items * @property {number} total + * @property {number} nullCount * @property {string} title * @property {string?} color */ @@ -17,11 +18,14 @@ import { colorMap } from '../display_utils.js'; const { div, span } = van.tags; const defaultColor = 'teal'; +const otherColor = colorMap['emptyTeal']; +const nullColor = colorMap['emptyLight']; const FrequencyBars = (/** @type Properties */ props) => { loadStylesheet('frequencyBars', stylesheet); const total = van.derive(() => getValue(props.total)); + const nullCount = van.derive(() => getValue(props.nullCount)); const color = van.derive(() => { const colorValue = getValue(props.color) || defaultColor; return colorMap[colorValue] || colorValue; @@ -41,7 +45,16 @@ const FrequencyBars = (/** @type Properties */ props) => { { class: 'flex-row fx-gap-2' }, div( { class: 'tg-frequency-bars' }, - span({ class: 'tg-frequency-bars--empty' }), + span({ + class: 'tg-frequency-bars--fill', + style: `width: 100%; background-color: ${nullColor};`, + }), + span({ + class: 'tg-frequency-bars--fill', + style: () => `width: ${(total.val - nullCount.val) * 100 / total.val}%; + ${(total.val - nullCount.val) ? 'min-width: 1px;' : ''} + background-color: ${otherColor};`, + }), span({ class: 'tg-frequency-bars--fill', style: () => `width: ${count * 100 / total.val}%; @@ -59,6 +72,15 @@ const FrequencyBars = (/** @type Properties */ props) => { div(value), ); }), + div( + { class: 'tg-frequency-bars--legend flex-row fx-flex-wrap text-caption mt-1' }, + span({ class: 'dot', style: `color: ${color.val};` }), + 'Value', + span({ class: 'dot', style: `color: ${otherColor};` }), + 'Other', + span({ class: 'dot', style: `color: ${nullColor};` }), + 'Null', + ), ); }; @@ -71,14 +93,6 @@ stylesheet.replace(` position: relative; } -.tg-frequency-bars--empty { - position: absolute; - width: 100%; - height: 100%; - border-radius: 4px; - background-color: ${colorMap['emptyLight']} -} - .tg-frequency-bars--fill { position: absolute; border-radius: 4px; @@ -89,6 +103,19 @@ stylesheet.replace(` flex-shrink: 0; text-align: right; } + +.tg-frequency-bars--legend { + font-style: italic; +} + +.tg-frequency-bars--legend span { + margin-right: 2px; + font-size: 4px; +} + +.tg-frequency-bars--legend span:not(:first-child) { + margin-left: 8px; +} `); export { FrequencyBars }; diff --git a/testgen/ui/components/frontend/js/components/score_card.js b/testgen/ui/components/frontend/js/components/score_card.js index 7b191677..130bc470 100644 --- a/testgen/ui/components/frontend/js/components/score_card.js +++ b/testgen/ui/components/frontend/js/components/score_card.js @@ -165,11 +165,16 @@ stylesheet.replace(` } .tg-score-card--categories { + display: flex; + flex-direction: column; + flex-wrap: wrap; + row-gap: 8px; + column-gap: 16px; max-height: 100px; overflow-y: auto; - display: grid; - grid-gap: 8px; - grid-template-columns: 160px 160px; +} +.tg-score-card--categories > div { + min-width: 160px; } .tg-score-card--category-score { diff --git a/testgen/ui/components/frontend/js/components/sorting_selector.js b/testgen/ui/components/frontend/js/components/sorting_selector.js index 60b9afa6..824e118a 100644 --- a/testgen/ui/components/frontend/js/components/sorting_selector.js +++ b/testgen/ui/components/frontend/js/components/sorting_selector.js @@ -45,8 +45,8 @@ const SortingSelector = (/** @type {Properties} */ props) => { ); const directionIcons = { - ASC: `arrow_downward`, - DESC: `arrow_upward`, + ASC: `arrow_upward`, + DESC: `arrow_downward`, } const activeColumnItem = (colId) => { diff --git a/testgen/ui/components/frontend/js/components/summary_bar.js b/testgen/ui/components/frontend/js/components/summary_bar.js index 449efcde..2c791913 100644 --- a/testgen/ui/components/frontend/js/components/summary_bar.js +++ b/testgen/ui/components/frontend/js/components/summary_bar.js @@ -4,6 +4,7 @@ * @property {string} value * @property {string} color * @property {string} label + * @property {boolean?} showPercent * * @typedef Properties * @type {object} @@ -13,7 +14,7 @@ * @property {number?} width */ import van from '../van.min.js'; -import { getValue, loadStylesheet } from '../utils.js'; +import { friendlyPercent, getValue, loadStylesheet } from '../utils.js'; import { colorMap } from '../display_utils.js'; const { div, span } = van.tags; @@ -42,14 +43,17 @@ const SummaryBar = (/** @type Properties */ props) => { ), () => total.val ? div( { class: 'tg-summary-bar--caption flex-row fx-flex-wrap text-caption mt-1' }, - getValue(props.items).map(item => div( - { class: 'tg-summary-bar--legend flex-row' }, - span({ - class: 'dot', - style: `color: ${colorMap[item.color] || item.color};`, - }), - `${item.label}: ${item.value || 0}`, - )), + getValue(props.items).map(item => item.label + ? div( + { class: 'tg-summary-bar--legend flex-row' }, + span({ + class: 'dot', + style: `color: ${colorMap[item.color] || item.color};`, + }), + `${item.label}: ${item.value || 0}` + (item.showPercent ? ` (${friendlyPercent(item.value * 100 / total.val)}%)` : '') + ) + : null, + ), ) : '', ); }; diff --git a/testgen/ui/components/frontend/js/components/tooltip.js b/testgen/ui/components/frontend/js/components/tooltip.js index 38a814e2..6663afb1 100644 --- a/testgen/ui/components/frontend/js/components/tooltip.js +++ b/testgen/ui/components/frontend/js/components/tooltip.js @@ -50,7 +50,7 @@ stylesheet.replace(` border-radius: 4px; background-color: var(--tooltip-color); padding: 4px 8px; - color: white; + color: var(--tooltip-text-color); font-size: 13px; font-family: 'Roboto', 'Helvetica Neue', sans-serif; text-align: center; diff --git a/testgen/ui/components/frontend/js/components/tree.js b/testgen/ui/components/frontend/js/components/tree.js index 6d603fe3..8f8e95b0 100644 --- a/testgen/ui/components/frontend/js/components/tree.js +++ b/testgen/ui/components/frontend/js/components/tree.js @@ -6,6 +6,8 @@ * @property {string?} classes * @property {string?} icon * @property {number?} iconSize + * @property {'red'?} iconColor + * @property {string?} iconTooltip * @property {TreeNode[]?} children * @property {number?} level * @property {boolean?} expanded @@ -23,13 +25,16 @@ * @property {string} id * @property {string} classes * @property {TreeNode[]} nodes - * @property {string} selected + * @property {(string|string[])?} selected * @property {function(string)?} onSelect * @property {boolean?} multiSelect * @property {boolean?} multiSelectToggle + * @property {string?} multiSelectToggleLabel * @property {function(SelectedNode[] | null)?} onMultiSelect - * @property {(function(TreeNode): boolean) | null} isNodeHidden + * @property {(function(TreeNode, string): boolean) | null} isNodeHidden + * @property {function()?} onApplySearchOptions * @property {(function(): boolean) | null} hasActiveFilters + * @property {function()?} onApplyFilters * @property {function()?} onResetFilters */ import van from '../van.min.js'; @@ -40,11 +45,12 @@ import { Portal } from './portal.js'; import { Icon } from './icon.js'; import { Checkbox } from './checkbox.js'; import { Toggle } from './toggle.js'; +import { withTooltip } from './tooltip.js'; const { div, h3, span } = van.tags; const levelOffset = 14; -const Tree = (/** @type Properties */ props, /** @type any? */ filtersContent) => { +const Tree = (/** @type Properties */ props, /** @type any? */ searchOptionsContent, /** @type any? */ filtersContent) => { loadStylesheet('tree', stylesheet); // Use only initial prop value as default and maintain internal state @@ -74,7 +80,7 @@ const Tree = (/** @type Properties */ props, /** @type any? */ filtersContent) = if (!multiSelect.val) { selectTree(treeNodes.val, false); } - props.onMultiSelect(multiSelect.val ? [] : null); + props.onMultiSelect?.(multiSelect.val ? getMultiSelection(treeNodes.val) : null); }); return div( @@ -82,124 +88,171 @@ const Tree = (/** @type Properties */ props, /** @type any? */ filtersContent) = id: props.id, class: () => `flex-column ${getValue(props.classes)}`, }, - Toolbar(treeNodes, props, filtersContent), - props.multiSelectToggle - ? div( - { class: 'mt-1 mb-2 ml-1 text-secondary' }, - Toggle({ - label: 'Select multiple', - checked: multiSelect, - onChange: (/** @type boolean */ checked) => multiSelect.val = checked, - }), - ) - : null, + Toolbar(treeNodes, multiSelect, props, searchOptionsContent, filtersContent), div( { class: 'tg-tree' }, () => div( { class: 'tg-tree--nodes', - onclick: van.derive(() => multiSelect.val ? () => props.onMultiSelect(getMultiSelection(treeNodes.val)) : null), + onclick: van.derive(() => multiSelect.val ? () => props.onMultiSelect?.(getMultiSelection(treeNodes.val)) : null), }, treeNodes.val.map(node => TreeNode(node, selected, multiSelect.val)), ), ), () => noMatches.val - ? span({ class: 'tg-tree--empty mt-7 mb-7 text-secondary' }, 'No matching itens found') + ? span({ class: 'tg-tree--empty mt-7 mb-7 text-secondary' }, 'No matching items found') : '', ); }; const Toolbar = ( /** @type { val: TreeNode[] } */ nodes, + /** @type object */ multiSelect, /** @type Properties */ props, + /** @type any? */ searchOptionsContent, /** @type any? */ filtersContent, ) => { const search = van.state(''); + const searchOptionsDomId = `tree-search-options-${getRandomId()}`; + const searchOptionsOpened = van.state(false); + const filterDomId = `tree-filters-${getRandomId()}`; const filtersOpened = van.state(false); const filtersActive = van.state(false); - const isNodeHidden = (/** @type TreeNode */ node) => !node.label.includes(search.val) || props.isNodeHidden?.(node); + const isNodeHidden = (/** @type TreeNode */ node) => props.isNodeHidden + ? props.isNodeHidden?.(node, search.val) + : !node.label.toLowerCase().includes(search.val.toLowerCase()); return div( - { class: 'flex-row fx-gap-1 tg-tree--actions' }, - Input({ - icon: 'search', - clearable: true, - onChange: (/** @type string */ value) => { - search.val = value; - filterTree(nodes.val, isNodeHidden); - if (value) { - expandOrCollapseTree(nodes.val, true); - } - }, - }), - filtersContent ? [ - div( - { class: () => `tg-tree--filter-button ${filtersActive.val ? 'active' : ''}` }, - Button({ - id: filterDomId, - type: 'icon', - icon: 'filter_list', - style: 'width: 24px; height: 24px; padding: 4px;', - tooltip: () => filtersActive.val ? 'Filters active' : 'Filters', - tooltipPosition: 'bottom', - onclick: () => filtersOpened.val = !filtersOpened.val, - }), - ), - Portal( - { target: filterDomId, opened: filtersOpened }, - () => div( - { class: 'tg-tree--filters' }, - h3( - { class: 'flex-row fx-justify-space-between'}, - 'Filters', - Button({ - type: 'icon', - icon: 'close', - iconSize: 22, - onclick: () => filtersOpened.val = false, - }), - ), - filtersContent, - div( - { class: 'flex-row fx-justify-space-between mt-4' }, - Button({ - label: 'Reset filters', - width: '110px', - disabled: () => !props.hasActiveFilters(), - onclick: props.onResetFilters, - }), + { class: 'tg-tree--actions' }, + div( + { class: 'flex-row fx-gap-1 mb-1' }, + Input({ + icon: 'search', + clearable: true, + onChange: (/** @type string */ value) => { + search.val = value; + filterTree(nodes.val, isNodeHidden); + if (value) { + expandOrCollapseTree(nodes.val, true); + } + }, + }), + searchOptionsContent ? [ + div( + { class: 'tg-tree--search-options' }, + Button({ + id: searchOptionsDomId, + type: 'icon', + icon: 'settings', + style: 'width: 24px; height: 24px; padding: 4px;', + tooltip: 'Search options', + tooltipPosition: 'bottom', + onclick: () => searchOptionsOpened.val = !searchOptionsOpened.val, + }), + ), + Portal( + { target: searchOptionsDomId, opened: searchOptionsOpened }, + () => div( + { class: 'tg-tree--portal' }, + searchOptionsContent, Button({ type: 'stroked', color: 'primary', label: 'Apply', - width: '80px', + style: 'width: 80px; margin-top: 12px; margin-left: auto;', onclick: () => { + props.onApplySearchOptions?.(); filterTree(nodes.val, isNodeHidden); - filtersActive.val = props.hasActiveFilters(); - filtersOpened.val = false; + searchOptionsOpened.val = false; }, }), ), + ) + ] : null, + Button({ + type: 'icon', + icon: 'expand_all', + style: 'width: 24px; height: 24px; padding: 4px;', + tooltip: 'Expand All', + tooltipPosition: 'bottom', + onclick: () => expandOrCollapseTree(nodes.val, true), + }), + Button({ + type: 'icon', + icon: 'collapse_all', + style: 'width: 24px; height: 24px; padding: 4px;', + tooltip: 'Collapse All', + tooltipPosition: 'bottom', + onclick: () => expandOrCollapseTree(nodes.val, false), + }), + ), + div( + { class: 'flex-row fx-justify-space-between mb-1' }, + div( + { class: 'text-secondary' }, + props.multiSelectToggle + ? Toggle({ + label: props.multiSelectToggleLabel ?? 'Select multiple', + checked: multiSelect, + onChange: (/** @type boolean */ checked) => multiSelect.val = checked, + }) + : null, + ), + filtersContent ? [ + div( + { class: () => `tg-tree--filter-button ${filtersActive.val ? 'active' : ''}` }, + Button({ + id: filterDomId, + type: 'basic', + label: 'Filters', + icon: 'filter_list', + style: 'height: 24px; padding: 4px;', + tooltip: () => filtersActive.val ? 'Filters active' : null, + tooltipPosition: 'bottom', + onclick: () => filtersOpened.val = !filtersOpened.val, + }), ), - ) - ] : null, - Button({ - type: 'icon', - icon: 'expand_all', - style: 'width: 24px; height: 24px; padding: 4px;', - tooltip: 'Expand All', - tooltipPosition: 'bottom', - onclick: () => expandOrCollapseTree(nodes.val, true), - }), - Button({ - type: 'icon', - icon: 'collapse_all', - style: 'width: 24px; height: 24px; padding: 4px;', - tooltip: 'Collapse All', - tooltipPosition: 'bottom', - onclick: () => expandOrCollapseTree(nodes.val, false), - }), + Portal( + { target: filterDomId, opened: filtersOpened }, + () => div( + { class: 'tg-tree--portal' }, + h3( + { class: 'flex-row fx-justify-space-between'}, + 'Filters', + Button({ + type: 'icon', + icon: 'close', + iconSize: 22, + onclick: () => filtersOpened.val = false, + }), + ), + filtersContent, + div( + { class: 'flex-row fx-justify-space-between mt-4' }, + Button({ + label: 'Reset filters', + width: '110px', + disabled: () => !props.hasActiveFilters(), + onclick: props.onResetFilters, + }), + Button({ + type: 'stroked', + color: 'primary', + label: 'Apply', + width: '80px', + onclick: () => { + props.onApplyFilters?.(); + filterTree(nodes.val, isNodeHidden); + filtersActive.val = props.hasActiveFilters(); + filtersOpened.val = false; + }, + }), + ), + ), + ) + ] : null, + ) ); }; @@ -225,8 +278,8 @@ const TreeNode = ( node.selected.val = node.children.every(child => child.selected.val); } else { node.selected.val = !node.selected.val; - event.fromChild = true; } + event.fromChild = true; } : null, }, @@ -252,12 +305,18 @@ const TreeNode = ( ? [ Checkbox({ checked: () => node.selected.val, - indeterminate: hasChildren ? () => !node.selected.val && node.children.some(({ selected }) => selected.val) : false, + indeterminate: hasChildren ? () => isIndeterminate(node) : false, }), span({ class: 'mr-1' }), ] : null, - node.icon ? Icon({ size: 24, classes: 'tg-tree--row-icon' }, node.icon) : null, + () => { + if (node.icon) { + const icon = Icon({ size: node.iconSize, classes: `tg-tree--row-icon ${node.iconColor}` }, node.icon); + return node.iconTooltip ? withTooltip(icon, { text: node.iconTooltip, position: 'right' }) : icon; + } + return null; + }, node.label, ), hasChildren ? div( @@ -283,7 +342,7 @@ const initTreeState = ( } node.expanded = van.state(expanded); node.hidden = van.state(false); - node.selected = van.state(false); + node.selected = van.state(node.selected ?? false); treeExpanded = treeExpanded || expanded; }); return treeExpanded; @@ -341,7 +400,8 @@ const getMultiSelection = (nodes) => { if (selectedChildren.length) { selected.push({ id: node.id, - all: selectedChildren.length === node.children.length, + all: selectedChildren.length === node.children.length + && (selectedChildren[0]?.children === undefined || selectedChildren.every(child => child.all)), children: selectedChildren, }); } @@ -352,6 +412,35 @@ const getMultiSelection = (nodes) => { return selected; }; +/** + * + * @param {TreeNode} node + * @returns {boolean} + */ +const isIndeterminate = (node) => { + return !node.selected.val && isAnyDescendantSelected(node); +}; + + +/** + * + * @param {TreeNode} node + * @returns {boolean} + */ +const isAnyDescendantSelected = (node) => { + if ((node.children ?? []).length <= 0) { + return false; + } + + for (const child of node.children) { + if (getValue(child.selected) || isAnyDescendantSelected(child)) { + return true; + } + } + + return false; +} + const stylesheet = new CSSStyleSheet(); stylesheet.replace(` .tg-tree { @@ -364,9 +453,10 @@ stylesheet.replace(` .tg-tree--actions { margin: 4px; + border-bottom: 1px solid var(--border-color); } -.tg-tree--actions > label { +.tg-tree--actions > div > label { flex: auto; } @@ -381,7 +471,7 @@ stylesheet.replace(` border-color: var(--primary-color); } -.tg-tree--filters { +.tg-tree--portal { border-radius: 8px; background: var(--dk-card-background); box-shadow: var(--portal-box-shadow); @@ -390,7 +480,7 @@ stylesheet.replace(` z-index: 99; } -.tg-tree--filters > h3 { +.tg-tree--portal > h3 { margin: 0 0 12px; font-size: 18px; font-weight: 500; @@ -426,6 +516,10 @@ stylesheet.replace(` color: #B0BEC5; text-align: center; } + +.tg-tree--row-icon.red { + color: var(--red); +} `); export { Tree }; diff --git a/testgen/ui/components/frontend/js/data_profiling/column_distribution.js b/testgen/ui/components/frontend/js/data_profiling/column_distribution.js index e40ca9f7..a0810553 100644 --- a/testgen/ui/components/frontend/js/data_profiling/column_distribution.js +++ b/testgen/ui/components/frontend/js/data_profiling/column_distribution.js @@ -5,6 +5,7 @@ * @type {object} * @property {boolean?} border * @property {boolean?} dataPreview + * @property {boolean?} history */ import van from '../van.min.js'; import { Card } from '../components/card.js'; @@ -14,7 +15,7 @@ import { SummaryBar } from '../components/summary_bar.js'; import { PercentBar } from '../components/percent_bar.js'; import { FrequencyBars } from '../components/frequency_bars.js'; import { BoxPlot } from '../components/box_plot.js'; -import { loadStylesheet, emitEvent, getValue } from '../utils.js'; +import { loadStylesheet, emitEvent, friendlyPercent, getValue } from '../utils.js'; import { formatTimestamp, roundDigits } from '../display_utils.js'; const { div, span } = van.tags; @@ -37,21 +38,39 @@ const ColumnDistributionCard = (/** @type Properties */ props, /** @type Column return Card({ border: props.border, title: `Value Distribution ${item.is_latest_profile ? '*' : ''}`, - content: item.profile_run_id && columnFunction ? columnFunction(item) : null, - actionContent: item.profile_run_id - ? (getValue(props.dataPreview) - ? Button({ - type: 'stroked', - label: 'Data Preview', - icon: 'pageview', - width: 'auto', - onclick: () => emitEvent('DataPreviewClicked', { payload: item }), - }) - : null) - : span( - { class: 'text-secondary' }, - 'No profiling data available', - ), + content: item.profile_run_id + ? (item.record_ct === 0 + ? BaseCounts(item) + : columnFunction?.(item)) + : null, + actionContent: div( + { class: 'flex-row fx-gap-3' }, + item.profile_run_id + ? ([ + getValue(props.dataPreview) + ? Button({ + type: 'stroked', + label: 'Data Preview', + icon: 'pageview', + width: 'auto', + onclick: () => emitEvent('DataPreviewClicked', { payload: item }), + }) + : null, + getValue(props.history) + ? Button({ + type: 'stroked', + label: 'History', + icon: 'history', + width: 'auto', + onclick: () => emitEvent('HistoryClicked', { payload: item }), + }) + : null, + ]) + : span( + { class: 'text-secondary' }, + 'No profiling data available', + ), + ), }) }; @@ -75,6 +94,9 @@ function AlphaColumn(/** @type ColumnProfile */ item) { } const total = item.record_ct; + const missing = item.null_value_ct + item.zero_length_ct + item.filled_value_ct; + const duplicates = item.value_ct - item.distinct_value_ct; + const duplicatesStandardized = item.value_ct - item.distinct_std_value_ct; return div( { class: 'flex-column fx-gap-5' }, @@ -84,14 +106,36 @@ function AlphaColumn(/** @type ColumnProfile */ item) { SummaryBar({ height: summaryHeight, width: summaryWidth, - label: `Missing Values: ${item.null_value_ct + item.filled_value_ct + item.filled_value_ct}`, + label: `Missing Values: ${missing} (${friendlyPercent(missing * 100 / total)}%)`, items: [ - { label: 'Actual Values', value: item.value_ct - item.filled_value_ct, color: 'green' }, - { label: 'Null', value: item.null_value_ct, color: 'brownLight' }, + { label: 'Actual Values', value: item.value_ct - item.zero_length_ct - item.filled_value_ct, color: 'green' }, + { label: 'Null', value: item.null_value_ct, color: 'brownLight', showPercent: true }, { label: 'Zero Length', value: item.zero_length_ct, color: 'yellow' }, { label: 'Dummy Values', value: item.filled_value_ct, color: 'orange' }, ], }), + SummaryBar({ + height: summaryHeight, + width: summaryWidth, + label: `Duplicate Values: ${duplicates} (${friendlyPercent(duplicates * 100 / item.value_ct)}%)`, + items: [ + { label: 'Distinct', value: item.distinct_value_ct, color: 'indigo' }, + { label: 'Duplicates', value: duplicates, color: 'orange' }, + { value: item.null_value_ct, color: 'empty' }, + ], + }), + item.distinct_std_value_ct != item.distinct_value_ct + ? SummaryBar({ + height: summaryHeight, + width: summaryWidth, + label: `Duplicate Values, Standardized: ${duplicatesStandardized} (${friendlyPercent(duplicatesStandardized * 100 / item.value_ct)}%)`, + items: [ + { label: 'Distinct', value: item.distinct_std_value_ct, color: 'indigo' }, + { label: 'Duplicates', value: duplicatesStandardized, color: 'orange' }, + { value: item.null_value_ct, color: 'empty' }, + ], + }) + : null, SummaryBar({ height: summaryHeight, width: summaryWidth, @@ -101,7 +145,7 @@ function AlphaColumn(/** @type ColumnProfile */ item) { { label: 'Lower Case', value: item.lower_case_ct, color: 'blueLight' }, { label: 'Upper Case', value: item.upper_case_ct, color: 'blue' }, { label: 'Non-Alpha', value: item.non_alpha_ct, color: 'brown' }, - { label: 'Null', value: item.null_value_ct, color: 'brownLight' }, + { value: item.null_value_ct, color: 'empty' }, ], }), ), @@ -110,6 +154,7 @@ function AlphaColumn(/** @type ColumnProfile */ item) { item.top_freq_values ? FrequencyBars({ title: 'Frequent Values', total: item.record_ct, + nullCount: item.null_value_ct, items: item.top_freq_values.substring(2).split('\n| ').map(parts => { const [value, count] = parts.split(' | '); return { value, count: Number(count) }; @@ -118,6 +163,7 @@ function AlphaColumn(/** @type ColumnProfile */ item) { item.top_patterns ? FrequencyBars({ title: 'Frequent Patterns', total: item.record_ct, + nullCount: item.null_value_ct, items: item.top_patterns.split(' | ').reduce((array, item, index) => { if (index % 2) { array[(index - 1) / 2].value = item; @@ -146,15 +192,19 @@ function AlphaColumn(/** @type ColumnProfile */ item) { ), ), div( - { class: 'flex-row fx-flex-wrap fx-align-flex-start fx-gap-4 tg-profile--attribute-block' }, + { class: 'flex-row fx-flex-wrap fx-align-flex-start fx-gap-4' }, Attribute({ label: 'Minimum Length', value: item.min_length, width: attributeWidth }), Attribute({ label: 'Maximum Length', value: item.max_length, width: attributeWidth }), Attribute({ label: 'Average Length', value: roundDigits(item.avg_length), width: attributeWidth }), + ), + div( + { class: 'flex-row fx-flex-wrap fx-align-flex-start fx-gap-4' }, Attribute({ label: 'Minimum Text', value: item.min_text, width: attributeWidth }), Attribute({ label: 'Maximum Text', value: item.max_text, width: attributeWidth }), + ), + div( + { class: 'flex-row fx-flex-wrap fx-align-flex-start fx-gap-4' }, Attribute({ label: 'Standard Pattern Match', value: standardPattern, width: attributeWidth }), - Attribute({ label: 'Distinct Values', value: item.distinct_value_ct, width: attributeWidth }), - Attribute({ label: 'Distinct Standard Values', value: item.distinct_std_value_ct, width: attributeWidth }), Attribute({ label: 'Distinct Patterns', value: item.distinct_pattern_ct, width: attributeWidth }), ), ); @@ -196,8 +246,8 @@ function DatetimeColumn(/** @type ColumnProfile */ item) { div( { class: 'flex-column fx-gap-3 tg-profile--percent-column' }, PercentBar({ label: 'Before 1 Year', value: item.before_1yr_date_ct, total, width: percentWidth }), - PercentBar({ label: 'Before 5 Year', value: item.before_5yr_date_ct, total, width: percentWidth }), - PercentBar({ label: 'Before 20 Year', value: item.before_20yr_date_ct, total, width: percentWidth }), + PercentBar({ label: 'Before 5 Years', value: item.before_5yr_date_ct, total, width: percentWidth }), + PercentBar({ label: 'Before 20 Years', value: item.before_20yr_date_ct, total, width: percentWidth }), ), div( { class: 'flex-column fx-gap-3 tg-profile--percent-column' }, @@ -260,10 +310,17 @@ function NumericColumn(/** @type ColumnProfile */ item) { } const BaseCounts = (/** @type ColumnProfile */ item) => { + const attributes = [ + { key: 'record_ct', label: 'Record Count' }, + { key: 'value_ct', label: 'Value Count' }, + ]; return div( { class: 'flex-row fx-gap-4' }, - Attribute({ label: 'Record Count', value: item.record_ct, width: attributeWidth }), - Attribute({ label: 'Value Count', value: item.value_ct, width: attributeWidth }), + attributes.map(({ key, label }) => Attribute({ + label: item[key] === 0 ? span({ class: 'text-error' }, label) : label, + value: item[key], + width: attributeWidth, + })), ); }; diff --git a/testgen/ui/components/frontend/js/data_profiling/column_profiling_history.js b/testgen/ui/components/frontend/js/data_profiling/column_profiling_history.js new file mode 100644 index 00000000..06d3f426 --- /dev/null +++ b/testgen/ui/components/frontend/js/data_profiling/column_profiling_history.js @@ -0,0 +1,85 @@ +/** + * @import { Column } from './data_profiling_utils.js'; + * + * @typedef ProfilingRun + * @type {object} + * @property {string} run_id + * @property {number} run_date + * + * @typedef Properties + * @type {object} + * @property {ProfilingRun} profiling_runs + * @property {Column} selected_item + */ +import van from '../van.min.js'; +import { Streamlit } from '../streamlit.js'; +import { emitEvent, getValue, loadStylesheet } from '../utils.js'; +import { formatTimestamp } from '../display_utils.js'; +import { ColumnDistributionCard } from './column_distribution.js'; + +const { div, span } = van.tags; + +const ColumnProfilingHistory = (/** @type Properties */ props) => { + loadStylesheet('column-profiling-history', stylesheet); + Streamlit.setFrameHeight(600); + window.testgen.isPage = true; + + return div( + { class: 'column-history flex-row fx-align-stretch' }, + () => div( + { class: 'column-history--list' }, + getValue(props.profiling_runs).map(({ run_id, run_date }, index) => div( + { + class: () => `column-history--item clickable ${getValue(props.selected_item).profile_run_id === run_id ? 'selected' : ''}`, + onclick: () => emitEvent('RunSelected', { payload: run_id }), + }, + div(formatTimestamp(run_date)), + index === 0 ? span({ class: 'text-caption' }, 'Latest run') : null, + )), + ), + span({class: 'column-history--divider'}), + () => div( + { class: 'column-history--details' }, + ColumnDistributionCard({}, getValue(props.selected_item)), + ), + ); +} + +const stylesheet = new CSSStyleSheet(); +stylesheet.replace(` +.column-history { + height: 100%; +} + +.column-history--list { + flex: 150px 1 1; +} + +.column-history--item { + padding: 8px; +} + +.column-history--item:hover { + background-color: var(--sidebar-item-hover-color); +} + +.column-history--item.selected { + background-color: #06a04a17; +} + +.column-history--item.selected > div { + font-weight: 500; +} + +.column-history--details { + overflow: auto; +} + +.column-history--divider { + width: 1px; + background-color: var(--grey); + margin: 0 10px; +} +`); + +export { ColumnProfilingHistory }; diff --git a/testgen/ui/components/frontend/js/data_profiling/column_profiling_results.js b/testgen/ui/components/frontend/js/data_profiling/column_profiling_results.js index 8f40ec28..98f4a6e1 100644 --- a/testgen/ui/components/frontend/js/data_profiling/column_profiling_results.js +++ b/testgen/ui/components/frontend/js/data_profiling/column_profiling_results.js @@ -1,4 +1,6 @@ /** + * @import { Column } from './data_profiling_utils.js'; + * * @typedef Properties * @type {object} * @property {Column} column diff --git a/testgen/ui/components/frontend/js/data_profiling/data_characteristics.js b/testgen/ui/components/frontend/js/data_profiling/data_characteristics.js index 6aeb3aa2..7a0f5f20 100644 --- a/testgen/ui/components/frontend/js/data_profiling/data_characteristics.js +++ b/testgen/ui/components/frontend/js/data_profiling/data_characteristics.js @@ -5,13 +5,15 @@ * @type {object} * @property {boolean?} scores * @property {boolean?} border + * @property {boolean?} allowRemove */ import van from '../van.min.js'; import { Card } from '../components/card.js'; import { Attribute } from '../components/attribute.js'; +import { Button } from '../components/button.js'; import { ScoreMetric } from '../components/score_metric.js'; import { formatTimestamp } from '../display_utils.js'; -import { loadStylesheet } from '../utils.js'; +import { emitEvent, loadStylesheet } from '../utils.js'; import { getColumnIcon } from './data_profiling_utils.js'; const { div, span, i } = van.tags; @@ -23,9 +25,13 @@ const DataCharacteristicsCard = (/** @type Properties */ props, /** @type Column if (item.type === 'column') { attributes.push( { key: 'column_type', label: 'Data Type' }, - { key: 'datatype_suggestion', label: `Suggested Data Type ${item.is_latest_profile ? '*' : ''}` }, { key: 'functional_data_type', label: `Semantic Data Type ${item.is_latest_profile ? '*' : ''}` }, ); + if (item.datatype_suggestion && item.datatype_suggestion.toLowerCase() !== item.column_type.toLowerCase()) { + attributes.push( + { key: 'datatype_suggestion', label: `Suggested Data Type ${item.is_latest_profile ? '*' : ''}` }, + ); + } } else { attributes.push( { key: 'functional_table_type', label: `Semantic Table Type ${item.is_latest_profile ? '*' : ''}` }, @@ -34,7 +40,7 @@ const DataCharacteristicsCard = (/** @type Properties */ props, /** @type Column if (item.add_date) { attributes.push({ key: 'add_date', label: 'First Detected' }); } - if (item.last_mod_date !== item.add_date) { + if (item.last_mod_date && item.last_mod_date !== item.add_date) { attributes.push({ key: 'last_mod_date', label: 'Modification Detected' }); } if (item.drop_date) { @@ -47,37 +53,53 @@ const DataCharacteristicsCard = (/** @type Properties */ props, /** @type Column content: div( { class: 'flex-row fx-gap-4 fx-justify-space-between' }, div( - { class: 'flex-row fx-flex-wrap fx-gap-4' }, - attributes.map(({ key, label }) => { - let value = item[key]; - if (key === 'column_type') { - const { icon, iconSize } = getColumnIcon(item); - value = div( - { class: 'flex-row' }, - i( - { - class: 'material-symbols-rounded tg-data-chars--column-icon', - style: `font-size: ${iconSize || 24}px;`, - }, - icon, - ), - (value || 'unknown').toLowerCase(), - ); - } else if (key === 'datatype_suggestion') { - value = (value || '').toLowerCase(); - } else if (key === 'functional_table_type') { - value = (value || '').split('-') - .map(word => word ? (word[0].toUpperCase() + word.substring(1)) : '') - .join(' '); - } else if (['add_date', 'last_mod_date', 'drop_date'].includes(key)) { - value = formatTimestamp(value, true); - if (key === 'drop_date') { - label = span({ class: 'text-error' }, label); + { class: 'flex-column fx-align-flex-start fx-gap-3' }, + div( + { class: 'flex-row fx-flex-wrap fx-gap-4' }, + attributes.map(({ key, label }) => { + let value = item[key]; + if (key === 'column_type') { + const { icon, iconSize } = getColumnIcon(item); + value = div( + { class: 'flex-row' }, + i( + { + class: 'material-symbols-rounded tg-data-chars--column-icon', + style: `font-size: ${iconSize || 24}px;`, + }, + icon, + ), + (value || 'unknown').toLowerCase(), + ); + } else if (key === 'datatype_suggestion') { + value = (value || '').toLowerCase(); + } else if (key === 'functional_table_type') { + value = (value || '').split('-') + .map(word => word ? (word[0].toUpperCase() + word.substring(1)) : '') + .join(' '); + } else if (['add_date', 'last_mod_date', 'drop_date'].includes(key)) { + value = formatTimestamp(value, true); + if (key === 'drop_date') { + label = span({ class: 'text-error' }, label); + } } - } - return Attribute({ label, value, width: 250 }); - }), + return Attribute({ label, value, width: 250 }); + }), + ), + props.allowRemove && item.drop_date && item.type === 'table' + ? Button({ + type: 'stroked', + color: 'warn', + label: 'Remove from Catalog', + icon: 'delete', + width: 'auto', + disabled: item.test_suites.length, + tooltip: item.test_suites.length ? 'The table has associated test definitions and cannot be removed from Data Catalog. Delete the test definitions first.' : 'Remove the table and its columns from Data Catalog', + tooltipPosition: 'right', + onclick: () => emitEvent('RemoveTableClicked', { payload: item }), + }) + : null, ), props.scores ? div( { style: 'margin-top: -40px;' }, diff --git a/testgen/ui/components/frontend/js/data_profiling/data_profiling_utils.js b/testgen/ui/components/frontend/js/data_profiling/data_profiling_utils.js index 0aafa3d2..df3cbf17 100644 --- a/testgen/ui/components/frontend/js/data_profiling/data_profiling_utils.js +++ b/testgen/ui/components/frontend/js/data_profiling/data_profiling_utils.js @@ -17,6 +17,12 @@ * @property {string} test_suite * @property {string} test_run_id * @property {number} test_run_date + * + * @typedef TestSuite + * @type {object} + * @property {string} id + * @property {string} name + * @property {string} test_count * * @typedef Column * @type {object} @@ -127,6 +133,8 @@ * * Issues * @property {HygieneIssue[]?} hygiene_issues * @property {TestIssue[]?} test_issues + * * Test Suites + * @property {TestSuite[]?} test_suites * * @typedef Table * @type {object} @@ -143,6 +151,7 @@ * @property {number} column_ct * @property {number} data_point_ct * @property {number} add_date + * @property {number} last_refresh_date * @property {number} drop_date * * Table Tags * @property {string} description @@ -175,6 +184,8 @@ * * Issues * @property {HygieneIssue[]?} hygiene_issues * @property {TestIssue[]?} test_issues + * * Test Suites + * @property {TestSuite[]?} test_suites */ import van from '../van.min.js'; import { Link } from '../components/link.js'; @@ -182,13 +193,13 @@ import { formatTimestamp } from '../display_utils.js'; const { span, b } = van.tags; -const TABLE_ICON = { icon: 'table', iconSize: 20 }; +const TABLE_ICON = { icon: 'table' }; const COLUMN_ICONS = { - A: { icon: 'abc' }, - B: { icon: 'toggle_off', iconSize: 20 }, - D: { icon: 'calendar_clock', iconSize: 20 }, - N: { icon: '123' }, - T: { icon: 'calendar_clock', iconSize: 20 }, + A: { icon: 'abc', iconSize: 24 }, + B: { icon: 'toggle_off' }, + D: { icon: 'calendar_clock' }, + N: { icon: '123', iconSize: 24 }, + T: { icon: 'calendar_clock' }, X: { icon: 'question_mark', iconSize: 18 }, }; const BOOLEAN_TYPE = 'Boolean'; diff --git a/testgen/ui/components/frontend/js/data_profiling/table_size.js b/testgen/ui/components/frontend/js/data_profiling/table_size.js index 3f7af986..9c5055b1 100644 --- a/testgen/ui/components/frontend/js/data_profiling/table_size.js +++ b/testgen/ui/components/frontend/js/data_profiling/table_size.js @@ -9,21 +9,29 @@ import { Card } from '../components/card.js'; import { Attribute } from '../components/attribute.js'; import { Button } from '../components/button.js'; import { emitEvent } from '../utils.js'; +import { formatTimestamp } from '../display_utils.js'; -const { div } = van.tags; +const { div, span } = van.tags; const TableSizeCard = (/** @type Properties */ _props, /** @type Table */ item) => { const attributes = [ { key: 'column_ct', label: 'Column Count' }, { key: 'record_ct', label: 'Row Count' }, { key: 'data_point_ct', label: 'Data Point Count' }, - ] + ]; return Card({ - title: 'Table Size', + title: 'Table Size **', content: div( - { class: 'flex-row fx-flex-wrap fx-gap-4' }, - attributes.map(({ key, label }) => Attribute({ label, value: item[key], width: 250 })), + div( + { class: 'flex-row fx-flex-wrap fx-gap-4' }, + attributes.map(({ key, label }) => Attribute({ + label: item[key] === 0 ? span({ class: 'text-error' }, label) : label, + value: item[key], + width: 250, + })), + ), + span({ class: 'text-caption flex-row fx-justify-content-flex-end mt-2' }, `** as of ${formatTimestamp(item.last_refresh_date)}`), ), actionContent: Button({ type: 'stroked', diff --git a/testgen/ui/components/frontend/js/display_utils.js b/testgen/ui/components/frontend/js/display_utils.js index e5fb11a2..652d3822 100644 --- a/testgen/ui/components/frontend/js/display_utils.js +++ b/testgen/ui/components/frontend/js/display_utils.js @@ -63,6 +63,7 @@ const colorMap = { grey: '#BDBDBD', // Gray 400 empty: 'var(--empty)', // Light: Gray 200, Dark: Gray 800 emptyLight: 'var(--empty-light)', // Light: Gray 50, Dark: Gray 900 + emptyTeal: 'var(--empty-teal)', } const DISABLED_ACTION_TEXT = 'You do not have permissions to perform this action. Contact your administrator.'; diff --git a/testgen/ui/components/frontend/js/main.js b/testgen/ui/components/frontend/js/main.js index e01a2cda..0061db66 100644 --- a/testgen/ui/components/frontend/js/main.js +++ b/testgen/ui/components/frontend/js/main.js @@ -14,6 +14,7 @@ import { ExpanderToggle } from './components/expander_toggle.js'; import { Link } from './components/link.js'; import { Paginator } from './components/paginator.js'; import { SortingSelector } from './components/sorting_selector.js'; +import { ColumnSelector } from './components/explorer_column_selector.js'; import { TestRuns } from './pages/test_runs.js'; import { ProfilingRuns } from './pages/profiling_runs.js'; import { DatabaseFlavorSelector } from './components/flavor_selector.js'; @@ -24,6 +25,7 @@ import { QualityDashboard } from './pages/quality_dashboard.js'; import { ScoreDetails } from './pages/score_details.js'; import { ScoreExplorer } from './pages/score_explorer.js'; import { ColumnProfilingResults } from './data_profiling/column_profiling_results.js'; +import { ColumnProfilingHistory } from './data_profiling/column_profiling_history.js'; import { ScheduleList } from './pages/schedule_list.js'; let currentWindowVan = van; @@ -43,12 +45,14 @@ const TestGenComponent = (/** @type {string} */ id, /** @type {object} */ props) database_flavor_selector: DatabaseFlavorSelector, data_catalog: DataCatalog, column_profiling_results: ColumnProfilingResults, + column_profiling_history: ColumnProfilingHistory, project_dashboard: ProjectDashboard, test_suites: TestSuites, quality_dashboard: QualityDashboard, score_details: ScoreDetails, score_explorer: ScoreExplorer, schedule_list: ScheduleList, + column_selector: ColumnSelector, }; if (Object.keys(componentById).includes(id)) { diff --git a/testgen/ui/components/frontend/js/pages/data_catalog.js b/testgen/ui/components/frontend/js/pages/data_catalog.js index 62f4f8de..1641ca5e 100644 --- a/testgen/ui/components/frontend/js/pages/data_catalog.js +++ b/testgen/ui/components/frontend/js/pages/data_catalog.js @@ -2,6 +2,13 @@ * @import { Column, Table } from '../data_profiling/data_profiling_utils.js'; * @import { TreeNode } from '../components/tree.js'; * + * @typedef ProjectSummary + * @type {object} + * @property {string} project_code + * @property {number} connections_ct + * @property {number} table_groups_ct + * @property {string} default_connection_id + * * @typedef ColumnPath * @type {object} * @property {string} column_id @@ -10,6 +17,8 @@ * @property {string} table_name * @property {'A' | 'B' | 'D' | 'N' | 'T' | 'X'} general_type * @property {string} functional_data_type + * @property {number} record_ct + * @property {number} value_ct * @property {number} drop_date * @property {number} table_drop_date * @property {boolean} critical_data_element @@ -22,8 +31,9 @@ * * @typedef Properties * @type {object} + * @property {ProjectSummary} project_summary * @property {ColumnPath[]} columns - * @property {Table | Column} selected + * @property {Table | Column} selected_item * @property {Object.} tag_values * @property {string} last_saved_timestamp * @property {Permissions} permissions @@ -48,6 +58,8 @@ import { capitalize } from '../display_utils.js'; import { TableSizeCard } from '../data_profiling/table_size.js'; import { Card } from '../components/card.js'; import { Button } from '../components/button.js'; +import { Link } from '../components/link.js'; +import { EMPTY_STATE_MESSAGE, EmptyState } from '../components/empty_state.js'; const { div, h2, span, i } = van.tags; @@ -80,25 +92,27 @@ const TAG_HELP = { const DataCatalog = (/** @type Properties */ props) => { loadStylesheet('data-catalog', stylesheet); Streamlit.setFrameHeight(1); // Non-zero value is needed to render - window.frameElement.style.setProperty('height', 'calc(100vh - 175px)'); + window.frameElement.style.setProperty('height', 'calc(100vh - 85px)'); window.testgen.isPage = true; /** @type TreeNode[] */ const treeNodes = van.derive(() => { let columns = []; try { - columns = JSON.parse(getValue(props.columns)); + columns = JSON.parse(getValue(props.columns) ?? []); } catch { } const tables = {}; columns.forEach((item) => { - const { column_id, table_id, column_name, table_name, drop_date, table_drop_date } = item; + const { column_id, table_id, column_name, table_name, record_ct, value_ct, drop_date, table_drop_date } = item; if (!tables[table_id]) { tables[table_id] = { id: table_id, label: table_name, classes: table_drop_date ? 'text-disabled' : '', ...TABLE_ICON, + iconColor: record_ct === 0 ? 'red' : null, + iconTooltip: record_ct === 0 ? 'No records detected' : null, criticalDataElement: !!item.table_critical_data_element, children: [], }; @@ -109,6 +123,8 @@ const DataCatalog = (/** @type Properties */ props) => { label: column_name, classes: drop_date ? 'text-disabled' : '', ...getColumnIcon(item), + iconColor: value_ct === 0 ? 'red' : null, + iconTooltip: value_ct === 0 ? 'No non-null values detected' : null, criticalDataElement: !!(item.critical_data_element ?? item.table_critical_data_element), }; TAG_KEYS.forEach(key => columnNode[key] = item[key] ?? item[`table_${key}`]); @@ -119,7 +135,7 @@ const DataCatalog = (/** @type Properties */ props) => { const selectedItem = van.derive(() => { try { - return JSON.parse(getValue(props.selected)); + return JSON.parse(getValue(props.selected_item)); } catch (e) { console.error(e) return null; @@ -142,85 +158,175 @@ const DataCatalog = (/** @type Properties */ props) => { } }; + const searchOptions = { + tableName: van.state(true), + columnName: van.state(true), + }; const filters = { criticalDataElement: van.state(false) }; TAG_KEYS.forEach(key => filters[key] = van.state(null)); + // To hold temporary state within the portals, which might be discarded by clicking outside + const tempSearchOptions = {}; + const tempFilters = {}; + + const copyState = (fromObject, toObject) => { + Object.entries(fromObject).forEach(([ key, state ]) => { + toObject[key] = toObject[key] ?? van.state(); + toObject[key].val = state.val; + }); + }; + const userCanEdit = getValue(props.permissions)?.can_edit ?? false; + const userCanNavigate = getValue(props.permissions)?.can_navigate ?? false; + const projectSummary = getValue(props.project_summary); - return div( - { - class: 'flex-row tg-dh', - ondragover: (event) => event.preventDefault(), - }, - Tree( - { - id: treeDomId, - classes: 'tg-dh--tree', - nodes: treeNodes, - // Use .rawVal, so only initial value from query params is passed to tree - selected: selectedItem.rawVal ? `${selectedItem.rawVal.type}_${selectedItem.rawVal.id}` : null, - onSelect: (/** @type string */ selected) => emitEvent('ItemSelected', { payload: selected }), - multiSelect: multiEditMode, - multiSelectToggle: userCanEdit, - onMultiSelect: (/** @type string[] | null */ selected) => multiSelectedItems.val = selected, - isNodeHidden: (/** @type TreeNode */ node) => { - let hidden = ![ node.criticalDataElement, false ].includes(filters.criticalDataElement.val); - hidden = hidden || TAG_KEYS.some(key => ![ node[key], null ].includes(filters[key].val)); - return hidden; - }, - hasActiveFilters: () => filters.criticalDataElement.val || TAG_KEYS.some(key => !!filters[key].val), - onResetFilters: () => { - filters.criticalDataElement.val = false; - TAG_KEYS.forEach(key => filters[key].val = null); - }, - }, - // Pass as a function that will be called when the filter portal is opened - // Otherwise state bindings get garbage collected and Select dropdowns won't open - // https://vanjs.org/advanced#gc + return projectSummary.table_groups_ct > 0 + ? div( + { class: 'flex-column tg-dh' }, () => div( - Checkbox({ - label: 'Only critical data elements (CDEs)', - checked: filters.criticalDataElement, - onChange: (checked) => filters.criticalDataElement.val = checked, + { class: 'flex-row fx-align-flex-end fx-justify-space-between mb-2' }, + Select({ + label: 'Table Group', + value: getValue(props.table_group_filter_options)?.find((op) => op.selected)?.value ?? null, + options: getValue(props.table_group_filter_options) ?? [], + height: 38, + style: 'font-size: 14px;', + testId: 'table-group-filter', + onChange: (value) => emitEvent('TableGroupSelected', {payload: value}), }), - div( - { - class: 'flex-row fx-flex-wrap fx-gap-4 fx-justify-space-between mt-4', - style: 'max-width: 420px;', + Button({ + icon: 'download', + type: 'stroked', + label: 'Export', + tooltip: 'Download filtered columns to Excel', + tooltipPosition: 'left', + width: 'fit-content', + style: 'background: var(--dk-card-background);', + onclick: () => { + const columnIds = treeNodes.val.reduce((ids, table) => { + if (!table.hidden.val) { + table.children.forEach(column => { + if (!column.hidden.val) { + ids.push(column.id); + } + }); + } + return ids; + }, []); + emitEvent('ExportClicked', { payload: columnIds }); }, - TAG_KEYS.map(key => Select({ - id: `data-catalog-${key}`, - label: capitalize(key.replaceAll('_', ' ')), - height: 32, - value: filters[key], - options: getValue(props.tag_values)?.[key]?.map(key => ({ label: key, value: key })), - allowNull: true, - disabled: !getValue(props.tag_values)?.[key]?.length, - onChange: v => filters[key].val = v, - })), - ), + }), ), - ), - div( - { - class: 'tg-dh--dragger', - draggable: true, - ondragstart: (event) => { - event.dataTransfer.effectAllowed = 'move'; - event.dataTransfer.setDragImage(EMPTY_IMAGE, 0, 0); - dragState.val = { startX: event.screenX, startWidth: document.getElementById(treeDomId).offsetWidth }; - }, - ondragend: (event) => { - dragResize(event); - dragState.val = null; - }, - ondrag: van.derive(() => dragState.val ? dragResize : null), - }, - ), - () => multiEditMode.val - ? MultiEdit(props, multiSelectedItems, multiEditMode) - : SelectedDetails(props, selectedItem.val), - ); + () => treeNodes.val.length + ? div( + { + class: 'flex-row tg-dh--content', + ondragover: (event) => event.preventDefault(), + }, + Tree( + { + id: treeDomId, + classes: 'tg-dh--tree', + nodes: treeNodes, + // Use .rawVal, so only initial value from query params is passed to tree + selected: selectedItem.rawVal ? `${selectedItem.rawVal.type}_${selectedItem.rawVal.id}` : null, + onSelect: (/** @type string */ selected) => emitEvent('ItemSelected', { payload: selected }), + multiSelect: multiEditMode, + multiSelectToggle: userCanEdit, + multiSelectToggleLabel: 'Edit multiple', + onMultiSelect: (/** @type string[] | null */ selected) => multiSelectedItems.val = selected, + isNodeHidden: (/** @type TreeNode */ node, /** string */ search) => + !node.label.toLowerCase().includes(search.toLowerCase()) + || (!!node.children && !searchOptions.tableName.val) + || (!node.children && !searchOptions.columnName.val) + || ![ node.criticalDataElement, false ].includes(filters.criticalDataElement.val) + || TAG_KEYS.some(key => ![ node[key], null ].includes(filters[key].val)), + onApplySearchOptions: () => { + copyState(tempSearchOptions, searchOptions); + // If both were unselected, reset their values + // Otherwise, nothing will be matched and the user might not realize why + if (!searchOptions.tableName.val && !searchOptions.columnName.val) { + searchOptions.tableName.val = true; + searchOptions.columnName.val = true + } + }, + hasActiveFilters: () => filters.criticalDataElement.val || TAG_KEYS.some(key => !!filters[key].val), + onApplyFilters: () => copyState(tempFilters, filters), + onResetFilters: () => { + tempFilters.criticalDataElement.val = false; + TAG_KEYS.forEach(key => tempFilters[key].val = null); + }, + }, + () => { + copyState(searchOptions, tempSearchOptions); + return div( + { class: 'flex-column fx-gap-2' }, + span({ class: 'text-caption' }, 'Search by'), + Checkbox({ + label: 'Table name', + checked: tempSearchOptions.tableName, + onChange: (checked) => tempSearchOptions.tableName.val = checked, + }), + Checkbox({ + label: 'Column name', + checked: tempSearchOptions.columnName, + onChange: (checked) => tempSearchOptions.columnName.val = checked, + }), + ); + }, + // Pass as a function that will be called when the filter portal is opened + // Otherwise state bindings get garbage collected and Select dropdowns won't open + // https://vanjs.org/advanced#gc + () => { + copyState(filters, tempFilters); + return div( + Checkbox({ + label: 'Only critical data elements (CDEs)', + checked: tempFilters.criticalDataElement, + onChange: (checked) => tempFilters.criticalDataElement.val = checked, + }), + div( + { + class: 'flex-row fx-flex-wrap fx-gap-4 fx-justify-space-between mt-4', + style: 'max-width: 420px;', + }, + TAG_KEYS.map(key => Select({ + id: `data-catalog-${key}`, + label: capitalize(key.replaceAll('_', ' ')), + height: 32, + value: tempFilters[key], + options: getValue(props.tag_values)?.[key]?.map(key => ({ label: key, value: key })), + allowNull: true, + disabled: !getValue(props.tag_values)?.[key]?.length, + onChange: (value) => tempFilters[key].val = value, + })), + ), + ); + }, + ), + div( + { + class: 'tg-dh--dragger', + draggable: true, + ondragstart: (event) => { + event.dataTransfer.effectAllowed = 'move'; + event.dataTransfer.setDragImage(EMPTY_IMAGE, 0, 0); + dragState.val = { startX: event.screenX, startWidth: document.getElementById(treeDomId).offsetWidth }; + }, + ondragend: (event) => { + dragResize(event); + dragState.val = null; + }, + ondrag: van.derive(() => dragState.val ? dragResize : null), + }, + ), + () => multiEditMode.val + ? MultiEdit(props, multiSelectedItems, multiEditMode) + : SelectedDetails(props, selectedItem.val), + ) + : ConditionalEmptyState(projectSummary, userCanEdit, userCanNavigate), + ) + : ConditionalEmptyState(projectSummary, userCanEdit, userCanNavigate); }; const SelectedDetails = (/** @type Properties */ props, /** @type Table | Column */ item) => { @@ -244,16 +350,17 @@ const SelectedDetails = (/** @type Properties */ props, /** @type Table | Column ), LatestProfilingTime({ noLinks: !userCanNavigate }, item), ), - DataCharacteristicsCard({ scores: true }, item), + DataCharacteristicsCard({ scores: true, allowRemove: true }, item), item.type === 'column' - ? ColumnDistributionCard({ dataPreview: true }, item) + ? ColumnDistributionCard({ dataPreview: true, history: true }, item) : TableSizeCard({}, item), TagsCard({ tagOptions: getValue(props.tag_values), editable: userCanEdit }, item), PotentialPIICard({ noLinks: !userCanNavigate }, item), HygieneIssuesCard({ noLinks: !userCanNavigate }, item), TestIssuesCard({ noLinks: !userCanNavigate }, item), + TestSuitesCard(item), ) - : EmptyState( + : ItemEmptyState( 'Select a table or column on the left to view its details.', 'quick_reference_all', ); @@ -300,8 +407,10 @@ const TagsCard = (/** @type TagProperties */ props, /** @type Table | Column */ value ? 'check_circle' : 'cancel', ), span( - { class: value ? 'text-capitalize' : 'text-secondary' }, - value ? label : `Not a ${label}`, + { class: value ? '' : 'text-secondary' }, + item.type === 'column' + ? (value ? 'Critical data element' : 'Not a critical data element') + : (value ? 'All critical data elements' : 'Not all critical data elements'), ), (item.type === 'column' && state.rawVal === null) ? InheritedIcon('table') : null, ); @@ -372,12 +481,52 @@ const TagsCard = (/** @type TagProperties */ props, /** @type Table | Column */ }); }; +const TestSuitesCard = (/** @type Table | Column */ item) => { + return Card({ + title: 'Related Test Suites', + content: div( + { class: 'flex-column fx-gap-2' }, + item.test_suites.map(({ id, name, test_count }) => div( + { class: 'flex-row fx-gap-1' }, + Link({ + href: 'test-suites:definitions', + params: { + test_suite_id: id, + table_name: item.table_name, + column_name: item.column_name, + }, + open_new: true, + label: name, + }), + span({ class: 'text-caption' }, `(${test_count} test definitions)`), + )) + ), + actionContent: item.test_suites.length + ? null + : item.drop_date + ? span({ class: 'text-secondary' }, `No test definitions for ${item.type}`) + : span( + { class: 'text-secondary flex-row fx-gap-1 fx-justify-content-flex-end' }, + `No test definitions yet for ${item.type}.`, + Link({ + href: 'test-suites', + params: { + project_code: item.project_code, + table_group_id: item.table_group_id, + }, + open_new: true, + label: 'Go to Test Suites', + right_icon: 'chevron_right', + }), + ), + }); +}; + const MultiEdit = (/** @type Properties */ props, /** @type Object */ selectedItems, /** @type Object */ multiEditMode) => { const hasSelection = van.derive(() => selectedItems.val?.length); const columnCount = van.derive(() => selectedItems.val?.reduce((count, { children }) => count + children.length, 0)); const attributes = [ - 'description', 'critical_data_element', ...TAG_KEYS, ].map(key => ({ @@ -395,7 +544,6 @@ const MultiEdit = (/** @type Properties */ props, /** @type Object */ selectedIt ]; const tagOptions = getValue(props.tag_values) ?? {}; const width = 400; - const descriptionWidth = 800; return div( { class: 'tg-dh--details flex-column' }, @@ -428,8 +576,7 @@ const MultiEdit = (/** @type Properties */ props, /** @type Object */ selectedIt onChange: (value) => valueState.val = value, }) : Input({ - label, help, - width: key === 'description' ? descriptionWidth : width, + label, help, width, placeholder: () => checkedState.val ? null : '(keep current values)', autocompleteOptions: tagOptions[key], onChange: (value) => valueState.val = value || null, @@ -481,14 +628,14 @@ const MultiEdit = (/** @type Properties */ props, /** @type Object */ selectedIt ), ), }) - : EmptyState( + : ItemEmptyState( 'Select tables or columns on the left to edit their tags.', 'edit_document', ), ); }; -const EmptyState = (/** @type string */ message, /** @type string */ icon) => { +const ItemEmptyState = (/** @type string */ message, /** @type string */ icon) => { return div( { class: 'flex-column fx-align-flex-center fx-justify-center tg-dh--no-selection' }, Icon({ size: 80, classes: 'text-disabled mb-5' }, icon), @@ -496,10 +643,66 @@ const EmptyState = (/** @type string */ message, /** @type string */ icon) => { ); }; +const ConditionalEmptyState = ( + /** @type ProjectSummary */ projectSummary, + /** @type boolean */ userCanEdit, + /** @type boolean */ userCanNavigate, +) => { + let args = { + label: 'No profiling data yet', + message: EMPTY_STATE_MESSAGE.profiling, + button: Button({ + icon: 'play_arrow', + type: 'stroked', + color: 'primary', + label: 'Run Profiling', + width: 'fit-content', + style: 'margin: auto; background: background: var(--dk-card-background);', + disabled: !userCanEdit, + tooltip: userCanEdit ? null : DISABLED_ACTION_TEXT, + tooltipPosition: 'bottom', + onclick: () => emitEvent('RunProfilingClicked', {}), + }), + } + if (projectSummary.connections_ct <= 0) { + args = { + label: 'Your project is empty', + message: EMPTY_STATE_MESSAGE.connection, + link: { + label: 'Go to Connections', + href: 'connections', + params: { project_code: projectSummary.project_code }, + disabled: !userCanNavigate, + }, + }; + } else if (projectSummary.table_groups_ct <= 0) { + args = { + label: 'Your project is empty', + message: EMPTY_STATE_MESSAGE.tableGroup, + link: { + label: 'Go to Table Groups', + href: 'connections:table-groups', + params: { connection_id: projectSummary.default_connection_id }, + disabled: !userCanNavigate, + }, + }; + } + + return EmptyState({ + icon: 'dataset', + ...args, + }); +}; + const stylesheet = new CSSStyleSheet(); stylesheet.replace(` .tg-dh { height: 100%; +} + +.tg-dh--content { + min-height: 0; + flex: auto; align-items: stretch; } diff --git a/testgen/ui/components/frontend/js/pages/score_explorer.js b/testgen/ui/components/frontend/js/pages/score_explorer.js index d887e033..27deb404 100644 --- a/testgen/ui/components/frontend/js/pages/score_explorer.js +++ b/testgen/ui/components/frontend/js/pages/score_explorer.js @@ -3,6 +3,7 @@ * @type {object} * @property {string} field * @property {string} value + * @property {Array?} others * * @typedef ScoreDefinition * @type {object} @@ -12,6 +13,7 @@ * @property {boolean} cde_score * @property {string} category * @property {ScoreDefinitionFilter[]} filters + * @property {boolean} filter_by_columns * * @typedef ScoreCardCategory * @type {object} @@ -59,6 +61,8 @@ import { Checkbox } from '../components/checkbox.js'; import { Portal } from '../components/portal.js'; import { ScoreBreakdown } from '../components/score_breakdown.js'; import { IssuesTable } from '../components/score_issues.js'; +import { EmptyState, EMPTY_STATE_MESSAGE } from '../components/empty_state.js'; +import { ColumnFilter } from '../components/explorer_column_selector.js'; const { div, i, span } = van.tags; @@ -84,38 +88,66 @@ const ScoreExplorer = (/** @type {Properties} */ props) => { const domId = 'score-explorer-page'; const userCanEdit = getValue(props.permissions)?.can_edit ?? false; + const updateToolbarFilters = van.derive(() => { + const oldFilters = props.definition.oldVal.filters; + const newFilters = props.definition.val.filters; + const oldFilterByColumns = props.definition.oldVal.filter_by_columns; + const newFilterByColumns = props.definition.val.filter_by_columns; + + if (!isEqual(oldFilters, newFilters) || oldFilterByColumns !== newFilterByColumns) { + return {filters: newFilters, filter_by_columns: newFilterByColumns}; + } + + return null; + }); resizeFrameHeightToElement(domId); resizeFrameHeightOnDOMChange(domId); return div( { id: domId, class: 'score-explorer' }, - Toolbar(props.filter_values, getValue(props.definition), props.is_new, userCanEdit), - span({ class: 'mb-4', style: 'display: block;' }), - ScoreCard(props.score_card), + Toolbar(props.filter_values, getValue(props.definition), props.is_new, userCanEdit, updateToolbarFilters), span({ class: 'mb-4', style: 'display: block;' }), () => { - const drilldown = getValue(props.drilldown); - const issuesValue = getValue(props.issues); - - return ( - (issuesValue && getValue(props.drilldown)) - ? IssuesTable( - issuesValue?.items, - issuesValue?.columns, - getValue(props.score_card), - getValue(props.breakdown_score_type), - getValue(props.breakdown_category), - drilldown, - () => emitEvent('DrilldownChanged', { payload: null }), - ) - : ScoreBreakdown( - props.score_card, - props.breakdown, - props.breakdown_category, - props.breakdown_score_type, - (project_code, name, score_type, category, drilldown) => emitEvent('DrilldownChanged', { payload: drilldown }), - ) + const isEmpty = getValue(props.is_new) && getValue(props.definition)?.filters?.length <= 0; + + if (isEmpty) { + return EmptyState({ + class: 'explorer-empty-state', + label: 'No filters or columns selected yet', + icon: 'readiness_score', + message: EMPTY_STATE_MESSAGE.explorer, + }); + } + + return div( + {class: 'flex-column'}, + ScoreCard(props.score_card), + span({ class: 'mb-4', style: 'display: block;' }), + () => { + const drilldown = getValue(props.drilldown); + const issuesValue = getValue(props.issues); + + return ( + (issuesValue && getValue(props.drilldown)) + ? IssuesTable( + issuesValue?.items, + issuesValue?.columns, + getValue(props.score_card), + getValue(props.breakdown_score_type), + getValue(props.breakdown_category), + drilldown, + () => emitEvent('DrilldownChanged', { payload: null }), + ) + : ScoreBreakdown( + props.score_card, + props.breakdown, + props.breakdown_category, + props.breakdown_score_type, + (project_code, name, score_type, category, drilldown) => emitEvent('DrilldownChanged', { payload: drilldown }), + ) + ); + }, ); }, ); @@ -126,6 +158,7 @@ const Toolbar = ( /** @type ScoreDefinition */ definition, /** @type boolean */ isNew, /** @type boolean */ userCanEdit, + /** @type ... */ updates, ) => { const addFilterButtonId = 'score-explorer--add-filter-btn'; const categories = [ @@ -141,7 +174,8 @@ const Toolbar = ( 'data_product', ]; const filterableFields = categories.filter((c) => c !== 'dq_dimension'); - const filters = van.state(definition.filters.map((f, idx) => ({key: `${f.field}-${idx}-${getRandomId()}`, field: f.field, value: van.state(f.value) }))); + const filters = van.state(definition.filters.map((f, idx) => ({key: `${f.field}-${idx}-${getRandomId()}`, field: f.field, value: van.state(f.value), others: f.others ?? [] }))); + const filterByColumns = van.state(definition.filter_by_columns); const filterSelectorOpened = van.state(false); const displayTotalScore = van.state(definition.total_score ?? true); const displayCDEScore = van.state(definition.cde_score ?? true); @@ -172,6 +206,7 @@ const Toolbar = ( filters.val = [ ...filters.val.slice(0, position), ...filters.val.slice(position + 1) ]; }; const setFilterValue = (/** @type number*/ position, /** @type string */ value) => { + filterByColumns.val = false; filters.val[position].value.val = value filters.val = [ ...filters.val ]; }; @@ -186,6 +221,7 @@ const Toolbar = ( category: displayCategory.oldVal ? selectedCategory.oldVal : null, total_score: displayTotalScore.oldVal, cde_score: displayCDEScore.oldVal, + filter_by_columns: filterByColumns.oldVal, }; const current = { name: getValue(scoreName), @@ -195,10 +231,17 @@ const Toolbar = ( category: getValue(displayCategory) ? getValue(selectedCategory) : null, total_score: getValue(displayTotalScore), cde_score: getValue(displayCDEScore), + filter_by_columns: getValue(filterByColumns), }; if (!isEqual(current, previous)) { - refresh(current); + if (current.filter_by_columns && !previous.filter_by_columns) { + emitEvent('ColumnSelectorOpened', {}); + } else if (!current.filter_by_columns && previous.filter_by_columns) { + filterSelectorOpened.val = true; + } else { + refresh(current); + } } }); @@ -212,42 +255,120 @@ const Toolbar = ( } }); + van.derive(() => { + const updatesValue = getValue(updates); + if (updatesValue != null) { + const simplifiedFilters = (filters.rawVal ?? []).map(f => ({ field: f.field, value: f.value.rawVal, others: f.others ?? []})) + if (!isEqual(updatesValue.filters, simplifiedFilters)) { + filters.val = updatesValue.filters.map((f, idx) => ({key: `${f.field}-${idx}-${getRandomId()}`, field: f.field, value: van.state(f.value), others: f.others ?? [] })); + } + + if (updatesValue.filter_by_columns !== filterByColumns.rawVal) { + filterByColumns.val = updatesValue.filter_by_columns; + } + } + }); + return div( { class: 'flex-column score-explorer--toolbar' }, div( { class: 'flex-column' }, - span({ class: 'text-caption mb-1' }, 'Filter by'), div( - { class: 'flex-row fx-flex-wrap fx-gap-4' }, + { class: 'flex-column' }, + span({ class: 'text-caption mb-1' }, 'Filter by'), + div( + { class: 'flex-row fx-flex-wrap fx-gap-4' }, + () => { + const filters_ = getValue(filters); + const filterValues_ = getValue(filterValues); + if (filters_?.length <= 0) { + return ''; + } + + return div( + { class: 'flex-row fx-flex-wrap fx-gap-4' }, + filters_.map(({ key, field, value, others }, idx) => { + renderedFilters[key] = renderedFilters[key] ?? ( + filterByColumns.val + ? ColumnFilter({field, value, others}) + : Filter(idx, field, value, filterValues_[field], setFilterValue, removeFilter, !isInitialized && !value.val) + ); + return renderedFilters[key]; + }), + ); + }, + () => { + const filters_ = getValue(filters); + const filterByColumns_ = getValue(filterByColumns); + + const fieldFilterTrigger = Button({ + id: addFilterButtonId, + icon: 'add', + label: 'Add Filter', + type: 'basic', + color: 'primary', + style: 'width: auto;', + onclick: () => filterSelectorOpened.val = true, + }); + const columnsSelectorTrigger = Button({ + id: addFilterButtonId, + label: 'Select Columns', + type: 'basic', + color: 'primary', + style: 'width: auto;', + onclick: () => emitEvent('ColumnSelectorOpened', {}), + }); + const combinedTrigger = div( + {class: 'flex-row fx-gap-3'}, + fieldFilterTrigger, + span({class: 'text-caption'}, 'Or'), + columnsSelectorTrigger, + ); + + if (filters_?.length <= 0 && filterByColumns_ == undefined) { + return combinedTrigger; + } + + if (filterByColumns_) { + return columnsSelectorTrigger; + } + + return fieldFilterTrigger; + }, + Portal( + { target: addFilterButtonId, style: '', opened: filterSelectorOpened}, + FilterFieldSelector(filterableFields, undefined, addEmptyFilter), + ), + ) + ), + div( + { class: 'flex-row fx-justify-content-flex-end', style: 'width: 100%;' }, () => { - const filters_ = getValue(filters); - const filterValues_ = getValue(filterValues); - if (filters_?.length <= 0) { + if (filterByColumns.val == undefined) { return ''; } - return div( - { class: 'flex-row fx-flex-wrap fx-gap-4' }, - getValue(filters).map(({ key, field, value }, idx) => { - renderedFilters[key] = renderedFilters[key] ?? Filter(idx, field, value, filterValues_[field], setFilterValue, removeFilter, !isInitialized); - return renderedFilters[key]; - }), - ); - }, - Button({ - id: addFilterButtonId, - icon: 'add', - label: 'Add Filter', - type: 'basic', - color: 'primary', - style: 'width: auto;', - onclick: () => filterSelectorOpened.val = true, - }), - Portal( - { target: addFilterButtonId, style: '', opened: filterSelectorOpened}, - FilterFieldSelector(filterableFields, undefined, addEmptyFilter), - ), - ) + const switchToColumnSelectorTrigger = Button({ + label: 'Switch to Column Selector', + type: 'basic', + color: 'primary', + style: 'width: auto;', + onclick: () => emitEvent('FilterModeChanged', {payload: true}), + }); + const switchToCategoryFilterTrigger = Button({ + label: 'Switch to Category Filters', + type: 'basic', + color: 'primary', + style: 'width: auto;', + onclick: () => emitEvent('FilterModeChanged', {payload: false}), + }); + + if (filterByColumns.val) { + return switchToCategoryFilterTrigger; + } + return switchToColumnSelectorTrigger; + } + ), ), div( { class: 'flex-row fx-align-flex-end fx-flex-wrap fx-gap-5' }, @@ -392,6 +513,10 @@ stylesheet.replace(` min-height: 1100px; } +.explorer-empty-state { + margin-top: unset !important; +} + .score-explorer--toolbar { border: 1px solid var(--border-color); border-radius: 8px; @@ -399,6 +524,7 @@ stylesheet.replace(` padding: 16px; } + .score-explorer--filter { background: var(--form-field-color); border-radius: 8px; diff --git a/testgen/ui/components/frontend/js/pages/test_suites.js b/testgen/ui/components/frontend/js/pages/test_suites.js index 8ebf10e0..923e9de3 100644 --- a/testgen/ui/components/frontend/js/pages/test_suites.js +++ b/testgen/ui/components/frontend/js/pages/test_suites.js @@ -6,6 +6,7 @@ * @property {number} connections_ct * @property {number} table_groups_ct * @property {string} default_connection_id + * @property {boolean} can_export_to_observability * * @typedef TableGroupOption * @type {object} @@ -70,8 +71,9 @@ const TestSuites = (/** @type Properties */ props) => { return div( { id: wrapperId, style: 'overflow-y: auto;' }, - () => - getValue(props.project_summary).test_suites_ct > 0 + () => { + const projectSummary = getValue(props.project_summary); + return projectSummary.test_suites_ct > 0 ? div( { class: 'tg-test-suites'}, () => div( @@ -111,9 +113,29 @@ const TestSuites = (/** @type Properties */ props) => { { class: 'flex-row' }, userCanEdit ? [ - Button({ type: 'icon', icon: 'output', tooltip: 'Export results to Observability', onclick: () => emitEvent('ExportActionClicked', {payload: testSuite.id}) }), - Button({ type: 'icon', icon: 'edit', tooltip: 'Edit test suite', onclick: () => emitEvent('EditActionClicked', {payload: testSuite.id}) }), - Button({ type: 'icon', icon: 'delete', tooltip: 'Delete test suite', tooltipPosition: 'left', onclick: () => emitEvent('DeleteActionClicked', {payload: testSuite.id}) }), + Button({ + type: 'icon', + icon: 'output', + tooltip: projectSummary.can_export_to_observability + ? 'Export results to Observability' + : 'Observability export not configured in Project Settings', + tooltipPosition: 'left', + disabled: !projectSummary.can_export_to_observability, + onclick: () => emitEvent('ExportActionClicked', {payload: testSuite.id}), + }), + Button({ + type: 'icon', + icon: 'edit', + tooltip: 'Edit test suite', + onclick: () => emitEvent('EditActionClicked', {payload: testSuite.id}), + }), + Button({ + type: 'icon', + icon: 'delete', + tooltip: 'Delete test suite', + tooltipPosition: 'left', + onclick: () => emitEvent('DeleteActionClicked', {payload: testSuite.id}), + }), ] : '' ), @@ -124,7 +146,7 @@ const TestSuites = (/** @type Properties */ props) => { Link({ href: 'test-suites:definitions', params: { test_suite_id: testSuite.id }, - label: `${testSuite.test_ct ?? 0} test definitions`, + label: `View ${testSuite.test_ct ?? 0} test definitions`, right_icon: 'chevron_right', right_icon_size: 20, class: 'mb-4', @@ -170,7 +192,7 @@ const TestSuites = (/** @type Properties */ props) => { onclick: () => emitEvent('RunTestsClicked', {payload: testSuite.id}), }), Button({ - label: 'Generate Tests', + label: parseInt(testSuite.test_ct) ? 'Regenerate Tests' : 'Generate Tests', color: 'primary', type: 'stroked', style: 'margin-top: 16px; min-width: 180px;', @@ -184,7 +206,8 @@ const TestSuites = (/** @type Properties */ props) => { })), ), ) - : ConditionalEmptyState(getValue(props.project_summary), userCanEdit), + : ConditionalEmptyState(projectSummary, userCanEdit); + }, ); }; @@ -200,7 +223,7 @@ const ConditionalEmptyState = ( color: 'primary', label: 'Add Test Suite', width: 'fit-content', - style: 'margin: auto; background: white;', + style: 'margin: auto; background: var(--dk-card-background);', disabled: !userCanEdit, tooltip: userCanEdit ? null : DISABLED_ACTION_TEXT, tooltipPosition: 'bottom', diff --git a/testgen/ui/components/frontend/js/utils.js b/testgen/ui/components/frontend/js/utils.js index 5a4b8acb..caab512e 100644 --- a/testgen/ui/components/frontend/js/utils.js +++ b/testgen/ui/components/frontend/js/utils.js @@ -105,6 +105,9 @@ function getParents(/** @type HTMLElement*/ element) { } function friendlyPercent(/** @type number */ value) { + if (Number.isNaN(value)) { + return 0; + } const rounded = Math.round(value); if (rounded === 0 && value > 0) { return '< 0'; @@ -177,4 +180,11 @@ function afterMount(/** @ype Function */ callback) { trigger.val = true; } -export { afterMount, debounce, emitEvent, enforceElementWidth, getRandomId, getValue, getParents, isEqual, isState, loadStylesheet, resizeFrameHeightToElement, resizeFrameHeightOnDOMChange, friendlyPercent }; +function slugify(/** @type string */ str) { + return str + .toLowerCase() + .replace(/[^a-z0-9]+/g, '-') + .replace(/^-|-$/g, ''); +} + +export { afterMount, debounce, emitEvent, enforceElementWidth, getRandomId, getValue, getParents, isEqual, isState, loadStylesheet, resizeFrameHeightToElement, resizeFrameHeightOnDOMChange, friendlyPercent, slugify }; diff --git a/testgen/ui/components/widgets/download_dialog.py b/testgen/ui/components/widgets/download_dialog.py index a908043d..0a43a748 100644 --- a/testgen/ui/components/widgets/download_dialog.py +++ b/testgen/ui/components/widgets/download_dialog.py @@ -1,12 +1,92 @@ import tempfile from collections.abc import Callable, Iterable +from io import BytesIO +from typing import TypedDict from zipfile import ZipFile +import pandas as pd import streamlit as st +from testgen.common import date_service + PROGRESS_UPDATE_TYPE = Callable[[float], None] -FILE_DATA_TYPE = tuple[str, str, str|bytes] +FILE_DATA_TYPE = tuple[str, str, str | bytes] + + +class ExcelColumnOptions(TypedDict): + header: str + wrap: bool + + +def get_excel_file_data( + data: pd.DataFrame, + title: str, + details: dict[str, str] | None = None, + columns: dict[str, ExcelColumnOptions] | None = None, + update_progress: PROGRESS_UPDATE_TYPE | None = None, +) -> FILE_DATA_TYPE: + if not columns: + columns = { col: {} for col in data.columns } + + filtered_data = data[columns.keys()] + start_row = 4 + len(details or {}) + + with BytesIO() as buffer: + with pd.ExcelWriter(buffer, engine="xlsxwriter") as writer: + # Data + filtered_data.to_excel(writer, index=False, sheet_name="Sheet1", startrow=start_row) + + workbook = writer.book + worksheet = writer.sheets["Sheet1"] + worksheet.set_column(0, 1000, None, workbook.add_format({"valign": "top"})) + worksheet.autofit() + + # Title + worksheet.write( + "A2", + title, + workbook.add_format({"bold": True, "size": 14}), + ) + + details_key_format = workbook.add_format({"size": 9}) + details_value_format = workbook.add_format({"italic": True, "size": 9}) + + # Timestamp + worksheet.write("A3", "Exported on", details_key_format) + worksheet.write("B3", date_service.get_timezoned_now(st.session_state), details_value_format) + + # Details + if details: + for index, (key, value) in enumerate(details.items()): + worksheet.write(f"A{4 + index}", key, details_key_format) + worksheet.write(f"B{4 + index}", value, details_value_format) + + # Headers + table style + (max_row, max_col) = filtered_data.shape + headers = [ + {"header": options.get("header", key.replace("_", " ").capitalize())} + for key, options in columns.items() + ] + worksheet.add_table( + start_row, + 0, + max_row + start_row, + max_col - 1, + {"columns": headers, "style": "Table Style Medium 16"}, + ) + + # Wrap columns + wrap_format = workbook.add_format({"text_wrap": True, "valign": "top"}) + for index, options in enumerate(columns.values()): + if options.get("wrap"): + worksheet.set_column(index, index, 60, wrap_format) + + if update_progress: + update_progress(1.0) + buffer.seek(0) + return f"{title}.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", buffer.getvalue() + def zip_multi_file_data( zip_file_name: str, diff --git a/testgen/ui/components/widgets/testgen_component.py b/testgen/ui/components/widgets/testgen_component.py index ae692c14..f4866bdd 100644 --- a/testgen/ui/components/widgets/testgen_component.py +++ b/testgen/ui/components/widgets/testgen_component.py @@ -18,6 +18,7 @@ "quality_dashboard", "score_details", "schedule_list", + "column_selector", ] diff --git a/testgen/ui/pdf/hygiene_issue_report.py b/testgen/ui/pdf/hygiene_issue_report.py index 7a0462ab..1e3ddda3 100644 --- a/testgen/ui/pdf/hygiene_issue_report.py +++ b/testgen/ui/pdf/hygiene_issue_report.py @@ -44,8 +44,8 @@ def build_summary_table(document, hi_data): *[ (cmd[0], *coords, *cmd[1:]) for coords in ( - ((2, 2), (2, 4)), - ((0, 0), (0, -1)) + ((2, 2), (2, -3)), + ((0, 0), (0, -2)) ) for cmd in ( ("FONT", "Helvetica-Bold"), @@ -63,10 +63,11 @@ def build_summary_table(document, hi_data): ("SPAN", (3, 3), (4, 3)), ("SPAN", (3, 4), (4, 4)), ("SPAN", (3, 5), (4, 5)), - ("SPAN", (2, 5), (4, 5)), + ("SPAN", (1, 6), (4, 6)), + ("SPAN", (0, 7), (4, 7)), # Link cell - ("BACKGROUND", (2, 5), (4, 5), colors.white), + ("BACKGROUND", (0, 7), (4, 7), colors.white), # Status cell *[ @@ -105,12 +106,37 @@ def build_summary_table(document, hi_data): ), ), - ("Database/Schema", hi_data["schema_name"], "Profiling Date", profiling_timestamp), - ("Table", hi_data["table_name"], "Table Group", hi_data["table_groups_name"]), - ("Column", hi_data["column_name"], "Disposition", hi_data["disposition"] or "No Decision"), + ("Profiling Date", profiling_timestamp, "Table Group", hi_data["table_groups_name"]), + ("Database/Schema", hi_data["schema_name"], "Disposition", hi_data["disposition"] or "No Decision"), + ("Table", hi_data["table_name"], "Column Type", hi_data["column_type"]), + ("Column", hi_data["column_name"], "Semantic Data Type", hi_data["functional_data_type"]), + ( + "Column Tags", + ( + Paragraph( + "Critical data element: Yes" if hi_data["critical_data_element"] else "Critical data element: No", + style=PARA_STYLE_CELL, + ), + Paragraph(f"Description: {hi_data['column_description']}", style=PARA_STYLE_CELL) + if hi_data["column_description"] + else [], + [ + Paragraph(f"{tag.replace('_', ' ').capitalize()}: {hi_data[tag]}", style=PARA_STYLE_CELL) + for tag in [ + "data_source", + "source_system", + "source_process", + "business_domain", + "stakeholder_group", + "transform_level", + "aggregation_level", + "data_product", + ] + if hi_data[tag] + ], + ), + ), ( - "Column Type", - hi_data["column_type"], Paragraph( f""" View on TestGen > diff --git a/testgen/ui/pdf/test_result_report.py b/testgen/ui/pdf/test_result_report.py index c60cfc3e..883b0346 100644 --- a/testgen/ui/pdf/test_result_report.py +++ b/testgen/ui/pdf/test_result_report.py @@ -55,7 +55,7 @@ def build_summary_table(document, tr_data): *[ (cmd[0], *coords, *cmd[1:]) for coords in ( - ((3, 3), (3, -2)), + ((3, 3), (3, -3)), ((0, 0), (0, -2)) ) for cmd in ( @@ -78,10 +78,11 @@ def build_summary_table(document, tr_data): ("SPAN", (4, 5), (5, 5)), ("SPAN", (1, 6), (2, 6)), ("SPAN", (4, 6), (5, 6)), - ("SPAN", (0, 7), (5, 7)), + ("SPAN", (1, 7), (5, 7)), + ("SPAN", (0, 8), (5, 8)), # Link cell - ("BACKGROUND", (0, 7), (5, 7), colors.white), + ("BACKGROUND", (0, 8), (5, 8), colors.white), # Measure cell ("FONT", (1, 1), (1, 1), "Helvetica-Bold"), @@ -118,10 +119,36 @@ def build_summary_table(document, tr_data): ("Measured Value", tr_data["result_measure"], tr_data["measure_uom_description"]), ("Threshold Value", tr_data["threshold_value"], tr_data["threshold_description"]), - ("Test Run Date", test_timestamp, None, "Table Group", tr_data["table_groups_name"]), - ("Database/Schema", tr_data["schema_name"], None, "Test Suite", tr_data["test_suite"]), + ("Test Run Date", test_timestamp, None, "Test Suite", tr_data["test_suite"]), + ("Database/Schema", tr_data["schema_name"], None, "Table Group", tr_data["table_groups_name"]), ("Table", tr_data["table_name"], None, "Data Quality Dimension", tr_data["dq_dimension"]), ("Column", tr_data["column_names"], None, "Disposition", tr_data["disposition"] or "No Decision"), + ( + "Column Tags", + ( + Paragraph( + "Critical data element: Yes" if tr_data["critical_data_element"] else "Critical data element: No", + style=PARA_STYLE_CELL, + ), + Paragraph(f"Description: {tr_data['column_description']}", style=PARA_STYLE_CELL) + if tr_data["column_description"] + else [], + [ + Paragraph(f"{tag.replace('_', ' ').capitalize()}: {tr_data[tag]}", style=PARA_STYLE_CELL) + for tag in [ + "data_source", + "source_system", + "source_process", + "business_domain", + "stakeholder_group", + "transform_level", + "aggregation_level", + "data_product", + ] + if tr_data[tag] + ], + ), + ), ( Paragraph( f""" @@ -203,10 +230,11 @@ def get_report_content(document, tr_data): yield Paragraph("TestGen Test Issue Report", PARA_STYLE_TITLE) yield build_summary_table(document, tr_data) - yield KeepTogether([ - Paragraph("Usage Notes", PARA_STYLE_H1), - Paragraph(f"{tr_data['usage_notes']}", PARA_STYLE_TEXT), - ]) + if tr_data["usage_notes"]: + yield KeepTogether([ + Paragraph("Usage Notes", PARA_STYLE_H1), + Paragraph(f"{tr_data['usage_notes']}", PARA_STYLE_TEXT), + ]) yield CondPageBreak(SECTION_MIN_AVAILABLE_HEIGHT) yield Paragraph("Result History", PARA_STYLE_H1) diff --git a/testgen/ui/queries/profiling_queries.py b/testgen/ui/queries/profiling_queries.py index ab18f736..4893e0ec 100644 --- a/testgen/ui/queries/profiling_queries.py +++ b/testgen/ui/queries/profiling_queries.py @@ -93,7 +93,7 @@ def get_run_by_id(profile_run_id: str) -> pd.Series: return pd.Series() -@st.cache_data(show_spinner="Loading data ...") +@st.cache_data(show_spinner=False) def get_profiling_results(profiling_run_id: str, table_name: str, column_name: str, sorting_columns = None): order_by = "" if sorting_columns is None: @@ -129,12 +129,7 @@ def get_profiling_results(profiling_run_id: str, table_name: str, column_name: s WHERE profile_run_id = profile_results.profile_run_id AND table_name = profile_results.table_name AND column_name = profile_results.column_name - ) THEN 'Yes' END AS hygiene_issues, - distinct_value_hash, - fractional_sum, - date_days_present, - date_weeks_present, - date_months_present + ) THEN 'Yes' END AS hygiene_issues FROM {schema}.profile_results WHERE profile_run_id = '{profiling_run_id}' AND table_name ILIKE '{table_name}' @@ -144,8 +139,8 @@ def get_profiling_results(profiling_run_id: str, table_name: str, column_name: s return db.retrieve_data(query) -@st.cache_data(show_spinner="Loading data ...") -def get_table_by_id(table_id: str, table_group_id: str) -> dict | None: +@st.cache_data(show_spinner=False) +def get_table_by_id(table_id: str) -> dict | None: if not is_uuid4(table_id): return None @@ -163,6 +158,7 @@ def get_table_by_id(table_id: str, table_group_id: str) -> dict | None: table_chars.column_ct, data_point_ct, add_date, + last_refresh_date, drop_date, -- Table Tags table_chars.description, @@ -190,8 +186,7 @@ def get_table_by_id(table_id: str, table_group_id: str) -> dict | None: LEFT JOIN {schema}.table_groups ON ( table_chars.table_groups_id = table_groups.id ) - WHERE table_id = '{table_id}' - AND table_chars.table_groups_id = '{table_group_id}'; + WHERE table_id = '{table_id}'; """ results = db.retrieve_data(query) @@ -200,23 +195,18 @@ def get_table_by_id(table_id: str, table_group_id: str) -> dict | None: return json.loads(results.to_json(orient="records"))[0] -@st.cache_data(show_spinner="Loading data ...") +@st.cache_data(show_spinner=False) def get_column_by_id( column_id: str, - table_group_id: str, include_tags: bool = False, include_has_test_runs: bool = False, include_scores: bool = False, ) -> dict | None: - if not is_uuid4(column_id): return None - condition = f""" - column_chars.column_id = '{column_id}' - AND column_chars.table_groups_id = '{table_group_id}' - """ - return get_column_by_condition(condition, include_tags, include_has_test_runs, include_scores) + condition = f"column_chars.column_id = '{column_id}'" + return get_columns_by_condition(condition, include_tags, include_has_test_runs, include_scores)[0] @st.cache_data(show_spinner="Loading data ...") @@ -234,10 +224,20 @@ def get_column_by_name( AND column_chars.table_name = '{table_name}' AND column_chars.table_groups_id = '{table_group_id}' """ - return get_column_by_condition(condition, include_tags, include_has_test_runs, include_scores) + return get_columns_by_condition(condition, include_tags, include_has_test_runs, include_scores)[0] -def get_column_by_condition( +def get_columns_by_id( + column_ids: list[str], + include_tags: bool = False, + include_has_test_runs: bool = False, + include_scores: bool = False, +) -> dict | None: + condition = f"column_chars.column_id IN ('{"', '".join([ col for col in column_ids if is_uuid4(col) ])}')" + return get_columns_by_condition(condition, include_tags, include_has_test_runs, include_scores) + + +def get_columns_by_condition( filter_condition: str, include_tags: bool = False, include_has_test_runs: bool = False, @@ -253,6 +253,7 @@ def get_column_by_condition( column_chars.table_name, column_chars.schema_name, column_chars.table_groups_id::VARCHAR AS table_group_id, + column_chars.ordinal_position, -- Characteristics column_chars.general_type, column_chars.column_type, @@ -311,7 +312,7 @@ def get_column_by_condition( results = db.retrieve_data(query) if not results.empty: # to_json converts datetimes, NaN, etc, to JSON-safe values (Note: to_dict does not) - return json.loads(results.to_json(orient="records"))[0] + return json.loads(results.to_json(orient="records")) @st.cache_data(show_spinner=False) diff --git a/testgen/ui/queries/project_queries.py b/testgen/ui/queries/project_queries.py index 5c087064..342702e9 100644 --- a/testgen/ui/queries/project_queries.py +++ b/testgen/ui/queries/project_queries.py @@ -53,6 +53,12 @@ def get_summary_by_code(project_code: str) -> pd.Series: FROM {schema}.test_runs LEFT JOIN {schema}.test_suites ON test_runs.test_suite_id = test_suites.id WHERE test_suites.project_code = '{project_code}' - ) AS test_runs_ct; + ) AS test_runs_ct, + ( + SELECT COALESCE(observability_api_key, '') <> '' + AND COALESCE(observability_api_url, '') <> '' + FROM {schema}.projects + WHERE project_code = '{project_code}' + ) AS can_export_to_observability; """ return db.retrieve_data(sql).iloc[0] diff --git a/testgen/ui/queries/scoring_queries.py b/testgen/ui/queries/scoring_queries.py index ab7cec73..f6a72741 100644 --- a/testgen/ui/queries/scoring_queries.py +++ b/testgen/ui/queries/scoring_queries.py @@ -7,7 +7,7 @@ from testgen.common.models.scores import ScoreCard, ScoreCategory, ScoreDefinition, SelectedIssue -@st.cache_data(show_spinner="Loading data ...") +@st.cache_data(show_spinner="Loading data :gray[:small[(This might take a few minutes)]] ...") def get_all_score_cards(project_code: str) -> list["ScoreCard"]: return [ definition.as_cached_score_card() @@ -43,7 +43,18 @@ def get_score_card_issue_reports(selected_issues: list["SelectedIssue"]): results.profile_run_id::VARCHAR, types.suggested_action, results.table_groups_id::VARCHAR, - results.anomaly_id::VARCHAR + results.anomaly_id::VARCHAR, + column_chars.functional_data_type, + column_chars.description as column_description, + COALESCE(column_chars.critical_data_element, table_chars.critical_data_element) as critical_data_element, + COALESCE(column_chars.data_source, table_chars.data_source, groups.data_source) as data_source, + COALESCE(column_chars.source_system, table_chars.source_system, groups.source_system) as source_system, + COALESCE(column_chars.source_process, table_chars.source_process, groups.source_process) as source_process, + COALESCE(column_chars.business_domain, table_chars.business_domain, groups.business_domain) as business_domain, + COALESCE(column_chars.stakeholder_group, table_chars.stakeholder_group, groups.stakeholder_group) as stakeholder_group, + COALESCE(column_chars.transform_level, table_chars.transform_level, groups.transform_level) as transform_level, + COALESCE(column_chars.aggregation_level, table_chars.aggregation_level) as aggregation_level, + COALESCE(column_chars.data_product, table_chars.data_product, groups.data_product) as data_product FROM {schema}.profile_anomaly_results results INNER JOIN {schema}.profile_anomaly_types types ON results.anomaly_id = types.id @@ -51,6 +62,13 @@ def get_score_card_issue_reports(selected_issues: list["SelectedIssue"]): ON results.profile_run_id = runs.id INNER JOIN {schema}.table_groups groups ON results.table_groups_id = groups.id + LEFT JOIN {schema}.data_column_chars column_chars + ON (groups.id = column_chars.table_groups_id + AND results.schema_name = column_chars.schema_name + AND results.table_name = column_chars.table_name + AND results.column_name = column_chars.column_name) + LEFT JOIN {schema}.data_table_chars table_chars + ON column_chars.table_id = table_chars.table_id WHERE results.id IN ({",".join([f"'{issue_id}'" for issue_id in profile_ids])}); """ profile_results = pd.read_sql_query(profile_query, engine) @@ -87,7 +105,17 @@ def get_score_card_issue_reports(selected_issues: list["SelectedIssue"]): results.test_suite_id, results.test_definition_id::VARCHAR as test_definition_id_runtime, results.table_groups_id::VARCHAR, - types.id::VARCHAR AS test_type_id + types.id::VARCHAR AS test_type_id, + column_chars.description as column_description, + COALESCE(column_chars.critical_data_element, table_chars.critical_data_element) as critical_data_element, + COALESCE(column_chars.data_source, table_chars.data_source, groups.data_source) as data_source, + COALESCE(column_chars.source_system, table_chars.source_system, groups.source_system) as source_system, + COALESCE(column_chars.source_process, table_chars.source_process, groups.source_process) as source_process, + COALESCE(column_chars.business_domain, table_chars.business_domain, groups.business_domain) as business_domain, + COALESCE(column_chars.stakeholder_group, table_chars.stakeholder_group, groups.stakeholder_group) as stakeholder_group, + COALESCE(column_chars.transform_level, table_chars.transform_level, groups.transform_level) as transform_level, + COALESCE(column_chars.aggregation_level, table_chars.aggregation_level) as aggregation_level, + COALESCE(column_chars.data_product, table_chars.data_product, groups.data_product) as data_product FROM {schema}.test_results results INNER JOIN {schema}.test_types types ON (results.test_type = types.test_type) @@ -95,6 +123,13 @@ def get_score_card_issue_reports(selected_issues: list["SelectedIssue"]): ON (results.test_suite_id = suites.id) INNER JOIN {schema}.table_groups groups ON (results.table_groups_id = groups.id) + LEFT JOIN {schema}.data_column_chars column_chars + ON (groups.id = column_chars.table_groups_id + AND results.schema_name = column_chars.schema_name + AND results.table_name = column_chars.table_name + AND results.column_names = column_chars.column_name) + LEFT JOIN {schema}.data_table_chars table_chars + ON column_chars.table_id = table_chars.table_id WHERE results.id IN ({",".join([f"'{issue_id}'" for issue_id in test_ids])}); """ test_results = pd.read_sql_query(test_query, engine) @@ -146,3 +181,21 @@ def get_score_category_values(project_code: str) -> dict[ScoreCategory, list[str if row["category"] and row["value"]: values[row["category"]].append(row["value"]) return values + + +@st.cache_data(show_spinner="Loading data :gray[:small[(This might take a few minutes)]] ...") +def get_column_filters(project_code: str) -> list[dict]: + query = f""" + SELECT + data_column_chars.column_id::text AS column_id, + data_column_chars.column_name AS name, + data_column_chars.table_id::text AS table_id, + data_column_chars.table_name AS table, + data_column_chars.table_groups_id::text AS table_group_id, + table_groups.table_groups_name AS table_group + FROM data_column_chars + INNER JOIN table_groups ON (table_groups.id = data_column_chars.table_groups_id) + WHERE table_groups.project_code = '{project_code}' + ORDER BY table_name, ordinal_position; + """ + return [row.to_dict() for _, row in pd.read_sql_query(query, engine).iterrows()] diff --git a/testgen/ui/queries/test_definition_queries.py b/testgen/ui/queries/test_definition_queries.py index 67a36f87..ae16d4c6 100644 --- a/testgen/ui/queries/test_definition_queries.py +++ b/testgen/ui/queries/test_definition_queries.py @@ -4,14 +4,17 @@ def update_attribute(schema, test_definition_ids, attribute, value): - sql = f"""UPDATE {schema}.test_definitions - SET - {attribute}='{value}' - where - id in ({"'" + "','".join(test_definition_ids) + "'"}) - ; - """ - db.execute_sql(sql) + sql = f""" + WITH selected as ( + SELECT UNNEST(ARRAY [{", ".join([ f"'{item}'" for item in test_definition_ids ])}]) AS id + ) + UPDATE {schema}.test_definitions + SET {attribute}='{value}' + FROM {schema}.test_definitions td + INNER JOIN selected ON (td.id = selected.id::UUID) + WHERE td.id = test_definitions.id; + """ + db.execute_sql_raw(sql) st.cache_data.clear() @@ -237,14 +240,6 @@ def add(schema, test_definition): st.cache_data.clear() -def get_test_definition_usage(schema, test_definition_ids): - ids_str = ",".join([f"'{item}'" for item in test_definition_ids]) - sql = f""" - select distinct test_definition_id from {schema}.test_results where test_definition_id in ({ids_str}); - """ - return db.retrieve_data(sql) - - def delete(schema, test_definition_ids): if test_definition_ids is None or len(test_definition_ids) == 0: raise ValueError("No Test Definition is specified.") @@ -268,17 +263,19 @@ def cascade_delete(schema, test_suite_ids): def move(schema, test_definitions, target_table_group, target_test_suite): - test_definition_ids = [f"'{td['id']}'" for td in test_definitions] sql = f""" - UPDATE {schema}.test_definitions - SET - table_groups_id = '{target_table_group}'::UUID, - test_suite_id = '{target_test_suite}'::UUID - WHERE - id in ({",".join(test_definition_ids)}) - ; + WITH selected as ( + SELECT UNNEST(ARRAY [{", ".join([ f"'{td['id']}'" for td in test_definitions ])}]) AS id + ) + UPDATE {schema}.test_definitions + SET + table_groups_id = '{target_table_group}'::UUID, + test_suite_id = '{target_test_suite}'::UUID + FROM {schema}.test_definitions td + INNER JOIN selected ON (td.id = selected.id::UUID) + WHERE td.id = test_definitions.id; """ - db.execute_sql(sql) + db.execute_sql_raw(sql) st.cache_data.clear() diff --git a/testgen/ui/queries/user_queries.py b/testgen/ui/queries/user_queries.py index c9953b4c..d245dbeb 100644 --- a/testgen/ui/queries/user_queries.py +++ b/testgen/ui/queries/user_queries.py @@ -5,12 +5,16 @@ @st.cache_data(show_spinner=False) -def get_users(): +def get_users(include_password: bool=False): schema: str = st.session_state["dbschema"] - sql = f"""SELECT - id::VARCHAR(50), - username, email, "name", "password", preauthorized, role - FROM {schema}.auth_users""" + sql = f""" + SELECT + id::VARCHAR(50), + username, email, "name", + {"password," if include_password else ""} + role + FROM {schema}.auth_users + """ return db.retrieve_data(sql) @@ -42,12 +46,11 @@ def add_user(user): def edit_user(user): schema: str = st.session_state["dbschema"] - encrypted_password = encrypt_ui_password(user["password"]) sql = f"""UPDATE {schema}.auth_users SET username = '{user["username"]}', email = '{user["email"]}', name = '{user["name"]}', - password = '{encrypted_password}', + {f"password = '{encrypt_ui_password(user["password"])}'," if user["password"] else ""} role = '{user["role"]}' WHERE id = '{user["user_id"]}';""" db.execute_sql(sql) diff --git a/testgen/ui/services/form_service.py b/testgen/ui/services/form_service.py index 09981f80..ee4bf4a1 100644 --- a/testgen/ui/services/form_service.py +++ b/testgen/ui/services/form_service.py @@ -1,7 +1,6 @@ import typing from builtins import float from enum import Enum -from io import BytesIO from pathlib import Path from time import sleep @@ -11,7 +10,6 @@ from st_aggrid import AgGrid, ColumnsAutoSizeMode, DataReturnMode, GridOptionsBuilder, GridUpdateMode, JsCode from streamlit_extras.no_default_selectbox import selectbox -import testgen.common.date_service as date_service import testgen.ui.services.database_service as db from testgen.ui.navigation.router import Router @@ -170,109 +168,6 @@ def render_widget(self, boo_form_display_only=False): raise ValueError(f"Widget {self.widget} is not supported.") -@st.cache_data(show_spinner=False) -def _generate_excel_export( - df_data, lst_export_columns, str_title=None, str_caption=None, lst_wrap_columns=None, lst_column_headers=None -): - if lst_export_columns: - # Filter the DataFrame to keep only the columns in lst_export_columns - df_to_export = df_data[lst_export_columns] - else: - lst_export_columns = list(df_data.columns) - df_to_export = df_data - - dct_col_to_header = dict(zip(lst_export_columns, lst_column_headers, strict=True)) if lst_column_headers else None - - if not str_title: - str_title = "TestGen Data Export" - start_row = 4 if str_caption else 3 - - # Create a BytesIO buffer to hold the Excel file - output = BytesIO() - - # Create a Pandas Excel writer using XlsxWriter as the engine - with pd.ExcelWriter(output, engine="xlsxwriter") as writer: - # Write the DataFrame to an Excel file, starting from the fourth row - df_to_export.to_excel(writer, index=False, sheet_name="Sheet1", startrow=start_row) - - # Access the XlsxWriter workbook and worksheet objects from the dataframe - workbook = writer.book - worksheet = writer.sheets["Sheet1"] - - # Add table formatting - (max_row, max_col) = df_to_export.shape - if dct_col_to_header: - column_settings = [{"header": dct_col_to_header[column]} for column in df_to_export.columns] - else: - column_settings = [{"header": column} for column in df_to_export.columns] - worksheet.add_table( - start_row, - 0, - max_row + start_row, - max_col - 1, - {"columns": column_settings, "style": "Table Style Medium 16"}, - ) - - # Define the format for wrapped text - wrap_format = workbook.add_format( - { - "text_wrap": True, - "valign": "top", # Align to the top to better display wrapped text - } - ) - valign_format = workbook.add_format({"valign": "top"}) - - # Autofit the worksheet (before adding title or settingwrapped column width) - worksheet.set_column(0, 1000, None, valign_format) - worksheet.autofit() - - # Set a fixed column width for wrapped columns and apply wrap format - approx_width = 60 - for col_idx, column in enumerate(df_to_export[lst_export_columns].columns): - if column in lst_wrap_columns: - # Set column width and format for wrapping - worksheet.set_column(col_idx, col_idx, approx_width, wrap_format) - - # Add a cell format for the title - title_format = workbook.add_format({"bold": True, "size": 14}) - # Write the title in cell A2 with formatting - worksheet.write("A2", str_title, title_format) - - if str_caption: - str_caption = str_caption.replace("{TIMESTAMP}", date_service.get_timezoned_now(st.session_state)) - caption_format = workbook.add_format({"italic": True, "size": 9, "valign": "top"}) - worksheet.write("A3", str_caption, caption_format) - - # Rewind the buffer - output.seek(0) - - # Return the Excel file - return output.getvalue() - - -def render_excel_export( - df, lst_export_columns, str_export_title=None, str_caption=None, lst_wrap_columns=None, lst_column_headers=None -): - - if st.button(label=":material/download: Export", help="Download to Excel"): - download_excel(df, lst_export_columns, str_export_title, str_caption, lst_wrap_columns, lst_column_headers) - - -@st.dialog(title="Download to Excel") -def download_excel( - df, lst_export_columns, str_export_title=None, str_caption=None, lst_wrap_columns=None, lst_column_headers=None -): - st.write(f'**Are you sure you want to download "{str_export_title}.xlsx"?**') - - st.download_button( - label="Download", - data=_generate_excel_export( - df, lst_export_columns, str_export_title, str_caption, lst_wrap_columns, lst_column_headers - ), - file_name=f"{str_export_title}.xlsx", - mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", - ) - def render_refresh_button(button_container): with button_container: do_refresh = st.button(":material/refresh:", help="Refresh page data", use_container_width=False) diff --git a/testgen/ui/services/test_definition_service.py b/testgen/ui/services/test_definition_service.py index 7f224f71..452e7cda 100644 --- a/testgen/ui/services/test_definition_service.py +++ b/testgen/ui/services/test_definition_service.py @@ -43,13 +43,9 @@ def get_test_definition(db_schema, test_def_id): return database_service.retrieve_data(str_sql) -def delete(test_definition_ids, dry_run=False): +def delete(test_definition_ids): schema = st.session_state["dbschema"] - usage_result = test_definition_queries.get_test_definition_usage(schema, test_definition_ids) - can_be_deleted = usage_result.empty - if not dry_run and can_be_deleted: - test_definition_queries.delete(schema, test_definition_ids) - return can_be_deleted + test_definition_queries.delete(schema, test_definition_ids) def cascade_delete(test_suite_ids: list[str]): diff --git a/testgen/ui/services/test_results_service.py b/testgen/ui/services/test_results_service.py index 0860fe87..7f2d886b 100644 --- a/testgen/ui/services/test_results_service.py +++ b/testgen/ui/services/test_results_service.py @@ -72,7 +72,17 @@ def get_test_results( r.auto_gen, -- These are used in the PDF report - tt.threshold_description, tt.usage_notes, r.test_time + tt.threshold_description, tt.usage_notes, r.test_time, + dcc.description as column_description, + COALESCE(dcc.critical_data_element, dtc.critical_data_element) as critical_data_element, + COALESCE(dcc.data_source, dtc.data_source, tg.data_source) as data_source, + COALESCE(dcc.source_system, dtc.source_system, tg.source_system) as source_system, + COALESCE(dcc.source_process, dtc.source_process, tg.source_process) as source_process, + COALESCE(dcc.business_domain, dtc.business_domain, tg.business_domain) as business_domain, + COALESCE(dcc.stakeholder_group, dtc.stakeholder_group, tg.stakeholder_group) as stakeholder_group, + COALESCE(dcc.transform_level, dtc.transform_level, tg.transform_level) as transform_level, + COALESCE(dcc.aggregation_level, dtc.aggregation_level) as aggregation_level, + COALESCE(dcc.data_product, dtc.data_product, tg.data_product) as data_product FROM run_results r INNER JOIN {schema}.test_types tt @@ -97,6 +107,13 @@ def get_test_results( LEFT JOIN {schema}.cat_test_conditions c ON (cn.sql_flavor = c.sql_flavor AND r.test_type = c.test_type) + LEFT JOIN {schema}.data_column_chars dcc + ON (tg.id = dcc.table_groups_id + AND r.schema_name = dcc.schema_name + AND r.table_name = dcc.table_name + AND r.column_names = dcc.column_name) + LEFT JOIN {schema}.data_table_chars dtc + ON dcc.table_id = dtc.table_id {order_by} ; """ df = db.retrieve_data(sql) diff --git a/testgen/ui/services/user_session_service.py b/testgen/ui/services/user_session_service.py index 4d2dc840..463f454a 100644 --- a/testgen/ui/services/user_session_service.py +++ b/testgen/ui/services/user_session_service.py @@ -72,10 +72,9 @@ def end_user_session() -> None: def get_auth_data(): - auth_data = user_queries.get_users() + auth_data = user_queries.get_users(include_password=True) usernames = {} - preauthorized_list = [] for item in auth_data.itertuples(): usernames[item.username.lower()] = { @@ -84,8 +83,6 @@ def get_auth_data(): "password": item.password, "role": item.role, } - if item.preauthorized: - preauthorized_list.append(item.email) return { "credentials": {"usernames": usernames}, @@ -94,7 +91,6 @@ def get_auth_data(): "key": _get_jwt_hashing_key(), "name": AUTH_TOKEN_COOKIE_NAME, }, - "preauthorized": {"emails": preauthorized_list}, } diff --git a/testgen/ui/views/data_catalog.py b/testgen/ui/views/data_catalog.py index 553720e6..8eff9142 100644 --- a/testgen/ui/views/data_catalog.py +++ b/testgen/ui/views/data_catalog.py @@ -1,4 +1,5 @@ import json +import time import typing from collections import defaultdict from datetime import datetime @@ -12,15 +13,29 @@ import testgen.ui.services.query_service as dq from testgen.ui.components import widgets as testgen from testgen.ui.components.widgets import testgen_component +from testgen.ui.components.widgets.download_dialog import ( + FILE_DATA_TYPE, + PROGRESS_UPDATE_TYPE, + download_dialog, + get_excel_file_data, +) from testgen.ui.navigation.menu import MenuItem from testgen.ui.navigation.page import Page +from testgen.ui.navigation.router import Router from testgen.ui.queries import project_queries -from testgen.ui.queries.profiling_queries import TAG_FIELDS, get_column_by_id, get_hygiene_issues, get_table_by_id +from testgen.ui.queries.profiling_queries import ( + TAG_FIELDS, + get_column_by_id, + get_columns_by_id, + get_hygiene_issues, + get_table_by_id, +) from testgen.ui.services import user_session_service -from testgen.ui.session import session +from testgen.ui.session import session, temp_value +from testgen.ui.views.dialogs.column_history_dialog import column_history_dialog from testgen.ui.views.dialogs.data_preview_dialog import data_preview_dialog from testgen.ui.views.dialogs.run_profiling_dialog import run_profiling_dialog -from testgen.utils import friendly_score, score +from testgen.utils import format_field, friendly_score, is_uuid4, score PAGE_ICON = "dataset" PAGE_TITLE = "Data Catalog" @@ -39,76 +54,255 @@ def render(self, project_code: str, table_group_id: str | None = None, selected: PAGE_TITLE, ) - user_can_navigate = not user_session_service.user_has_catalog_role() - - if render_empty_state(project_code, user_can_navigate): - return - - group_filter_column, _, loading_column = st.columns([.3, .5, .2], vertical_alignment="center") - - with group_filter_column: - table_groups_df = get_table_group_options(project_code) - table_group_id = testgen.select( - options=table_groups_df, - value_column="id", - display_column="table_groups_name", - default_value=table_group_id, - required=True, - label="Table Group", - bind_to_query="table_group_id", - ) - - with loading_column: - columns_df = get_table_group_columns(table_group_id) - selected_item = get_selected_item(selected, table_group_id) - if selected_item: - selected_item["project_code"] = project_code - selected_item["connection_id"] = str( - table_groups_df.loc[table_groups_df["id"] == table_group_id].iloc[0]["connection_id"]) - else: - self.router.set_query_params({ "selected": None }) - - if columns_df.empty: - table_group = table_groups_df.loc[table_groups_df["id"] == table_group_id].iloc[0] - testgen.empty_state( - label="No profiling data yet", - icon=PAGE_ICON, - message=testgen.EmptyStateMessage.Profiling, - action_label="Run Profiling", - action_disabled=not user_session_service.user_can_edit(), - button_onclick=partial(run_profiling_dialog, project_code, table_group), - button_icon="play_arrow", - ) + _, loading_column = st.columns([.4, .6]) + spinner_container = loading_column.container(key="data_catalog:spinner") + + with spinner_container: + with st.spinner(text="Loading data ..."): + # Make sure none of the loading logic use @st.cache_data(show_spinner=True) + # Otherwise, the testgen_component randomly remounts for no reason when selecting items + # (something to do with displaying the extra cache spinner next to the custom component) + # Enclosing the loading logic in a Streamlit container also fixes it + + project_summary = project_queries.get_summary_by_code(project_code) + user_can_navigate = not user_session_service.user_has_catalog_role() + table_groups = get_table_group_options(project_code) + + if not table_group_id or table_group_id not in table_groups["id"].values: + table_group_id = table_groups.iloc[0]["id"] if not table_groups.empty else None + on_table_group_selected(table_group_id) + + columns, selected_item, selected_table_group = pd.DataFrame(), None, None + if table_group_id: + selected_table_group = table_groups.loc[table_groups["id"] == table_group_id].iloc[0] + columns = get_table_group_columns(table_group_id) + selected_item = get_selected_item(selected, table_group_id) + + if selected_item: + selected_item["project_code"] = project_code + selected_item["connection_id"] = format_field(selected_table_group["connection_id"]) else: - def on_item_selected(item_id): - self.router.set_query_params({ "selected": item_id }) - - testgen_component( - "data_catalog", - props={ - "columns": columns_df.to_json(orient="records"), - "selected": json.dumps(selected_item), - "tag_values": get_tag_values(), - "last_saved_timestamp": st.session_state.get("data_catalog:last_saved_timestamp"), - "permissions": { - "can_edit": user_session_service.user_can_disposition(), - "can_navigate": user_can_navigate, - }, + on_item_selected(None) + + testgen_component( + "data_catalog", + props={ + "project_summary": { + "project_code": project_code, + "connections_ct": format_field(project_summary["connections_ct"]), + "table_groups_ct": format_field(project_summary["table_groups_ct"]), + "default_connection_id": format_field(project_summary["default_connection_id"]), }, - on_change_handlers={ - "ItemSelected": on_item_selected, - "DataPreviewClicked": lambda item: data_preview_dialog( - item["table_group_id"], - item["schema_name"], - item["table_name"], - item.get("column_name"), - ), + "table_group_filter_options": [ + { + "value": format_field(table_group["id"]), + "label": format_field(table_group["table_groups_name"]), + "selected": str(table_group_id) == str(table_group["id"]), + } for _, table_group in table_groups.iterrows() + ], + "columns": columns.to_json(orient="records") if not columns.empty else None, + "selected_item": json.dumps(selected_item), + "tag_values": get_tag_values(), + "last_saved_timestamp": st.session_state.get("data_catalog:last_saved_timestamp"), + "permissions": { + "can_edit": user_session_service.user_can_disposition(), + "can_navigate": user_can_navigate, }, - event_handlers={ "TagsChanged": partial(on_tags_changed, loading_column) }, - ) + }, + on_change_handlers={ + "RunProfilingClicked": partial( + run_profiling_dialog, + project_code, + selected_table_group, + ), + "TableGroupSelected": on_table_group_selected, + "ItemSelected": on_item_selected, + "ExportClicked": lambda columns: download_dialog( + dialog_title="Download Excel Report", + file_content_func=get_excel_report_data, + args=(selected_table_group["table_groups_name"], columns), + ), + "RemoveTableClicked": remove_table_dialog, + "DataPreviewClicked": lambda item: data_preview_dialog( + item["table_group_id"], + item["schema_name"], + item["table_name"], + item.get("column_name"), + ), + "HistoryClicked": lambda item: column_history_dialog( + item["table_group_id"], + item["schema_name"], + item["table_name"], + item["column_name"], + item["add_date"], + ), + }, + event_handlers={ "TagsChanged": partial(on_tags_changed, spinner_container) }, + ) + + +def on_table_group_selected(table_group_id: str | None) -> None: + Router().set_query_params({ "table_group_id": table_group_id }) + + +def on_item_selected(item_id: str | None) -> None: + Router().set_query_params({ "selected": item_id }) + + +def get_excel_report_data(update_progress: PROGRESS_UPDATE_TYPE, table_group: str, columns: list[str]) -> None: + data = get_columns_by_id( + [ col.split("_")[1] for col in columns ], + include_tags=True, + ) + data = pd.DataFrame(data) + + for key in ["column_type", "datatype_suggestion"]: + data[key] = data[key].apply(lambda val: val.lower() if not pd.isna(val) else None) + + for key in ["avg_embedded_spaces", "avg_length", "avg_value", "stdev_value"]: + data[key] = data[key].apply(lambda val: round(val, 2) if not pd.isna(val) else None) + + for key in ["min_date", "max_date", "add_date", "last_mod_date", "drop_date"]: + data[key] = data[key].apply( + lambda val: datetime.fromtimestamp(val / 1000).strftime("%b %-d %Y, %-I:%M %p") if not pd.isna(val) else None + ) + + for key in ["data_source", "source_system", "source_process", "business_domain", "stakeholder_group", "transform_level", "aggregation_level", "data_product"]: + data[key] = data.apply( + lambda col: col[key] or col[f"table_{key}"] or col.get(f"table_group_{key}"), + axis=1, + ) + type_map = {"A": "Alpha", "B": "Boolean", "D": "Datetime", "N": "Numeric"} + data["general_type"] = data["general_type"].apply(lambda val: type_map.get(val)) + + data["critical_data_element"] = data.apply( + lambda col: "Yes" if col["critical_data_element"] or col["table_critical_data_element"] else None, + axis=1, + ) + data["top_freq_values"] = data["top_freq_values"].apply( + lambda val: "\n".join([ f"{part.split(" | ")[1]} | {part.split(" | ")[0]}" for part in val[2:].split("\n| ") ]) + if val + else None + ) + data["top_patterns"] = data["top_patterns"].apply( + lambda val: "".join([ f"{part}{'\n' if index % 2 else ' | '}" for index, part in enumerate(val.split(" | ")) ]) + if val + else None + ) + + file_columns = { + "schema_name": {"header": "Schema"}, + "table_name": {"header": "Table"}, + "column_name": {"header": "Column"}, + "critical_data_element": {}, + "ordinal_position": {"header": "Position"}, + "general_type": {}, + "column_type": {"header": "Data type"}, + "datatype_suggestion": {"header": "Suggested data type"}, + "functional_data_type": {"header": "Semantic data type"}, + "add_date": {"header": "First detected"}, + "last_mod_date": {"header": "Modification detected"}, + "drop_date": {"header": "Drop detected"}, + "record_ct": {"header": "Record count"}, + "value_ct": {"header": "Value count"}, + "distinct_value_ct": {"header": "Distinct values"}, + "null_value_ct": {"header": "Null values"}, + "zero_value_ct": {"header": "Zero values"}, + "zero_length_ct": {"header": "Zero length"}, + "filled_value_ct": {"header": "Dummy values"}, + "mixed_case_ct": {"header": "Mixed case"}, + "lower_case_ct": {"header": "Lower case"}, + "non_alpha_ct": {"header": "Non-alpha"}, + "includes_digit_ct": {"header": "Includes digits"}, + "numeric_ct": {"header": "Numeric values"}, + "date_ct": {"header": "Date values"}, + "quoted_value_ct": {"header": "Quoted values"}, + "lead_space_ct": {"header": "Leading spaces"}, + "embedded_space_ct": {"header": "Embedded spaces"}, + "avg_embedded_spaces": {"header": "Average embedded spaces"}, + "min_length": {"header": "Minimum length"}, + "max_length": {"header": "Maximum length"}, + "avg_length": {"header": "Average length"}, + "min_text": {"header": "Minimum text", "wrap": True}, + "max_text": {"header": "Maximum text", "wrap": True}, + "distinct_std_value_ct": {"header": "Distinct standard values"}, + "distinct_pattern_ct": {"header": "Distinct patterns"}, + "std_pattern_match": {"header": "Standard pattern match"}, + "top_freq_values": {"header": "Frequent values", "wrap": True}, + "top_patterns": {"header": "Frequent patterns", "wrap": True}, + "min_value": {"header": "Minimum value"}, + "min_value_over_0": {"header": "Minimum value > 0"}, + "max_value": {"header": "Maximum value"}, + "avg_value": {"header": "Average value"}, + "stdev_value": {"header": "Standard deviation"}, + "percentile_25": {"header": "25th percentile"}, + "percentile_50": {"header": "Median value"}, + "percentile_75": {"header": "75th percentile"}, + "min_date": {"header": "Minimum date (UTC)"}, + "max_date": {"header": "Maximum date (UTC)"}, + "before_1yr_date_ct": {"header": "Before 1 year"}, + "before_5yr_date_ct": {"header": "Before 5 years"}, + "before_20yr_date_ct": {"header": "Before 20 years"}, + "within_1yr_date_ct": {"header": "Within 1 year"}, + "within_1mo_date_ct": {"header": "Within 1 month"}, + "future_date_ct": {"header": "Future dates"}, + "boolean_true_ct": {"header": "Boolean true values"}, + "description": {"wrap": True}, + "data_source": {}, + "source_system": {}, + "source_process": {}, + "business_domain": {}, + "stakeholder_group": {}, + "transform_level": {}, + "aggregation_level": {}, + "data_product": {}, + } + return get_excel_file_data( + data, + "Data Catalog Columns", + details={"Table group": table_group}, + columns=file_columns, + update_progress=update_progress, + ) + + +@st.dialog(title="Remove Table from Catalog") +def remove_table_dialog(item: dict) -> None: + remove_clicked, set_remove_clicked = temp_value("data-catalog:confirm-remove-table-val") + st.html(f"Are you sure you want to remove the table {item['table_name']} from the data catalog?") + st.warning("This action cannot be undone.") + + _, button_column = st.columns([.85, .15]) + with button_column: + testgen.button( + label="Remove", + type_="flat", + color="warn", + key="data-catalog:confirm-remove-table-btn", + on_click=lambda: set_remove_clicked(True), + ) -def on_tags_changed(spinner_container: DeltaGenerator, payload: dict) -> None: + if remove_clicked(): + schema = st.session_state["dbschema"] + db.execute_sql(f""" + DELETE FROM {schema}.data_column_chars + WHERE table_id = '{item["id"]}'; + """) + db.execute_sql(f""" + DELETE FROM {schema}.data_table_chars + WHERE table_id = '{item["id"]}'; + """) + + st.success("Table has been removed.") + time.sleep(1) + for func in [ get_table_group_columns, get_tag_values ]: + func.clear() + st.session_state["data_catalog:last_saved_timestamp"] = datetime.now().timestamp() + st.rerun() + + +def on_tags_changed(spinner_container: DeltaGenerator, payload: dict) -> FILE_DATA_TYPE: attributes = ["description"] attributes.extend(TAG_FIELDS) cde_value_map = { @@ -133,18 +327,29 @@ def on_tags_changed(spinner_container: DeltaGenerator, payload: dict) -> None: with spinner_container: with st.spinner("Saving tags"): if tables: - db.execute_sql(f""" + db.execute_sql_raw(f""" + WITH selected as ( + SELECT UNNEST(ARRAY [{", ".join([ f"'{item}'" for item in tables ])}]) AS table_id + ) UPDATE {schema}.data_table_chars SET {', '.join(set_attributes)} - WHERE table_id IN ({", ".join([ f"'{item}'" for item in tables ])}); + FROM {schema}.data_table_chars dtc + INNER JOIN selected ON (dtc.table_id = selected.table_id::UUID) + WHERE dtc.table_id = data_table_chars.table_id; """) + if columns: - db.execute_sql(f""" + db.execute_sql_raw(f""" + WITH selected as ( + SELECT UNNEST(ARRAY [{", ".join([ f"'{item}'" for item in columns ])}]) AS column_id + ) UPDATE {schema}.data_column_chars SET {', '.join(set_attributes)} - WHERE column_id IN ({", ".join([ f"'{item}'" for item in columns ])}); - """) + FROM {schema}.data_column_chars dcc + INNER JOIN selected ON (dcc.column_id = selected.column_id::UUID) + WHERE dcc.column_id = data_column_chars.column_id; + """) for func in [ get_table_group_columns, get_table_by_id, get_column_by_id, get_tag_values ]: func.clear() @@ -152,44 +357,17 @@ def on_tags_changed(spinner_container: DeltaGenerator, payload: dict) -> None: st.rerun() -def render_empty_state(project_code: str, user_can_navigate: bool) -> bool: - project_summary_df = project_queries.get_summary_by_code(project_code) - if project_summary_df["profiling_runs_ct"]: # Without profiling, we don't have any table and column information in db - return False - - label="Your project is empty" - testgen.whitespace(5) - if not project_summary_df["connections_ct"]: - testgen.empty_state( - label=label, - icon=PAGE_ICON, - message=testgen.EmptyStateMessage.Connection, - action_label="Go to Connections", - action_disabled=not user_can_navigate, - link_href="connections", - link_params={ "project_code": project_code }, - ) - else: - testgen.empty_state( - label=label, - icon=PAGE_ICON, - message=testgen.EmptyStateMessage.Profiling if project_summary_df["table_groups_ct"] else testgen.EmptyStateMessage.TableGroup, - action_label="Go to Table Groups", - action_disabled=not user_can_navigate, - link_href="connections:table-groups", - link_params={ "connection_id": str(project_summary_df["default_connection_id"]) } - ) - return True - - @st.cache_data(show_spinner=False) def get_table_group_options(project_code): schema = st.session_state["dbschema"] return dq.run_table_groups_lookup_query(schema, project_code) -@st.cache_data(show_spinner="Loading data ...") +@st.cache_data(show_spinner=False) def get_table_group_columns(table_group_id: str) -> pd.DataFrame: + if not is_uuid4(table_group_id): + return pd.DataFrame() + schema = st.session_state["dbschema"] sql = f""" SELECT CONCAT('column_', column_chars.column_id) AS column_id, @@ -198,6 +376,8 @@ def get_table_group_columns(table_group_id: str) -> pd.DataFrame: table_chars.table_name, column_chars.general_type, column_chars.functional_data_type, + table_chars.record_ct, + profile_results.value_ct, column_chars.drop_date, table_chars.drop_date AS table_drop_date, column_chars.critical_data_element, @@ -208,6 +388,11 @@ def get_table_group_columns(table_group_id: str) -> pd.DataFrame: LEFT JOIN {schema}.data_table_chars table_chars ON ( column_chars.table_id = table_chars.table_id ) + LEFT JOIN {schema}.profile_results ON ( + column_chars.last_complete_profile_run_id = profile_results.profile_run_id + AND column_chars.table_name = profile_results.table_name + AND column_chars.column_name = profile_results.column_name + ) WHERE column_chars.table_groups_id = '{table_group_id}' ORDER BY table_name, ordinal_position; """ @@ -215,15 +400,15 @@ def get_table_group_columns(table_group_id: str) -> pd.DataFrame: def get_selected_item(selected: str, table_group_id: str) -> dict | None: - if not selected: + if not selected or not is_uuid4(table_group_id): return None item_type, item_id = selected.split("_", 2) if item_type == "table": - item = get_table_by_id(item_id, table_group_id) + item = get_table_by_id(item_id) elif item_type == "column": - item = get_column_by_id(item_id, table_group_id, include_tags=True, include_has_test_runs=True, include_scores=True) + item = get_column_by_id(item_id, include_tags=True, include_has_test_runs=True, include_scores=True) else: return None @@ -233,6 +418,7 @@ def get_selected_item(selected: str, table_group_id: str) -> dict | None: item["dq_score_testing"] = friendly_score(item["dq_score_testing"]) item["hygiene_issues"] = get_hygiene_issues(item["profile_run_id"], item["table_name"], item.get("column_name")) item["test_issues"] = get_latest_test_issues(item["table_group_id"], item["table_name"], item.get("column_name")) + item["test_suites"] = get_related_test_suites(item["table_group_id"], item["table_name"], item.get("column_name")) return item @@ -281,6 +467,34 @@ def get_latest_test_issues(table_group_id: str, table_name: str, column_name: st return [row.to_dict() for _, row in df.iterrows()] +@st.cache_data(show_spinner=False) +def get_related_test_suites(table_group_id: str, table_name: str, column_name: str | None = None) -> dict | None: + schema = st.session_state["dbschema"] + + column_condition = "" + if column_name: + column_condition = f"AND column_name = '{column_name}'" + + sql = f""" + SELECT + test_suites.id::VARCHAR, + test_suite AS name, + COUNT(*) AS test_count + FROM {schema}.test_definitions + LEFT JOIN {schema}.test_suites ON ( + test_definitions.test_suite_id = test_suites.id + ) + WHERE test_suites.table_groups_id = '{table_group_id}' + AND table_name = '{table_name}' + {column_condition} + GROUP BY test_suites.id + ORDER BY test_suite; + """ + + df = db.retrieve_data(sql) + return [row.to_dict() for _, row in df.iterrows()] + + @st.cache_data(show_spinner=False) def get_tag_values() -> dict[str, list[str]]: schema = st.session_state["dbschema"] diff --git a/testgen/ui/views/dialogs/column_history_dialog.py b/testgen/ui/views/dialogs/column_history_dialog.py new file mode 100644 index 00000000..6a224004 --- /dev/null +++ b/testgen/ui/views/dialogs/column_history_dialog.py @@ -0,0 +1,94 @@ +import json + +import pandas as pd +import streamlit as st + +import testgen.ui.services.database_service as db +from testgen.ui.components import widgets as testgen +from testgen.ui.components.widgets import testgen_component +from testgen.ui.queries.profiling_queries import COLUMN_PROFILING_FIELDS +from testgen.utils import format_field + + +def column_history_dialog(*args) -> None: + st.session_state["column_history_dialog:run_id"] = None + _column_history_dialog(*args) + + +@st.dialog(title="Column History") +def _column_history_dialog( + table_group_id: str, + schema_name: str, + table_name: str, + column_name: str, + add_date: int, +) -> None: + testgen.css_class("l-dialog") + caption_column, loading_column = st.columns([ 0.8, 0.2 ], vertical_alignment="bottom") + + with caption_column: + testgen.caption(f"Table > Column: {table_name} > {column_name}") + + with loading_column: + with st.spinner("Loading data ..."): + profiling_runs = get_profiling_runs(table_group_id, add_date) + run_id = st.session_state.get("column_history_dialog:run_id") or profiling_runs.iloc[0]["id"] + selected_item = get_run_column(run_id, schema_name, table_name, column_name) + + testgen_component( + "column_profiling_history", + props={ + "profiling_runs": [ + { + "run_id": format_field(run["id"]), + "run_date": format_field(run["profiling_starttime"]), + } for _, run in profiling_runs.iterrows() + ], + "selected_item": selected_item, + }, + on_change_handlers={ + "RunSelected": on_run_selected, + } + ) + + +def on_run_selected(run_id: str) -> None: + st.session_state["column_history_dialog:run_id"] = run_id + + +@st.cache_data(show_spinner=False) +def get_profiling_runs( + table_group_id: str, + after_date: int, +) -> pd.DataFrame: + schema: str = st.session_state["dbschema"] + query = f""" + SELECT + id::VARCHAR, + profiling_starttime + FROM {schema}.profiling_runs + WHERE table_groups_id = '{table_group_id}' + AND profiling_starttime >= TO_TIMESTAMP({after_date / 1000}) + ORDER BY profiling_starttime DESC; + """ + return db.retrieve_data(query) + + +@st.cache_data(show_spinner=False) +def get_run_column(run_id: str, schema_name: str, table_name: str, column_name: str) -> dict: + schema: str = st.session_state["dbschema"] + query = f""" + SELECT + profile_run_id::VARCHAR, + general_type, + {COLUMN_PROFILING_FIELDS} + FROM {schema}.profile_results + WHERE profile_run_id = '{run_id}' + AND schema_name = '{schema_name}' + AND table_name = '{table_name}' + AND column_name = '{column_name}'; + """ + results = db.retrieve_data(query) + if not results.empty: + # to_json converts datetimes, NaN, etc, to JSON-safe values (Note: to_dict does not) + return json.loads(results.to_json(orient="records"))[0] diff --git a/testgen/ui/views/dialogs/data_preview_dialog.py b/testgen/ui/views/dialogs/data_preview_dialog.py index dd8f6195..6911c3d6 100644 --- a/testgen/ui/views/dialogs/data_preview_dialog.py +++ b/testgen/ui/views/dialogs/data_preview_dialog.py @@ -65,7 +65,7 @@ def get_preview_data( if not connection_df.empty: use_top = connection_df["sql_flavor"] == "mssql" query = f""" - SELECT + SELECT DISTINCT {"TOP 100" if use_top else ""} {column_name or "*"} FROM {schema_name}.{table_name} @@ -92,6 +92,7 @@ def get_preview_data( return pd.DataFrame() else: df.index = df.index + 1 + df.fillna("", inplace=True) return df else: return pd.DataFrame() diff --git a/testgen/ui/views/dialogs/generate_tests_dialog.py b/testgen/ui/views/dialogs/generate_tests_dialog.py index 76476450..89013108 100644 --- a/testgen/ui/views/dialogs/generate_tests_dialog.py +++ b/testgen/ui/views/dialogs/generate_tests_dialog.py @@ -68,7 +68,7 @@ def generate_tests_dialog(test_suite: pd.Series) -> None: if test_generation_button: button_container.empty() - status_container.info("Starting test generation ...") + status_container.info("Generating tests ...") try: run_test_gen_queries(table_group_id, test_suite_name, selected_set) diff --git a/testgen/ui/views/dialogs/run_tests_dialog.py b/testgen/ui/views/dialogs/run_tests_dialog.py index d0cb0ada..212c1361 100644 --- a/testgen/ui/views/dialogs/run_tests_dialog.py +++ b/testgen/ui/views/dialogs/run_tests_dialog.py @@ -36,7 +36,7 @@ def run_tests_dialog(project_code: str, test_suite: pd.Series | None = None, def if testgen.expander_toggle(expand_label="Show CLI command", key="run_tests_dialog:keys:show-cli"): st.code( - f"testgen run-tests --project-key {project_code} --test-suite-key {test_suite['test_suite']}", + f"testgen run-tests --project-key {project_code} --test-suite-key {test_suite_name}", language="shellSession" ) diff --git a/testgen/ui/views/hygiene_issues.py b/testgen/ui/views/hygiene_issues.py index fb2b5ba9..3cf0fe3d 100644 --- a/testgen/ui/views/hygiene_issues.py +++ b/testgen/ui/views/hygiene_issues.py @@ -13,7 +13,13 @@ from testgen.common import date_service from testgen.common.mixpanel_service import MixpanelService from testgen.ui.components import widgets as testgen -from testgen.ui.components.widgets.download_dialog import FILE_DATA_TYPE, download_dialog, zip_multi_file_data +from testgen.ui.components.widgets.download_dialog import ( + FILE_DATA_TYPE, + PROGRESS_UPDATE_TYPE, + download_dialog, + get_excel_file_data, + zip_multi_file_data, +) from testgen.ui.navigation.page import Page from testgen.ui.pdf.hygiene_issue_report import create_report from testgen.ui.services import project_service, user_session_service @@ -124,15 +130,17 @@ def render( str_help = "Toggle on to perform actions on multiple Hygiene Issues" do_multi_select = st.toggle("Multi-Select", help=str_help) + with st.container(): + with st.spinner("Loading data ..."): + # Get hygiene issue list + df_pa = get_profiling_anomalies(run_id, issue_class, issue_type_id, table_name, column_name, sorting_columns) - # Get hygiene issue list - df_pa = get_profiling_anomalies(run_id, issue_class, issue_type_id, table_name, column_name, sorting_columns) + # Retrieve disposition action (cache refreshed) + df_action = get_anomaly_disposition(run_id) - # Retrieve disposition action (cache refreshed) - df_action = get_anomaly_disposition(run_id) - # Update action from disposition df - action_map = df_action.set_index("id")["action"].to_dict() - df_pa["action"] = df_pa["id"].map(action_map).fillna(df_pa["action"]) + # Update action from disposition df + action_map = df_action.set_index("id")["action"].to_dict() + df_pa["action"] = df_pa["id"].map(action_map).fillna(df_pa["action"]) if not df_pa.empty: summaries = get_profiling_anomaly_summary(run_id) @@ -178,21 +186,12 @@ def render( ) with export_button_column: - lst_export_columns = [ - "schema_name", - "table_name", - "column_name", - "anomaly_name", - "issue_likelihood", - "anomaly_description", - "action", - "detail", - "suggested_action", - ] - lst_wrap_columns = ["anomaly_description", "suggested_action"] - fm.render_excel_export( - df_pa, lst_export_columns, "Hygiene Screen", "{TIMESTAMP}", lst_wrap_columns - ) + if st.button(label=":material/download: Export", help="Download filtered hygiene issues to Excel"): + download_dialog( + dialog_title="Download Excel Report", + file_content_func=get_excel_report_data, + args=(df_pa, run_df["table_groups_name"], run_date), + ) if selected: # Always show details for last selected row @@ -327,7 +326,7 @@ def refresh_score(project_code: str, run_id: str, table_group_id: str | None) -> st.cache_data.clear() -@st.cache_data(show_spinner="False") +@st.cache_data(show_spinner=False) def get_profiling_run_columns(profiling_run_id: str) -> pd.DataFrame: schema: str = st.session_state["dbschema"] sql = f""" @@ -339,7 +338,7 @@ def get_profiling_run_columns(profiling_run_id: str) -> pd.DataFrame: return db.retrieve_data(sql) -@st.cache_data(show_spinner="Retrieving Data") +@st.cache_data(show_spinner=False) def get_profiling_anomalies( profile_run_id: str, likelihood: str | None, @@ -384,7 +383,21 @@ def get_profiling_anomalies( END AS likelihood_order, t.anomaly_description, r.detail, t.suggested_action, r.anomaly_id, r.table_groups_id::VARCHAR, r.id::VARCHAR, p.profiling_starttime, r.profile_run_id::VARCHAR, - tg.table_groups_name + tg.table_groups_name, + + -- These are used in the PDF report + dcc.functional_data_type, + dcc.description as column_description, + COALESCE(dcc.critical_data_element, dtc.critical_data_element) as critical_data_element, + COALESCE(dcc.data_source, dtc.data_source, tg.data_source) as data_source, + COALESCE(dcc.source_system, dtc.source_system, tg.source_system) as source_system, + COALESCE(dcc.source_process, dtc.source_process, tg.source_process) as source_process, + COALESCE(dcc.business_domain, dtc.business_domain, tg.business_domain) as business_domain, + COALESCE(dcc.stakeholder_group, dtc.stakeholder_group, tg.stakeholder_group) as stakeholder_group, + COALESCE(dcc.transform_level, dtc.transform_level, tg.transform_level) as transform_level, + COALESCE(dcc.aggregation_level, dtc.aggregation_level) as aggregation_level, + COALESCE(dcc.data_product, dtc.data_product, tg.data_product) as data_product + FROM {schema}.profile_anomaly_results r INNER JOIN {schema}.profile_anomaly_types t ON r.anomaly_id = t.id @@ -392,6 +405,13 @@ def get_profiling_anomalies( ON r.profile_run_id = p.id INNER JOIN {schema}.table_groups tg ON r.table_groups_id = tg.id + LEFT JOIN {schema}.data_column_chars dcc + ON (tg.id = dcc.table_groups_id + AND r.schema_name = dcc.schema_name + AND r.table_name = dcc.table_name + AND r.column_name = dcc.column_name) + LEFT JOIN {schema}.data_table_chars dtc + ON dcc.table_id = dtc.table_id WHERE r.profile_run_id = '{profile_run_id}' {criteria} {order_by} @@ -405,7 +425,7 @@ def get_profiling_anomalies( return df -@st.cache_data(show_spinner="Retrieving Status") +@st.cache_data(show_spinner=False) def get_anomaly_disposition(str_profile_run_id): str_schema = st.session_state["dbschema"] str_sql = f""" @@ -468,6 +488,32 @@ def get_profiling_anomaly_summary(str_profile_run_id): ] +def get_excel_report_data( + update_progress: PROGRESS_UPDATE_TYPE, + data: pd.DataFrame, + table_group: str, + run_date: str, +) -> FILE_DATA_TYPE: + columns = { + "schema_name": {"header": "Schema"}, + "table_name": {"header": "Table"}, + "column_name": {"header": "Column"}, + "anomaly_name": {"header": "Issue name"}, + "issue_likelihood": {"header": "Likelihood"}, + "anomaly_description": {"header": "Description", "wrap": True}, + "action": {}, + "detail": {}, + "suggested_action": {"wrap": True}, + } + return get_excel_file_data( + data, + "Hygiene Issues", + details={"Table group": table_group, "Profiling run date": run_date}, + columns=columns, + update_progress=update_progress, + ) + + @st.cache_data(show_spinner=False) def get_source_data(hi_data): return get_source_data_uncached(hi_data) @@ -495,7 +541,9 @@ def source_data_dialog(selected_row): st.info(bad_data_msg) # Pretify the dataframe df_bad.columns = [col.replace("_", " ").title() for col in df_bad.columns] - df_bad.fillna("[NULL]", inplace=True) + df_bad.fillna("", inplace=True) + if len(df_bad) == 500: + testgen.caption("* Top 500 records displayed", "text-align: right;") # Display the dataframe st.dataframe(df_bad, height=500, width=1050, hide_index=True) diff --git a/testgen/ui/views/login.py b/testgen/ui/views/login.py index 3f08d190..14bda7c0 100644 --- a/testgen/ui/views/login.py +++ b/testgen/ui/views/login.py @@ -27,7 +27,6 @@ def render(self, **_kwargs) -> None: auth_data["cookie"]["name"], auth_data["cookie"]["key"], auth_data["cookie"]["expiry_days"], - auth_data["preauthorized"], ) _, login_column, links_column = st.columns([0.25, 0.5, 0.25]) diff --git a/testgen/ui/views/profiling_results.py b/testgen/ui/views/profiling_results.py index c09dd7ce..cc656ce0 100644 --- a/testgen/ui/views/profiling_results.py +++ b/testgen/ui/views/profiling_results.py @@ -1,6 +1,8 @@ import json import typing +from datetime import datetime +import pandas as pd import streamlit as st import testgen.ui.queries.profiling_queries as profiling_queries @@ -8,6 +10,12 @@ import testgen.ui.services.form_service as fm from testgen.common import date_service from testgen.ui.components import widgets as testgen +from testgen.ui.components.widgets.download_dialog import ( + FILE_DATA_TYPE, + PROGRESS_UPDATE_TYPE, + download_dialog, + get_excel_file_data, +) from testgen.ui.components.widgets.testgen_component import testgen_component from testgen.ui.navigation.page import Page from testgen.ui.services import project_service, user_session_service @@ -92,7 +100,10 @@ def render(self, run_id: str, table_name: str | None = None, column_name: str | column_name = "%%" # Display main results grid - df = profiling_queries.get_profiling_results(run_id, table_name, column_name, sorting_columns) + with st.container(): + with st.spinner("Loading data ..."): + df = profiling_queries.get_profiling_results(run_id, table_name, column_name, sorting_columns) + show_columns = [ "schema_name", "table_name", @@ -116,7 +127,12 @@ def render(self, run_id: str, table_name: str | None = None, column_name: str | with export_button_column: testgen.flex_row_end() - render_export_button(df) + if st.button(label=":material/download: Export", help="Download filtered profiling results to Excel"): + download_dialog( + dialog_title="Download Excel Report", + file_content_func=get_excel_report_data, + args=(df, run_df["table_groups_name"], run_date), + ) # Display profiling for selected row if not selected_row: @@ -138,75 +154,102 @@ def render(self, run_id: str, table_name: str | None = None, column_name: str | ) -def render_export_button(df): - export_columns = [ - "schema_name", - "table_name", - "column_name", - "position", - "hygiene_issues", - # Characteristics - "general_type", - "column_type", - "semantic_table_type", - "semantic_data_type", - "datatype_suggestion", - # Value Counts - "record_ct", - "value_ct", - "distinct_value_ct", - "null_value_ct", - "zero_value_ct", - # Alpha - "zero_length_ct", - "filled_value_ct", - "includes_digit_ct", - "numeric_ct", - "date_ct", - "quoted_value_ct", - "lead_space_ct", - "embedded_space_ct", - "avg_embedded_spaces", - "min_length", - "max_length", - "avg_length", - "min_text", - "max_text", - "distinct_std_value_ct", - "distinct_pattern_ct", - "std_pattern_match", - "top_freq_values", - "top_patterns", - # Numeric - "min_value", - "min_value_over_0", - "max_value", - "avg_value", - "stdev_value", - "percentile_25", - "percentile_50", - "percentile_75", - # Date - "min_date", - "max_date", - "before_1yr_date_ct", - "before_5yr_date_ct", - "before_20yr_date_ct", - "within_1yr_date_ct", - "within_1mo_date_ct", - "future_date_ct", - # Boolean - "boolean_true_ct", - # Extra - "distinct_value_hash", - "fractional_sum", - "date_days_present", - "date_weeks_present", - "date_months_present", - ] - wrap_columns = ["top_freq_values", "top_patterns"] - caption = "{TIMESTAMP}" - fm.render_excel_export(df, export_columns, "Profiling Results", caption, wrap_columns) +def get_excel_report_data( + update_progress: PROGRESS_UPDATE_TYPE, + data: pd.DataFrame, + table_group: str, + run_date: str, +) -> FILE_DATA_TYPE: + data = data.copy() + + for key in ["column_type", "datatype_suggestion"]: + data[key] = data[key].apply(lambda val: val.lower() if not pd.isna(val) else None) + + for key in ["avg_embedded_spaces", "avg_length", "avg_value", "stdev_value"]: + data[key] = data[key].apply(lambda val: round(val, 2) if not pd.isna(val) else None) + + for key in ["min_date", "max_date"]: + data[key] = data[key].apply( + lambda val: datetime.strptime(val, "%Y-%m-%dT%H:%M:%S").strftime("%b %-d %Y, %-I:%M %p") if val != "NaT" else None + ) + + data["hygiene_issues"] = data["hygiene_issues"].apply(lambda val: "Yes" if val else None) + + type_map = {"A": "Alpha", "B": "Boolean", "D": "Datetime", "N": "Numeric"} + data["general_type"] = data["general_type"].apply(lambda val: type_map.get(val)) + + data["top_freq_values"] = data["top_freq_values"].apply( + lambda val: "\n".join([ f"{part.split(" | ")[1]} | {part.split(" | ")[0]}" for part in val[2:].split("\n| ") ]) + if val + else None + ) + data["top_patterns"] = data["top_patterns"].apply( + lambda val: "".join([ f"{part}{'\n' if index % 2 else ' | '}" for index, part in enumerate(val.split(" | ")) ]) + if val + else None + ) + + columns = { + "schema_name": {"header": "Schema"}, + "table_name": {"header": "Table"}, + "column_name": {"header": "Column"}, + "position": {}, + "general_type": {}, + "column_type": {"header": "Data type"}, + "datatype_suggestion": {"header": "Suggested data type"}, + "semantic_data_type": {}, + "record_ct": {"header": "Record count"}, + "value_ct": {"header": "Value count"}, + "distinct_value_ct": {"header": "Distinct values"}, + "null_value_ct": {"header": "Null values"}, + "zero_value_ct": {"header": "Zero values"}, + "zero_length_ct": {"header": "Zero length"}, + "filled_value_ct": {"header": "Dummy values"}, + "mixed_case_ct": {"header": "Mixed case"}, + "lower_case_ct": {"header": "Lower case"}, + "non_alpha_ct": {"header": "Non-alpha"}, + "includes_digit_ct": {"header": "Includes digits"}, + "numeric_ct": {"header": "Numeric values"}, + "date_ct": {"header": "Date values"}, + "quoted_value_ct": {"header": "Quoted values"}, + "lead_space_ct": {"header": "Leading spaces"}, + "embedded_space_ct": {"header": "Embedded spaces"}, + "avg_embedded_spaces": {"header": "Average embedded spaces"}, + "min_length": {"header": "Minimum length"}, + "max_length": {"header": "Maximum length"}, + "avg_length": {"header": "Average length"}, + "min_text": {"header": "Minimum text", "wrap": True}, + "max_text": {"header": "Maximum text", "wrap": True}, + "distinct_std_value_ct": {"header": "Distinct standard values"}, + "distinct_pattern_ct": {"header": "Distinct patterns"}, + "std_pattern_match": {"header": "Standard pattern match"}, + "top_freq_values": {"header": "Frequent values", "wrap": True}, + "top_patterns": {"header": "Frequent patterns", "wrap": True}, + "min_value": {"header": "Minimum value"}, + "min_value_over_0": {"header": "Minimum value > 0"}, + "max_value": {"header": "Maximum value"}, + "avg_value": {"header": "Average value"}, + "stdev_value": {"header": "Standard deviation"}, + "percentile_25": {"header": "25th percentile"}, + "percentile_50": {"header": "Median value"}, + "percentile_75": {"header": "75th percentile"}, + "min_date": {"header": "Minimum date (UTC)"}, + "max_date": {"header": "Maximum date (UTC)"}, + "before_1yr_date_ct": {"header": "Before 1 year"}, + "before_5yr_date_ct": {"header": "Before 5 years"}, + "before_20yr_date_ct": {"header": "Before 20 years"}, + "within_1yr_date_ct": {"header": "Within 1 year"}, + "within_1mo_date_ct": {"header": "Within 1 month"}, + "future_date_ct": {"header": "Future dates"}, + "boolean_true_ct": {"header": "Boolean true values"}, + } + return get_excel_file_data( + data, + "Profiling Results", + details={"Table group": table_group, "Profiling run date": run_date}, + columns=columns, + update_progress=update_progress, + ) def generate_create_script(df): diff --git a/testgen/ui/views/profiling_runs.py b/testgen/ui/views/profiling_runs.py index e67b58aa..cd3c8fe0 100644 --- a/testgen/ui/views/profiling_runs.py +++ b/testgen/ui/views/profiling_runs.py @@ -181,7 +181,7 @@ def get_db_table_group_choices(project_code: str) -> pd.DataFrame: return dq.run_table_groups_lookup_query(schema, project_code) -@st.cache_data(show_spinner="Retrieving Data") +@st.cache_data(show_spinner="Loading data ...") def get_db_profiling_runs(project_code: str, table_group_id: str | None = None) -> pd.DataFrame: schema = st.session_state["dbschema"] table_group_condition = f" AND v_profiling_runs.table_groups_id = '{table_group_id}' " if table_group_id else "" diff --git a/testgen/ui/views/quality_dashboard.py b/testgen/ui/views/quality_dashboard.py index 107ba49b..665366cd 100644 --- a/testgen/ui/views/quality_dashboard.py +++ b/testgen/ui/views/quality_dashboard.py @@ -42,7 +42,11 @@ def render(self, *, project_code: str, **_kwargs) -> None: "table_groups_count": int(project_summary["table_groups_ct"]), "profiling_runs_count": int(project_summary["profiling_runs_ct"]), }, - "scores": [format_score_card(score) for score in get_all_score_cards(project_code) if score.get("score") or score.get("cde_score") or score.get("categories")], + "scores": [ + format_score_card(score) + for score in get_all_score_cards(project_code) + if score.get("score") or score.get("cde_score") or score.get("categories") + ], }, on_change_handlers={ "RefreshData": refresh_data, diff --git a/testgen/ui/views/score_details.py b/testgen/ui/views/score_details.py index 37f78cd3..e490217b 100644 --- a/testgen/ui/views/score_details.py +++ b/testgen/ui/views/score_details.py @@ -8,7 +8,7 @@ from testgen.commands.run_refresh_score_cards_results import run_recalculate_score_card from testgen.common.mixpanel_service import MixpanelService from testgen.common.models import with_database_session -from testgen.common.models.scores import ScoreDefinition, ScoreDefinitionBreakdownItem, SelectedIssue +from testgen.common.models.scores import ScoreCategory, ScoreDefinition, ScoreDefinitionBreakdownItem, SelectedIssue from testgen.ui.components import widgets as testgen from testgen.ui.components.widgets.download_dialog import FILE_DATA_TYPE, download_dialog, zip_multi_file_data from testgen.ui.navigation.page import Page @@ -36,7 +36,7 @@ def render( self, *, definition_id: str, - category: str = "table_name", + category: str | None = None, score_type: str | None = None, drilldown: str | None = None, **_kwargs @@ -49,7 +49,7 @@ def render( "quality-dashboard", ) return - + project_service.set_sidebar_project(score_definition.project_code) testgen.page_header( @@ -60,10 +60,16 @@ def render( ], ) + if not category and score_definition.category: + category = score_definition.category.value + + if not category: + category = ScoreCategory.dq_dimension.value + score_card = None score_breakdown = None issues = None - with st.spinner(text="Loading data ..."): + with st.spinner(text="Loading data :gray[:small[(This might take a few minutes)]] ..."): user_can_edit = user_session_service.user_can_edit() score_card = format_score_card(score_definition.as_cached_score_card()) if not score_type: diff --git a/testgen/ui/views/score_explorer.py b/testgen/ui/views/score_explorer.py index bbc0400b..80d9ab82 100644 --- a/testgen/ui/views/score_explorer.py +++ b/testgen/ui/views/score_explorer.py @@ -1,4 +1,6 @@ +import json from datetime import datetime +from functools import partial from io import BytesIO from typing import ClassVar @@ -10,7 +12,7 @@ run_refresh_score_cards_results, ) from testgen.common.mixpanel_service import MixpanelService -from testgen.common.models.scores import ScoreCategory, ScoreDefinition, ScoreDefinitionFilter, SelectedIssue +from testgen.common.models.scores import ScoreCategory, ScoreDefinition, ScoreDefinitionCriteria, SelectedIssue from testgen.ui.components import widgets as testgen from testgen.ui.components.widgets.download_dialog import FILE_DATA_TYPE, download_dialog, zip_multi_file_data from testgen.ui.navigation.page import Page @@ -19,12 +21,13 @@ from testgen.ui.queries import profiling_queries, test_run_queries from testgen.ui.queries.scoring_queries import ( get_all_score_cards, + get_column_filters, get_score_card_issue_reports, get_score_category_values, ) from testgen.ui.services import user_session_service -from testgen.ui.session import session -from testgen.utils import format_score_card, format_score_card_breakdown, format_score_card_issues +from testgen.ui.session import session, temp_value +from testgen.utils import format_score_card, format_score_card_breakdown, format_score_card_issues, try_json PAGE_PATH = "quality-dashboard:explorer" @@ -42,12 +45,13 @@ def render( total_score: str | None = None, cde_score: str | None = None, category: str | None = None, - filters: list[str] | None = None, - breakdown_category: str | None = "table_name", + filters: str | None = None, + breakdown_category: str | None = None, breakdown_score_type: str | None = "score", drilldown: str | None = None, definition_id: str | None = None, project_code: str | None = None, + filter_by_columns: str | None = None, **_kwargs ): page_title: str = "Score Explorer" @@ -61,7 +65,10 @@ def render( "quality-dashboard", ) return - + + if not breakdown_category and original_score_definition.category: + breakdown_category = original_score_definition.category.value + project_code = original_score_definition.project_code page_title = "Edit Scorecard" last_breadcrumb = original_score_definition.name @@ -70,10 +77,13 @@ def render( {"label": last_breadcrumb}, ]) + if not breakdown_category: + breakdown_category = ScoreCategory.dq_dimension.value + score_breakdown = None issues = None filter_values = {} - with st.spinner(text="Loading data ..."): + with st.spinner(text="Loading data :gray[:small[(This might take a few minutes)]] ..."): user_can_edit = user_session_service.user_can_edit() filter_values = get_score_category_values(project_code) @@ -82,6 +92,9 @@ def render( project_code=project_code, total_score=True, cde_score=True, + criteria=ScoreDefinitionCriteria( + group_by_field=filter_by_columns != "true" if filter_by_columns else None, + ), ) if definition_id and not (name or total_score or category or filters): score_definition = ScoreDefinition.get(definition_id) @@ -94,20 +107,22 @@ def render( score_definition.category = ScoreCategory(category) if category else None if filters: - applied_filters = filters - if not isinstance(applied_filters, list): - applied_filters = [filters] - - score_definition.filters = [ - ScoreDefinitionFilter(field=field_value[0], value=field_value[1]) - for f in applied_filters if (field_value := f.split("=")) + applied_filters: list[dict] = try_json(filters, default=[]) + applied_filters = [ + {"field": f["field"], "value": f["value"], "others": f.get("others", [])} + for f in applied_filters + if f.get("field") and f.get("value") ] + score_definition.criteria = ScoreDefinitionCriteria.from_filters( + applied_filters, + group_by_field=filter_by_columns != "true", + ) score_card = None if score_definition: score_card = score_definition.as_score_card() - if len(score_definition.filters) > 0 and not drilldown: + if score_definition.criteria.has_filters() and not drilldown: score_breakdown = format_score_card_breakdown( score_definition.get_score_card_breakdown( score_type=breakdown_score_type, @@ -145,6 +160,8 @@ def render( "DrilldownChanged": set_breakdown_drilldown, "IssueReportsExported": export_issue_reports, "ScoreDefinitionSaved": save_score_definition, + "ColumnSelectorOpened": partial(column_selector_dialog, project_code, score_definition_dict), + "FilterModeChanged": change_score_definition_filter_mode, }, ) @@ -157,12 +174,9 @@ def set_score_definition(definition: dict | None) -> None: "total_score": definition["total_score"], "cde_score": definition["cde_score"], "category": definition["category"], - "filters": [ - f"{f["field"]}={filter_value}" - for f in definition["filters"] - if (filter_value := f.get("value")) - ], + "filters": json.dumps(definition["filters"], separators=(",", ":")), "definition_id": str(definition_id) if definition_id else None, + "filter_by_columns": str(definition.get("filter_by_columns", False)).lower(), }) @@ -220,6 +234,76 @@ def get_report_file_data(update_progress, issue) -> FILE_DATA_TYPE: return file_name, "application/pdf", buffer.read() +def column_selector_dialog(project_code: str, score_definition_dict: dict, _) -> None: + is_column_selector_opened, set_column_selector_opened = temp_value("explorer-column-selector", default=False) + + def dialog_content() -> None: + if not is_column_selector_opened(): + st.rerun() + + selected_filters = set() + if score_definition_dict.get("filter_by_columns"): + selected_filters = _get_selected_filters(score_definition_dict.get("filters", [])) + + column_filters = get_column_filters(project_code) + for column in column_filters: + table_group_selected = (f"table_groups_name={column["table_group"]}",) in selected_filters + table_selected = ( + f"table_groups_name={column["table_group"]}", + f"table_name={column["table"]}", + ) in selected_filters + column_selected = ( + f"table_groups_name={column["table_group"]}", + f"table_name={column["table"]}", + f"column_name={column["name"]}", + ) in selected_filters + column["selected"] = table_group_selected or table_selected or column_selected + + testgen.testgen_component( + "column_selector", + props={"columns": column_filters}, + on_change_handlers={ + "ColumnFiltersUpdated": set_score_definition_column_filters, + } + ) + + def set_score_definition_column_filters(filters: list[dict]) -> None: + set_score_definition({ + **score_definition_dict, + "filters": filters, + "filter_by_columns": bool(filters), + }) + set_column_selector_opened(False) + + set_column_selector_opened(True) + return st.dialog(title="Select Columns for the Scorecard", width="small")(dialog_content)() + + +def _get_selected_filters(filters: list[dict]) -> set[tuple[str]]: + selected_filters = set() + for filter_ in filters: + filter_values = { + filter_["field"]: filter_["value"], + } + for linked_filter in filter_.get("others", []): + filter_values[linked_filter["field"]] = linked_filter["value"] + + parts = [] + for key in ["table_groups_name", "table_name", "column_name"]: + if key in filter_values: + parts.append(f"{key}={filter_values[key]}") + + selected_filters.add(tuple(parts)) + return selected_filters + + +def change_score_definition_filter_mode(filter_by_columns: bool) -> None: + Router().set_query_params({ + "filters": None, + "filter_by_columns": str(filter_by_columns).lower(), + }) + + def save_score_definition(_) -> None: project_code = st.query_params.get("project_code") definition_id = st.query_params.get("definition_id") @@ -227,7 +311,8 @@ def save_score_definition(_) -> None: total_score = st.query_params.get("total_score") cde_score = st.query_params.get("cde_score") category = st.query_params.get("category") - filters = st.query_params.get_all("filters") + filters: list[dict] = try_json(st.query_params.get("filters"), default=[]) + filter_by_columns: bool = (st.query_params.get("filter_by_columns") or "false") == "true" if not name: raise ValueError("A name is required to save the scorecard") @@ -260,10 +345,13 @@ def save_score_definition(_) -> None: score_definition.total_score = total_score and total_score.lower() == "true" score_definition.cde_score = cde_score and cde_score.lower() == "true" score_definition.category = ScoreCategory(category) if category else None - score_definition.filters = [ - ScoreDefinitionFilter(field=field_value[0], value=field_value[1]) - for f in filters if (field_value := f.split("=")) - ] + score_definition.criteria = ScoreDefinitionCriteria.from_filters( + [ + {"field": f["field"], "value": f["value"], "others": f.get("others", [])} for f in filters + if f.get("field") and f.get("value") + ], + group_by_field=not filter_by_columns, + ) score_definition.save() run_refresh_score_cards_results(definition_id=score_definition.id, **refresh_kwargs) get_all_score_cards.clear() @@ -277,6 +365,7 @@ def save_score_definition(_) -> None: "cde_score": None, "category": None, "filters": None, + "filter_by_columns": None, "definition_id": str(score_definition.id) if score_definition.id else None, }) diff --git a/testgen/ui/views/test_definitions.py b/testgen/ui/views/test_definitions.py index 8c657609..8f4e6ad8 100644 --- a/testgen/ui/views/test_definitions.py +++ b/testgen/ui/views/test_definitions.py @@ -1,6 +1,7 @@ import logging import time import typing +from datetime import datetime import pandas as pd import streamlit as st @@ -14,10 +15,16 @@ import testgen.ui.services.test_suite_service as test_suite_service from testgen.common import date_service from testgen.ui.components import widgets as testgen +from testgen.ui.components.widgets.download_dialog import ( + FILE_DATA_TYPE, + PROGRESS_UPDATE_TYPE, + download_dialog, + get_excel_file_data, +) from testgen.ui.navigation.page import Page from testgen.ui.services import project_service, user_session_service from testgen.ui.services.string_service import empty_if_null, snake_case_to_title_case -from testgen.ui.session import session +from testgen.ui.session import session, temp_value from testgen.ui.views.dialogs.profiling_results_dialog import view_profiling_button LOG = logging.getLogger("testgen") @@ -127,71 +134,48 @@ def render(self, test_suite_id: str, table_name: str | None = None, column_name: if user_can_edit: if actions_column.button( ":material/edit: Edit", - help="Edit the Test Definition", disabled=not selected, ): edit_test_dialog(project_code, table_group, test_suite, table_name, column_name, selected_test_def) if actions_column.button( ":material/file_copy: Copy/Move", - help="Copy or Move the Test Definition", disabled=not selected, ): copy_move_test_dialog(project_code, table_group, test_suite, selected) if actions_column.button( ":material/delete: Delete", - help="Delete the selected Test Definition", disabled=not selected, ): - delete_test_dialog(selected_test_def) - - -@st.dialog("Delete Test") -def delete_test_dialog(selected_test_definition): - test_definition_id = selected_test_definition["id"] - test_name_short = selected_test_definition["test_name_short"] - - can_be_deleted = test_definition_service.delete([test_definition_id], dry_run=True) - - fm.render_html_list( - selected_test_definition, - [ - "id", - "project_code", - "schema_name", - "table_name", - "column_name", - "test_name_short", - "table_groups_id", - "test_suite", - "test_active_display", - "test_description", - "last_manual_update", - ], - "Test Definition Information", - int_data_width=700, - ) - - with st.form("Delete Test Definition", clear_on_submit=True, border=False): - _, button_column = st.columns([.85, .15]) - with button_column: - delete = st.form_submit_button( - "Delete", - disabled=not can_be_deleted, - type="primary", - use_container_width=True, - ) - - if delete: - test_definition_service.delete([test_definition_id]) - success_message = f"Test Definition {test_name_short} has been deleted. " - st.success(success_message) - time.sleep(1) - st.rerun() + delete_test_dialog(selected) + + +@st.dialog("Delete Tests") +def delete_test_dialog(test_definitions: list[dict]): + delete_clicked, set_delete_clicked = temp_value("test-definitions:confirm-delete-tests-val") + st.html(f""" + Are you sure you want to delete + {f"{len(test_definitions)} selected test definitions?" + if len(test_definitions) > 1 + else "the selected test definition?"} + """) + + _, button_column = st.columns([.85, .15]) + with button_column: + testgen.button( + label="Delete", + type_="flat", + color="warn", + key="test-definitions:confirm-delete-tests-btn", + on_click=lambda: set_delete_clicked(True), + ) - if not can_be_deleted: - st.markdown(":orange[This Test Definition cannot be deleted because it is being used in existing tests.]") + if delete_clicked(): + test_definition_service.delete([ item["id"] for item in test_definitions ]) + st.success("Test definitions have been deleted.") + time.sleep(1) + st.rerun() def show_test_form_by_id(test_definition_id): @@ -768,13 +752,15 @@ def show_test_defs_grid( str_project_code, str_test_suite, str_table_name, str_column_name, do_multi_select, export_container, str_table_groups_id ): - df = test_definition_service.get_test_definitions( - str_project_code, str_test_suite, str_table_name, str_column_name - ) - date_service.accommodate_dataframe_to_timezone(df, st.session_state) + with st.container(): + with st.spinner("Loading data ..."): + df = test_definition_service.get_test_definitions( + str_project_code, str_test_suite, str_table_name, str_column_name + ) + date_service.accommodate_dataframe_to_timezone(df, st.session_state) - for col in df.select_dtypes(include=["datetime"]).columns: - df[col] = df[col].astype(str).replace("NaT", "") + for col in df.select_dtypes(include=["datetime"]).columns: + df[col] = df[col].astype(str).replace("NaT", "") lst_show_columns = [ "schema_name", @@ -814,43 +800,12 @@ def show_test_defs_grid( ) with export_container: - lst_export_columns = [ - "schema_name", - "table_name", - "column_name", - "test_name_short", - "final_test_description", - "threshold_value", - "export_uom", - "test_active_display", - "lock_refresh_display", - "urgency", - "profiling_as_of_date", - "last_manual_update", - ] - lst_wrap_columns = ["final_test_description"] - lst_export_headers = [ - "Schema", - "Table Name", - "Column/Test Focus", - "Test Type", - "Description", - "Test Threshold", - "Unit of Measure", - "Active", - "Locked", - "Urgency", - "From Profiling As-Of", - "Last Manual Update", - ] - fm.render_excel_export( - df, - lst_export_columns, - f"Test Definitions for Test Suite {str_test_suite}", - "{TIMESTAMP}", - lst_wrap_columns, - lst_export_headers, - ) + if st.button(label=":material/download: Export", help="Download filtered test definitions to Excel"): + download_dialog( + dialog_title="Download Excel Report", + file_content_func=get_excel_report_data, + args=(df, str_test_suite), + ) if dct_selected_row: st.html("

 
") @@ -917,6 +872,40 @@ def show_test_defs_grid( return dct_selected_row +def get_excel_report_data(update_progress: PROGRESS_UPDATE_TYPE, data: pd.DataFrame, test_suite: str) -> FILE_DATA_TYPE: + data = data.copy() + + for key in ["test_active_display", "lock_refresh_display"]: + data[key] = data[key].apply(lambda val: val if val == "Yes" else None) + + for key in ["profiling_as_of_date", "last_manual_update"]: + data[key] = data[key].apply( + lambda val: datetime.strptime(val, "%Y-%m-%d %H:%M:%S").strftime("%b %-d %Y, %-I:%M %p") if not pd.isna(val) else None + ) + + columns = { + "schema_name": {"header": "Schema"}, + "table_name": {"header": "Table"}, + "column_name": {"header": "Column/Focus"}, + "test_name_short": {"header": "Test type"}, + "final_test_description": {"header": "Description", "wrap": True}, + "threshold_value": {}, + "export_uom": {"header": "Unit of measure"}, + "test_active_display": {"header": "Active"}, + "lock_refresh_display": {"header": "Locked"}, + "urgency": {"header": "Severity"}, + "profiling_as_of_date": {"header": "From profiling as-of (UTC)"}, + "last_manual_update": {"header": "Last manual update (UTC)"}, + } + return get_excel_file_data( + data, + "Test Definitions", + details={"Test suite": test_suite}, + columns=columns, + update_progress=update_progress, + ) + + def generate_test_defs_help(str_test_type): df = run_test_type_lookup_query(str_test_type) if not df.empty: diff --git a/testgen/ui/views/test_results.py b/testgen/ui/views/test_results.py index 38410faa..39373a40 100644 --- a/testgen/ui/views/test_results.py +++ b/testgen/ui/views/test_results.py @@ -16,7 +16,13 @@ from testgen.common import date_service from testgen.common.mixpanel_service import MixpanelService from testgen.ui.components import widgets as testgen -from testgen.ui.components.widgets.download_dialog import FILE_DATA_TYPE, download_dialog, zip_multi_file_data +from testgen.ui.components.widgets.download_dialog import ( + FILE_DATA_TYPE, + PROGRESS_UPDATE_TYPE, + download_dialog, + get_excel_file_data, + zip_multi_file_data, +) from testgen.ui.navigation.page import Page from testgen.ui.pdf.test_result_report import create_report from testgen.ui.services import project_service, test_definition_service, test_results_service, user_session_service @@ -158,7 +164,16 @@ def render( # Display main grid and retrieve selection selected = show_result_detail( - run_id, export_button_column, status, test_type, table_name, column_name, sorting_columns, do_multi_select + run_id, + run_date, + run_df["test_suite"], + export_button_column, + status, + test_type, + table_name, + column_name, + sorting_columns, + do_multi_select, ) # Need to render toolbar buttons after grid, so selection status is maintained @@ -259,7 +274,7 @@ def get_test_types(): return df -@st.cache_data(show_spinner="False") +@st.cache_data(show_spinner=False) def get_test_run_columns(test_run_id: str) -> pd.DataFrame: schema: str = st.session_state["dbschema"] sql = f""" @@ -271,7 +286,7 @@ def get_test_run_columns(test_run_id: str) -> pd.DataFrame: return db.retrieve_data(sql) -@st.cache_data(show_spinner="Retrieving Results") +@st.cache_data(show_spinner=False) def get_test_results( run_id: str, test_status: str | None = None, @@ -284,7 +299,7 @@ def get_test_results( return test_results_service.get_test_results(schema, run_id, test_status, test_type_id, table_name, column_name, sorting_columns) -@st.cache_data(show_spinner="Retrieving Status") +@st.cache_data(show_spinner=False) def get_test_disposition(str_run_id): str_schema = st.session_state["dbschema"] str_sql = f""" @@ -458,6 +473,8 @@ def show_test_def_detail(str_test_def_id): def show_result_detail( run_id: str, + run_date: str, + test_suite: str, export_container: DeltaGenerator, test_status: str | None = None, test_type_id: str | None = None, @@ -466,13 +483,16 @@ def show_result_detail( sorting_columns: list[str] | None = None, do_multi_select: bool = False, ): - # Retrieve test results (always cached, action as null) - df = get_test_results(run_id, test_status, test_type_id, table_name, column_name, sorting_columns) - # Retrieve disposition action (cache refreshed) - df_action = get_test_disposition(run_id) - # Update action from disposition df - action_map = df_action.set_index("id")["action"].to_dict() - df["action"] = df["test_result_id"].map(action_map).fillna(df["action"]) + with st.container(): + with st.spinner("Loading data ..."): + # Retrieve test results (always cached, action as null) + df = get_test_results(run_id, test_status, test_type_id, table_name, column_name, sorting_columns) + # Retrieve disposition action (cache refreshed) + df_action = get_test_disposition(run_id) + + # Update action from disposition df + action_map = df_action.set_index("id")["action"].to_dict() + df["action"] = df["test_result_id"].map(action_map).fillna(df["action"]) lst_show_columns = [ "table_name", @@ -504,42 +524,12 @@ def show_result_detail( ) with export_container: - lst_export_columns = [ - "schema_name", - "table_name", - "column_names", - "test_name_short", - "test_description", - "dq_dimension", - "measure_uom", - "measure_uom_description", - "threshold_value", - "severity", - "result_measure", - "result_status", - "result_message", - "action", - ] - lst_wrap_colunns = ["test_description"] - lst_export_headers = [ - "Schema Name", - "Table Name", - "Columns/Focus", - "Test Type", - "Test Description", - "DQ Dimension", - "UOM", - "UOM Description", - "Threshold Value", - "Severity", - "Result Measure", - "Status", - "Message", - "Action", - ] - fm.render_excel_export( - df, lst_export_columns, "Test Results", "{TIMESTAMP}", lst_wrap_colunns, lst_export_headers - ) + if st.button(label=":material/download: Export", help="Download filtered test results to Excel"): + download_dialog( + dialog_title="Download Excel Report", + file_content_func=get_excel_report_data, + args=(df, test_suite, run_date), + ) # Display history and detail for selected row if not selected_rows: @@ -636,6 +626,37 @@ def show_result_detail( return selected_rows +def get_excel_report_data( + update_progress: PROGRESS_UPDATE_TYPE, + data: pd.DataFrame, + test_suite: str, + run_date: str, +) -> FILE_DATA_TYPE: + columns = { + "schema_name": {"header": "Schema"}, + "table_name": {"header": "Table"}, + "column_names": {"header": "Columns/Focus"}, + "test_name_short": {"header": "Test type"}, + "test_description": {"header": "Description", "wrap": True}, + "dq_dimension": {"header": "Quality dimension"}, + "measure_uom": {"header": "Unit of measure (UOM)"}, + "measure_uom_description": {"header": "UOM description"}, + "threshold_value": {}, + "severity": {}, + "result_measure": {}, + "result_status": {"header": "Status"}, + "result_message": {"header": "Message"}, + "action": {}, + } + return get_excel_file_data( + data, + "Test Results", + details={"Test suite": test_suite, "Test run date": run_date}, + columns=columns, + update_progress=update_progress, + ) + + def write_history_graph(dfh): y_min = min(dfh["result_measure"].min(), dfh["threshold_value"].min()) y_max = max(dfh["result_measure"].max(), dfh["threshold_value"].max()) @@ -756,7 +777,9 @@ def source_data_dialog(selected_row): st.info(bad_data_msg) # Pretify the dataframe df_bad.columns = [col.replace("_", " ").title() for col in df_bad.columns] - df_bad.fillna("[NULL]", inplace=True) + df_bad.fillna("", inplace=True) + if len(df_bad) == 500: + testgen.caption("* Top 500 records displayed", "text-align: right;") # Display the dataframe st.dataframe(df_bad, height=500, width=1050, hide_index=True) diff --git a/testgen/ui/views/test_runs.py b/testgen/ui/views/test_runs.py index 6806fe37..5bd8888f 100644 --- a/testgen/ui/views/test_runs.py +++ b/testgen/ui/views/test_runs.py @@ -218,7 +218,7 @@ def get_db_test_suite_choices(project_code: str, table_groups_id: str | None = N return run_test_suite_lookup_query(schema, project_code, table_groups_id) -# @st.cache_data(show_spinner="Retrieving Data") +@st.cache_data(show_spinner="Loading data ...") def get_db_test_runs(project_code: str, table_groups_id: str | None = None, test_suite_id: str | None = None) -> pd.DataFrame: schema = st.session_state["dbschema"] table_group_condition = f" AND test_suites.table_groups_id = '{table_groups_id}' " if table_groups_id else "" diff --git a/testgen/ui/views/test_suites.py b/testgen/ui/views/test_suites.py index fc40ae5a..524c74f5 100644 --- a/testgen/ui/views/test_suites.py +++ b/testgen/ui/views/test_suites.py @@ -76,6 +76,7 @@ def render(self, project_code: str, table_group_id: str | None = None, **_kwargs "connections_ct": format_field(project_summary["connections_ct"]), "table_groups_ct": format_field(project_summary["table_groups_ct"]), "default_connection_id": format_field(project_summary["default_connection_id"]), + "can_export_to_observability": format_field(project_summary["can_export_to_observability"]), }, "test_suites": [ { diff --git a/testgen/utils/__init__.py b/testgen/utils/__init__.py index ff7d878d..a73b2770 100644 --- a/testgen/utils/__init__.py +++ b/testgen/utils/__init__.py @@ -5,6 +5,7 @@ if TYPE_CHECKING: from testgen.common.models.scores import ScoreCard +import json import urllib.parse from typing import Any, TypeVar from uuid import UUID @@ -31,6 +32,13 @@ def is_uuid4(value: str) -> bool: return str(uuid) == value +def try_json(value: str | None, default: T | None) -> T: + try: + return json.loads(value) + except: + return default + + # https://github.com/streamlit/streamlit/issues/798#issuecomment-1647759949 def get_base_url() -> str: session = st.runtime.get_instance()._session_mgr.list_active_sessions()[0] @@ -52,6 +60,8 @@ def format_field(field: Any) -> Any: return int(field) elif isinstance(field, np.floating): return float(field) + elif isinstance(field, np.bool_): + return bool(field) return field @@ -106,6 +116,7 @@ def format_score_card(score_card: ScoreCard | None) -> ScoreCard: "transform_level": "Transform Level", "aggregation_level": "Aggregation Level", "dq_dimension": "Quality Dimension", + "data_product": "Data Product", } if not score_card: return {