From f0a97c0603be5a7a1a09f62a7b7d9ff6fc34d964 Mon Sep 17 00:00:00 2001
From: Luis
Date: Tue, 6 May 2025 12:48:19 -0400
Subject: [PATCH 01/33] fix(scoring): add missing label for data product
category
---
testgen/utils/__init__.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/testgen/utils/__init__.py b/testgen/utils/__init__.py
index ff7d878d..469d727f 100644
--- a/testgen/utils/__init__.py
+++ b/testgen/utils/__init__.py
@@ -106,6 +106,7 @@ def format_score_card(score_card: ScoreCard | None) -> ScoreCard:
"transform_level": "Transform Level",
"aggregation_level": "Aggregation Level",
"dq_dimension": "Quality Dimension",
+ "data_product": "Data Product",
}
if not score_card:
return {
From 684f1c95731d5d34cdfb28a535e602c3d2612178 Mon Sep 17 00:00:00 2001
From: Luis
Date: Wed, 7 May 2025 16:58:39 -0400
Subject: [PATCH 02/33] refactor(scoring): display loading message for long
wait periods
---
testgen/ui/queries/scoring_queries.py | 2 +-
testgen/ui/views/quality_dashboard.py | 6 +++++-
testgen/ui/views/score_details.py | 2 +-
testgen/ui/views/score_explorer.py | 2 +-
4 files changed, 8 insertions(+), 4 deletions(-)
diff --git a/testgen/ui/queries/scoring_queries.py b/testgen/ui/queries/scoring_queries.py
index ab7cec73..4c7feccb 100644
--- a/testgen/ui/queries/scoring_queries.py
+++ b/testgen/ui/queries/scoring_queries.py
@@ -7,7 +7,7 @@
from testgen.common.models.scores import ScoreCard, ScoreCategory, ScoreDefinition, SelectedIssue
-@st.cache_data(show_spinner="Loading data ...")
+@st.cache_data(show_spinner="Loading data :gray[:small[(This might take a few minutes)]] ...")
def get_all_score_cards(project_code: str) -> list["ScoreCard"]:
return [
definition.as_cached_score_card()
diff --git a/testgen/ui/views/quality_dashboard.py b/testgen/ui/views/quality_dashboard.py
index 107ba49b..665366cd 100644
--- a/testgen/ui/views/quality_dashboard.py
+++ b/testgen/ui/views/quality_dashboard.py
@@ -42,7 +42,11 @@ def render(self, *, project_code: str, **_kwargs) -> None:
"table_groups_count": int(project_summary["table_groups_ct"]),
"profiling_runs_count": int(project_summary["profiling_runs_ct"]),
},
- "scores": [format_score_card(score) for score in get_all_score_cards(project_code) if score.get("score") or score.get("cde_score") or score.get("categories")],
+ "scores": [
+ format_score_card(score)
+ for score in get_all_score_cards(project_code)
+ if score.get("score") or score.get("cde_score") or score.get("categories")
+ ],
},
on_change_handlers={
"RefreshData": refresh_data,
diff --git a/testgen/ui/views/score_details.py b/testgen/ui/views/score_details.py
index 37f78cd3..bb35b5a4 100644
--- a/testgen/ui/views/score_details.py
+++ b/testgen/ui/views/score_details.py
@@ -63,7 +63,7 @@ def render(
score_card = None
score_breakdown = None
issues = None
- with st.spinner(text="Loading data ..."):
+ with st.spinner(text="Loading data :gray[:small[(This might take a few minutes)]] ..."):
user_can_edit = user_session_service.user_can_edit()
score_card = format_score_card(score_definition.as_cached_score_card())
if not score_type:
diff --git a/testgen/ui/views/score_explorer.py b/testgen/ui/views/score_explorer.py
index bbc0400b..0097b00c 100644
--- a/testgen/ui/views/score_explorer.py
+++ b/testgen/ui/views/score_explorer.py
@@ -73,7 +73,7 @@ def render(
score_breakdown = None
issues = None
filter_values = {}
- with st.spinner(text="Loading data ..."):
+ with st.spinner(text="Loading data :gray[:small[(This might take a few minutes)]] ..."):
user_can_edit = user_session_service.user_can_edit()
filter_values = get_score_category_values(project_code)
From 1157d074fe01f0b4f35a29d24d2fd63f0658acd9 Mon Sep 17 00:00:00 2001
From: Luis
Date: Wed, 7 May 2025 18:38:42 -0400
Subject: [PATCH 03/33] misc(scoring): display info alert when no filters are
applied
---
.../frontend/js/components/alert.js | 62 +++++++++++++++++++
.../frontend/js/pages/score_explorer.js | 8 +++
2 files changed, 70 insertions(+)
create mode 100644 testgen/ui/components/frontend/js/components/alert.js
diff --git a/testgen/ui/components/frontend/js/components/alert.js b/testgen/ui/components/frontend/js/components/alert.js
new file mode 100644
index 00000000..8b4f6e34
--- /dev/null
+++ b/testgen/ui/components/frontend/js/components/alert.js
@@ -0,0 +1,62 @@
+/**
+ * @typedef Alert
+ * @type {object}
+ * @property {string} value
+ * @property {string} color
+ * @property {string} label
+ *
+ * @typedef Properties
+ * @type {object}
+ * @property {string?} icon
+ * @property {'info'|'success'|'error'} type
+ * @property {string?} message
+ */
+import van from '../van.min.js';
+import { getValue, loadStylesheet } from '../utils.js';
+import { Icon } from './icon.js';
+
+const { div } = van.tags;
+const alertTypeColors = {
+ info: {backgroundColor: 'rgba(28, 131, 225, 0.1)', color: 'rgb(0, 66, 128)'},
+ success: {backgroundColor: 'rgba(33, 195, 84, 0.1)', color: 'rgb(23, 114, 51)'},
+ error: {backgroundColor: 'rgba(255, 43, 43, 0.09)', color: 'rgb(125, 53, 59)'},
+};
+
+const Alert = (/** @type Properties */ props, /** @type Array */ ...children) => {
+ loadStylesheet('alert', stylesheet);
+
+ return div(
+ {
+ ...props,
+ class: () => (getValue(props.class) ?? '') + ` tg-alert flex-row`,
+ style: () => {
+ const colors = alertTypeColors[getValue(props.type)];
+ return `color: ${colors.color}; background-color: ${colors.backgroundColor};`;
+ },
+ role: 'alert',
+ },
+ () => {
+ const icon = getValue(props.icon);
+ return Icon({size: 20, classes: 'mr-2'}, icon);
+ },
+ div(
+ {class: 'flex-column'},
+ ...children,
+ ),
+ );
+};
+
+const stylesheet = new CSSStyleSheet();
+stylesheet.replace(`
+.tg-alert {
+ padding: 16px;
+ border-radius: 0.5rem;
+ font-size: 16px;
+ line-height: 24px;
+}
+.tg-alert > .tg-icon {
+ color: inherit !important;
+}
+`);
+
+export { Alert };
diff --git a/testgen/ui/components/frontend/js/pages/score_explorer.js b/testgen/ui/components/frontend/js/pages/score_explorer.js
index d887e033..f3ead425 100644
--- a/testgen/ui/components/frontend/js/pages/score_explorer.js
+++ b/testgen/ui/components/frontend/js/pages/score_explorer.js
@@ -59,6 +59,7 @@ import { Checkbox } from '../components/checkbox.js';
import { Portal } from '../components/portal.js';
import { ScoreBreakdown } from '../components/score_breakdown.js';
import { IssuesTable } from '../components/score_issues.js';
+import { Alert } from '../components/alert.js';
const { div, i, span } = van.tags;
@@ -92,6 +93,13 @@ const ScoreExplorer = (/** @type {Properties} */ props) => {
{ id: domId, class: 'score-explorer' },
Toolbar(props.filter_values, getValue(props.definition), props.is_new, userCanEdit),
span({ class: 'mb-4', style: 'display: block;' }),
+ () =>
+ getValue(props.is_new) && getValue(props.definition)?.filters?.length <= 0
+ ? Alert(
+ { icon: 'info', type: 'info', class: 'mb-4' },
+ span({}, 'Add filters to the scorecard to get started.'),
+ )
+ : '',
ScoreCard(props.score_card),
span({ class: 'mb-4', style: 'display: block;' }),
() => {
From 60ede5721f2422dd568708a2c71c7b8beabf1097 Mon Sep 17 00:00:00 2001
From: Luis
Date: Thu, 8 May 2025 10:32:37 -0400
Subject: [PATCH 04/33] refactor(scoring): use scorecard category as default
breakdown grouping
---
testgen/ui/views/score_details.py | 12 +++++++++---
testgen/ui/views/score_explorer.py | 10 ++++++++--
2 files changed, 17 insertions(+), 5 deletions(-)
diff --git a/testgen/ui/views/score_details.py b/testgen/ui/views/score_details.py
index bb35b5a4..e490217b 100644
--- a/testgen/ui/views/score_details.py
+++ b/testgen/ui/views/score_details.py
@@ -8,7 +8,7 @@
from testgen.commands.run_refresh_score_cards_results import run_recalculate_score_card
from testgen.common.mixpanel_service import MixpanelService
from testgen.common.models import with_database_session
-from testgen.common.models.scores import ScoreDefinition, ScoreDefinitionBreakdownItem, SelectedIssue
+from testgen.common.models.scores import ScoreCategory, ScoreDefinition, ScoreDefinitionBreakdownItem, SelectedIssue
from testgen.ui.components import widgets as testgen
from testgen.ui.components.widgets.download_dialog import FILE_DATA_TYPE, download_dialog, zip_multi_file_data
from testgen.ui.navigation.page import Page
@@ -36,7 +36,7 @@ def render(
self,
*,
definition_id: str,
- category: str = "table_name",
+ category: str | None = None,
score_type: str | None = None,
drilldown: str | None = None,
**_kwargs
@@ -49,7 +49,7 @@ def render(
"quality-dashboard",
)
return
-
+
project_service.set_sidebar_project(score_definition.project_code)
testgen.page_header(
@@ -60,6 +60,12 @@ def render(
],
)
+ if not category and score_definition.category:
+ category = score_definition.category.value
+
+ if not category:
+ category = ScoreCategory.dq_dimension.value
+
score_card = None
score_breakdown = None
issues = None
diff --git a/testgen/ui/views/score_explorer.py b/testgen/ui/views/score_explorer.py
index 0097b00c..0495e095 100644
--- a/testgen/ui/views/score_explorer.py
+++ b/testgen/ui/views/score_explorer.py
@@ -43,7 +43,7 @@ def render(
cde_score: str | None = None,
category: str | None = None,
filters: list[str] | None = None,
- breakdown_category: str | None = "table_name",
+ breakdown_category: str | None = None,
breakdown_score_type: str | None = "score",
drilldown: str | None = None,
definition_id: str | None = None,
@@ -61,7 +61,10 @@ def render(
"quality-dashboard",
)
return
-
+
+ if original_score_definition.category:
+ breakdown_category = original_score_definition.category.value
+
project_code = original_score_definition.project_code
page_title = "Edit Scorecard"
last_breadcrumb = original_score_definition.name
@@ -70,6 +73,9 @@ def render(
{"label": last_breadcrumb},
])
+ if not breakdown_category:
+ breakdown_category = ScoreCategory.dq_dimension.value
+
score_breakdown = None
issues = None
filter_values = {}
From 7d10838e6e8eae138005ab737a7c2d9bf8c05c3c Mon Sep 17 00:00:00 2001
From: Luis
Date: Thu, 8 May 2025 14:19:54 -0400
Subject: [PATCH 05/33] fix(scoring): replace grid with wrap-enabled columns in
scorecard categories
---
.../components/frontend/js/components/score_card.js | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/testgen/ui/components/frontend/js/components/score_card.js b/testgen/ui/components/frontend/js/components/score_card.js
index 7b191677..130bc470 100644
--- a/testgen/ui/components/frontend/js/components/score_card.js
+++ b/testgen/ui/components/frontend/js/components/score_card.js
@@ -165,11 +165,16 @@ stylesheet.replace(`
}
.tg-score-card--categories {
+ display: flex;
+ flex-direction: column;
+ flex-wrap: wrap;
+ row-gap: 8px;
+ column-gap: 16px;
max-height: 100px;
overflow-y: auto;
- display: grid;
- grid-gap: 8px;
- grid-template-columns: 160px 160px;
+}
+.tg-score-card--categories > div {
+ min-width: 160px;
}
.tg-score-card--category-score {
From 24f06689d4126ea0cc9cd60a8c823227e6b6f1d2 Mon Sep 17 00:00:00 2001
From: Luis
Date: Wed, 14 May 2025 19:06:22 -0400
Subject: [PATCH 06/33] feat(scoring): enable per-column filtering for score
cards
Scorecards can now be defined by selecting independent columns, tables
or tables groups.
---
testgen/common/models/scores.py | 168 +++++++++--
.../030_initialize_new_schema_structure.sql | 22 +-
.../dbsetup/075_grant_role_rights.sql | 1 +
.../dbupgrade/0137_incremental_upgrade.sql | 2 +-
.../dbupgrade/0138_incremental_upgrade.sql | 38 +++
testgen/ui/components/frontend/css/shared.css | 2 +-
.../frontend/js/components/empty_state.js | 21 +-
.../js/components/explorer_column_selector.js | 283 ++++++++++++++++++
.../components/frontend/js/components/tree.js | 46 ++-
testgen/ui/components/frontend/js/main.js | 2 +
.../frontend/js/pages/score_explorer.js | 240 +++++++++++----
testgen/ui/components/frontend/js/utils.js | 9 +-
.../components/widgets/testgen_component.py | 1 +
testgen/ui/queries/scoring_queries.py | 18 ++
testgen/ui/views/score_explorer.py | 129 ++++++--
testgen/utils/__init__.py | 8 +
16 files changed, 865 insertions(+), 125 deletions(-)
create mode 100644 testgen/template/dbupgrade/0138_incremental_upgrade.sql
create mode 100644 testgen/ui/components/frontend/js/components/explorer_column_selector.js
diff --git a/testgen/common/models/scores.py b/testgen/common/models/scores.py
index e09a29a0..91dcb144 100644
--- a/testgen/common/models/scores.py
+++ b/testgen/common/models/scores.py
@@ -1,8 +1,8 @@
import enum
import uuid
-from collections import defaultdict
from collections.abc import Iterable
from datetime import UTC, datetime
+from itertools import groupby
from typing import Literal, Self, TypedDict
import pandas as pd
@@ -69,15 +69,17 @@ class ScoreDefinition(Base):
cde_score: bool = Column(Boolean, default=False, nullable=False)
category: ScoreCategory | None = Column(Enum(ScoreCategory), nullable=True)
- results: Iterable["ScoreDefinitionResult"] = relationship(
- "ScoreDefinitionResult",
+ criteria: "ScoreDefinitionCriteria" = relationship(
+ "ScoreDefinitionCriteria",
cascade="all, delete-orphan",
- order_by="ScoreDefinitionResult.category",
lazy="joined",
+ uselist=False,
+ single_parent=True,
)
- filters: Iterable["ScoreDefinitionFilter"] = relationship(
- "ScoreDefinitionFilter",
+ results: Iterable["ScoreDefinitionResult"] = relationship(
+ "ScoreDefinitionResult",
cascade="all, delete-orphan",
+ order_by="ScoreDefinitionResult.category",
lazy="joined",
)
breakdown: Iterable["ScoreDefinitionBreakdownItem"] = relationship(
@@ -102,9 +104,12 @@ def from_table_group(cls, table_group: dict) -> Self:
definition.total_score = True
definition.cde_score = True
definition.category = ScoreCategory.dq_dimension
- definition.filters = [
- ScoreDefinitionFilter(field="table_groups_name", value=table_group["table_groups_name"]),
- ]
+ definition.criteria = ScoreDefinitionCriteria(
+ operand="AND",
+ filters=[
+ ScoreDefinitionFilter(field="table_groups_name", value=table_group["table_groups_name"]),
+ ],
+ )
return definition
@classmethod
@@ -159,7 +164,7 @@ def as_score_card(self) -> "ScoreCard":
score_cards/get_category_scores_by_column.sql
score_cards/get_category_scores_by_dimension.sql
"""
- if len(self.filters) <= 0:
+ if not self.criteria.has_filters():
return {
"id": self.id,
"project_code": self.project_code,
@@ -378,15 +383,15 @@ def recalculate_scores_history(self) -> None:
self.history = list(current_history.values())
def _get_raw_query_filters(self, cde_only: bool = False, prefix: str | None = None) -> list[str]:
- values_by_field = defaultdict(list)
- for filter_ in self.filters:
- values_by_field[filter_.field].append(f"'{filter_.value}'")
- values_by_field["project_code"].append(f"'{self.project_code}'")
+ extra_filters = [
+ f"{prefix or ''}project_code = '{self.project_code}'"
+ ]
if cde_only:
- values_by_field["critical_data_element"].append("true")
+ extra_filters.append(f"{prefix or ''}critical_data_element = true")
return [
- f"{prefix or ''}{field} IN ({', '.join(values)})" for field, values in values_by_field.items()
+ *extra_filters,
+ self.criteria.get_as_sql(prefix=prefix),
]
def to_dict(self) -> dict:
@@ -397,17 +402,144 @@ def to_dict(self) -> dict:
"total_score": self.total_score,
"cde_score": self.cde_score,
"category": self.category.value if self.category else None,
- "filters": [{"field": f.field, "value": f.value} for f in self.filters],
+ "filters": list(self.criteria),
+ "filter_by_columns": (not self.criteria.group_by_field)
+ if self.criteria.group_by_field is not None else None,
}
+class ScoreDefinitionCriteria(Base):
+ """
+ Hold the filter conditions applied for a given scorecard.
+
+ Properties are as follow:
+
+ :param operand: boolean operand to join the final filters
+
+ Either `AND` or `OR`. The operand is used to join the filters
+ after they have been individually processed, grouped and
+ formatted into valid SQL expressions.
+
+ :param group_by_field: boolean to group filters by field name
+
+ Boolean indicating that filters to same field must be combined
+ to produce the intermediary filters that will later be joined
+ with :property:`operand`.
+
+ When false, filters are individually converted to valid SQL and
+ then joined with :property:`operand`.
+
+ When true, filters are sorted and grouped by field name, all
+ filters for a given field name are combined with an `OR` boolean
+ condition into a single filter. Then, the resulting filters
+ are joined with :property:`operand`.
+
+ :param filters: a list of :class:`ScoreDefinitionFilter` objects
+ """
+
+ __tablename__ = "score_definition_criteria"
+
+ id: str = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
+ definition_id: str = Column(UUID(as_uuid=True), ForeignKey("score_definitions.id", ondelete="CASCADE"))
+ operand: Literal["AND", "OR"] = Column(String, nullable=False, default="AND")
+ group_by_field: bool = Column(Boolean, nullable=False, default=True)
+ filters: list["ScoreDefinitionFilter"] = relationship(
+ "ScoreDefinitionFilter",
+ cascade="all, delete-orphan",
+ lazy="joined",
+ )
+
+ def __str__(self):
+ return self.get_as_sql()
+
+ def get_as_sql(
+ self,
+ prefix: str | None = None,
+ ) -> str | None:
+ if len(self.filters) > 0:
+ if self.group_by_field:
+ filters_sql = []
+ grouped_filters = groupby(sorted(self.filters, key=lambda f: f.field), key=lambda f: f.field)
+ for _, field_filters in grouped_filters:
+ field_filters_sql = [f.get_as_sql(prefix=prefix, operand="AND") for f in field_filters]
+ filters_sql.append(
+ f"({" OR ".join(field_filters_sql)})" if len(field_filters_sql) > 1 else field_filters_sql[0]
+ )
+ else:
+ filters_sql = [ f.get_as_sql(prefix=prefix, operand="AND") for f in self.filters ]
+ return f"({f' {self.operand} '.join(filters_sql)})" if len(filters_sql) > 1 else filters_sql[0]
+ return None
+
+ def __iter__(self):
+ for filter_ in self.filters:
+ yield {
+ "field": filter_.field,
+ "value": filter_.value,
+ "others": [
+ {"field": linked_filter.field, "value": linked_filter.value}
+ for linked_filter in filter_.next_filter
+ ] if filter_.next_filter else [],
+ }
+
+ def has_filters(self) -> bool:
+ return len(self.filters) > 0
+
+ @classmethod
+ def from_filters(cls, filters: list[dict], group_by_field: bool = True) -> "ScoreDefinitionCriteria":
+ chained_filters: list[ScoreDefinitionFilter] = []
+ for filter_ in filters:
+ root_filter = current_filter = ScoreDefinitionFilter(
+ field=filter_["field"],
+ value=filter_["value"],
+ next_filter=None,
+ )
+ for linked_filter in (filter_.get("others") or []):
+ current_filter.next_filter = ScoreDefinitionFilter(
+ field=linked_filter["field"],
+ value=linked_filter["value"],
+ next_filter=None,
+ )
+ current_filter = current_filter.next_filter
+ chained_filters.append(root_filter)
+ return cls(operand="AND" if group_by_field else "OR", filters=chained_filters, group_by_field=group_by_field)
+
+
class ScoreDefinitionFilter(Base):
__tablename__ = "score_definition_filters"
id: str = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
- definition_id: str = Column(UUID(as_uuid=True), ForeignKey("score_definitions.id", ondelete="CASCADE"))
+ criteria_id = Column(
+ UUID(as_uuid=True),
+ ForeignKey("score_definition_criteria.id", ondelete="CASCADE"),
+ nullable=True,
+ default=None,
+ )
field: str = Column(String, nullable=False)
value: str = Column(String, nullable=False)
+ next_filter_id = Column(
+ UUID(as_uuid=True),
+ ForeignKey("score_definition_filters.id", ondelete="CASCADE"),
+ nullable=True,
+ default=None,
+ )
+ next_filter: "ScoreDefinitionFilter" = relationship(
+ "ScoreDefinitionFilter",
+ cascade="all, delete-orphan",
+ lazy="joined",
+ uselist=False,
+ single_parent=True,
+ )
+
+ def __iter__(self):
+ current_filter = self
+ yield current_filter
+ while current_filter.next_filter:
+ yield current_filter.next_filter
+ current_filter = current_filter.next_filter
+
+ def get_as_sql(self, prefix: str | None = None, operand: Literal["AND", "OR"] = "AND") -> str:
+ sql_filters = [f"{prefix or ''}{f.field} = '{f.value}'" for f in self]
+ return f"({f' {operand} '.join(sql_filters)})"
class ScoreDefinitionResult(Base):
diff --git a/testgen/template/dbsetup/030_initialize_new_schema_structure.sql b/testgen/template/dbsetup/030_initialize_new_schema_structure.sql
index f7978a4e..f1ff5b9d 100644
--- a/testgen/template/dbsetup/030_initialize_new_schema_structure.sql
+++ b/testgen/template/dbsetup/030_initialize_new_schema_structure.sql
@@ -657,13 +657,23 @@ CREATE TABLE IF NOT EXISTS score_definitions (
category VARCHAR(30) DEFAULT NULL
);
+CREATE TABLE IF NOT EXISTS score_definition_criteria (
+ id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+ definition_id UUID NOT NULL REFERENCES score_definitions(id) ON DELETE CASCADE,
+ operand VARCHAR NOT NULL DEFAULT 'AND',
+ group_by_field BOOLEAN NOT NULL DEFAULT true
+);
+
CREATE TABLE IF NOT EXISTS score_definition_filters (
- id UUID DEFAULT gen_random_uuid() PRIMARY KEY,
- definition_id UUID CONSTRAINT score_definitions_filters_score_definitions_definition_id_fk
- REFERENCES score_definitions (id)
- ON DELETE CASCADE,
- field TEXT DEFAULT NULL,
- value TEXT DEFAULT NULL
+ id UUID DEFAULT gen_random_uuid() PRIMARY KEY,
+ criteria_id UUID DEFAULT NULL CONSTRAINT score_definitions_filters_score_definition_criteria_fk
+ REFERENCES score_definition_criteria (id)
+ ON DELETE CASCADE,
+ next_filter_id UUID DEFAULT NULL CONSTRAINT score_definitions_filters_score_definitions_filters_fk
+ REFERENCES score_definition_filters (id)
+ ON DELETE CASCADE,
+ field TEXT DEFAULT NULL,
+ value TEXT DEFAULT NULL
);
CREATE TABLE IF NOT EXISTS score_definition_results (
diff --git a/testgen/template/dbsetup/075_grant_role_rights.sql b/testgen/template/dbsetup/075_grant_role_rights.sql
index f5bd1013..1b4f11b5 100644
--- a/testgen/template/dbsetup/075_grant_role_rights.sql
+++ b/testgen/template/dbsetup/075_grant_role_rights.sql
@@ -33,6 +33,7 @@ GRANT SELECT, INSERT, DELETE, UPDATE ON
{SCHEMA_NAME}.data_column_chars,
{SCHEMA_NAME}.auth_users,
{SCHEMA_NAME}.score_definitions,
+ {SCHEMA_NAME}.score_definition_criteria,
{SCHEMA_NAME}.score_definition_filters,
{SCHEMA_NAME}.score_definition_results,
{SCHEMA_NAME}.score_definition_results_breakdown,
diff --git a/testgen/template/dbupgrade/0137_incremental_upgrade.sql b/testgen/template/dbupgrade/0137_incremental_upgrade.sql
index efa3d78a..26d159cf 100644
--- a/testgen/template/dbupgrade/0137_incremental_upgrade.sql
+++ b/testgen/template/dbupgrade/0137_incremental_upgrade.sql
@@ -2,4 +2,4 @@ SET SEARCH_PATH TO {SCHEMA_NAME};
UPDATE job_schedules
SET kwargs = kwargs - 'project_code' || jsonb_build_object('project_key', kwargs->'project_code')
-WHERE key = 'run-tests';
\ No newline at end of file
+WHERE key = 'run-tests';
diff --git a/testgen/template/dbupgrade/0138_incremental_upgrade.sql b/testgen/template/dbupgrade/0138_incremental_upgrade.sql
new file mode 100644
index 00000000..7c26925c
--- /dev/null
+++ b/testgen/template/dbupgrade/0138_incremental_upgrade.sql
@@ -0,0 +1,38 @@
+SET SEARCH_PATH TO {SCHEMA_NAME};
+
+CREATE TABLE score_definition_criteria (
+ id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+ definition_id UUID NOT NULL REFERENCES score_definitions(id) ON DELETE CASCADE,
+ operand VARCHAR NOT NULL DEFAULT 'AND',
+ group_by_field BOOLEAN NOT NULL DEFAULT true
+);
+
+ALTER TABLE score_definition_filters
+ ADD COLUMN criteria_id UUID DEFAULT NULL,
+ ADD COLUMN next_filter_id UUID DEFAULT NULL,
+ ADD CONSTRAINT score_definitions_filters_score_definition_criteria_fk FOREIGN KEY (criteria_id) REFERENCES score_definition_criteria (id) ON DELETE CASCADE,
+ ADD CONSTRAINT score_definitions_filters_score_definitions_filters_fk FOREIGN KEY (next_filter_id) REFERENCES score_definition_filters (id) ON DELETE CASCADE;
+
+DO $$
+DECLARE
+ current_definition_id UUID;
+ new_criteria_id UUID;
+ definition_filter RECORD;
+BEGIN
+ FOR current_definition_id IN SELECT id FROM score_definitions LOOP
+ new_criteria_id := gen_random_uuid();
+ RAISE NOTICE 'Definition = %', current_definition_id;
+ RAISE NOTICE 'Create Score Criteria (AND)';
+ EXECUTE format(
+ 'INSERT INTO score_definition_criteria (id, definition_id, operand, group_by_field) VALUES (%L, %L, %L, %L)',
+ new_criteria_id, current_definition_id, 'AND', true
+ );
+
+ FOR definition_filter IN SELECT id, field, value FROM score_definition_filters WHERE definition_id = current_definition_id LOOP
+ RAISE NOTICE 'Link filter to Score Criteria Field=% Value=%', definition_filter.field, definition_filter.value;
+ EXECUTE format('UPDATE score_definition_filters SET criteria_id = %L WHERE id = %L', new_criteria_id, definition_filter.id);
+ END LOOP;
+ END LOOP;
+END $$;
+
+ALTER TABLE score_definition_filters DROP COLUMN definition_id;
diff --git a/testgen/ui/components/frontend/css/shared.css b/testgen/ui/components/frontend/css/shared.css
index 8100174a..a918e619 100644
--- a/testgen/ui/components/frontend/css/shared.css
+++ b/testgen/ui/components/frontend/css/shared.css
@@ -118,7 +118,7 @@ body {
}
.clickable {
- cursor: pointer;
+ cursor: pointer !important;
}
.hidden {
diff --git a/testgen/ui/components/frontend/js/components/empty_state.js b/testgen/ui/components/frontend/js/components/empty_state.js
index 7c243b50..67d7b677 100644
--- a/testgen/ui/components/frontend/js/components/empty_state.js
+++ b/testgen/ui/components/frontend/js/components/empty_state.js
@@ -16,6 +16,7 @@
* @property {Message} message
* @property {Link?} link
* @property {any?} button
+* @property {string?} class
*/
import van from '../van.min.js';
import { Card } from '../components/card.js';
@@ -49,13 +50,17 @@ const EMPTY_STATE_MESSAGE = {
line1: 'Track data quality scores',
line2: 'Create custom scorecards to assess quality of your data assets across different categories.',
},
+ explorer: {
+ line1: 'Track data quality scores',
+ line2: 'Filter or select columns to assess the quality of your data assets across different categories.',
+ },
};
const EmptyState = (/** @type Properties */ props) => {
loadStylesheet('empty-state', stylesheet);
return Card({
- class: 'tg-empty-state flex-column fx-align-flex-center',
+ class: `tg-empty-state flex-column fx-align-flex-center ${getValue(props.class ?? '')}`,
content: [
span({ class: 'tg-empty-state--title mb-5' }, props.label),
i({class: 'material-symbols-rounded mb-5'}, props.icon),
@@ -63,11 +68,15 @@ const EmptyState = (/** @type Properties */ props) => {
span({ class: 'mb-5' }, props.message.line2),
(
getValue(props.button) ??
- Link({
- class: 'tg-empty-state--link',
- right_icon: 'chevron_right',
- ...(getValue(props.link)),
- })
+ (
+ getValue(props.link)
+ ? Link({
+ class: 'tg-empty-state--link',
+ right_icon: 'chevron_right',
+ ...(getValue(props.link)),
+ })
+ : ''
+ )
),
],
});
diff --git a/testgen/ui/components/frontend/js/components/explorer_column_selector.js b/testgen/ui/components/frontend/js/components/explorer_column_selector.js
new file mode 100644
index 00000000..1d86c542
--- /dev/null
+++ b/testgen/ui/components/frontend/js/components/explorer_column_selector.js
@@ -0,0 +1,283 @@
+/**
+ * @typedef FilterValue
+ * @type {object}
+ * @property {string} field
+ * @property {string} value
+ * @property {Array?} others
+ *
+ * @typedef Selection
+ * @type {Array}
+ *
+ * @typedef Column
+ * @type {object}
+ * @property {string} name
+ * @property {string} table
+ * @property {string} table_group
+ * @property {boolean?} selected
+ *
+ * @typedef Properties
+ * @type {object}
+ * @property {Array} columns
+ */
+import van from '../van.min.js';
+import { Streamlit } from '../streamlit.js';
+import { emitEvent, getValue, isEqual, loadStylesheet, slugify } from '../utils.js';
+import { Tree } from './tree.js';
+import { Icon } from './icon.js';
+import { Button } from './button.js';
+
+const { div, i, span } = van.tags;
+const tableGroupFieldName = 'table_groups_name';
+const tableFieldName = 'table_name';
+const columnFieldName = 'column_name';
+
+const TRANSLATIONS = {
+ table_groups_name: 'Table Group',
+ table_name: 'Table',
+ column_name: 'Column',
+};
+
+const ColumnSelector = (/** @type Properties */ props) => {
+ loadStylesheet('column-selector', stylesheet);
+
+ window.testgen.isPage = true;
+ Streamlit.setFrameHeight(400);
+
+ const initialSelection = van.state([]);
+ const selection = van.state([]);
+ const valueById = van.state({});
+ const treeNodes = van.state([]);
+ const changed = van.derive(() => {
+ const current = selection.val;
+ const initial = initialSelection.val;
+ return !isEqual(current, initial);
+ });
+
+ van.derive(() => {
+ const initialization = initlialize(getValue(props.columns) ?? []);
+
+ valueById.val = initialization.valueById;
+ treeNodes.val = initialization.treeNodes;
+ selection.val = initialization.selection;
+ initialSelection.val = initialization.selection;
+ });
+
+ return div(
+ {class: 'flex-column fx-gap-2 column-selector-wrapper'},
+ div(
+ {class: 'flex-row column-selector'},
+ Tree({
+ id: 'column-selector-tree',
+ classes: 'column-selector--tree',
+ multiSelect: true,
+ onMultiSelect: (selected) => {
+ if (!selected) {
+ selection.val = [];
+ return;
+ }
+
+ selection.val = getSelectionFromTreeNodes(selected, getValue(valueById));
+ },
+ nodes: treeNodes,
+ }),
+ span({class: 'column-selector--divider'}),
+ () => {
+ const selection_ = getValue(selection);
+ return div(
+ {class: 'flex-row fx-flex-wrap fx-align-flex-start fx-flex-align-content fx-gap-2 column-selector--selected'},
+ selection_.map((item) => ColumnFilter(item)),
+ );
+ },
+ ),
+ div(
+ {class: 'flex-row fx-justify-content-flex-end'},
+ Button({
+ type: 'stroked',
+ color: 'primary',
+ label: 'Apply',
+ width: 'auto',
+ disabled: van.derive(() => !changed.val),
+ onclick: () => emitEvent('ColumnFiltersUpdated', {payload: selection.val}),
+ }),
+ )
+ );
+};
+
+function initlialize(/** @type Array */ columns) {
+ const valueById = {};
+ const treeNodesMapping = {};
+
+ for (const columnObject of columns) {
+ const tableGroup = slugify(columnObject.table_group);
+ const table = slugify(columnObject.table);
+ const column = slugify(columnObject.name);
+
+ const tableGroupId = `${tableGroupFieldName}:${tableGroup}`
+ const tableId = `${tableFieldName}:${tableGroup}:${table}`
+ const columnId = `${columnFieldName}:${tableGroup}:${table}:${column}`
+
+ valueById[tableGroupId] = columnObject.table_group;
+ valueById[tableId] = columnObject.table;
+ valueById[columnId] = columnObject.name;
+
+ treeNodesMapping[tableGroupId] = treeNodesMapping[tableGroupId] ?? {
+ id: tableGroupId,
+ label: columnObject.table_group,
+ icon: 'dataset',
+ selected: false,
+ children: {},
+ };
+ treeNodesMapping[tableGroupId].children[tableId] = treeNodesMapping[tableGroupId].children[tableId] ?? {
+ id: tableId,
+ label: columnObject.table,
+ icon: 'table',
+ selected: false,
+ children: {},
+ };
+ treeNodesMapping[tableGroupId].children[tableId].children[columnId] = {
+ id: columnId,
+ label: columnObject.name,
+ icon: 'abc',
+ selected: columnObject.selected ?? false,
+ };
+ }
+
+ const treeNodes = Object.values(treeNodesMapping);
+ for (const tableGroup of treeNodes) {
+ tableGroup.children = Object.values(tableGroup.children);
+ for (const table of tableGroup.children) {
+ table.children = Object.values(table.children);
+ table.selected = table.children.every(child => child.selected);
+ }
+ tableGroup.selected = tableGroup.children.every(child => child.selected);
+ }
+
+ return { treeNodes, valueById, selection: getSelectionFromTreeNodes(treeNodes, valueById) };
+}
+
+function getSelectionFromTreeNodes(treeNodes, valueById) {
+ if (!treeNodes || treeNodes.length === 0) {
+ return [];
+ }
+
+ const selection = [];
+ const isFromUserAction = treeNodes[0].all !== undefined;
+ const propertyToCheck = isFromUserAction ? 'all' : 'selected';
+ for (const tableGroup of treeNodes) {
+ if (tableGroup[propertyToCheck]) {
+ selection.push({field: tableGroupFieldName, value: valueById[tableGroup.id]});
+ continue;
+ }
+
+ for (const table of tableGroup.children) {
+ if (table[propertyToCheck]) {
+ selection.push({
+ field: tableFieldName,
+ value: valueById[table.id],
+ others: [
+ {field: tableGroupFieldName, value: valueById[tableGroup.id]},
+ ],
+ });
+ continue;
+ }
+
+ for (const column of table.children) {
+ if (isFromUserAction || column.selected) {
+ selection.push({
+ field: columnFieldName,
+ value: valueById[column.id],
+ others: [
+ {field: tableFieldName, value: valueById[table.id]},
+ {field: tableGroupFieldName, value: valueById[tableGroup.id]},
+ ],
+ });
+ }
+ }
+ }
+ }
+
+ return selection;
+}
+
+const ColumnFilter = (
+ /** @type FilterValue */ filter,
+) => {
+ const expanded = van.state(false);
+ const expandIcon = van.derive(() => expanded.val ? 'keyboard_arrow_up' : 'keyboard_arrow_down');
+
+ return div(
+ {
+ class: 'flex-row column-selector--filter',
+ 'data-testid': 'column-selector-filter',
+ style: 'background: var(--form-field-color); border-radius: 8px; padding: 8px 12px;',
+ },
+ div(
+ {class: 'flex-column'},
+ div(
+ { class: 'flex-row', 'data-testid': 'column-selector-filter' },
+ span({ class: 'text-secondary mr-1', 'data-testid': 'column-selector-filter-label' }, `${TRANSLATIONS[filter.field] ?? filter.field} =`),
+ span({'data-testid': 'column-selector-filter-value'}, filter.value),
+ ),
+ () => {
+ const expanded_ = getValue(expanded);
+ if (!expanded_) {
+ return '';
+ }
+
+ return div(
+ {class: 'flex-column', 'data-testid': 'column-selector-filter-others'},
+ filter.others.map((item) => ColumnFilterLine(item.field, item.value)),
+ );
+ },
+ ),
+ filter.others?.length > 0
+ ? Icon(
+ {
+ size: 16,
+ classes: 'clickable text-secondary ml-1',
+ 'data-testid': 'column-selector-filter-expand',
+ onclick: () => expanded.val = !expanded.val,
+ },
+ expandIcon,
+ )
+ : '',
+ );
+};
+
+const ColumnFilterLine = (/** @type string */ field, /** @type string */ value) => {
+ return div(
+ { class: 'flex-row', 'data-testid': 'column-selector-filter' },
+ span({ class: 'text-secondary mr-1', 'data-testid': 'column-selector-filter-label' }, `${TRANSLATIONS[field] ?? field} =`),
+ span({'data-testid': 'column-selector-filter-value'}, value),
+ );
+};
+
+const stylesheet = new CSSStyleSheet();
+stylesheet.replace(`
+.column-selector-wrapper {
+ height: 100%;
+ overflow-y: hidden;
+}
+
+.column-selector {
+ height: calc(100% - 48px);
+ align-items: stretch;
+}
+
+.column-selector--tree {
+ flex: 1;
+}
+
+.column-selector--divider {
+ width: 1px;
+ background-color: var(--grey);
+ margin: 0 10px;
+}
+
+.column-selector--selected {
+ flex: 2;
+ overflow-y: auto;
+}
+`);
+
+export { ColumnSelector, ColumnFilter };
diff --git a/testgen/ui/components/frontend/js/components/tree.js b/testgen/ui/components/frontend/js/components/tree.js
index 6d603fe3..7a486ed8 100644
--- a/testgen/ui/components/frontend/js/components/tree.js
+++ b/testgen/ui/components/frontend/js/components/tree.js
@@ -23,7 +23,7 @@
* @property {string} id
* @property {string} classes
* @property {TreeNode[]} nodes
- * @property {string} selected
+ * @property {(string|string[])?} selected
* @property {function(string)?} onSelect
* @property {boolean?} multiSelect
* @property {boolean?} multiSelectToggle
@@ -74,7 +74,7 @@ const Tree = (/** @type Properties */ props, /** @type any? */ filtersContent) =
if (!multiSelect.val) {
selectTree(treeNodes.val, false);
}
- props.onMultiSelect(multiSelect.val ? [] : null);
+ props.onMultiSelect?.(multiSelect.val ? getMultiSelection(treeNodes.val) : null);
});
return div(
@@ -98,13 +98,13 @@ const Tree = (/** @type Properties */ props, /** @type any? */ filtersContent) =
() => div(
{
class: 'tg-tree--nodes',
- onclick: van.derive(() => multiSelect.val ? () => props.onMultiSelect(getMultiSelection(treeNodes.val)) : null),
+ onclick: van.derive(() => multiSelect.val ? () => props.onMultiSelect?.(getMultiSelection(treeNodes.val)) : null),
},
treeNodes.val.map(node => TreeNode(node, selected, multiSelect.val)),
),
),
() => noMatches.val
- ? span({ class: 'tg-tree--empty mt-7 mb-7 text-secondary' }, 'No matching itens found')
+ ? span({ class: 'tg-tree--empty mt-7 mb-7 text-secondary' }, 'No matching items found')
: '',
);
};
@@ -225,8 +225,8 @@ const TreeNode = (
node.selected.val = node.children.every(child => child.selected.val);
} else {
node.selected.val = !node.selected.val;
- event.fromChild = true;
}
+ event.fromChild = true;
}
: null,
},
@@ -252,7 +252,7 @@ const TreeNode = (
? [
Checkbox({
checked: () => node.selected.val,
- indeterminate: hasChildren ? () => !node.selected.val && node.children.some(({ selected }) => selected.val) : false,
+ indeterminate: hasChildren ? () => isIndeterminate(node) : false,
}),
span({ class: 'mr-1' }),
]
@@ -283,7 +283,7 @@ const initTreeState = (
}
node.expanded = van.state(expanded);
node.hidden = van.state(false);
- node.selected = van.state(false);
+ node.selected = van.state(node.selected ?? false);
treeExpanded = treeExpanded || expanded;
});
return treeExpanded;
@@ -341,7 +341,8 @@ const getMultiSelection = (nodes) => {
if (selectedChildren.length) {
selected.push({
id: node.id,
- all: selectedChildren.length === node.children.length,
+ all: selectedChildren.length === node.children.length
+ && (selectedChildren[0]?.children === undefined || selectedChildren.every(child => child.all)),
children: selectedChildren,
});
}
@@ -352,6 +353,35 @@ const getMultiSelection = (nodes) => {
return selected;
};
+/**
+ *
+ * @param {TreeNode} node
+ * @returns {boolean}
+ */
+const isIndeterminate = (node) => {
+ return !node.selected.val && isAnyDescendantSelected(node);
+};
+
+
+/**
+ *
+ * @param {TreeNode} node
+ * @returns {boolean}
+ */
+const isAnyDescendantSelected = (node) => {
+ if ((node.children ?? []).length <= 0) {
+ return false;
+ }
+
+ for (const child of node.children) {
+ if (getValue(child.selected) || isAnyDescendantSelected(child)) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
const stylesheet = new CSSStyleSheet();
stylesheet.replace(`
.tg-tree {
diff --git a/testgen/ui/components/frontend/js/main.js b/testgen/ui/components/frontend/js/main.js
index e01a2cda..3d265a2d 100644
--- a/testgen/ui/components/frontend/js/main.js
+++ b/testgen/ui/components/frontend/js/main.js
@@ -14,6 +14,7 @@ import { ExpanderToggle } from './components/expander_toggle.js';
import { Link } from './components/link.js';
import { Paginator } from './components/paginator.js';
import { SortingSelector } from './components/sorting_selector.js';
+import { ColumnSelector } from './components/explorer_column_selector.js';
import { TestRuns } from './pages/test_runs.js';
import { ProfilingRuns } from './pages/profiling_runs.js';
import { DatabaseFlavorSelector } from './components/flavor_selector.js';
@@ -49,6 +50,7 @@ const TestGenComponent = (/** @type {string} */ id, /** @type {object} */ props)
score_details: ScoreDetails,
score_explorer: ScoreExplorer,
schedule_list: ScheduleList,
+ column_selector: ColumnSelector,
};
if (Object.keys(componentById).includes(id)) {
diff --git a/testgen/ui/components/frontend/js/pages/score_explorer.js b/testgen/ui/components/frontend/js/pages/score_explorer.js
index f3ead425..27deb404 100644
--- a/testgen/ui/components/frontend/js/pages/score_explorer.js
+++ b/testgen/ui/components/frontend/js/pages/score_explorer.js
@@ -3,6 +3,7 @@
* @type {object}
* @property {string} field
* @property {string} value
+ * @property {Array?} others
*
* @typedef ScoreDefinition
* @type {object}
@@ -12,6 +13,7 @@
* @property {boolean} cde_score
* @property {string} category
* @property {ScoreDefinitionFilter[]} filters
+ * @property {boolean} filter_by_columns
*
* @typedef ScoreCardCategory
* @type {object}
@@ -59,7 +61,8 @@ import { Checkbox } from '../components/checkbox.js';
import { Portal } from '../components/portal.js';
import { ScoreBreakdown } from '../components/score_breakdown.js';
import { IssuesTable } from '../components/score_issues.js';
-import { Alert } from '../components/alert.js';
+import { EmptyState, EMPTY_STATE_MESSAGE } from '../components/empty_state.js';
+import { ColumnFilter } from '../components/explorer_column_selector.js';
const { div, i, span } = van.tags;
@@ -85,45 +88,66 @@ const ScoreExplorer = (/** @type {Properties} */ props) => {
const domId = 'score-explorer-page';
const userCanEdit = getValue(props.permissions)?.can_edit ?? false;
+ const updateToolbarFilters = van.derive(() => {
+ const oldFilters = props.definition.oldVal.filters;
+ const newFilters = props.definition.val.filters;
+ const oldFilterByColumns = props.definition.oldVal.filter_by_columns;
+ const newFilterByColumns = props.definition.val.filter_by_columns;
+
+ if (!isEqual(oldFilters, newFilters) || oldFilterByColumns !== newFilterByColumns) {
+ return {filters: newFilters, filter_by_columns: newFilterByColumns};
+ }
+
+ return null;
+ });
resizeFrameHeightToElement(domId);
resizeFrameHeightOnDOMChange(domId);
return div(
{ id: domId, class: 'score-explorer' },
- Toolbar(props.filter_values, getValue(props.definition), props.is_new, userCanEdit),
- span({ class: 'mb-4', style: 'display: block;' }),
- () =>
- getValue(props.is_new) && getValue(props.definition)?.filters?.length <= 0
- ? Alert(
- { icon: 'info', type: 'info', class: 'mb-4' },
- span({}, 'Add filters to the scorecard to get started.'),
- )
- : '',
- ScoreCard(props.score_card),
+ Toolbar(props.filter_values, getValue(props.definition), props.is_new, userCanEdit, updateToolbarFilters),
span({ class: 'mb-4', style: 'display: block;' }),
() => {
- const drilldown = getValue(props.drilldown);
- const issuesValue = getValue(props.issues);
-
- return (
- (issuesValue && getValue(props.drilldown))
- ? IssuesTable(
- issuesValue?.items,
- issuesValue?.columns,
- getValue(props.score_card),
- getValue(props.breakdown_score_type),
- getValue(props.breakdown_category),
- drilldown,
- () => emitEvent('DrilldownChanged', { payload: null }),
- )
- : ScoreBreakdown(
- props.score_card,
- props.breakdown,
- props.breakdown_category,
- props.breakdown_score_type,
- (project_code, name, score_type, category, drilldown) => emitEvent('DrilldownChanged', { payload: drilldown }),
- )
+ const isEmpty = getValue(props.is_new) && getValue(props.definition)?.filters?.length <= 0;
+
+ if (isEmpty) {
+ return EmptyState({
+ class: 'explorer-empty-state',
+ label: 'No filters or columns selected yet',
+ icon: 'readiness_score',
+ message: EMPTY_STATE_MESSAGE.explorer,
+ });
+ }
+
+ return div(
+ {class: 'flex-column'},
+ ScoreCard(props.score_card),
+ span({ class: 'mb-4', style: 'display: block;' }),
+ () => {
+ const drilldown = getValue(props.drilldown);
+ const issuesValue = getValue(props.issues);
+
+ return (
+ (issuesValue && getValue(props.drilldown))
+ ? IssuesTable(
+ issuesValue?.items,
+ issuesValue?.columns,
+ getValue(props.score_card),
+ getValue(props.breakdown_score_type),
+ getValue(props.breakdown_category),
+ drilldown,
+ () => emitEvent('DrilldownChanged', { payload: null }),
+ )
+ : ScoreBreakdown(
+ props.score_card,
+ props.breakdown,
+ props.breakdown_category,
+ props.breakdown_score_type,
+ (project_code, name, score_type, category, drilldown) => emitEvent('DrilldownChanged', { payload: drilldown }),
+ )
+ );
+ },
);
},
);
@@ -134,6 +158,7 @@ const Toolbar = (
/** @type ScoreDefinition */ definition,
/** @type boolean */ isNew,
/** @type boolean */ userCanEdit,
+ /** @type ... */ updates,
) => {
const addFilterButtonId = 'score-explorer--add-filter-btn';
const categories = [
@@ -149,7 +174,8 @@ const Toolbar = (
'data_product',
];
const filterableFields = categories.filter((c) => c !== 'dq_dimension');
- const filters = van.state(definition.filters.map((f, idx) => ({key: `${f.field}-${idx}-${getRandomId()}`, field: f.field, value: van.state(f.value) })));
+ const filters = van.state(definition.filters.map((f, idx) => ({key: `${f.field}-${idx}-${getRandomId()}`, field: f.field, value: van.state(f.value), others: f.others ?? [] })));
+ const filterByColumns = van.state(definition.filter_by_columns);
const filterSelectorOpened = van.state(false);
const displayTotalScore = van.state(definition.total_score ?? true);
const displayCDEScore = van.state(definition.cde_score ?? true);
@@ -180,6 +206,7 @@ const Toolbar = (
filters.val = [ ...filters.val.slice(0, position), ...filters.val.slice(position + 1) ];
};
const setFilterValue = (/** @type number*/ position, /** @type string */ value) => {
+ filterByColumns.val = false;
filters.val[position].value.val = value
filters.val = [ ...filters.val ];
};
@@ -194,6 +221,7 @@ const Toolbar = (
category: displayCategory.oldVal ? selectedCategory.oldVal : null,
total_score: displayTotalScore.oldVal,
cde_score: displayCDEScore.oldVal,
+ filter_by_columns: filterByColumns.oldVal,
};
const current = {
name: getValue(scoreName),
@@ -203,10 +231,17 @@ const Toolbar = (
category: getValue(displayCategory) ? getValue(selectedCategory) : null,
total_score: getValue(displayTotalScore),
cde_score: getValue(displayCDEScore),
+ filter_by_columns: getValue(filterByColumns),
};
if (!isEqual(current, previous)) {
- refresh(current);
+ if (current.filter_by_columns && !previous.filter_by_columns) {
+ emitEvent('ColumnSelectorOpened', {});
+ } else if (!current.filter_by_columns && previous.filter_by_columns) {
+ filterSelectorOpened.val = true;
+ } else {
+ refresh(current);
+ }
}
});
@@ -220,42 +255,120 @@ const Toolbar = (
}
});
+ van.derive(() => {
+ const updatesValue = getValue(updates);
+ if (updatesValue != null) {
+ const simplifiedFilters = (filters.rawVal ?? []).map(f => ({ field: f.field, value: f.value.rawVal, others: f.others ?? []}))
+ if (!isEqual(updatesValue.filters, simplifiedFilters)) {
+ filters.val = updatesValue.filters.map((f, idx) => ({key: `${f.field}-${idx}-${getRandomId()}`, field: f.field, value: van.state(f.value), others: f.others ?? [] }));
+ }
+
+ if (updatesValue.filter_by_columns !== filterByColumns.rawVal) {
+ filterByColumns.val = updatesValue.filter_by_columns;
+ }
+ }
+ });
+
return div(
{ class: 'flex-column score-explorer--toolbar' },
div(
{ class: 'flex-column' },
- span({ class: 'text-caption mb-1' }, 'Filter by'),
div(
- { class: 'flex-row fx-flex-wrap fx-gap-4' },
+ { class: 'flex-column' },
+ span({ class: 'text-caption mb-1' }, 'Filter by'),
+ div(
+ { class: 'flex-row fx-flex-wrap fx-gap-4' },
+ () => {
+ const filters_ = getValue(filters);
+ const filterValues_ = getValue(filterValues);
+ if (filters_?.length <= 0) {
+ return '';
+ }
+
+ return div(
+ { class: 'flex-row fx-flex-wrap fx-gap-4' },
+ filters_.map(({ key, field, value, others }, idx) => {
+ renderedFilters[key] = renderedFilters[key] ?? (
+ filterByColumns.val
+ ? ColumnFilter({field, value, others})
+ : Filter(idx, field, value, filterValues_[field], setFilterValue, removeFilter, !isInitialized && !value.val)
+ );
+ return renderedFilters[key];
+ }),
+ );
+ },
+ () => {
+ const filters_ = getValue(filters);
+ const filterByColumns_ = getValue(filterByColumns);
+
+ const fieldFilterTrigger = Button({
+ id: addFilterButtonId,
+ icon: 'add',
+ label: 'Add Filter',
+ type: 'basic',
+ color: 'primary',
+ style: 'width: auto;',
+ onclick: () => filterSelectorOpened.val = true,
+ });
+ const columnsSelectorTrigger = Button({
+ id: addFilterButtonId,
+ label: 'Select Columns',
+ type: 'basic',
+ color: 'primary',
+ style: 'width: auto;',
+ onclick: () => emitEvent('ColumnSelectorOpened', {}),
+ });
+ const combinedTrigger = div(
+ {class: 'flex-row fx-gap-3'},
+ fieldFilterTrigger,
+ span({class: 'text-caption'}, 'Or'),
+ columnsSelectorTrigger,
+ );
+
+ if (filters_?.length <= 0 && filterByColumns_ == undefined) {
+ return combinedTrigger;
+ }
+
+ if (filterByColumns_) {
+ return columnsSelectorTrigger;
+ }
+
+ return fieldFilterTrigger;
+ },
+ Portal(
+ { target: addFilterButtonId, style: '', opened: filterSelectorOpened},
+ FilterFieldSelector(filterableFields, undefined, addEmptyFilter),
+ ),
+ )
+ ),
+ div(
+ { class: 'flex-row fx-justify-content-flex-end', style: 'width: 100%;' },
() => {
- const filters_ = getValue(filters);
- const filterValues_ = getValue(filterValues);
- if (filters_?.length <= 0) {
+ if (filterByColumns.val == undefined) {
return '';
}
- return div(
- { class: 'flex-row fx-flex-wrap fx-gap-4' },
- getValue(filters).map(({ key, field, value }, idx) => {
- renderedFilters[key] = renderedFilters[key] ?? Filter(idx, field, value, filterValues_[field], setFilterValue, removeFilter, !isInitialized);
- return renderedFilters[key];
- }),
- );
- },
- Button({
- id: addFilterButtonId,
- icon: 'add',
- label: 'Add Filter',
- type: 'basic',
- color: 'primary',
- style: 'width: auto;',
- onclick: () => filterSelectorOpened.val = true,
- }),
- Portal(
- { target: addFilterButtonId, style: '', opened: filterSelectorOpened},
- FilterFieldSelector(filterableFields, undefined, addEmptyFilter),
- ),
- )
+ const switchToColumnSelectorTrigger = Button({
+ label: 'Switch to Column Selector',
+ type: 'basic',
+ color: 'primary',
+ style: 'width: auto;',
+ onclick: () => emitEvent('FilterModeChanged', {payload: true}),
+ });
+ const switchToCategoryFilterTrigger = Button({
+ label: 'Switch to Category Filters',
+ type: 'basic',
+ color: 'primary',
+ style: 'width: auto;',
+ onclick: () => emitEvent('FilterModeChanged', {payload: false}),
+ });
+
+ if (filterByColumns.val) {
+ return switchToCategoryFilterTrigger;
+ }
+ return switchToColumnSelectorTrigger;
+ }
+ ),
),
div(
{ class: 'flex-row fx-align-flex-end fx-flex-wrap fx-gap-5' },
@@ -400,6 +513,10 @@ stylesheet.replace(`
min-height: 1100px;
}
+.explorer-empty-state {
+ margin-top: unset !important;
+}
+
.score-explorer--toolbar {
border: 1px solid var(--border-color);
border-radius: 8px;
@@ -407,6 +524,7 @@ stylesheet.replace(`
padding: 16px;
}
+
.score-explorer--filter {
background: var(--form-field-color);
border-radius: 8px;
diff --git a/testgen/ui/components/frontend/js/utils.js b/testgen/ui/components/frontend/js/utils.js
index 5a4b8acb..655cc437 100644
--- a/testgen/ui/components/frontend/js/utils.js
+++ b/testgen/ui/components/frontend/js/utils.js
@@ -177,4 +177,11 @@ function afterMount(/** @ype Function */ callback) {
trigger.val = true;
}
-export { afterMount, debounce, emitEvent, enforceElementWidth, getRandomId, getValue, getParents, isEqual, isState, loadStylesheet, resizeFrameHeightToElement, resizeFrameHeightOnDOMChange, friendlyPercent };
+function slugify(/** @type string */ str) {
+ return str
+ .toLowerCase()
+ .replace(/[^a-z0-9]+/g, '-')
+ .replace(/^-|-$/g, '');
+}
+
+export { afterMount, debounce, emitEvent, enforceElementWidth, getRandomId, getValue, getParents, isEqual, isState, loadStylesheet, resizeFrameHeightToElement, resizeFrameHeightOnDOMChange, friendlyPercent, slugify };
diff --git a/testgen/ui/components/widgets/testgen_component.py b/testgen/ui/components/widgets/testgen_component.py
index ae692c14..f4866bdd 100644
--- a/testgen/ui/components/widgets/testgen_component.py
+++ b/testgen/ui/components/widgets/testgen_component.py
@@ -18,6 +18,7 @@
"quality_dashboard",
"score_details",
"schedule_list",
+ "column_selector",
]
diff --git a/testgen/ui/queries/scoring_queries.py b/testgen/ui/queries/scoring_queries.py
index 4c7feccb..99fca14b 100644
--- a/testgen/ui/queries/scoring_queries.py
+++ b/testgen/ui/queries/scoring_queries.py
@@ -146,3 +146,21 @@ def get_score_category_values(project_code: str) -> dict[ScoreCategory, list[str
if row["category"] and row["value"]:
values[row["category"]].append(row["value"])
return values
+
+
+@st.cache_data(show_spinner="Loading data :gray[:small[(This might take a few minutes)]] ...")
+def get_column_filters(project_code: str) -> list[dict]:
+ query = f"""
+ SELECT
+ data_column_chars.column_id::text AS column_id,
+ data_column_chars.column_name AS name,
+ data_column_chars.table_id::text AS table_id,
+ data_column_chars.table_name AS table,
+ data_column_chars.table_groups_id::text AS table_group_id,
+ table_groups.table_groups_name AS table_group
+ FROM data_column_chars
+ INNER JOIN table_groups ON (table_groups.id = data_column_chars.table_groups_id)
+ WHERE table_groups.project_code = '{project_code}'
+ ORDER BY table_name, ordinal_position;
+ """
+ return [row.to_dict() for _, row in pd.read_sql_query(query, engine).iterrows()]
diff --git a/testgen/ui/views/score_explorer.py b/testgen/ui/views/score_explorer.py
index 0495e095..80d9ab82 100644
--- a/testgen/ui/views/score_explorer.py
+++ b/testgen/ui/views/score_explorer.py
@@ -1,4 +1,6 @@
+import json
from datetime import datetime
+from functools import partial
from io import BytesIO
from typing import ClassVar
@@ -10,7 +12,7 @@
run_refresh_score_cards_results,
)
from testgen.common.mixpanel_service import MixpanelService
-from testgen.common.models.scores import ScoreCategory, ScoreDefinition, ScoreDefinitionFilter, SelectedIssue
+from testgen.common.models.scores import ScoreCategory, ScoreDefinition, ScoreDefinitionCriteria, SelectedIssue
from testgen.ui.components import widgets as testgen
from testgen.ui.components.widgets.download_dialog import FILE_DATA_TYPE, download_dialog, zip_multi_file_data
from testgen.ui.navigation.page import Page
@@ -19,12 +21,13 @@
from testgen.ui.queries import profiling_queries, test_run_queries
from testgen.ui.queries.scoring_queries import (
get_all_score_cards,
+ get_column_filters,
get_score_card_issue_reports,
get_score_category_values,
)
from testgen.ui.services import user_session_service
-from testgen.ui.session import session
-from testgen.utils import format_score_card, format_score_card_breakdown, format_score_card_issues
+from testgen.ui.session import session, temp_value
+from testgen.utils import format_score_card, format_score_card_breakdown, format_score_card_issues, try_json
PAGE_PATH = "quality-dashboard:explorer"
@@ -42,12 +45,13 @@ def render(
total_score: str | None = None,
cde_score: str | None = None,
category: str | None = None,
- filters: list[str] | None = None,
+ filters: str | None = None,
breakdown_category: str | None = None,
breakdown_score_type: str | None = "score",
drilldown: str | None = None,
definition_id: str | None = None,
project_code: str | None = None,
+ filter_by_columns: str | None = None,
**_kwargs
):
page_title: str = "Score Explorer"
@@ -62,7 +66,7 @@ def render(
)
return
- if original_score_definition.category:
+ if not breakdown_category and original_score_definition.category:
breakdown_category = original_score_definition.category.value
project_code = original_score_definition.project_code
@@ -88,6 +92,9 @@ def render(
project_code=project_code,
total_score=True,
cde_score=True,
+ criteria=ScoreDefinitionCriteria(
+ group_by_field=filter_by_columns != "true" if filter_by_columns else None,
+ ),
)
if definition_id and not (name or total_score or category or filters):
score_definition = ScoreDefinition.get(definition_id)
@@ -100,20 +107,22 @@ def render(
score_definition.category = ScoreCategory(category) if category else None
if filters:
- applied_filters = filters
- if not isinstance(applied_filters, list):
- applied_filters = [filters]
-
- score_definition.filters = [
- ScoreDefinitionFilter(field=field_value[0], value=field_value[1])
- for f in applied_filters if (field_value := f.split("="))
+ applied_filters: list[dict] = try_json(filters, default=[])
+ applied_filters = [
+ {"field": f["field"], "value": f["value"], "others": f.get("others", [])}
+ for f in applied_filters
+ if f.get("field") and f.get("value")
]
+ score_definition.criteria = ScoreDefinitionCriteria.from_filters(
+ applied_filters,
+ group_by_field=filter_by_columns != "true",
+ )
score_card = None
if score_definition:
score_card = score_definition.as_score_card()
- if len(score_definition.filters) > 0 and not drilldown:
+ if score_definition.criteria.has_filters() and not drilldown:
score_breakdown = format_score_card_breakdown(
score_definition.get_score_card_breakdown(
score_type=breakdown_score_type,
@@ -151,6 +160,8 @@ def render(
"DrilldownChanged": set_breakdown_drilldown,
"IssueReportsExported": export_issue_reports,
"ScoreDefinitionSaved": save_score_definition,
+ "ColumnSelectorOpened": partial(column_selector_dialog, project_code, score_definition_dict),
+ "FilterModeChanged": change_score_definition_filter_mode,
},
)
@@ -163,12 +174,9 @@ def set_score_definition(definition: dict | None) -> None:
"total_score": definition["total_score"],
"cde_score": definition["cde_score"],
"category": definition["category"],
- "filters": [
- f"{f["field"]}={filter_value}"
- for f in definition["filters"]
- if (filter_value := f.get("value"))
- ],
+ "filters": json.dumps(definition["filters"], separators=(",", ":")),
"definition_id": str(definition_id) if definition_id else None,
+ "filter_by_columns": str(definition.get("filter_by_columns", False)).lower(),
})
@@ -226,6 +234,76 @@ def get_report_file_data(update_progress, issue) -> FILE_DATA_TYPE:
return file_name, "application/pdf", buffer.read()
+def column_selector_dialog(project_code: str, score_definition_dict: dict, _) -> None:
+ is_column_selector_opened, set_column_selector_opened = temp_value("explorer-column-selector", default=False)
+
+ def dialog_content() -> None:
+ if not is_column_selector_opened():
+ st.rerun()
+
+ selected_filters = set()
+ if score_definition_dict.get("filter_by_columns"):
+ selected_filters = _get_selected_filters(score_definition_dict.get("filters", []))
+
+ column_filters = get_column_filters(project_code)
+ for column in column_filters:
+ table_group_selected = (f"table_groups_name={column["table_group"]}",) in selected_filters
+ table_selected = (
+ f"table_groups_name={column["table_group"]}",
+ f"table_name={column["table"]}",
+ ) in selected_filters
+ column_selected = (
+ f"table_groups_name={column["table_group"]}",
+ f"table_name={column["table"]}",
+ f"column_name={column["name"]}",
+ ) in selected_filters
+ column["selected"] = table_group_selected or table_selected or column_selected
+
+ testgen.testgen_component(
+ "column_selector",
+ props={"columns": column_filters},
+ on_change_handlers={
+ "ColumnFiltersUpdated": set_score_definition_column_filters,
+ }
+ )
+
+ def set_score_definition_column_filters(filters: list[dict]) -> None:
+ set_score_definition({
+ **score_definition_dict,
+ "filters": filters,
+ "filter_by_columns": bool(filters),
+ })
+ set_column_selector_opened(False)
+
+ set_column_selector_opened(True)
+ return st.dialog(title="Select Columns for the Scorecard", width="small")(dialog_content)()
+
+
+def _get_selected_filters(filters: list[dict]) -> set[tuple[str]]:
+ selected_filters = set()
+ for filter_ in filters:
+ filter_values = {
+ filter_["field"]: filter_["value"],
+ }
+ for linked_filter in filter_.get("others", []):
+ filter_values[linked_filter["field"]] = linked_filter["value"]
+
+ parts = []
+ for key in ["table_groups_name", "table_name", "column_name"]:
+ if key in filter_values:
+ parts.append(f"{key}={filter_values[key]}")
+
+ selected_filters.add(tuple(parts))
+ return selected_filters
+
+
+def change_score_definition_filter_mode(filter_by_columns: bool) -> None:
+ Router().set_query_params({
+ "filters": None,
+ "filter_by_columns": str(filter_by_columns).lower(),
+ })
+
+
def save_score_definition(_) -> None:
project_code = st.query_params.get("project_code")
definition_id = st.query_params.get("definition_id")
@@ -233,7 +311,8 @@ def save_score_definition(_) -> None:
total_score = st.query_params.get("total_score")
cde_score = st.query_params.get("cde_score")
category = st.query_params.get("category")
- filters = st.query_params.get_all("filters")
+ filters: list[dict] = try_json(st.query_params.get("filters"), default=[])
+ filter_by_columns: bool = (st.query_params.get("filter_by_columns") or "false") == "true"
if not name:
raise ValueError("A name is required to save the scorecard")
@@ -266,10 +345,13 @@ def save_score_definition(_) -> None:
score_definition.total_score = total_score and total_score.lower() == "true"
score_definition.cde_score = cde_score and cde_score.lower() == "true"
score_definition.category = ScoreCategory(category) if category else None
- score_definition.filters = [
- ScoreDefinitionFilter(field=field_value[0], value=field_value[1])
- for f in filters if (field_value := f.split("="))
- ]
+ score_definition.criteria = ScoreDefinitionCriteria.from_filters(
+ [
+ {"field": f["field"], "value": f["value"], "others": f.get("others", [])} for f in filters
+ if f.get("field") and f.get("value")
+ ],
+ group_by_field=not filter_by_columns,
+ )
score_definition.save()
run_refresh_score_cards_results(definition_id=score_definition.id, **refresh_kwargs)
get_all_score_cards.clear()
@@ -283,6 +365,7 @@ def save_score_definition(_) -> None:
"cde_score": None,
"category": None,
"filters": None,
+ "filter_by_columns": None,
"definition_id": str(score_definition.id) if score_definition.id else None,
})
diff --git a/testgen/utils/__init__.py b/testgen/utils/__init__.py
index 469d727f..e31bb0a2 100644
--- a/testgen/utils/__init__.py
+++ b/testgen/utils/__init__.py
@@ -5,6 +5,7 @@
if TYPE_CHECKING:
from testgen.common.models.scores import ScoreCard
+import json
import urllib.parse
from typing import Any, TypeVar
from uuid import UUID
@@ -31,6 +32,13 @@ def is_uuid4(value: str) -> bool:
return str(uuid) == value
+def try_json(value: str | None, default: T | None) -> T:
+ try:
+ return json.loads(value)
+ except:
+ return default
+
+
# https://github.com/streamlit/streamlit/issues/798#issuecomment-1647759949
def get_base_url() -> str:
session = st.runtime.get_instance()._session_mgr.list_active_sessions()[0]
From c9826f282b9b7f165c2bc72f3177ae72615948b5 Mon Sep 17 00:00:00 2001
From: Aarthy Adityan
Date: Fri, 9 May 2025 01:42:48 -0400
Subject: [PATCH 07/33] fix(data-catalog): update drop date only once
---
testgen/template/data_chars/data_chars_update.sql | 2 ++
1 file changed, 2 insertions(+)
diff --git a/testgen/template/data_chars/data_chars_update.sql b/testgen/template/data_chars/data_chars_update.sql
index c8981079..c35dc933 100644
--- a/testgen/template/data_chars/data_chars_update.sql
+++ b/testgen/template/data_chars/data_chars_update.sql
@@ -104,6 +104,7 @@ FROM last_run l
AND d.table_name = n.table_name
)
WHERE data_table_chars.table_id = d.table_id
+ AND d.drop_date IS NULL
AND n.table_name IS NULL;
-- ==============================================================================
@@ -221,4 +222,5 @@ FROM last_run l
)
WHERE data_column_chars.table_id = d.table_id
AND data_column_chars.column_name = d.column_name
+ AND d.drop_date IS NULL
AND n.column_name IS NULL;
From 2a5fe7fad28d62304c0d67e2b57007bfc237498f Mon Sep 17 00:00:00 2001
From: Aarthy Adityan
Date: Fri, 9 May 2025 01:44:21 -0400
Subject: [PATCH 08/33] fix: improve text and tooltip in data catalog and test
suites
---
.../frontend/js/pages/data_catalog.js | 6 ++-
.../frontend/js/pages/test_suites.js | 39 +++++++++++++++----
testgen/ui/queries/project_queries.py | 8 +++-
testgen/ui/views/test_suites.py | 1 +
testgen/utils/__init__.py | 2 +
5 files changed, 45 insertions(+), 11 deletions(-)
diff --git a/testgen/ui/components/frontend/js/pages/data_catalog.js b/testgen/ui/components/frontend/js/pages/data_catalog.js
index 62f4f8de..fd0adb8f 100644
--- a/testgen/ui/components/frontend/js/pages/data_catalog.js
+++ b/testgen/ui/components/frontend/js/pages/data_catalog.js
@@ -300,8 +300,10 @@ const TagsCard = (/** @type TagProperties */ props, /** @type Table | Column */
value ? 'check_circle' : 'cancel',
),
span(
- { class: value ? 'text-capitalize' : 'text-secondary' },
- value ? label : `Not a ${label}`,
+ { class: value ? '' : 'text-secondary' },
+ item.type === 'column'
+ ? (value ? 'Critical data element' : 'Not a critical data element')
+ : (value ? 'All critical data elements' : 'Not all critical data elements'),
),
(item.type === 'column' && state.rawVal === null) ? InheritedIcon('table') : null,
);
diff --git a/testgen/ui/components/frontend/js/pages/test_suites.js b/testgen/ui/components/frontend/js/pages/test_suites.js
index 8ebf10e0..91ce0929 100644
--- a/testgen/ui/components/frontend/js/pages/test_suites.js
+++ b/testgen/ui/components/frontend/js/pages/test_suites.js
@@ -6,6 +6,7 @@
* @property {number} connections_ct
* @property {number} table_groups_ct
* @property {string} default_connection_id
+ * @property {boolean} can_export_to_observability
*
* @typedef TableGroupOption
* @type {object}
@@ -70,8 +71,9 @@ const TestSuites = (/** @type Properties */ props) => {
return div(
{ id: wrapperId, style: 'overflow-y: auto;' },
- () =>
- getValue(props.project_summary).test_suites_ct > 0
+ () => {
+ const projectSummary = getValue(props.project_summary);
+ return projectSummary.test_suites_ct > 0
? div(
{ class: 'tg-test-suites'},
() => div(
@@ -111,9 +113,29 @@ const TestSuites = (/** @type Properties */ props) => {
{ class: 'flex-row' },
userCanEdit
? [
- Button({ type: 'icon', icon: 'output', tooltip: 'Export results to Observability', onclick: () => emitEvent('ExportActionClicked', {payload: testSuite.id}) }),
- Button({ type: 'icon', icon: 'edit', tooltip: 'Edit test suite', onclick: () => emitEvent('EditActionClicked', {payload: testSuite.id}) }),
- Button({ type: 'icon', icon: 'delete', tooltip: 'Delete test suite', tooltipPosition: 'left', onclick: () => emitEvent('DeleteActionClicked', {payload: testSuite.id}) }),
+ Button({
+ type: 'icon',
+ icon: 'output',
+ tooltip: projectSummary.can_export_to_observability
+ ? 'Export results to Observability'
+ : 'Observability export not configured in Project Settings',
+ tooltipPosition: 'left',
+ disabled: !projectSummary.can_export_to_observability,
+ onclick: () => emitEvent('ExportActionClicked', {payload: testSuite.id}),
+ }),
+ Button({
+ type: 'icon',
+ icon: 'edit',
+ tooltip: 'Edit test suite',
+ onclick: () => emitEvent('EditActionClicked', {payload: testSuite.id}),
+ }),
+ Button({
+ type: 'icon',
+ icon: 'delete',
+ tooltip: 'Delete test suite',
+ tooltipPosition: 'left',
+ onclick: () => emitEvent('DeleteActionClicked', {payload: testSuite.id}),
+ }),
]
: ''
),
@@ -170,7 +192,7 @@ const TestSuites = (/** @type Properties */ props) => {
onclick: () => emitEvent('RunTestsClicked', {payload: testSuite.id}),
}),
Button({
- label: 'Generate Tests',
+ label: parseInt(testSuite.test_ct) ? 'Regenerate Tests' : 'Generate Tests',
color: 'primary',
type: 'stroked',
style: 'margin-top: 16px; min-width: 180px;',
@@ -184,7 +206,8 @@ const TestSuites = (/** @type Properties */ props) => {
})),
),
)
- : ConditionalEmptyState(getValue(props.project_summary), userCanEdit),
+ : ConditionalEmptyState(projectSummary, userCanEdit);
+ },
);
};
@@ -200,7 +223,7 @@ const ConditionalEmptyState = (
color: 'primary',
label: 'Add Test Suite',
width: 'fit-content',
- style: 'margin: auto; background: white;',
+ style: 'margin: auto; background: var(--dk-card-background);',
disabled: !userCanEdit,
tooltip: userCanEdit ? null : DISABLED_ACTION_TEXT,
tooltipPosition: 'bottom',
diff --git a/testgen/ui/queries/project_queries.py b/testgen/ui/queries/project_queries.py
index 5c087064..342702e9 100644
--- a/testgen/ui/queries/project_queries.py
+++ b/testgen/ui/queries/project_queries.py
@@ -53,6 +53,12 @@ def get_summary_by_code(project_code: str) -> pd.Series:
FROM {schema}.test_runs
LEFT JOIN {schema}.test_suites ON test_runs.test_suite_id = test_suites.id
WHERE test_suites.project_code = '{project_code}'
- ) AS test_runs_ct;
+ ) AS test_runs_ct,
+ (
+ SELECT COALESCE(observability_api_key, '') <> ''
+ AND COALESCE(observability_api_url, '') <> ''
+ FROM {schema}.projects
+ WHERE project_code = '{project_code}'
+ ) AS can_export_to_observability;
"""
return db.retrieve_data(sql).iloc[0]
diff --git a/testgen/ui/views/test_suites.py b/testgen/ui/views/test_suites.py
index fc40ae5a..524c74f5 100644
--- a/testgen/ui/views/test_suites.py
+++ b/testgen/ui/views/test_suites.py
@@ -76,6 +76,7 @@ def render(self, project_code: str, table_group_id: str | None = None, **_kwargs
"connections_ct": format_field(project_summary["connections_ct"]),
"table_groups_ct": format_field(project_summary["table_groups_ct"]),
"default_connection_id": format_field(project_summary["default_connection_id"]),
+ "can_export_to_observability": format_field(project_summary["can_export_to_observability"]),
},
"test_suites": [
{
diff --git a/testgen/utils/__init__.py b/testgen/utils/__init__.py
index e31bb0a2..a73b2770 100644
--- a/testgen/utils/__init__.py
+++ b/testgen/utils/__init__.py
@@ -60,6 +60,8 @@ def format_field(field: Any) -> Any:
return int(field)
elif isinstance(field, np.floating):
return float(field)
+ elif isinstance(field, np.bool_):
+ return bool(field)
return field
From 6b6437f03bba744d88b0a3cba1ea89ac23277a07 Mon Sep 17 00:00:00 2001
From: Aarthy Adityan
Date: Fri, 9 May 2025 01:44:49 -0400
Subject: [PATCH 09/33] feat(issue-report): add column tags to pdf reports
---
testgen/ui/pdf/hygiene_issue_report.py | 44 ++++++++++++++++----
testgen/ui/pdf/test_result_report.py | 46 +++++++++++++++++----
testgen/ui/queries/scoring_queries.py | 39 ++++++++++++++++-
testgen/ui/services/test_results_service.py | 19 ++++++++-
testgen/ui/views/hygiene_issues.py | 23 ++++++++++-
5 files changed, 149 insertions(+), 22 deletions(-)
diff --git a/testgen/ui/pdf/hygiene_issue_report.py b/testgen/ui/pdf/hygiene_issue_report.py
index 7a0462ab..1e3ddda3 100644
--- a/testgen/ui/pdf/hygiene_issue_report.py
+++ b/testgen/ui/pdf/hygiene_issue_report.py
@@ -44,8 +44,8 @@ def build_summary_table(document, hi_data):
*[
(cmd[0], *coords, *cmd[1:])
for coords in (
- ((2, 2), (2, 4)),
- ((0, 0), (0, -1))
+ ((2, 2), (2, -3)),
+ ((0, 0), (0, -2))
)
for cmd in (
("FONT", "Helvetica-Bold"),
@@ -63,10 +63,11 @@ def build_summary_table(document, hi_data):
("SPAN", (3, 3), (4, 3)),
("SPAN", (3, 4), (4, 4)),
("SPAN", (3, 5), (4, 5)),
- ("SPAN", (2, 5), (4, 5)),
+ ("SPAN", (1, 6), (4, 6)),
+ ("SPAN", (0, 7), (4, 7)),
# Link cell
- ("BACKGROUND", (2, 5), (4, 5), colors.white),
+ ("BACKGROUND", (0, 7), (4, 7), colors.white),
# Status cell
*[
@@ -105,12 +106,37 @@ def build_summary_table(document, hi_data):
),
),
- ("Database/Schema", hi_data["schema_name"], "Profiling Date", profiling_timestamp),
- ("Table", hi_data["table_name"], "Table Group", hi_data["table_groups_name"]),
- ("Column", hi_data["column_name"], "Disposition", hi_data["disposition"] or "No Decision"),
+ ("Profiling Date", profiling_timestamp, "Table Group", hi_data["table_groups_name"]),
+ ("Database/Schema", hi_data["schema_name"], "Disposition", hi_data["disposition"] or "No Decision"),
+ ("Table", hi_data["table_name"], "Column Type", hi_data["column_type"]),
+ ("Column", hi_data["column_name"], "Semantic Data Type", hi_data["functional_data_type"]),
+ (
+ "Column Tags",
+ (
+ Paragraph(
+ "Critical data element: Yes" if hi_data["critical_data_element"] else "Critical data element: No",
+ style=PARA_STYLE_CELL,
+ ),
+ Paragraph(f"Description: {hi_data['column_description']}", style=PARA_STYLE_CELL)
+ if hi_data["column_description"]
+ else [],
+ [
+ Paragraph(f"{tag.replace('_', ' ').capitalize()}: {hi_data[tag]}", style=PARA_STYLE_CELL)
+ for tag in [
+ "data_source",
+ "source_system",
+ "source_process",
+ "business_domain",
+ "stakeholder_group",
+ "transform_level",
+ "aggregation_level",
+ "data_product",
+ ]
+ if hi_data[tag]
+ ],
+ ),
+ ),
(
- "Column Type",
- hi_data["column_type"],
Paragraph(
f"""
View on TestGen >
diff --git a/testgen/ui/pdf/test_result_report.py b/testgen/ui/pdf/test_result_report.py
index c60cfc3e..883b0346 100644
--- a/testgen/ui/pdf/test_result_report.py
+++ b/testgen/ui/pdf/test_result_report.py
@@ -55,7 +55,7 @@ def build_summary_table(document, tr_data):
*[
(cmd[0], *coords, *cmd[1:])
for coords in (
- ((3, 3), (3, -2)),
+ ((3, 3), (3, -3)),
((0, 0), (0, -2))
)
for cmd in (
@@ -78,10 +78,11 @@ def build_summary_table(document, tr_data):
("SPAN", (4, 5), (5, 5)),
("SPAN", (1, 6), (2, 6)),
("SPAN", (4, 6), (5, 6)),
- ("SPAN", (0, 7), (5, 7)),
+ ("SPAN", (1, 7), (5, 7)),
+ ("SPAN", (0, 8), (5, 8)),
# Link cell
- ("BACKGROUND", (0, 7), (5, 7), colors.white),
+ ("BACKGROUND", (0, 8), (5, 8), colors.white),
# Measure cell
("FONT", (1, 1), (1, 1), "Helvetica-Bold"),
@@ -118,10 +119,36 @@ def build_summary_table(document, tr_data):
("Measured Value", tr_data["result_measure"], tr_data["measure_uom_description"]),
("Threshold Value", tr_data["threshold_value"], tr_data["threshold_description"]),
- ("Test Run Date", test_timestamp, None, "Table Group", tr_data["table_groups_name"]),
- ("Database/Schema", tr_data["schema_name"], None, "Test Suite", tr_data["test_suite"]),
+ ("Test Run Date", test_timestamp, None, "Test Suite", tr_data["test_suite"]),
+ ("Database/Schema", tr_data["schema_name"], None, "Table Group", tr_data["table_groups_name"]),
("Table", tr_data["table_name"], None, "Data Quality Dimension", tr_data["dq_dimension"]),
("Column", tr_data["column_names"], None, "Disposition", tr_data["disposition"] or "No Decision"),
+ (
+ "Column Tags",
+ (
+ Paragraph(
+ "Critical data element: Yes" if tr_data["critical_data_element"] else "Critical data element: No",
+ style=PARA_STYLE_CELL,
+ ),
+ Paragraph(f"Description: {tr_data['column_description']}", style=PARA_STYLE_CELL)
+ if tr_data["column_description"]
+ else [],
+ [
+ Paragraph(f"{tag.replace('_', ' ').capitalize()}: {tr_data[tag]}", style=PARA_STYLE_CELL)
+ for tag in [
+ "data_source",
+ "source_system",
+ "source_process",
+ "business_domain",
+ "stakeholder_group",
+ "transform_level",
+ "aggregation_level",
+ "data_product",
+ ]
+ if tr_data[tag]
+ ],
+ ),
+ ),
(
Paragraph(
f"""
@@ -203,10 +230,11 @@ def get_report_content(document, tr_data):
yield Paragraph("TestGen Test Issue Report", PARA_STYLE_TITLE)
yield build_summary_table(document, tr_data)
- yield KeepTogether([
- Paragraph("Usage Notes", PARA_STYLE_H1),
- Paragraph(f"{tr_data['usage_notes']}", PARA_STYLE_TEXT),
- ])
+ if tr_data["usage_notes"]:
+ yield KeepTogether([
+ Paragraph("Usage Notes", PARA_STYLE_H1),
+ Paragraph(f"{tr_data['usage_notes']}", PARA_STYLE_TEXT),
+ ])
yield CondPageBreak(SECTION_MIN_AVAILABLE_HEIGHT)
yield Paragraph("Result History", PARA_STYLE_H1)
diff --git a/testgen/ui/queries/scoring_queries.py b/testgen/ui/queries/scoring_queries.py
index 99fca14b..f6a72741 100644
--- a/testgen/ui/queries/scoring_queries.py
+++ b/testgen/ui/queries/scoring_queries.py
@@ -43,7 +43,18 @@ def get_score_card_issue_reports(selected_issues: list["SelectedIssue"]):
results.profile_run_id::VARCHAR,
types.suggested_action,
results.table_groups_id::VARCHAR,
- results.anomaly_id::VARCHAR
+ results.anomaly_id::VARCHAR,
+ column_chars.functional_data_type,
+ column_chars.description as column_description,
+ COALESCE(column_chars.critical_data_element, table_chars.critical_data_element) as critical_data_element,
+ COALESCE(column_chars.data_source, table_chars.data_source, groups.data_source) as data_source,
+ COALESCE(column_chars.source_system, table_chars.source_system, groups.source_system) as source_system,
+ COALESCE(column_chars.source_process, table_chars.source_process, groups.source_process) as source_process,
+ COALESCE(column_chars.business_domain, table_chars.business_domain, groups.business_domain) as business_domain,
+ COALESCE(column_chars.stakeholder_group, table_chars.stakeholder_group, groups.stakeholder_group) as stakeholder_group,
+ COALESCE(column_chars.transform_level, table_chars.transform_level, groups.transform_level) as transform_level,
+ COALESCE(column_chars.aggregation_level, table_chars.aggregation_level) as aggregation_level,
+ COALESCE(column_chars.data_product, table_chars.data_product, groups.data_product) as data_product
FROM {schema}.profile_anomaly_results results
INNER JOIN {schema}.profile_anomaly_types types
ON results.anomaly_id = types.id
@@ -51,6 +62,13 @@ def get_score_card_issue_reports(selected_issues: list["SelectedIssue"]):
ON results.profile_run_id = runs.id
INNER JOIN {schema}.table_groups groups
ON results.table_groups_id = groups.id
+ LEFT JOIN {schema}.data_column_chars column_chars
+ ON (groups.id = column_chars.table_groups_id
+ AND results.schema_name = column_chars.schema_name
+ AND results.table_name = column_chars.table_name
+ AND results.column_name = column_chars.column_name)
+ LEFT JOIN {schema}.data_table_chars table_chars
+ ON column_chars.table_id = table_chars.table_id
WHERE results.id IN ({",".join([f"'{issue_id}'" for issue_id in profile_ids])});
"""
profile_results = pd.read_sql_query(profile_query, engine)
@@ -87,7 +105,17 @@ def get_score_card_issue_reports(selected_issues: list["SelectedIssue"]):
results.test_suite_id,
results.test_definition_id::VARCHAR as test_definition_id_runtime,
results.table_groups_id::VARCHAR,
- types.id::VARCHAR AS test_type_id
+ types.id::VARCHAR AS test_type_id,
+ column_chars.description as column_description,
+ COALESCE(column_chars.critical_data_element, table_chars.critical_data_element) as critical_data_element,
+ COALESCE(column_chars.data_source, table_chars.data_source, groups.data_source) as data_source,
+ COALESCE(column_chars.source_system, table_chars.source_system, groups.source_system) as source_system,
+ COALESCE(column_chars.source_process, table_chars.source_process, groups.source_process) as source_process,
+ COALESCE(column_chars.business_domain, table_chars.business_domain, groups.business_domain) as business_domain,
+ COALESCE(column_chars.stakeholder_group, table_chars.stakeholder_group, groups.stakeholder_group) as stakeholder_group,
+ COALESCE(column_chars.transform_level, table_chars.transform_level, groups.transform_level) as transform_level,
+ COALESCE(column_chars.aggregation_level, table_chars.aggregation_level) as aggregation_level,
+ COALESCE(column_chars.data_product, table_chars.data_product, groups.data_product) as data_product
FROM {schema}.test_results results
INNER JOIN {schema}.test_types types
ON (results.test_type = types.test_type)
@@ -95,6 +123,13 @@ def get_score_card_issue_reports(selected_issues: list["SelectedIssue"]):
ON (results.test_suite_id = suites.id)
INNER JOIN {schema}.table_groups groups
ON (results.table_groups_id = groups.id)
+ LEFT JOIN {schema}.data_column_chars column_chars
+ ON (groups.id = column_chars.table_groups_id
+ AND results.schema_name = column_chars.schema_name
+ AND results.table_name = column_chars.table_name
+ AND results.column_names = column_chars.column_name)
+ LEFT JOIN {schema}.data_table_chars table_chars
+ ON column_chars.table_id = table_chars.table_id
WHERE results.id IN ({",".join([f"'{issue_id}'" for issue_id in test_ids])});
"""
test_results = pd.read_sql_query(test_query, engine)
diff --git a/testgen/ui/services/test_results_service.py b/testgen/ui/services/test_results_service.py
index 0860fe87..7f2d886b 100644
--- a/testgen/ui/services/test_results_service.py
+++ b/testgen/ui/services/test_results_service.py
@@ -72,7 +72,17 @@ def get_test_results(
r.auto_gen,
-- These are used in the PDF report
- tt.threshold_description, tt.usage_notes, r.test_time
+ tt.threshold_description, tt.usage_notes, r.test_time,
+ dcc.description as column_description,
+ COALESCE(dcc.critical_data_element, dtc.critical_data_element) as critical_data_element,
+ COALESCE(dcc.data_source, dtc.data_source, tg.data_source) as data_source,
+ COALESCE(dcc.source_system, dtc.source_system, tg.source_system) as source_system,
+ COALESCE(dcc.source_process, dtc.source_process, tg.source_process) as source_process,
+ COALESCE(dcc.business_domain, dtc.business_domain, tg.business_domain) as business_domain,
+ COALESCE(dcc.stakeholder_group, dtc.stakeholder_group, tg.stakeholder_group) as stakeholder_group,
+ COALESCE(dcc.transform_level, dtc.transform_level, tg.transform_level) as transform_level,
+ COALESCE(dcc.aggregation_level, dtc.aggregation_level) as aggregation_level,
+ COALESCE(dcc.data_product, dtc.data_product, tg.data_product) as data_product
FROM run_results r
INNER JOIN {schema}.test_types tt
@@ -97,6 +107,13 @@ def get_test_results(
LEFT JOIN {schema}.cat_test_conditions c
ON (cn.sql_flavor = c.sql_flavor
AND r.test_type = c.test_type)
+ LEFT JOIN {schema}.data_column_chars dcc
+ ON (tg.id = dcc.table_groups_id
+ AND r.schema_name = dcc.schema_name
+ AND r.table_name = dcc.table_name
+ AND r.column_names = dcc.column_name)
+ LEFT JOIN {schema}.data_table_chars dtc
+ ON dcc.table_id = dtc.table_id
{order_by} ;
"""
df = db.retrieve_data(sql)
diff --git a/testgen/ui/views/hygiene_issues.py b/testgen/ui/views/hygiene_issues.py
index fb2b5ba9..5efd81c3 100644
--- a/testgen/ui/views/hygiene_issues.py
+++ b/testgen/ui/views/hygiene_issues.py
@@ -384,7 +384,21 @@ def get_profiling_anomalies(
END AS likelihood_order,
t.anomaly_description, r.detail, t.suggested_action,
r.anomaly_id, r.table_groups_id::VARCHAR, r.id::VARCHAR, p.profiling_starttime, r.profile_run_id::VARCHAR,
- tg.table_groups_name
+ tg.table_groups_name,
+
+ -- These are used in the PDF report
+ dcc.functional_data_type,
+ dcc.description as column_description,
+ COALESCE(dcc.critical_data_element, dtc.critical_data_element) as critical_data_element,
+ COALESCE(dcc.data_source, dtc.data_source, tg.data_source) as data_source,
+ COALESCE(dcc.source_system, dtc.source_system, tg.source_system) as source_system,
+ COALESCE(dcc.source_process, dtc.source_process, tg.source_process) as source_process,
+ COALESCE(dcc.business_domain, dtc.business_domain, tg.business_domain) as business_domain,
+ COALESCE(dcc.stakeholder_group, dtc.stakeholder_group, tg.stakeholder_group) as stakeholder_group,
+ COALESCE(dcc.transform_level, dtc.transform_level, tg.transform_level) as transform_level,
+ COALESCE(dcc.aggregation_level, dtc.aggregation_level) as aggregation_level,
+ COALESCE(dcc.data_product, dtc.data_product, tg.data_product) as data_product
+
FROM {schema}.profile_anomaly_results r
INNER JOIN {schema}.profile_anomaly_types t
ON r.anomaly_id = t.id
@@ -392,6 +406,13 @@ def get_profiling_anomalies(
ON r.profile_run_id = p.id
INNER JOIN {schema}.table_groups tg
ON r.table_groups_id = tg.id
+ LEFT JOIN {schema}.data_column_chars dcc
+ ON (tg.id = dcc.table_groups_id
+ AND r.schema_name = dcc.schema_name
+ AND r.table_name = dcc.table_name
+ AND r.column_name = dcc.column_name)
+ LEFT JOIN {schema}.data_table_chars dtc
+ ON dcc.table_id = dtc.table_id
WHERE r.profile_run_id = '{profile_run_id}'
{criteria}
{order_by}
From b3ae97673614e971fd604ecba45c243c6ead4afd Mon Sep 17 00:00:00 2001
From: Aarthy Adityan
Date: Fri, 16 May 2025 01:55:24 -0400
Subject: [PATCH 10/33] refactor(data-catalog): move filter and empty states to
vanjs
---
testgen/ui/assets/style.css | 4 +
.../frontend/js/pages/data_catalog.js | 240 ++++++++++++------
testgen/ui/queries/profiling_queries.py | 4 +-
testgen/ui/views/data_catalog.py | 181 ++++++-------
4 files changed, 253 insertions(+), 176 deletions(-)
diff --git a/testgen/ui/assets/style.css b/testgen/ui/assets/style.css
index 49a2f37f..4744e722 100644
--- a/testgen/ui/assets/style.css
+++ b/testgen/ui/assets/style.css
@@ -112,6 +112,10 @@ div[data-testid="stSpinner"] > div > i {
border-color: var(--primary-color) rgba(49, 51, 63, 0.2) rgba(49, 51, 63, 0.2);
}
+div.st-key-data_catalog-spinner {
+ position: absolute;
+}
+
/* Theming for buttons, tabs and form inputs */
button[data-testid="stBaseButton-secondary"]:hover,
button[data-testid="stBaseButton-secondary"]:focus:not(:active),
diff --git a/testgen/ui/components/frontend/js/pages/data_catalog.js b/testgen/ui/components/frontend/js/pages/data_catalog.js
index fd0adb8f..56cb882f 100644
--- a/testgen/ui/components/frontend/js/pages/data_catalog.js
+++ b/testgen/ui/components/frontend/js/pages/data_catalog.js
@@ -2,6 +2,13 @@
* @import { Column, Table } from '../data_profiling/data_profiling_utils.js';
* @import { TreeNode } from '../components/tree.js';
*
+ * @typedef ProjectSummary
+ * @type {object}
+ * @property {string} project_code
+ * @property {number} connections_ct
+ * @property {number} table_groups_ct
+ * @property {string} default_connection_id
+ *
* @typedef ColumnPath
* @type {object}
* @property {string} column_id
@@ -22,8 +29,9 @@
*
* @typedef Properties
* @type {object}
+ * @property {ProjectSummary} project_summary
* @property {ColumnPath[]} columns
- * @property {Table | Column} selected
+ * @property {Table | Column} selected_item
* @property {Object.} tag_values
* @property {string} last_saved_timestamp
* @property {Permissions} permissions
@@ -48,6 +56,7 @@ import { capitalize } from '../display_utils.js';
import { TableSizeCard } from '../data_profiling/table_size.js';
import { Card } from '../components/card.js';
import { Button } from '../components/button.js';
+import { EMPTY_STATE_MESSAGE, EmptyState } from '../components/empty_state.js';
const { div, h2, span, i } = van.tags;
@@ -80,14 +89,14 @@ const TAG_HELP = {
const DataCatalog = (/** @type Properties */ props) => {
loadStylesheet('data-catalog', stylesheet);
Streamlit.setFrameHeight(1); // Non-zero value is needed to render
- window.frameElement.style.setProperty('height', 'calc(100vh - 175px)');
+ window.frameElement.style.setProperty('height', 'calc(100vh - 85px)');
window.testgen.isPage = true;
/** @type TreeNode[] */
const treeNodes = van.derive(() => {
let columns = [];
try {
- columns = JSON.parse(getValue(props.columns));
+ columns = JSON.parse(getValue(props.columns) ?? []);
} catch { }
const tables = {};
@@ -119,7 +128,7 @@ const DataCatalog = (/** @type Properties */ props) => {
const selectedItem = van.derive(() => {
try {
- return JSON.parse(getValue(props.selected));
+ return JSON.parse(getValue(props.selected_item));
} catch (e) {
console.error(e)
return null;
@@ -146,81 +155,102 @@ const DataCatalog = (/** @type Properties */ props) => {
TAG_KEYS.forEach(key => filters[key] = van.state(null));
const userCanEdit = getValue(props.permissions)?.can_edit ?? false;
+ const userCanNavigate = getValue(props.permissions)?.can_navigate ?? false;
+ const projectSummary = getValue(props.project_summary);
- return div(
- {
- class: 'flex-row tg-dh',
- ondragover: (event) => event.preventDefault(),
- },
- Tree(
- {
- id: treeDomId,
- classes: 'tg-dh--tree',
- nodes: treeNodes,
- // Use .rawVal, so only initial value from query params is passed to tree
- selected: selectedItem.rawVal ? `${selectedItem.rawVal.type}_${selectedItem.rawVal.id}` : null,
- onSelect: (/** @type string */ selected) => emitEvent('ItemSelected', { payload: selected }),
- multiSelect: multiEditMode,
- multiSelectToggle: userCanEdit,
- onMultiSelect: (/** @type string[] | null */ selected) => multiSelectedItems.val = selected,
- isNodeHidden: (/** @type TreeNode */ node) => {
- let hidden = ![ node.criticalDataElement, false ].includes(filters.criticalDataElement.val);
- hidden = hidden || TAG_KEYS.some(key => ![ node[key], null ].includes(filters[key].val));
- return hidden;
- },
- hasActiveFilters: () => filters.criticalDataElement.val || TAG_KEYS.some(key => !!filters[key].val),
- onResetFilters: () => {
- filters.criticalDataElement.val = false;
- TAG_KEYS.forEach(key => filters[key].val = null);
- },
- },
- // Pass as a function that will be called when the filter portal is opened
- // Otherwise state bindings get garbage collected and Select dropdowns won't open
- // https://vanjs.org/advanced#gc
+ return projectSummary.table_groups_ct > 0
+ ? div(
+ { class: 'flex-column tg-dh' },
() => div(
- Checkbox({
- label: 'Only critical data elements (CDEs)',
- checked: filters.criticalDataElement,
- onChange: (checked) => filters.criticalDataElement.val = checked,
+ { class: 'flex-row fx-align-flex-end fx-justify-space-between mb-2' },
+ Select({
+ label: 'Table Group',
+ value: getValue(props.table_group_filter_options)?.find((op) => op.selected)?.value ?? null,
+ options: getValue(props.table_group_filter_options) ?? [],
+ height: 38,
+ style: 'font-size: 14px;',
+ testId: 'table-group-filter',
+ onChange: (value) => emitEvent('TableGroupSelected', {payload: value}),
}),
- div(
+ ),
+ () => treeNodes.val.length
+ ? div(
{
- class: 'flex-row fx-flex-wrap fx-gap-4 fx-justify-space-between mt-4',
- style: 'max-width: 420px;',
+ class: 'flex-row tg-dh--content',
+ ondragover: (event) => event.preventDefault(),
},
- TAG_KEYS.map(key => Select({
- id: `data-catalog-${key}`,
- label: capitalize(key.replaceAll('_', ' ')),
- height: 32,
- value: filters[key],
- options: getValue(props.tag_values)?.[key]?.map(key => ({ label: key, value: key })),
- allowNull: true,
- disabled: !getValue(props.tag_values)?.[key]?.length,
- onChange: v => filters[key].val = v,
- })),
- ),
- ),
- ),
- div(
- {
- class: 'tg-dh--dragger',
- draggable: true,
- ondragstart: (event) => {
- event.dataTransfer.effectAllowed = 'move';
- event.dataTransfer.setDragImage(EMPTY_IMAGE, 0, 0);
- dragState.val = { startX: event.screenX, startWidth: document.getElementById(treeDomId).offsetWidth };
- },
- ondragend: (event) => {
- dragResize(event);
- dragState.val = null;
- },
- ondrag: van.derive(() => dragState.val ? dragResize : null),
- },
- ),
- () => multiEditMode.val
- ? MultiEdit(props, multiSelectedItems, multiEditMode)
- : SelectedDetails(props, selectedItem.val),
- );
+ Tree(
+ {
+ id: treeDomId,
+ classes: 'tg-dh--tree',
+ nodes: treeNodes,
+ // Use .rawVal, so only initial value from query params is passed to tree
+ selected: selectedItem.rawVal ? `${selectedItem.rawVal.type}_${selectedItem.rawVal.id}` : null,
+ onSelect: (/** @type string */ selected) => emitEvent('ItemSelected', { payload: selected }),
+ multiSelect: multiEditMode,
+ multiSelectToggle: userCanEdit,
+ onMultiSelect: (/** @type string[] | null */ selected) => multiSelectedItems.val = selected,
+ isNodeHidden: (/** @type TreeNode */ node) => {
+ let hidden = ![ node.criticalDataElement, false ].includes(filters.criticalDataElement.val);
+ hidden = hidden || TAG_KEYS.some(key => ![ node[key], null ].includes(filters[key].val));
+ return hidden;
+ },
+ hasActiveFilters: () => filters.criticalDataElement.val || TAG_KEYS.some(key => !!filters[key].val),
+ onResetFilters: () => {
+ filters.criticalDataElement.val = false;
+ TAG_KEYS.forEach(key => filters[key].val = null);
+ },
+ },
+ // Pass as a function that will be called when the filter portal is opened
+ // Otherwise state bindings get garbage collected and Select dropdowns won't open
+ // https://vanjs.org/advanced#gc
+ () => div(
+ Checkbox({
+ label: 'Only critical data elements (CDEs)',
+ checked: filters.criticalDataElement,
+ onChange: (checked) => filters.criticalDataElement.val = checked,
+ }),
+ div(
+ {
+ class: 'flex-row fx-flex-wrap fx-gap-4 fx-justify-space-between mt-4',
+ style: 'max-width: 420px;',
+ },
+ TAG_KEYS.map(key => Select({
+ id: `data-catalog-${key}`,
+ label: capitalize(key.replaceAll('_', ' ')),
+ height: 32,
+ value: filters[key],
+ options: getValue(props.tag_values)?.[key]?.map(key => ({ label: key, value: key })),
+ allowNull: true,
+ disabled: !getValue(props.tag_values)?.[key]?.length,
+ onChange: v => filters[key].val = v,
+ })),
+ ),
+ ),
+ ),
+ div(
+ {
+ class: 'tg-dh--dragger',
+ draggable: true,
+ ondragstart: (event) => {
+ event.dataTransfer.effectAllowed = 'move';
+ event.dataTransfer.setDragImage(EMPTY_IMAGE, 0, 0);
+ dragState.val = { startX: event.screenX, startWidth: document.getElementById(treeDomId).offsetWidth };
+ },
+ ondragend: (event) => {
+ dragResize(event);
+ dragState.val = null;
+ },
+ ondrag: van.derive(() => dragState.val ? dragResize : null),
+ },
+ ),
+ () => multiEditMode.val
+ ? MultiEdit(props, multiSelectedItems, multiEditMode)
+ : SelectedDetails(props, selectedItem.val),
+ )
+ : ConditionalEmptyState(projectSummary, userCanEdit, userCanNavigate),
+ )
+ : ConditionalEmptyState(projectSummary, userCanEdit, userCanNavigate);
};
const SelectedDetails = (/** @type Properties */ props, /** @type Table | Column */ item) => {
@@ -253,7 +283,7 @@ const SelectedDetails = (/** @type Properties */ props, /** @type Table | Column
HygieneIssuesCard({ noLinks: !userCanNavigate }, item),
TestIssuesCard({ noLinks: !userCanNavigate }, item),
)
- : EmptyState(
+ : ItemEmptyState(
'Select a table or column on the left to view its details.',
'quick_reference_all',
);
@@ -483,14 +513,14 @@ const MultiEdit = (/** @type Properties */ props, /** @type Object */ selectedIt
),
),
})
- : EmptyState(
+ : ItemEmptyState(
'Select tables or columns on the left to edit their tags.',
'edit_document',
),
);
};
-const EmptyState = (/** @type string */ message, /** @type string */ icon) => {
+const ItemEmptyState = (/** @type string */ message, /** @type string */ icon) => {
return div(
{ class: 'flex-column fx-align-flex-center fx-justify-center tg-dh--no-selection' },
Icon({ size: 80, classes: 'text-disabled mb-5' }, icon),
@@ -498,10 +528,66 @@ const EmptyState = (/** @type string */ message, /** @type string */ icon) => {
);
};
+const ConditionalEmptyState = (
+ /** @type ProjectSummary */ projectSummary,
+ /** @type boolean */ userCanEdit,
+ /** @type boolean */ userCanNavigate,
+) => {
+ let args = {
+ label: 'No profiling data yet',
+ message: EMPTY_STATE_MESSAGE.profiling,
+ button: Button({
+ icon: 'play_arrow',
+ type: 'stroked',
+ color: 'primary',
+ label: 'Run Profiling',
+ width: 'fit-content',
+ style: 'margin: auto; background: background: var(--dk-card-background);',
+ disabled: !userCanEdit,
+ tooltip: userCanEdit ? null : DISABLED_ACTION_TEXT,
+ tooltipPosition: 'bottom',
+ onclick: () => emitEvent('RunProfilingClicked', {}),
+ }),
+ }
+ if (projectSummary.connections_ct <= 0) {
+ args = {
+ label: 'Your project is empty',
+ message: EMPTY_STATE_MESSAGE.connection,
+ link: {
+ label: 'Go to Connections',
+ href: 'connections',
+ params: { project_code: projectSummary.project_code },
+ disabled: !userCanNavigate,
+ },
+ };
+ } else if (projectSummary.table_groups_ct <= 0) {
+ args = {
+ label: 'Your project is empty',
+ message: EMPTY_STATE_MESSAGE.tableGroup,
+ link: {
+ label: 'Go to Table Groups',
+ href: 'connections:table-groups',
+ params: { connection_id: projectSummary.default_connection_id },
+ disabled: !userCanNavigate,
+ },
+ };
+ }
+
+ return EmptyState({
+ icon: 'dataset',
+ ...args,
+ });
+};
+
const stylesheet = new CSSStyleSheet();
stylesheet.replace(`
.tg-dh {
height: 100%;
+}
+
+.tg-dh--content {
+ min-height: 0;
+ flex: auto;
align-items: stretch;
}
diff --git a/testgen/ui/queries/profiling_queries.py b/testgen/ui/queries/profiling_queries.py
index ab18f736..34db8ce5 100644
--- a/testgen/ui/queries/profiling_queries.py
+++ b/testgen/ui/queries/profiling_queries.py
@@ -144,7 +144,7 @@ def get_profiling_results(profiling_run_id: str, table_name: str, column_name: s
return db.retrieve_data(query)
-@st.cache_data(show_spinner="Loading data ...")
+@st.cache_data(show_spinner=False)
def get_table_by_id(table_id: str, table_group_id: str) -> dict | None:
if not is_uuid4(table_id):
return None
@@ -200,7 +200,7 @@ def get_table_by_id(table_id: str, table_group_id: str) -> dict | None:
return json.loads(results.to_json(orient="records"))[0]
-@st.cache_data(show_spinner="Loading data ...")
+@st.cache_data(show_spinner=False)
def get_column_by_id(
column_id: str,
table_group_id: str,
diff --git a/testgen/ui/views/data_catalog.py b/testgen/ui/views/data_catalog.py
index 553720e6..bf37dd72 100644
--- a/testgen/ui/views/data_catalog.py
+++ b/testgen/ui/views/data_catalog.py
@@ -14,13 +14,14 @@
from testgen.ui.components.widgets import testgen_component
from testgen.ui.navigation.menu import MenuItem
from testgen.ui.navigation.page import Page
+from testgen.ui.navigation.router import Router
from testgen.ui.queries import project_queries
from testgen.ui.queries.profiling_queries import TAG_FIELDS, get_column_by_id, get_hygiene_issues, get_table_by_id
from testgen.ui.services import user_session_service
from testgen.ui.session import session
from testgen.ui.views.dialogs.data_preview_dialog import data_preview_dialog
from testgen.ui.views.dialogs.run_profiling_dialog import run_profiling_dialog
-from testgen.utils import friendly_score, score
+from testgen.utils import format_field, friendly_score, is_uuid4, score
PAGE_ICON = "dataset"
PAGE_TITLE = "Data Catalog"
@@ -39,75 +40,88 @@ def render(self, project_code: str, table_group_id: str | None = None, selected:
PAGE_TITLE,
)
- user_can_navigate = not user_session_service.user_has_catalog_role()
-
- if render_empty_state(project_code, user_can_navigate):
- return
-
- group_filter_column, _, loading_column = st.columns([.3, .5, .2], vertical_alignment="center")
-
- with group_filter_column:
- table_groups_df = get_table_group_options(project_code)
- table_group_id = testgen.select(
- options=table_groups_df,
- value_column="id",
- display_column="table_groups_name",
- default_value=table_group_id,
- required=True,
- label="Table Group",
- bind_to_query="table_group_id",
- )
-
- with loading_column:
- columns_df = get_table_group_columns(table_group_id)
- selected_item = get_selected_item(selected, table_group_id)
- if selected_item:
- selected_item["project_code"] = project_code
- selected_item["connection_id"] = str(
- table_groups_df.loc[table_groups_df["id"] == table_group_id].iloc[0]["connection_id"])
- else:
- self.router.set_query_params({ "selected": None })
-
- if columns_df.empty:
- table_group = table_groups_df.loc[table_groups_df["id"] == table_group_id].iloc[0]
- testgen.empty_state(
- label="No profiling data yet",
- icon=PAGE_ICON,
- message=testgen.EmptyStateMessage.Profiling,
- action_label="Run Profiling",
- action_disabled=not user_session_service.user_can_edit(),
- button_onclick=partial(run_profiling_dialog, project_code, table_group),
- button_icon="play_arrow",
- )
+ _, loading_column = st.columns([.4, .6])
+ spinner_container = loading_column.container(key="data_catalog:spinner")
+
+ with spinner_container:
+ with st.spinner(text="Loading data ..."):
+ # Make sure none of the loading logic use @st.cache_data(show_spinner=True)
+ # Otherwise, the testgen_component randomly remounts for no reason when selecting items
+ # (something to do with displaying the extra cache spinner next to the custom component)
+ # Enclosing the loading logic in a Streamlit container also fixes it
+
+ project_summary = project_queries.get_summary_by_code(project_code)
+ user_can_navigate = not user_session_service.user_has_catalog_role()
+ table_groups = get_table_group_options(project_code)
+
+ if not table_group_id or table_group_id not in table_groups["id"].values:
+ table_group_id = table_groups.iloc[0]["id"] if not table_groups.empty else None
+ on_table_group_selected(table_group_id)
+
+ columns, selected_item, selected_table_group = pd.DataFrame(), None, None
+ if table_group_id:
+ selected_table_group = table_groups.loc[table_groups["id"] == table_group_id].iloc[0]
+ columns = get_table_group_columns(table_group_id)
+ selected_item = get_selected_item(selected, table_group_id)
+
+ if selected_item:
+ selected_item["project_code"] = project_code
+ selected_item["connection_id"] = format_field(selected_table_group["connection_id"])
else:
- def on_item_selected(item_id):
- self.router.set_query_params({ "selected": item_id })
-
- testgen_component(
- "data_catalog",
- props={
- "columns": columns_df.to_json(orient="records"),
- "selected": json.dumps(selected_item),
- "tag_values": get_tag_values(),
- "last_saved_timestamp": st.session_state.get("data_catalog:last_saved_timestamp"),
- "permissions": {
- "can_edit": user_session_service.user_can_disposition(),
- "can_navigate": user_can_navigate,
- },
+ on_item_selected(None)
+
+ testgen_component(
+ "data_catalog",
+ props={
+ "project_summary": {
+ "project_code": project_code,
+ "connections_ct": format_field(project_summary["connections_ct"]),
+ "table_groups_ct": format_field(project_summary["table_groups_ct"]),
+ "default_connection_id": format_field(project_summary["default_connection_id"]),
},
- on_change_handlers={
- "ItemSelected": on_item_selected,
- "DataPreviewClicked": lambda item: data_preview_dialog(
- item["table_group_id"],
- item["schema_name"],
- item["table_name"],
- item.get("column_name"),
- ),
+ "table_group_filter_options": [
+ {
+ "value": format_field(table_group["id"]),
+ "label": format_field(table_group["table_groups_name"]),
+ "selected": str(table_group_id) == str(table_group["id"]),
+ } for _, table_group in table_groups.iterrows()
+ ],
+ "columns": columns.to_json(orient="records") if not columns.empty else None,
+ "selected_item": json.dumps(selected_item),
+ "tag_values": get_tag_values(),
+ "last_saved_timestamp": st.session_state.get("data_catalog:last_saved_timestamp"),
+ "permissions": {
+ "can_edit": user_session_service.user_can_disposition(),
+ "can_navigate": user_can_navigate,
},
- event_handlers={ "TagsChanged": partial(on_tags_changed, loading_column) },
- )
+ },
+ on_change_handlers={
+ "RunProfilingClicked": partial(
+ run_profiling_dialog,
+ project_code,
+ selected_table_group,
+ ),
+ "TableGroupSelected": on_table_group_selected,
+ "ItemSelected": on_item_selected,
+ "DataPreviewClicked": lambda item: data_preview_dialog(
+ item["table_group_id"],
+ item["schema_name"],
+ item["table_name"],
+ item.get("column_name"),
+ ),
+ },
+ event_handlers={ "TagsChanged": partial(on_tags_changed, spinner_container) },
+ )
+
+
+def on_table_group_selected(table_group_id: str | None) -> None:
+ Router().set_query_params({ "table_group_id": table_group_id })
+def on_item_selected(item_id: str | None) -> None:
+ Router().set_query_params({ "selected": item_id })
+
+
def on_tags_changed(spinner_container: DeltaGenerator, payload: dict) -> None:
attributes = ["description"]
attributes.extend(TAG_FIELDS)
@@ -152,44 +166,17 @@ def on_tags_changed(spinner_container: DeltaGenerator, payload: dict) -> None:
st.rerun()
-def render_empty_state(project_code: str, user_can_navigate: bool) -> bool:
- project_summary_df = project_queries.get_summary_by_code(project_code)
- if project_summary_df["profiling_runs_ct"]: # Without profiling, we don't have any table and column information in db
- return False
-
- label="Your project is empty"
- testgen.whitespace(5)
- if not project_summary_df["connections_ct"]:
- testgen.empty_state(
- label=label,
- icon=PAGE_ICON,
- message=testgen.EmptyStateMessage.Connection,
- action_label="Go to Connections",
- action_disabled=not user_can_navigate,
- link_href="connections",
- link_params={ "project_code": project_code },
- )
- else:
- testgen.empty_state(
- label=label,
- icon=PAGE_ICON,
- message=testgen.EmptyStateMessage.Profiling if project_summary_df["table_groups_ct"] else testgen.EmptyStateMessage.TableGroup,
- action_label="Go to Table Groups",
- action_disabled=not user_can_navigate,
- link_href="connections:table-groups",
- link_params={ "connection_id": str(project_summary_df["default_connection_id"]) }
- )
- return True
-
-
@st.cache_data(show_spinner=False)
def get_table_group_options(project_code):
schema = st.session_state["dbschema"]
return dq.run_table_groups_lookup_query(schema, project_code)
-@st.cache_data(show_spinner="Loading data ...")
+@st.cache_data(show_spinner=False)
def get_table_group_columns(table_group_id: str) -> pd.DataFrame:
+ if not is_uuid4(table_group_id):
+ return pd.DataFrame()
+
schema = st.session_state["dbschema"]
sql = f"""
SELECT CONCAT('column_', column_chars.column_id) AS column_id,
@@ -215,7 +202,7 @@ def get_table_group_columns(table_group_id: str) -> pd.DataFrame:
def get_selected_item(selected: str, table_group_id: str) -> dict | None:
- if not selected:
+ if not selected or not is_uuid4(table_group_id):
return None
item_type, item_id = selected.split("_", 2)
From 0f206f3ee3737c47462ea59189addc06b1e96fad Mon Sep 17 00:00:00 2001
From: Aarthy Adityan
Date: Tue, 20 May 2025 01:10:40 -0400
Subject: [PATCH 11/33] refactor: download excel dialogs
---
.../ui/components/widgets/download_dialog.py | 82 +++++++-
testgen/ui/queries/profiling_queries.py | 7 +-
testgen/ui/services/form_service.py | 105 ----------
.../ui/views/dialogs/generate_tests_dialog.py | 2 +-
testgen/ui/views/hygiene_issues.py | 55 ++++--
testgen/ui/views/profiling_results.py | 180 +++++++++++-------
testgen/ui/views/test_definitions.py | 84 ++++----
testgen/ui/views/test_results.py | 94 +++++----
8 files changed, 335 insertions(+), 274 deletions(-)
diff --git a/testgen/ui/components/widgets/download_dialog.py b/testgen/ui/components/widgets/download_dialog.py
index a908043d..0a43a748 100644
--- a/testgen/ui/components/widgets/download_dialog.py
+++ b/testgen/ui/components/widgets/download_dialog.py
@@ -1,12 +1,92 @@
import tempfile
from collections.abc import Callable, Iterable
+from io import BytesIO
+from typing import TypedDict
from zipfile import ZipFile
+import pandas as pd
import streamlit as st
+from testgen.common import date_service
+
PROGRESS_UPDATE_TYPE = Callable[[float], None]
-FILE_DATA_TYPE = tuple[str, str, str|bytes]
+FILE_DATA_TYPE = tuple[str, str, str | bytes]
+
+
+class ExcelColumnOptions(TypedDict):
+ header: str
+ wrap: bool
+
+
+def get_excel_file_data(
+ data: pd.DataFrame,
+ title: str,
+ details: dict[str, str] | None = None,
+ columns: dict[str, ExcelColumnOptions] | None = None,
+ update_progress: PROGRESS_UPDATE_TYPE | None = None,
+) -> FILE_DATA_TYPE:
+ if not columns:
+ columns = { col: {} for col in data.columns }
+
+ filtered_data = data[columns.keys()]
+ start_row = 4 + len(details or {})
+
+ with BytesIO() as buffer:
+ with pd.ExcelWriter(buffer, engine="xlsxwriter") as writer:
+ # Data
+ filtered_data.to_excel(writer, index=False, sheet_name="Sheet1", startrow=start_row)
+
+ workbook = writer.book
+ worksheet = writer.sheets["Sheet1"]
+ worksheet.set_column(0, 1000, None, workbook.add_format({"valign": "top"}))
+ worksheet.autofit()
+
+ # Title
+ worksheet.write(
+ "A2",
+ title,
+ workbook.add_format({"bold": True, "size": 14}),
+ )
+
+ details_key_format = workbook.add_format({"size": 9})
+ details_value_format = workbook.add_format({"italic": True, "size": 9})
+
+ # Timestamp
+ worksheet.write("A3", "Exported on", details_key_format)
+ worksheet.write("B3", date_service.get_timezoned_now(st.session_state), details_value_format)
+
+ # Details
+ if details:
+ for index, (key, value) in enumerate(details.items()):
+ worksheet.write(f"A{4 + index}", key, details_key_format)
+ worksheet.write(f"B{4 + index}", value, details_value_format)
+
+ # Headers + table style
+ (max_row, max_col) = filtered_data.shape
+ headers = [
+ {"header": options.get("header", key.replace("_", " ").capitalize())}
+ for key, options in columns.items()
+ ]
+ worksheet.add_table(
+ start_row,
+ 0,
+ max_row + start_row,
+ max_col - 1,
+ {"columns": headers, "style": "Table Style Medium 16"},
+ )
+
+ # Wrap columns
+ wrap_format = workbook.add_format({"text_wrap": True, "valign": "top"})
+ for index, options in enumerate(columns.values()):
+ if options.get("wrap"):
+ worksheet.set_column(index, index, 60, wrap_format)
+
+ if update_progress:
+ update_progress(1.0)
+ buffer.seek(0)
+ return f"{title}.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", buffer.getvalue()
+
def zip_multi_file_data(
zip_file_name: str,
diff --git a/testgen/ui/queries/profiling_queries.py b/testgen/ui/queries/profiling_queries.py
index 34db8ce5..21b3dc1f 100644
--- a/testgen/ui/queries/profiling_queries.py
+++ b/testgen/ui/queries/profiling_queries.py
@@ -129,12 +129,7 @@ def get_profiling_results(profiling_run_id: str, table_name: str, column_name: s
WHERE profile_run_id = profile_results.profile_run_id
AND table_name = profile_results.table_name
AND column_name = profile_results.column_name
- ) THEN 'Yes' END AS hygiene_issues,
- distinct_value_hash,
- fractional_sum,
- date_days_present,
- date_weeks_present,
- date_months_present
+ ) THEN 'Yes' END AS hygiene_issues
FROM {schema}.profile_results
WHERE profile_run_id = '{profiling_run_id}'
AND table_name ILIKE '{table_name}'
diff --git a/testgen/ui/services/form_service.py b/testgen/ui/services/form_service.py
index 09981f80..ee4bf4a1 100644
--- a/testgen/ui/services/form_service.py
+++ b/testgen/ui/services/form_service.py
@@ -1,7 +1,6 @@
import typing
from builtins import float
from enum import Enum
-from io import BytesIO
from pathlib import Path
from time import sleep
@@ -11,7 +10,6 @@
from st_aggrid import AgGrid, ColumnsAutoSizeMode, DataReturnMode, GridOptionsBuilder, GridUpdateMode, JsCode
from streamlit_extras.no_default_selectbox import selectbox
-import testgen.common.date_service as date_service
import testgen.ui.services.database_service as db
from testgen.ui.navigation.router import Router
@@ -170,109 +168,6 @@ def render_widget(self, boo_form_display_only=False):
raise ValueError(f"Widget {self.widget} is not supported.")
-@st.cache_data(show_spinner=False)
-def _generate_excel_export(
- df_data, lst_export_columns, str_title=None, str_caption=None, lst_wrap_columns=None, lst_column_headers=None
-):
- if lst_export_columns:
- # Filter the DataFrame to keep only the columns in lst_export_columns
- df_to_export = df_data[lst_export_columns]
- else:
- lst_export_columns = list(df_data.columns)
- df_to_export = df_data
-
- dct_col_to_header = dict(zip(lst_export_columns, lst_column_headers, strict=True)) if lst_column_headers else None
-
- if not str_title:
- str_title = "TestGen Data Export"
- start_row = 4 if str_caption else 3
-
- # Create a BytesIO buffer to hold the Excel file
- output = BytesIO()
-
- # Create a Pandas Excel writer using XlsxWriter as the engine
- with pd.ExcelWriter(output, engine="xlsxwriter") as writer:
- # Write the DataFrame to an Excel file, starting from the fourth row
- df_to_export.to_excel(writer, index=False, sheet_name="Sheet1", startrow=start_row)
-
- # Access the XlsxWriter workbook and worksheet objects from the dataframe
- workbook = writer.book
- worksheet = writer.sheets["Sheet1"]
-
- # Add table formatting
- (max_row, max_col) = df_to_export.shape
- if dct_col_to_header:
- column_settings = [{"header": dct_col_to_header[column]} for column in df_to_export.columns]
- else:
- column_settings = [{"header": column} for column in df_to_export.columns]
- worksheet.add_table(
- start_row,
- 0,
- max_row + start_row,
- max_col - 1,
- {"columns": column_settings, "style": "Table Style Medium 16"},
- )
-
- # Define the format for wrapped text
- wrap_format = workbook.add_format(
- {
- "text_wrap": True,
- "valign": "top", # Align to the top to better display wrapped text
- }
- )
- valign_format = workbook.add_format({"valign": "top"})
-
- # Autofit the worksheet (before adding title or settingwrapped column width)
- worksheet.set_column(0, 1000, None, valign_format)
- worksheet.autofit()
-
- # Set a fixed column width for wrapped columns and apply wrap format
- approx_width = 60
- for col_idx, column in enumerate(df_to_export[lst_export_columns].columns):
- if column in lst_wrap_columns:
- # Set column width and format for wrapping
- worksheet.set_column(col_idx, col_idx, approx_width, wrap_format)
-
- # Add a cell format for the title
- title_format = workbook.add_format({"bold": True, "size": 14})
- # Write the title in cell A2 with formatting
- worksheet.write("A2", str_title, title_format)
-
- if str_caption:
- str_caption = str_caption.replace("{TIMESTAMP}", date_service.get_timezoned_now(st.session_state))
- caption_format = workbook.add_format({"italic": True, "size": 9, "valign": "top"})
- worksheet.write("A3", str_caption, caption_format)
-
- # Rewind the buffer
- output.seek(0)
-
- # Return the Excel file
- return output.getvalue()
-
-
-def render_excel_export(
- df, lst_export_columns, str_export_title=None, str_caption=None, lst_wrap_columns=None, lst_column_headers=None
-):
-
- if st.button(label=":material/download: Export", help="Download to Excel"):
- download_excel(df, lst_export_columns, str_export_title, str_caption, lst_wrap_columns, lst_column_headers)
-
-
-@st.dialog(title="Download to Excel")
-def download_excel(
- df, lst_export_columns, str_export_title=None, str_caption=None, lst_wrap_columns=None, lst_column_headers=None
-):
- st.write(f'**Are you sure you want to download "{str_export_title}.xlsx"?**')
-
- st.download_button(
- label="Download",
- data=_generate_excel_export(
- df, lst_export_columns, str_export_title, str_caption, lst_wrap_columns, lst_column_headers
- ),
- file_name=f"{str_export_title}.xlsx",
- mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
- )
-
def render_refresh_button(button_container):
with button_container:
do_refresh = st.button(":material/refresh:", help="Refresh page data", use_container_width=False)
diff --git a/testgen/ui/views/dialogs/generate_tests_dialog.py b/testgen/ui/views/dialogs/generate_tests_dialog.py
index 76476450..89013108 100644
--- a/testgen/ui/views/dialogs/generate_tests_dialog.py
+++ b/testgen/ui/views/dialogs/generate_tests_dialog.py
@@ -68,7 +68,7 @@ def generate_tests_dialog(test_suite: pd.Series) -> None:
if test_generation_button:
button_container.empty()
- status_container.info("Starting test generation ...")
+ status_container.info("Generating tests ...")
try:
run_test_gen_queries(table_group_id, test_suite_name, selected_set)
diff --git a/testgen/ui/views/hygiene_issues.py b/testgen/ui/views/hygiene_issues.py
index 5efd81c3..2facaac4 100644
--- a/testgen/ui/views/hygiene_issues.py
+++ b/testgen/ui/views/hygiene_issues.py
@@ -13,7 +13,13 @@
from testgen.common import date_service
from testgen.common.mixpanel_service import MixpanelService
from testgen.ui.components import widgets as testgen
-from testgen.ui.components.widgets.download_dialog import FILE_DATA_TYPE, download_dialog, zip_multi_file_data
+from testgen.ui.components.widgets.download_dialog import (
+ FILE_DATA_TYPE,
+ PROGRESS_UPDATE_TYPE,
+ download_dialog,
+ get_excel_file_data,
+ zip_multi_file_data,
+)
from testgen.ui.navigation.page import Page
from testgen.ui.pdf.hygiene_issue_report import create_report
from testgen.ui.services import project_service, user_session_service
@@ -178,21 +184,12 @@ def render(
)
with export_button_column:
- lst_export_columns = [
- "schema_name",
- "table_name",
- "column_name",
- "anomaly_name",
- "issue_likelihood",
- "anomaly_description",
- "action",
- "detail",
- "suggested_action",
- ]
- lst_wrap_columns = ["anomaly_description", "suggested_action"]
- fm.render_excel_export(
- df_pa, lst_export_columns, "Hygiene Screen", "{TIMESTAMP}", lst_wrap_columns
- )
+ if st.button(label=":material/download: Export", help="Download filtered hygiene issues to Excel"):
+ download_dialog(
+ dialog_title="Download Excel Report",
+ file_content_func=get_excel_report_data,
+ args=(df_pa, run_df["table_groups_name"], run_date),
+ )
if selected:
# Always show details for last selected row
@@ -489,6 +486,32 @@ def get_profiling_anomaly_summary(str_profile_run_id):
]
+def get_excel_report_data(
+ update_progress: PROGRESS_UPDATE_TYPE,
+ data: pd.DataFrame,
+ table_group: str,
+ run_date: str,
+) -> FILE_DATA_TYPE:
+ columns = {
+ "schema_name": {"header": "Schema"},
+ "table_name": {"header": "Table"},
+ "column_name": {"header": "Column"},
+ "anomaly_name": {"header": "Issue name"},
+ "issue_likelihood": {"header": "Likelihood"},
+ "anomaly_description": {"header": "Description", "wrap": True},
+ "action": {},
+ "detail": {},
+ "suggested_action": {"wrap": True},
+ }
+ return get_excel_file_data(
+ data,
+ "Hygiene Issues",
+ details={"Table group": table_group, "Profiling run date": run_date},
+ columns=columns,
+ update_progress=update_progress,
+ )
+
+
@st.cache_data(show_spinner=False)
def get_source_data(hi_data):
return get_source_data_uncached(hi_data)
diff --git a/testgen/ui/views/profiling_results.py b/testgen/ui/views/profiling_results.py
index c09dd7ce..5c4ad506 100644
--- a/testgen/ui/views/profiling_results.py
+++ b/testgen/ui/views/profiling_results.py
@@ -1,6 +1,8 @@
import json
import typing
+from datetime import datetime
+import pandas as pd
import streamlit as st
import testgen.ui.queries.profiling_queries as profiling_queries
@@ -8,6 +10,12 @@
import testgen.ui.services.form_service as fm
from testgen.common import date_service
from testgen.ui.components import widgets as testgen
+from testgen.ui.components.widgets.download_dialog import (
+ FILE_DATA_TYPE,
+ PROGRESS_UPDATE_TYPE,
+ download_dialog,
+ get_excel_file_data,
+)
from testgen.ui.components.widgets.testgen_component import testgen_component
from testgen.ui.navigation.page import Page
from testgen.ui.services import project_service, user_session_service
@@ -116,7 +124,12 @@ def render(self, run_id: str, table_name: str | None = None, column_name: str |
with export_button_column:
testgen.flex_row_end()
- render_export_button(df)
+ if st.button(label=":material/download: Export", help="Download filtered profiling results to Excel"):
+ download_dialog(
+ dialog_title="Download Excel Report",
+ file_content_func=get_excel_report_data,
+ args=(df, run_df["table_groups_name"], run_date),
+ )
# Display profiling for selected row
if not selected_row:
@@ -138,75 +151,102 @@ def render(self, run_id: str, table_name: str | None = None, column_name: str |
)
-def render_export_button(df):
- export_columns = [
- "schema_name",
- "table_name",
- "column_name",
- "position",
- "hygiene_issues",
- # Characteristics
- "general_type",
- "column_type",
- "semantic_table_type",
- "semantic_data_type",
- "datatype_suggestion",
- # Value Counts
- "record_ct",
- "value_ct",
- "distinct_value_ct",
- "null_value_ct",
- "zero_value_ct",
- # Alpha
- "zero_length_ct",
- "filled_value_ct",
- "includes_digit_ct",
- "numeric_ct",
- "date_ct",
- "quoted_value_ct",
- "lead_space_ct",
- "embedded_space_ct",
- "avg_embedded_spaces",
- "min_length",
- "max_length",
- "avg_length",
- "min_text",
- "max_text",
- "distinct_std_value_ct",
- "distinct_pattern_ct",
- "std_pattern_match",
- "top_freq_values",
- "top_patterns",
- # Numeric
- "min_value",
- "min_value_over_0",
- "max_value",
- "avg_value",
- "stdev_value",
- "percentile_25",
- "percentile_50",
- "percentile_75",
- # Date
- "min_date",
- "max_date",
- "before_1yr_date_ct",
- "before_5yr_date_ct",
- "before_20yr_date_ct",
- "within_1yr_date_ct",
- "within_1mo_date_ct",
- "future_date_ct",
- # Boolean
- "boolean_true_ct",
- # Extra
- "distinct_value_hash",
- "fractional_sum",
- "date_days_present",
- "date_weeks_present",
- "date_months_present",
- ]
- wrap_columns = ["top_freq_values", "top_patterns"]
- caption = "{TIMESTAMP}"
- fm.render_excel_export(df, export_columns, "Profiling Results", caption, wrap_columns)
+def get_excel_report_data(
+ update_progress: PROGRESS_UPDATE_TYPE,
+ data: pd.DataFrame,
+ table_group: str,
+ run_date: str,
+) -> FILE_DATA_TYPE:
+ data = data.copy()
+
+ for key in ["column_type", "datatype_suggestion"]:
+ data[key] = data[key].apply(lambda val: val.lower())
+
+ for key in ["avg_embedded_spaces", "avg_length", "avg_value", "stdev_value"]:
+ data[key] = data[key].apply(lambda val: round(val, 2))
+
+ for key in ["min_date", "max_date"]:
+ data[key] = data[key].apply(
+ lambda val: datetime.strptime(val, "%Y-%m-%dT%H:%M:%S").strftime("%b %-d %Y, %-I:%M %p") if val != "NaT" else None
+ )
+
+ data["hygiene_issues"] = data["hygiene_issues"].apply(lambda val: "Yes" if val else None)
+
+ type_map = {"A": "Alpha", "B": "Boolean", "D": "Datetime", "N": "Numeric"}
+ data["general_type"] = data["general_type"].apply(lambda val: type_map.get(val))
+
+ data["top_freq_values"] = data["top_freq_values"].apply(
+ lambda val: "\n".join([ f"{part.split(" | ")[1]} | {part.split(" | ")[0]}" for part in val[2:].split("\n| ") ])
+ if val
+ else None
+ )
+ data["top_patterns"] = data["top_patterns"].apply(
+ lambda val: "".join([ f"{part}{'\n' if index % 2 else ' | '}" for index, part in enumerate(val.split(" | ")) ])
+ if val
+ else None
+ )
+
+ columns = {
+ "schema_name": {"header": "Schema"},
+ "table_name": {"header": "Table"},
+ "column_name": {"header": "Column"},
+ "position": {},
+ "general_type": {},
+ "column_type": {"header": "Data type"},
+ "datatype_suggestion": {"header": "Suggested data type"},
+ "semantic_data_type": {},
+ "record_ct": {"header": "Record count"},
+ "value_ct": {"header": "Value count"},
+ "distinct_value_ct": {"header": "Distinct values"},
+ "null_value_ct": {"header": "Null values"},
+ "zero_value_ct": {"header": "Zero values"},
+ "zero_length_ct": {"header": "Zero length"},
+ "filled_value_ct": {"header": "Dummy values"},
+ "mixed_case_ct": {"header": "Mixed case"},
+ "lower_case_ct": {"header": "Lower case"},
+ "non_alpha_ct": {"header": "Non-alpha"},
+ "includes_digit_ct": {"header": "Includes digits"},
+ "numeric_ct": {"header": "Numeric values"},
+ "date_ct": {"header": "Date values"},
+ "quoted_value_ct": {"header": "Quoted values"},
+ "lead_space_ct": {"header": "Leading spaces"},
+ "embedded_space_ct": {"header": "Embedded spaces"},
+ "avg_embedded_spaces": {"header": "Average embedded spaces"},
+ "min_length": {"header": "Minimum length"},
+ "max_length": {"header": "Maximum length"},
+ "avg_length": {"header": "Average length"},
+ "min_text": {"header": "Minimum text", "wrap": True},
+ "max_text": {"header": "Maximum text", "wrap": True},
+ "distinct_std_value_ct": {"header": "Distinct standard values"},
+ "distinct_pattern_ct": {"header": "Distinct patterns"},
+ "std_pattern_match": {"header": "Standard pattern match"},
+ "top_freq_values": {"header": "Frequent values", "wrap": True},
+ "top_patterns": {"header": "Frequent patterns", "wrap": True},
+ "min_value": {"header": "Minimum value"},
+ "min_value_over_0": {"header": "Minimum value > 0"},
+ "max_value": {"header": "Maximum value"},
+ "avg_value": {"header": "Average value"},
+ "stdev_value": {"header": "Standard deviation"},
+ "percentile_25": {"header": "25th percentile"},
+ "percentile_50": {"header": "Median value"},
+ "percentile_75": {"header": "75th percentile"},
+ "min_date": {"header": "Minimum date (UTC)"},
+ "max_date": {"header": "Maximum date (UTC)"},
+ "before_1yr_date_ct": {"header": "Before 1 year"},
+ "before_5yr_date_ct": {"header": "Before 5 years"},
+ "before_20yr_date_ct": {"header": "Before 20 years"},
+ "within_1yr_date_ct": {"header": "Within 1 year"},
+ "within_1mo_date_ct": {"header": "Within 1 month"},
+ "future_date_ct": {"header": "Future dates"},
+ "boolean_true_ct": {"header": "Boolean true values"},
+ }
+ return get_excel_file_data(
+ data,
+ "Profiling Results",
+ details={"Table group": table_group, "Profiling run date": run_date},
+ columns=columns,
+ update_progress=update_progress,
+ )
def generate_create_script(df):
diff --git a/testgen/ui/views/test_definitions.py b/testgen/ui/views/test_definitions.py
index 8c657609..82927b26 100644
--- a/testgen/ui/views/test_definitions.py
+++ b/testgen/ui/views/test_definitions.py
@@ -1,6 +1,7 @@
import logging
import time
import typing
+from datetime import datetime
import pandas as pd
import streamlit as st
@@ -14,6 +15,12 @@
import testgen.ui.services.test_suite_service as test_suite_service
from testgen.common import date_service
from testgen.ui.components import widgets as testgen
+from testgen.ui.components.widgets.download_dialog import (
+ FILE_DATA_TYPE,
+ PROGRESS_UPDATE_TYPE,
+ download_dialog,
+ get_excel_file_data,
+)
from testgen.ui.navigation.page import Page
from testgen.ui.services import project_service, user_session_service
from testgen.ui.services.string_service import empty_if_null, snake_case_to_title_case
@@ -814,43 +821,12 @@ def show_test_defs_grid(
)
with export_container:
- lst_export_columns = [
- "schema_name",
- "table_name",
- "column_name",
- "test_name_short",
- "final_test_description",
- "threshold_value",
- "export_uom",
- "test_active_display",
- "lock_refresh_display",
- "urgency",
- "profiling_as_of_date",
- "last_manual_update",
- ]
- lst_wrap_columns = ["final_test_description"]
- lst_export_headers = [
- "Schema",
- "Table Name",
- "Column/Test Focus",
- "Test Type",
- "Description",
- "Test Threshold",
- "Unit of Measure",
- "Active",
- "Locked",
- "Urgency",
- "From Profiling As-Of",
- "Last Manual Update",
- ]
- fm.render_excel_export(
- df,
- lst_export_columns,
- f"Test Definitions for Test Suite {str_test_suite}",
- "{TIMESTAMP}",
- lst_wrap_columns,
- lst_export_headers,
- )
+ if st.button(label=":material/download: Export", help="Download filtered test definitions to Excel"):
+ download_dialog(
+ dialog_title="Download Excel Report",
+ file_content_func=get_excel_report_data,
+ args=(df, str_test_suite),
+ )
if dct_selected_row:
st.html("
")
@@ -917,6 +893,40 @@ def show_test_defs_grid(
return dct_selected_row
+def get_excel_report_data(update_progress: PROGRESS_UPDATE_TYPE, data: pd.DataFrame, test_suite: str) -> FILE_DATA_TYPE:
+ data = data.copy()
+
+ for key in ["test_active_display", "lock_refresh_display"]:
+ data[key] = data[key].apply(lambda val: val if val == "Yes" else None)
+
+ for key in ["profiling_as_of_date", "last_manual_update"]:
+ data[key] = data[key].apply(
+ lambda val: datetime.strptime(val, "%Y-%m-%d %H:%M:%S").strftime("%b %-d %Y, %-I:%M %p") if not pd.isna(val) else None
+ )
+
+ columns = {
+ "schema_name": {"header": "Schema"},
+ "table_name": {"header": "Table"},
+ "column_name": {"header": "Column/Focus"},
+ "test_name_short": {"header": "Test type"},
+ "final_test_description": {"header": "Description", "wrap": True},
+ "threshold_value": {},
+ "export_uom": {"header": "Unit of measure"},
+ "test_active_display": {"header": "Active"},
+ "lock_refresh_display": {"header": "Locked"},
+ "urgency": {"header": "Severity"},
+ "profiling_as_of_date": {"header": "From profiling as-of (UTC)"},
+ "last_manual_update": {"header": "Last manual update (UTC)"},
+ }
+ return get_excel_file_data(
+ data,
+ "Test Definitions",
+ details={"Test suite": test_suite},
+ columns=columns,
+ update_progress=update_progress,
+ )
+
+
def generate_test_defs_help(str_test_type):
df = run_test_type_lookup_query(str_test_type)
if not df.empty:
diff --git a/testgen/ui/views/test_results.py b/testgen/ui/views/test_results.py
index 38410faa..c2b01a3e 100644
--- a/testgen/ui/views/test_results.py
+++ b/testgen/ui/views/test_results.py
@@ -16,7 +16,13 @@
from testgen.common import date_service
from testgen.common.mixpanel_service import MixpanelService
from testgen.ui.components import widgets as testgen
-from testgen.ui.components.widgets.download_dialog import FILE_DATA_TYPE, download_dialog, zip_multi_file_data
+from testgen.ui.components.widgets.download_dialog import (
+ FILE_DATA_TYPE,
+ PROGRESS_UPDATE_TYPE,
+ download_dialog,
+ get_excel_file_data,
+ zip_multi_file_data,
+)
from testgen.ui.navigation.page import Page
from testgen.ui.pdf.test_result_report import create_report
from testgen.ui.services import project_service, test_definition_service, test_results_service, user_session_service
@@ -158,7 +164,16 @@ def render(
# Display main grid and retrieve selection
selected = show_result_detail(
- run_id, export_button_column, status, test_type, table_name, column_name, sorting_columns, do_multi_select
+ run_id,
+ run_date,
+ run_df["test_suite"],
+ export_button_column,
+ status,
+ test_type,
+ table_name,
+ column_name,
+ sorting_columns,
+ do_multi_select,
)
# Need to render toolbar buttons after grid, so selection status is maintained
@@ -458,6 +473,8 @@ def show_test_def_detail(str_test_def_id):
def show_result_detail(
run_id: str,
+ run_date: str,
+ test_suite: str,
export_container: DeltaGenerator,
test_status: str | None = None,
test_type_id: str | None = None,
@@ -504,42 +521,12 @@ def show_result_detail(
)
with export_container:
- lst_export_columns = [
- "schema_name",
- "table_name",
- "column_names",
- "test_name_short",
- "test_description",
- "dq_dimension",
- "measure_uom",
- "measure_uom_description",
- "threshold_value",
- "severity",
- "result_measure",
- "result_status",
- "result_message",
- "action",
- ]
- lst_wrap_colunns = ["test_description"]
- lst_export_headers = [
- "Schema Name",
- "Table Name",
- "Columns/Focus",
- "Test Type",
- "Test Description",
- "DQ Dimension",
- "UOM",
- "UOM Description",
- "Threshold Value",
- "Severity",
- "Result Measure",
- "Status",
- "Message",
- "Action",
- ]
- fm.render_excel_export(
- df, lst_export_columns, "Test Results", "{TIMESTAMP}", lst_wrap_colunns, lst_export_headers
- )
+ if st.button(label=":material/download: Export", help="Download filtered test results to Excel"):
+ download_dialog(
+ dialog_title="Download Excel Report",
+ file_content_func=get_excel_report_data,
+ args=(df, test_suite, run_date),
+ )
# Display history and detail for selected row
if not selected_rows:
@@ -636,6 +623,37 @@ def show_result_detail(
return selected_rows
+def get_excel_report_data(
+ update_progress: PROGRESS_UPDATE_TYPE,
+ data: pd.DataFrame,
+ test_suite: str,
+ run_date: str,
+) -> FILE_DATA_TYPE:
+ columns = {
+ "schema_name": {"header": "Schema"},
+ "table_name": {"header": "Table"},
+ "column_names": {"header": "Columns/Focus"},
+ "test_name_short": {"header": "Test type"},
+ "test_description": {"header": "Description", "wrap": True},
+ "dq_dimension": {"header": "Quality dimension"},
+ "measure_uom": {"header": "Unit of measure (UOM)"},
+ "measure_uom_description": {"header": "UOM description"},
+ "threshold_value": {},
+ "severity": {},
+ "result_measure": {},
+ "result_status": {"header": "Status"},
+ "result_message": {"header": "Message"},
+ "action": {},
+ }
+ return get_excel_file_data(
+ data,
+ "Test Results",
+ details={"Test suite": test_suite, "Test run date": run_date},
+ columns=columns,
+ update_progress=update_progress,
+ )
+
+
def write_history_graph(dfh):
y_min = min(dfh["result_measure"].min(), dfh["threshold_value"].min())
y_max = max(dfh["result_measure"].max(), dfh["threshold_value"].max())
From e044caf82414b7c65af8fcfc1785812f19bf4094 Mon Sep 17 00:00:00 2001
From: Aarthy Adityan
Date: Tue, 20 May 2025 01:10:58 -0400
Subject: [PATCH 12/33] feat(data-catalog): add export to excel
---
.../js/data_profiling/column_distribution.js | 4 +-
.../frontend/js/pages/data_catalog.js | 22 +++
testgen/ui/queries/profiling_queries.py | 31 ++--
testgen/ui/views/data_catalog.py | 143 +++++++++++++++++-
4 files changed, 181 insertions(+), 19 deletions(-)
diff --git a/testgen/ui/components/frontend/js/data_profiling/column_distribution.js b/testgen/ui/components/frontend/js/data_profiling/column_distribution.js
index e40ca9f7..fc3b9db2 100644
--- a/testgen/ui/components/frontend/js/data_profiling/column_distribution.js
+++ b/testgen/ui/components/frontend/js/data_profiling/column_distribution.js
@@ -196,8 +196,8 @@ function DatetimeColumn(/** @type ColumnProfile */ item) {
div(
{ class: 'flex-column fx-gap-3 tg-profile--percent-column' },
PercentBar({ label: 'Before 1 Year', value: item.before_1yr_date_ct, total, width: percentWidth }),
- PercentBar({ label: 'Before 5 Year', value: item.before_5yr_date_ct, total, width: percentWidth }),
- PercentBar({ label: 'Before 20 Year', value: item.before_20yr_date_ct, total, width: percentWidth }),
+ PercentBar({ label: 'Before 5 Years', value: item.before_5yr_date_ct, total, width: percentWidth }),
+ PercentBar({ label: 'Before 20 Years', value: item.before_20yr_date_ct, total, width: percentWidth }),
),
div(
{ class: 'flex-column fx-gap-3 tg-profile--percent-column' },
diff --git a/testgen/ui/components/frontend/js/pages/data_catalog.js b/testgen/ui/components/frontend/js/pages/data_catalog.js
index 56cb882f..a80b4777 100644
--- a/testgen/ui/components/frontend/js/pages/data_catalog.js
+++ b/testgen/ui/components/frontend/js/pages/data_catalog.js
@@ -172,6 +172,28 @@ const DataCatalog = (/** @type Properties */ props) => {
testId: 'table-group-filter',
onChange: (value) => emitEvent('TableGroupSelected', {payload: value}),
}),
+ Button({
+ icon: 'download',
+ type: 'stroked',
+ label: 'Export',
+ tooltip: 'Download filtered columns to Excel',
+ tooltipPosition: 'left',
+ width: 'fit-content',
+ style: 'background: var(--dk-card-background);',
+ onclick: () => {
+ const columnIds = treeNodes.val.reduce((ids, table) => {
+ if (!table.hidden.val) {
+ table.children.forEach(column => {
+ if (!column.hidden.val) {
+ ids.push(column.id);
+ }
+ });
+ }
+ return ids;
+ }, []);
+ emitEvent('ExportClicked', { payload: columnIds });
+ },
+ }),
),
() => treeNodes.val.length
? div(
diff --git a/testgen/ui/queries/profiling_queries.py b/testgen/ui/queries/profiling_queries.py
index 21b3dc1f..73c5c663 100644
--- a/testgen/ui/queries/profiling_queries.py
+++ b/testgen/ui/queries/profiling_queries.py
@@ -140,7 +140,7 @@ def get_profiling_results(profiling_run_id: str, table_name: str, column_name: s
@st.cache_data(show_spinner=False)
-def get_table_by_id(table_id: str, table_group_id: str) -> dict | None:
+def get_table_by_id(table_id: str) -> dict | None:
if not is_uuid4(table_id):
return None
@@ -185,8 +185,7 @@ def get_table_by_id(table_id: str, table_group_id: str) -> dict | None:
LEFT JOIN {schema}.table_groups ON (
table_chars.table_groups_id = table_groups.id
)
- WHERE table_id = '{table_id}'
- AND table_chars.table_groups_id = '{table_group_id}';
+ WHERE table_id = '{table_id}';
"""
results = db.retrieve_data(query)
@@ -198,20 +197,15 @@ def get_table_by_id(table_id: str, table_group_id: str) -> dict | None:
@st.cache_data(show_spinner=False)
def get_column_by_id(
column_id: str,
- table_group_id: str,
include_tags: bool = False,
include_has_test_runs: bool = False,
include_scores: bool = False,
) -> dict | None:
-
if not is_uuid4(column_id):
return None
- condition = f"""
- column_chars.column_id = '{column_id}'
- AND column_chars.table_groups_id = '{table_group_id}'
- """
- return get_column_by_condition(condition, include_tags, include_has_test_runs, include_scores)
+ condition = f"column_chars.column_id = '{column_id}'"
+ return get_columns_by_condition(condition, include_tags, include_has_test_runs, include_scores)[0]
@st.cache_data(show_spinner="Loading data ...")
@@ -229,10 +223,20 @@ def get_column_by_name(
AND column_chars.table_name = '{table_name}'
AND column_chars.table_groups_id = '{table_group_id}'
"""
- return get_column_by_condition(condition, include_tags, include_has_test_runs, include_scores)
+ return get_columns_by_condition(condition, include_tags, include_has_test_runs, include_scores)[0]
+
+
+def get_columns_by_id(
+ column_ids: list[str],
+ include_tags: bool = False,
+ include_has_test_runs: bool = False,
+ include_scores: bool = False,
+) -> dict | None:
+ condition = f"column_chars.column_id IN ('{"', '".join([ col for col in column_ids if is_uuid4(col) ])}')"
+ return get_columns_by_condition(condition, include_tags, include_has_test_runs, include_scores)
-def get_column_by_condition(
+def get_columns_by_condition(
filter_condition: str,
include_tags: bool = False,
include_has_test_runs: bool = False,
@@ -248,6 +252,7 @@ def get_column_by_condition(
column_chars.table_name,
column_chars.schema_name,
column_chars.table_groups_id::VARCHAR AS table_group_id,
+ column_chars.ordinal_position,
-- Characteristics
column_chars.general_type,
column_chars.column_type,
@@ -306,7 +311,7 @@ def get_column_by_condition(
results = db.retrieve_data(query)
if not results.empty:
# to_json converts datetimes, NaN, etc, to JSON-safe values (Note: to_dict does not)
- return json.loads(results.to_json(orient="records"))[0]
+ return json.loads(results.to_json(orient="records"))
@st.cache_data(show_spinner=False)
diff --git a/testgen/ui/views/data_catalog.py b/testgen/ui/views/data_catalog.py
index bf37dd72..8d91581e 100644
--- a/testgen/ui/views/data_catalog.py
+++ b/testgen/ui/views/data_catalog.py
@@ -12,11 +12,23 @@
import testgen.ui.services.query_service as dq
from testgen.ui.components import widgets as testgen
from testgen.ui.components.widgets import testgen_component
+from testgen.ui.components.widgets.download_dialog import (
+ FILE_DATA_TYPE,
+ PROGRESS_UPDATE_TYPE,
+ download_dialog,
+ get_excel_file_data,
+)
from testgen.ui.navigation.menu import MenuItem
from testgen.ui.navigation.page import Page
from testgen.ui.navigation.router import Router
from testgen.ui.queries import project_queries
-from testgen.ui.queries.profiling_queries import TAG_FIELDS, get_column_by_id, get_hygiene_issues, get_table_by_id
+from testgen.ui.queries.profiling_queries import (
+ TAG_FIELDS,
+ get_column_by_id,
+ get_columns_by_id,
+ get_hygiene_issues,
+ get_table_by_id,
+)
from testgen.ui.services import user_session_service
from testgen.ui.session import session
from testgen.ui.views.dialogs.data_preview_dialog import data_preview_dialog
@@ -103,6 +115,11 @@ def render(self, project_code: str, table_group_id: str | None = None, selected:
),
"TableGroupSelected": on_table_group_selected,
"ItemSelected": on_item_selected,
+ "ExportClicked": lambda columns: download_dialog(
+ dialog_title="Download Excel Report",
+ file_content_func=get_excel_report_data,
+ args=(selected_table_group["table_groups_name"], columns),
+ ),
"DataPreviewClicked": lambda item: data_preview_dialog(
item["table_group_id"],
item["schema_name"],
@@ -121,8 +138,126 @@ def on_table_group_selected(table_group_id: str | None) -> None:
def on_item_selected(item_id: str | None) -> None:
Router().set_query_params({ "selected": item_id })
+
+def get_excel_report_data(update_progress: PROGRESS_UPDATE_TYPE, table_group: str, columns: list[str]) -> None:
+ data = get_columns_by_id(
+ [ col.split("_")[1] for col in columns ],
+ include_tags=True,
+ )
+ data = pd.DataFrame(data)
+
+ for key in ["column_type", "datatype_suggestion"]:
+ data[key] = data[key].apply(lambda val: val.lower())
+
+ for key in ["avg_embedded_spaces", "avg_length", "avg_value", "stdev_value"]:
+ data[key] = data[key].apply(lambda val: round(val, 2))
+
+ for key in ["min_date", "max_date", "add_date", "last_mod_date", "drop_date"]:
+ data[key] = data[key].apply(
+ lambda val: datetime.fromtimestamp(val / 1000).strftime("%b %-d %Y, %-I:%M %p") if not pd.isna(val) else None
+ )
+
+ for key in ["data_source", "source_system", "source_process", "business_domain", "stakeholder_group", "transform_level", "aggregation_level", "data_product"]:
+ data[key] = data.apply(
+ lambda col: col[key] or col[f"table_{key}"] or col.get(f"table_group_{key}"),
+ axis=1,
+ )
+
+ type_map = {"A": "Alpha", "B": "Boolean", "D": "Datetime", "N": "Numeric"}
+ data["general_type"] = data["general_type"].apply(lambda val: type_map.get(val))
+
+ data["critical_data_element"] = data.apply(
+ lambda col: "Yes" if col["critical_data_element"] or col["table_critical_data_element"] else None,
+ axis=1,
+ )
+ data["top_freq_values"] = data["top_freq_values"].apply(
+ lambda val: "\n".join([ f"{part.split(" | ")[1]} | {part.split(" | ")[0]}" for part in val[2:].split("\n| ") ])
+ if val
+ else None
+ )
+ data["top_patterns"] = data["top_patterns"].apply(
+ lambda val: "".join([ f"{part}{'\n' if index % 2 else ' | '}" for index, part in enumerate(val.split(" | ")) ])
+ if val
+ else None
+ )
+
+ file_columns = {
+ "schema_name": {"header": "Schema"},
+ "table_name": {"header": "Table"},
+ "column_name": {"header": "Column"},
+ "critical_data_element": {},
+ "ordinal_position": {"header": "Position"},
+ "general_type": {},
+ "column_type": {"header": "Data type"},
+ "datatype_suggestion": {"header": "Suggested data type"},
+ "functional_data_type": {"header": "Semantic data type"},
+ "add_date": {"header": "First detected"},
+ "last_mod_date": {"header": "Modification detected"},
+ "drop_date": {"header": "Drop detected"},
+ "record_ct": {"header": "Record count"},
+ "value_ct": {"header": "Value count"},
+ "distinct_value_ct": {"header": "Distinct values"},
+ "null_value_ct": {"header": "Null values"},
+ "zero_value_ct": {"header": "Zero values"},
+ "zero_length_ct": {"header": "Zero length"},
+ "filled_value_ct": {"header": "Dummy values"},
+ "mixed_case_ct": {"header": "Mixed case"},
+ "lower_case_ct": {"header": "Lower case"},
+ "non_alpha_ct": {"header": "Non-alpha"},
+ "includes_digit_ct": {"header": "Includes digits"},
+ "numeric_ct": {"header": "Numeric values"},
+ "date_ct": {"header": "Date values"},
+ "quoted_value_ct": {"header": "Quoted values"},
+ "lead_space_ct": {"header": "Leading spaces"},
+ "embedded_space_ct": {"header": "Embedded spaces"},
+ "avg_embedded_spaces": {"header": "Average embedded spaces"},
+ "min_length": {"header": "Minimum length"},
+ "max_length": {"header": "Maximum length"},
+ "avg_length": {"header": "Average length"},
+ "min_text": {"header": "Minimum text", "wrap": True},
+ "max_text": {"header": "Maximum text", "wrap": True},
+ "distinct_std_value_ct": {"header": "Distinct standard values"},
+ "distinct_pattern_ct": {"header": "Distinct patterns"},
+ "std_pattern_match": {"header": "Standard pattern match"},
+ "top_freq_values": {"header": "Frequent values", "wrap": True},
+ "top_patterns": {"header": "Frequent patterns", "wrap": True},
+ "min_value": {"header": "Minimum value"},
+ "min_value_over_0": {"header": "Minimum value > 0"},
+ "max_value": {"header": "Maximum value"},
+ "avg_value": {"header": "Average value"},
+ "stdev_value": {"header": "Standard deviation"},
+ "percentile_25": {"header": "25th percentile"},
+ "percentile_50": {"header": "Median value"},
+ "percentile_75": {"header": "75th percentile"},
+ "min_date": {"header": "Minimum date (UTC)"},
+ "max_date": {"header": "Maximum date (UTC)"},
+ "before_1yr_date_ct": {"header": "Before 1 year"},
+ "before_5yr_date_ct": {"header": "Before 5 years"},
+ "before_20yr_date_ct": {"header": "Before 20 years"},
+ "within_1yr_date_ct": {"header": "Within 1 year"},
+ "within_1mo_date_ct": {"header": "Within 1 month"},
+ "future_date_ct": {"header": "Future dates"},
+ "boolean_true_ct": {"header": "Boolean true values"},
+ "description": {"wrap": True},
+ "data_source": {},
+ "source_system": {},
+ "source_process": {},
+ "business_domain": {},
+ "stakeholder_group": {},
+ "transform_level": {},
+ "aggregation_level": {},
+ "data_product": {},
+ }
+ return get_excel_file_data(
+ data,
+ "Data Catalog Columns",
+ details={"Table group": table_group},
+ columns=file_columns,
+ update_progress=update_progress,
+ )
+
-def on_tags_changed(spinner_container: DeltaGenerator, payload: dict) -> None:
+def on_tags_changed(spinner_container: DeltaGenerator, payload: dict) -> FILE_DATA_TYPE:
attributes = ["description"]
attributes.extend(TAG_FIELDS)
cde_value_map = {
@@ -208,9 +343,9 @@ def get_selected_item(selected: str, table_group_id: str) -> dict | None:
item_type, item_id = selected.split("_", 2)
if item_type == "table":
- item = get_table_by_id(item_id, table_group_id)
+ item = get_table_by_id(item_id)
elif item_type == "column":
- item = get_column_by_id(item_id, table_group_id, include_tags=True, include_has_test_runs=True, include_scores=True)
+ item = get_column_by_id(item_id, include_tags=True, include_has_test_runs=True, include_scores=True)
else:
return None
From 813898bfaffc29f3f64d98dfc7277fdbb44a20b0 Mon Sep 17 00:00:00 2001
From: Aarthy Adityan
Date: Thu, 22 May 2025 00:51:49 -0400
Subject: [PATCH 13/33] feat(data-column): add column history dialog
---
testgen/ui/assets/style.css | 4 +
.../js/data_profiling/column_distribution.js | 33 +++++--
.../column_profiling_history.js | 85 +++++++++++++++++
.../column_profiling_results.js | 2 +
testgen/ui/components/frontend/js/main.js | 2 +
.../frontend/js/pages/data_catalog.js | 2 +-
testgen/ui/views/data_catalog.py | 8 ++
.../ui/views/dialogs/column_history_dialog.py | 94 +++++++++++++++++++
8 files changed, 219 insertions(+), 11 deletions(-)
create mode 100644 testgen/ui/components/frontend/js/data_profiling/column_profiling_history.js
create mode 100644 testgen/ui/views/dialogs/column_history_dialog.py
diff --git a/testgen/ui/assets/style.css b/testgen/ui/assets/style.css
index 4744e722..a57b453c 100644
--- a/testgen/ui/assets/style.css
+++ b/testgen/ui/assets/style.css
@@ -100,6 +100,10 @@ div[data-testid="stDialog"] div[role="dialog"]:has(i.s-dialog) {
width: calc(35rem);
}
+div[data-testid="stDialog"] div[role="dialog"]:has(i.l-dialog) {
+ width: calc(75rem);
+}
+
div[data-testid="stDialog"] div[role="dialog"]:has(i.xl-dialog) {
width: calc(95rem);
}
diff --git a/testgen/ui/components/frontend/js/data_profiling/column_distribution.js b/testgen/ui/components/frontend/js/data_profiling/column_distribution.js
index fc3b9db2..8cea04eb 100644
--- a/testgen/ui/components/frontend/js/data_profiling/column_distribution.js
+++ b/testgen/ui/components/frontend/js/data_profiling/column_distribution.js
@@ -5,6 +5,7 @@
* @type {object}
* @property {boolean?} border
* @property {boolean?} dataPreview
+ * @property {boolean?} history
*/
import van from '../van.min.js';
import { Card } from '../components/card.js';
@@ -38,20 +39,32 @@ const ColumnDistributionCard = (/** @type Properties */ props, /** @type Column
border: props.border,
title: `Value Distribution ${item.is_latest_profile ? '*' : ''}`,
content: item.profile_run_id && columnFunction ? columnFunction(item) : null,
- actionContent: item.profile_run_id
- ? (getValue(props.dataPreview)
+ actionContent: div(
+ { class: 'flex-row fx-gap-3' },
+ item.profile_run_id
+ ? (getValue(props.dataPreview)
+ ? Button({
+ type: 'stroked',
+ label: 'Data Preview',
+ icon: 'pageview',
+ width: 'auto',
+ onclick: () => emitEvent('DataPreviewClicked', { payload: item }),
+ })
+ : null)
+ : span(
+ { class: 'text-secondary' },
+ 'No profiling data available',
+ ),
+ getValue(props.history)
? Button({
type: 'stroked',
- label: 'Data Preview',
- icon: 'pageview',
+ label: 'History',
+ icon: 'history',
width: 'auto',
- onclick: () => emitEvent('DataPreviewClicked', { payload: item }),
+ onclick: () => emitEvent('HistoryClicked', { payload: item }),
})
- : null)
- : span(
- { class: 'text-secondary' },
- 'No profiling data available',
- ),
+ : null,
+ ),
})
};
diff --git a/testgen/ui/components/frontend/js/data_profiling/column_profiling_history.js b/testgen/ui/components/frontend/js/data_profiling/column_profiling_history.js
new file mode 100644
index 00000000..06d3f426
--- /dev/null
+++ b/testgen/ui/components/frontend/js/data_profiling/column_profiling_history.js
@@ -0,0 +1,85 @@
+/**
+ * @import { Column } from './data_profiling_utils.js';
+ *
+ * @typedef ProfilingRun
+ * @type {object}
+ * @property {string} run_id
+ * @property {number} run_date
+ *
+ * @typedef Properties
+ * @type {object}
+ * @property {ProfilingRun} profiling_runs
+ * @property {Column} selected_item
+ */
+import van from '../van.min.js';
+import { Streamlit } from '../streamlit.js';
+import { emitEvent, getValue, loadStylesheet } from '../utils.js';
+import { formatTimestamp } from '../display_utils.js';
+import { ColumnDistributionCard } from './column_distribution.js';
+
+const { div, span } = van.tags;
+
+const ColumnProfilingHistory = (/** @type Properties */ props) => {
+ loadStylesheet('column-profiling-history', stylesheet);
+ Streamlit.setFrameHeight(600);
+ window.testgen.isPage = true;
+
+ return div(
+ { class: 'column-history flex-row fx-align-stretch' },
+ () => div(
+ { class: 'column-history--list' },
+ getValue(props.profiling_runs).map(({ run_id, run_date }, index) => div(
+ {
+ class: () => `column-history--item clickable ${getValue(props.selected_item).profile_run_id === run_id ? 'selected' : ''}`,
+ onclick: () => emitEvent('RunSelected', { payload: run_id }),
+ },
+ div(formatTimestamp(run_date)),
+ index === 0 ? span({ class: 'text-caption' }, 'Latest run') : null,
+ )),
+ ),
+ span({class: 'column-history--divider'}),
+ () => div(
+ { class: 'column-history--details' },
+ ColumnDistributionCard({}, getValue(props.selected_item)),
+ ),
+ );
+}
+
+const stylesheet = new CSSStyleSheet();
+stylesheet.replace(`
+.column-history {
+ height: 100%;
+}
+
+.column-history--list {
+ flex: 150px 1 1;
+}
+
+.column-history--item {
+ padding: 8px;
+}
+
+.column-history--item:hover {
+ background-color: var(--sidebar-item-hover-color);
+}
+
+.column-history--item.selected {
+ background-color: #06a04a17;
+}
+
+.column-history--item.selected > div {
+ font-weight: 500;
+}
+
+.column-history--details {
+ overflow: auto;
+}
+
+.column-history--divider {
+ width: 1px;
+ background-color: var(--grey);
+ margin: 0 10px;
+}
+`);
+
+export { ColumnProfilingHistory };
diff --git a/testgen/ui/components/frontend/js/data_profiling/column_profiling_results.js b/testgen/ui/components/frontend/js/data_profiling/column_profiling_results.js
index 8f40ec28..98f4a6e1 100644
--- a/testgen/ui/components/frontend/js/data_profiling/column_profiling_results.js
+++ b/testgen/ui/components/frontend/js/data_profiling/column_profiling_results.js
@@ -1,4 +1,6 @@
/**
+ * @import { Column } from './data_profiling_utils.js';
+ *
* @typedef Properties
* @type {object}
* @property {Column} column
diff --git a/testgen/ui/components/frontend/js/main.js b/testgen/ui/components/frontend/js/main.js
index 3d265a2d..0061db66 100644
--- a/testgen/ui/components/frontend/js/main.js
+++ b/testgen/ui/components/frontend/js/main.js
@@ -25,6 +25,7 @@ import { QualityDashboard } from './pages/quality_dashboard.js';
import { ScoreDetails } from './pages/score_details.js';
import { ScoreExplorer } from './pages/score_explorer.js';
import { ColumnProfilingResults } from './data_profiling/column_profiling_results.js';
+import { ColumnProfilingHistory } from './data_profiling/column_profiling_history.js';
import { ScheduleList } from './pages/schedule_list.js';
let currentWindowVan = van;
@@ -44,6 +45,7 @@ const TestGenComponent = (/** @type {string} */ id, /** @type {object} */ props)
database_flavor_selector: DatabaseFlavorSelector,
data_catalog: DataCatalog,
column_profiling_results: ColumnProfilingResults,
+ column_profiling_history: ColumnProfilingHistory,
project_dashboard: ProjectDashboard,
test_suites: TestSuites,
quality_dashboard: QualityDashboard,
diff --git a/testgen/ui/components/frontend/js/pages/data_catalog.js b/testgen/ui/components/frontend/js/pages/data_catalog.js
index a80b4777..5419036b 100644
--- a/testgen/ui/components/frontend/js/pages/data_catalog.js
+++ b/testgen/ui/components/frontend/js/pages/data_catalog.js
@@ -298,7 +298,7 @@ const SelectedDetails = (/** @type Properties */ props, /** @type Table | Column
),
DataCharacteristicsCard({ scores: true }, item),
item.type === 'column'
- ? ColumnDistributionCard({ dataPreview: true }, item)
+ ? ColumnDistributionCard({ dataPreview: true, history: true }, item)
: TableSizeCard({}, item),
TagsCard({ tagOptions: getValue(props.tag_values), editable: userCanEdit }, item),
PotentialPIICard({ noLinks: !userCanNavigate }, item),
diff --git a/testgen/ui/views/data_catalog.py b/testgen/ui/views/data_catalog.py
index 8d91581e..e1c549ec 100644
--- a/testgen/ui/views/data_catalog.py
+++ b/testgen/ui/views/data_catalog.py
@@ -31,6 +31,7 @@
)
from testgen.ui.services import user_session_service
from testgen.ui.session import session
+from testgen.ui.views.dialogs.column_history_dialog import column_history_dialog
from testgen.ui.views.dialogs.data_preview_dialog import data_preview_dialog
from testgen.ui.views.dialogs.run_profiling_dialog import run_profiling_dialog
from testgen.utils import format_field, friendly_score, is_uuid4, score
@@ -126,6 +127,13 @@ def render(self, project_code: str, table_group_id: str | None = None, selected:
item["table_name"],
item.get("column_name"),
),
+ "HistoryClicked": lambda item: column_history_dialog(
+ item["table_group_id"],
+ item["schema_name"],
+ item["table_name"],
+ item["column_name"],
+ item["add_date"],
+ ),
},
event_handlers={ "TagsChanged": partial(on_tags_changed, spinner_container) },
)
diff --git a/testgen/ui/views/dialogs/column_history_dialog.py b/testgen/ui/views/dialogs/column_history_dialog.py
new file mode 100644
index 00000000..6a224004
--- /dev/null
+++ b/testgen/ui/views/dialogs/column_history_dialog.py
@@ -0,0 +1,94 @@
+import json
+
+import pandas as pd
+import streamlit as st
+
+import testgen.ui.services.database_service as db
+from testgen.ui.components import widgets as testgen
+from testgen.ui.components.widgets import testgen_component
+from testgen.ui.queries.profiling_queries import COLUMN_PROFILING_FIELDS
+from testgen.utils import format_field
+
+
+def column_history_dialog(*args) -> None:
+ st.session_state["column_history_dialog:run_id"] = None
+ _column_history_dialog(*args)
+
+
+@st.dialog(title="Column History")
+def _column_history_dialog(
+ table_group_id: str,
+ schema_name: str,
+ table_name: str,
+ column_name: str,
+ add_date: int,
+) -> None:
+ testgen.css_class("l-dialog")
+ caption_column, loading_column = st.columns([ 0.8, 0.2 ], vertical_alignment="bottom")
+
+ with caption_column:
+ testgen.caption(f"Table > Column: {table_name} > {column_name}")
+
+ with loading_column:
+ with st.spinner("Loading data ..."):
+ profiling_runs = get_profiling_runs(table_group_id, add_date)
+ run_id = st.session_state.get("column_history_dialog:run_id") or profiling_runs.iloc[0]["id"]
+ selected_item = get_run_column(run_id, schema_name, table_name, column_name)
+
+ testgen_component(
+ "column_profiling_history",
+ props={
+ "profiling_runs": [
+ {
+ "run_id": format_field(run["id"]),
+ "run_date": format_field(run["profiling_starttime"]),
+ } for _, run in profiling_runs.iterrows()
+ ],
+ "selected_item": selected_item,
+ },
+ on_change_handlers={
+ "RunSelected": on_run_selected,
+ }
+ )
+
+
+def on_run_selected(run_id: str) -> None:
+ st.session_state["column_history_dialog:run_id"] = run_id
+
+
+@st.cache_data(show_spinner=False)
+def get_profiling_runs(
+ table_group_id: str,
+ after_date: int,
+) -> pd.DataFrame:
+ schema: str = st.session_state["dbschema"]
+ query = f"""
+ SELECT
+ id::VARCHAR,
+ profiling_starttime
+ FROM {schema}.profiling_runs
+ WHERE table_groups_id = '{table_group_id}'
+ AND profiling_starttime >= TO_TIMESTAMP({after_date / 1000})
+ ORDER BY profiling_starttime DESC;
+ """
+ return db.retrieve_data(query)
+
+
+@st.cache_data(show_spinner=False)
+def get_run_column(run_id: str, schema_name: str, table_name: str, column_name: str) -> dict:
+ schema: str = st.session_state["dbschema"]
+ query = f"""
+ SELECT
+ profile_run_id::VARCHAR,
+ general_type,
+ {COLUMN_PROFILING_FIELDS}
+ FROM {schema}.profile_results
+ WHERE profile_run_id = '{run_id}'
+ AND schema_name = '{schema_name}'
+ AND table_name = '{table_name}'
+ AND column_name = '{column_name}';
+ """
+ results = db.retrieve_data(query)
+ if not results.empty:
+ # to_json converts datetimes, NaN, etc, to JSON-safe values (Note: to_dict does not)
+ return json.loads(results.to_json(orient="records"))[0]
From 1c800d1ce68662eb03ca7dc7d0079a4b20ee74bf Mon Sep 17 00:00:00 2001
From: Aarthy Adityan
Date: Fri, 23 May 2025 16:13:25 -0400
Subject: [PATCH 14/33] feat(data-catalog): add duplicates bar
---
.../frontend/js/components/summary_bar.js | 22 +++++----
.../js/data_profiling/column_distribution.js | 45 +++++++++++++++----
testgen/ui/components/frontend/js/utils.js | 3 ++
3 files changed, 53 insertions(+), 17 deletions(-)
diff --git a/testgen/ui/components/frontend/js/components/summary_bar.js b/testgen/ui/components/frontend/js/components/summary_bar.js
index 449efcde..2c791913 100644
--- a/testgen/ui/components/frontend/js/components/summary_bar.js
+++ b/testgen/ui/components/frontend/js/components/summary_bar.js
@@ -4,6 +4,7 @@
* @property {string} value
* @property {string} color
* @property {string} label
+ * @property {boolean?} showPercent
*
* @typedef Properties
* @type {object}
@@ -13,7 +14,7 @@
* @property {number?} width
*/
import van from '../van.min.js';
-import { getValue, loadStylesheet } from '../utils.js';
+import { friendlyPercent, getValue, loadStylesheet } from '../utils.js';
import { colorMap } from '../display_utils.js';
const { div, span } = van.tags;
@@ -42,14 +43,17 @@ const SummaryBar = (/** @type Properties */ props) => {
),
() => total.val ? div(
{ class: 'tg-summary-bar--caption flex-row fx-flex-wrap text-caption mt-1' },
- getValue(props.items).map(item => div(
- { class: 'tg-summary-bar--legend flex-row' },
- span({
- class: 'dot',
- style: `color: ${colorMap[item.color] || item.color};`,
- }),
- `${item.label}: ${item.value || 0}`,
- )),
+ getValue(props.items).map(item => item.label
+ ? div(
+ { class: 'tg-summary-bar--legend flex-row' },
+ span({
+ class: 'dot',
+ style: `color: ${colorMap[item.color] || item.color};`,
+ }),
+ `${item.label}: ${item.value || 0}` + (item.showPercent ? ` (${friendlyPercent(item.value * 100 / total.val)}%)` : '')
+ )
+ : null,
+ ),
) : '',
);
};
diff --git a/testgen/ui/components/frontend/js/data_profiling/column_distribution.js b/testgen/ui/components/frontend/js/data_profiling/column_distribution.js
index 8cea04eb..f0a515e0 100644
--- a/testgen/ui/components/frontend/js/data_profiling/column_distribution.js
+++ b/testgen/ui/components/frontend/js/data_profiling/column_distribution.js
@@ -15,7 +15,7 @@ import { SummaryBar } from '../components/summary_bar.js';
import { PercentBar } from '../components/percent_bar.js';
import { FrequencyBars } from '../components/frequency_bars.js';
import { BoxPlot } from '../components/box_plot.js';
-import { loadStylesheet, emitEvent, getValue } from '../utils.js';
+import { loadStylesheet, emitEvent, friendlyPercent, getValue } from '../utils.js';
import { formatTimestamp, roundDigits } from '../display_utils.js';
const { div, span } = van.tags;
@@ -88,6 +88,9 @@ function AlphaColumn(/** @type ColumnProfile */ item) {
}
const total = item.record_ct;
+ const missing = item.null_value_ct + item.zero_length_ct + item.filled_value_ct;
+ const duplicates = item.value_ct - item.distinct_value_ct;
+ const duplicatesStandardized = item.value_ct - item.distinct_std_value_ct;
return div(
{ class: 'flex-column fx-gap-5' },
@@ -97,14 +100,36 @@ function AlphaColumn(/** @type ColumnProfile */ item) {
SummaryBar({
height: summaryHeight,
width: summaryWidth,
- label: `Missing Values: ${item.null_value_ct + item.filled_value_ct + item.filled_value_ct}`,
+ label: `Missing Values: ${missing} (${friendlyPercent(missing * 100 / total)}%)`,
items: [
- { label: 'Actual Values', value: item.value_ct - item.filled_value_ct, color: 'green' },
- { label: 'Null', value: item.null_value_ct, color: 'brownLight' },
+ { label: 'Actual Values', value: item.value_ct - item.zero_length_ct - item.filled_value_ct, color: 'green' },
+ { label: 'Null', value: item.null_value_ct, color: 'brownLight', showPercent: true },
{ label: 'Zero Length', value: item.zero_length_ct, color: 'yellow' },
{ label: 'Dummy Values', value: item.filled_value_ct, color: 'orange' },
],
}),
+ SummaryBar({
+ height: summaryHeight,
+ width: summaryWidth,
+ label: `Duplicate Values: ${duplicates} (${friendlyPercent(duplicates * 100 / item.value_ct)}%)`,
+ items: [
+ { label: 'Distinct', value: item.distinct_value_ct, color: 'indigo' },
+ { label: 'Duplicates', value: duplicates, color: 'orange' },
+ { value: item.null_value_ct, color: 'empty' },
+ ],
+ }),
+ item.distinct_std_value_ct != item.distinct_value_ct
+ ? SummaryBar({
+ height: summaryHeight,
+ width: summaryWidth,
+ label: `Duplicate Values, Standardized: ${duplicatesStandardized} (${friendlyPercent(duplicatesStandardized * 100 / item.value_ct)}%)`,
+ items: [
+ { label: 'Distinct', value: item.distinct_std_value_ct, color: 'indigo' },
+ { label: 'Duplicates', value: duplicatesStandardized, color: 'orange' },
+ { value: item.null_value_ct, color: 'empty' },
+ ],
+ })
+ : null,
SummaryBar({
height: summaryHeight,
width: summaryWidth,
@@ -114,7 +139,7 @@ function AlphaColumn(/** @type ColumnProfile */ item) {
{ label: 'Lower Case', value: item.lower_case_ct, color: 'blueLight' },
{ label: 'Upper Case', value: item.upper_case_ct, color: 'blue' },
{ label: 'Non-Alpha', value: item.non_alpha_ct, color: 'brown' },
- { label: 'Null', value: item.null_value_ct, color: 'brownLight' },
+ { value: item.null_value_ct, color: 'empty' },
],
}),
),
@@ -159,15 +184,19 @@ function AlphaColumn(/** @type ColumnProfile */ item) {
),
),
div(
- { class: 'flex-row fx-flex-wrap fx-align-flex-start fx-gap-4 tg-profile--attribute-block' },
+ { class: 'flex-row fx-flex-wrap fx-align-flex-start fx-gap-4' },
Attribute({ label: 'Minimum Length', value: item.min_length, width: attributeWidth }),
Attribute({ label: 'Maximum Length', value: item.max_length, width: attributeWidth }),
Attribute({ label: 'Average Length', value: roundDigits(item.avg_length), width: attributeWidth }),
+ ),
+ div(
+ { class: 'flex-row fx-flex-wrap fx-align-flex-start fx-gap-4' },
Attribute({ label: 'Minimum Text', value: item.min_text, width: attributeWidth }),
Attribute({ label: 'Maximum Text', value: item.max_text, width: attributeWidth }),
+ ),
+ div(
+ { class: 'flex-row fx-flex-wrap fx-align-flex-start fx-gap-4' },
Attribute({ label: 'Standard Pattern Match', value: standardPattern, width: attributeWidth }),
- Attribute({ label: 'Distinct Values', value: item.distinct_value_ct, width: attributeWidth }),
- Attribute({ label: 'Distinct Standard Values', value: item.distinct_std_value_ct, width: attributeWidth }),
Attribute({ label: 'Distinct Patterns', value: item.distinct_pattern_ct, width: attributeWidth }),
),
);
diff --git a/testgen/ui/components/frontend/js/utils.js b/testgen/ui/components/frontend/js/utils.js
index 655cc437..caab512e 100644
--- a/testgen/ui/components/frontend/js/utils.js
+++ b/testgen/ui/components/frontend/js/utils.js
@@ -105,6 +105,9 @@ function getParents(/** @type HTMLElement*/ element) {
}
function friendlyPercent(/** @type number */ value) {
+ if (Number.isNaN(value)) {
+ return 0;
+ }
const rounded = Math.round(value);
if (rounded === 0 && value > 0) {
return '< 0';
From 221366234b746a9c31b8cbcfe1a17300b6ab49d5 Mon Sep 17 00:00:00 2001
From: Aarthy Adityan
Date: Thu, 22 May 2025 20:51:31 -0400
Subject: [PATCH 15/33] feat(data-catalog): add search options and make case
insensitive
---
.../components/frontend/js/components/tree.js | 223 +++++++++++-------
.../frontend/js/pages/data_catalog.js | 105 ++++++---
2 files changed, 213 insertions(+), 115 deletions(-)
diff --git a/testgen/ui/components/frontend/js/components/tree.js b/testgen/ui/components/frontend/js/components/tree.js
index 7a486ed8..184bdff4 100644
--- a/testgen/ui/components/frontend/js/components/tree.js
+++ b/testgen/ui/components/frontend/js/components/tree.js
@@ -27,9 +27,12 @@
* @property {function(string)?} onSelect
* @property {boolean?} multiSelect
* @property {boolean?} multiSelectToggle
+ * @property {string?} multiSelectToggleLabel
* @property {function(SelectedNode[] | null)?} onMultiSelect
- * @property {(function(TreeNode): boolean) | null} isNodeHidden
+ * @property {(function(TreeNode, string): boolean) | null} isNodeHidden
+ * @property {function()?} onApplySearchOptions
* @property {(function(): boolean) | null} hasActiveFilters
+ * @property {function()?} onApplyFilters
* @property {function()?} onResetFilters
*/
import van from '../van.min.js';
@@ -44,7 +47,7 @@ import { Toggle } from './toggle.js';
const { div, h3, span } = van.tags;
const levelOffset = 14;
-const Tree = (/** @type Properties */ props, /** @type any? */ filtersContent) => {
+const Tree = (/** @type Properties */ props, /** @type any? */ searchOptionsContent, /** @type any? */ filtersContent) => {
loadStylesheet('tree', stylesheet);
// Use only initial prop value as default and maintain internal state
@@ -82,17 +85,7 @@ const Tree = (/** @type Properties */ props, /** @type any? */ filtersContent) =
id: props.id,
class: () => `flex-column ${getValue(props.classes)}`,
},
- Toolbar(treeNodes, props, filtersContent),
- props.multiSelectToggle
- ? div(
- { class: 'mt-1 mb-2 ml-1 text-secondary' },
- Toggle({
- label: 'Select multiple',
- checked: multiSelect,
- onChange: (/** @type boolean */ checked) => multiSelect.val = checked,
- }),
- )
- : null,
+ Toolbar(treeNodes, multiSelect, props, searchOptionsContent, filtersContent),
div(
{ class: 'tg-tree' },
() => div(
@@ -111,95 +104,152 @@ const Tree = (/** @type Properties */ props, /** @type any? */ filtersContent) =
const Toolbar = (
/** @type { val: TreeNode[] } */ nodes,
+ /** @type object */ multiSelect,
/** @type Properties */ props,
+ /** @type any? */ searchOptionsContent,
/** @type any? */ filtersContent,
) => {
const search = van.state('');
+ const searchOptionsDomId = `tree-search-options-${getRandomId()}`;
+ const searchOptionsOpened = van.state(false);
+
const filterDomId = `tree-filters-${getRandomId()}`;
const filtersOpened = van.state(false);
const filtersActive = van.state(false);
- const isNodeHidden = (/** @type TreeNode */ node) => !node.label.includes(search.val) || props.isNodeHidden?.(node);
+ const isNodeHidden = (/** @type TreeNode */ node) => props.isNodeHidden
+ ? props.isNodeHidden?.(node, search.val)
+ : !node.label.toLowerCase().includes(search.val.toLowerCase());
return div(
- { class: 'flex-row fx-gap-1 tg-tree--actions' },
- Input({
- icon: 'search',
- clearable: true,
- onChange: (/** @type string */ value) => {
- search.val = value;
- filterTree(nodes.val, isNodeHidden);
- if (value) {
- expandOrCollapseTree(nodes.val, true);
- }
- },
- }),
- filtersContent ? [
- div(
- { class: () => `tg-tree--filter-button ${filtersActive.val ? 'active' : ''}` },
- Button({
- id: filterDomId,
- type: 'icon',
- icon: 'filter_list',
- style: 'width: 24px; height: 24px; padding: 4px;',
- tooltip: () => filtersActive.val ? 'Filters active' : 'Filters',
- tooltipPosition: 'bottom',
- onclick: () => filtersOpened.val = !filtersOpened.val,
- }),
- ),
- Portal(
- { target: filterDomId, opened: filtersOpened },
- () => div(
- { class: 'tg-tree--filters' },
- h3(
- { class: 'flex-row fx-justify-space-between'},
- 'Filters',
- Button({
- type: 'icon',
- icon: 'close',
- iconSize: 22,
- onclick: () => filtersOpened.val = false,
- }),
- ),
- filtersContent,
- div(
- { class: 'flex-row fx-justify-space-between mt-4' },
- Button({
- label: 'Reset filters',
- width: '110px',
- disabled: () => !props.hasActiveFilters(),
- onclick: props.onResetFilters,
- }),
+ { class: 'tg-tree--actions' },
+ div(
+ { class: 'flex-row fx-gap-1 mb-1' },
+ Input({
+ icon: 'search',
+ clearable: true,
+ onChange: (/** @type string */ value) => {
+ search.val = value;
+ filterTree(nodes.val, isNodeHidden);
+ if (value) {
+ expandOrCollapseTree(nodes.val, true);
+ }
+ },
+ }),
+ searchOptionsContent ? [
+ div(
+ { class: 'tg-tree--search-options' },
+ Button({
+ id: searchOptionsDomId,
+ type: 'icon',
+ icon: 'settings',
+ style: 'width: 24px; height: 24px; padding: 4px;',
+ tooltip: 'Search options',
+ tooltipPosition: 'bottom',
+ onclick: () => searchOptionsOpened.val = !searchOptionsOpened.val,
+ }),
+ ),
+ Portal(
+ { target: searchOptionsDomId, opened: searchOptionsOpened },
+ () => div(
+ { class: 'tg-tree--portal' },
+ searchOptionsContent,
Button({
type: 'stroked',
color: 'primary',
label: 'Apply',
- width: '80px',
+ style: 'width: 80px; margin-top: 12px; margin-left: auto;',
onclick: () => {
+ props.onApplySearchOptions?.();
filterTree(nodes.val, isNodeHidden);
- filtersActive.val = props.hasActiveFilters();
- filtersOpened.val = false;
+ searchOptionsOpened.val = false;
},
}),
),
+ )
+ ] : null,
+ Button({
+ type: 'icon',
+ icon: 'expand_all',
+ style: 'width: 24px; height: 24px; padding: 4px;',
+ tooltip: 'Expand All',
+ tooltipPosition: 'bottom',
+ onclick: () => expandOrCollapseTree(nodes.val, true),
+ }),
+ Button({
+ type: 'icon',
+ icon: 'collapse_all',
+ style: 'width: 24px; height: 24px; padding: 4px;',
+ tooltip: 'Collapse All',
+ tooltipPosition: 'bottom',
+ onclick: () => expandOrCollapseTree(nodes.val, false),
+ }),
+ ),
+ div(
+ { class: 'flex-row fx-justify-space-between mb-1' },
+ div(
+ { class: 'text-secondary' },
+ props.multiSelectToggle
+ ? Toggle({
+ label: props.multiSelectToggleLabel ?? 'Select multiple',
+ checked: multiSelect,
+ onChange: (/** @type boolean */ checked) => multiSelect.val = checked,
+ })
+ : null,
+ ),
+ filtersContent ? [
+ div(
+ { class: () => `tg-tree--filter-button ${filtersActive.val ? 'active' : ''}` },
+ Button({
+ id: filterDomId,
+ type: 'basic',
+ label: 'Filters',
+ icon: 'filter_list',
+ style: 'height: 24px; padding: 4px;',
+ tooltip: () => filtersActive.val ? 'Filters active' : null,
+ tooltipPosition: 'bottom',
+ onclick: () => filtersOpened.val = !filtersOpened.val,
+ }),
),
- )
- ] : null,
- Button({
- type: 'icon',
- icon: 'expand_all',
- style: 'width: 24px; height: 24px; padding: 4px;',
- tooltip: 'Expand All',
- tooltipPosition: 'bottom',
- onclick: () => expandOrCollapseTree(nodes.val, true),
- }),
- Button({
- type: 'icon',
- icon: 'collapse_all',
- style: 'width: 24px; height: 24px; padding: 4px;',
- tooltip: 'Collapse All',
- tooltipPosition: 'bottom',
- onclick: () => expandOrCollapseTree(nodes.val, false),
- }),
+ Portal(
+ { target: filterDomId, opened: filtersOpened },
+ () => div(
+ { class: 'tg-tree--portal' },
+ h3(
+ { class: 'flex-row fx-justify-space-between'},
+ 'Filters',
+ Button({
+ type: 'icon',
+ icon: 'close',
+ iconSize: 22,
+ onclick: () => filtersOpened.val = false,
+ }),
+ ),
+ filtersContent,
+ div(
+ { class: 'flex-row fx-justify-space-between mt-4' },
+ Button({
+ label: 'Reset filters',
+ width: '110px',
+ disabled: () => !props.hasActiveFilters(),
+ onclick: props.onResetFilters,
+ }),
+ Button({
+ type: 'stroked',
+ color: 'primary',
+ label: 'Apply',
+ width: '80px',
+ onclick: () => {
+ props.onApplyFilters?.();
+ filterTree(nodes.val, isNodeHidden);
+ filtersActive.val = props.hasActiveFilters();
+ filtersOpened.val = false;
+ },
+ }),
+ ),
+ ),
+ )
+ ] : null,
+ )
);
};
@@ -394,9 +444,10 @@ stylesheet.replace(`
.tg-tree--actions {
margin: 4px;
+ border-bottom: 1px solid var(--border-color);
}
-.tg-tree--actions > label {
+.tg-tree--actions > div > label {
flex: auto;
}
@@ -411,7 +462,7 @@ stylesheet.replace(`
border-color: var(--primary-color);
}
-.tg-tree--filters {
+.tg-tree--portal {
border-radius: 8px;
background: var(--dk-card-background);
box-shadow: var(--portal-box-shadow);
@@ -420,7 +471,7 @@ stylesheet.replace(`
z-index: 99;
}
-.tg-tree--filters > h3 {
+.tg-tree--portal > h3 {
margin: 0 0 12px;
font-size: 18px;
font-weight: 500;
diff --git a/testgen/ui/components/frontend/js/pages/data_catalog.js b/testgen/ui/components/frontend/js/pages/data_catalog.js
index 5419036b..9fdea9f6 100644
--- a/testgen/ui/components/frontend/js/pages/data_catalog.js
+++ b/testgen/ui/components/frontend/js/pages/data_catalog.js
@@ -151,9 +151,24 @@ const DataCatalog = (/** @type Properties */ props) => {
}
};
+ const searchOptions = {
+ tableName: van.state(true),
+ columnName: van.state(true),
+ };
const filters = { criticalDataElement: van.state(false) };
TAG_KEYS.forEach(key => filters[key] = van.state(null));
+ // To hold temporary state within the portals, which might be discarded by clicking outside
+ const tempSearchOptions = {};
+ const tempFilters = {};
+
+ const copyState = (fromObject, toObject) => {
+ Object.entries(fromObject).forEach(([ key, state ]) => {
+ toObject[key] = toObject[key] ?? van.state();
+ toObject[key].val = state.val;
+ });
+ };
+
const userCanEdit = getValue(props.permissions)?.can_edit ?? false;
const userCanNavigate = getValue(props.permissions)?.can_navigate ?? false;
const projectSummary = getValue(props.project_summary);
@@ -211,44 +226,76 @@ const DataCatalog = (/** @type Properties */ props) => {
onSelect: (/** @type string */ selected) => emitEvent('ItemSelected', { payload: selected }),
multiSelect: multiEditMode,
multiSelectToggle: userCanEdit,
+ multiSelectToggleLabel: 'Edit multiple',
onMultiSelect: (/** @type string[] | null */ selected) => multiSelectedItems.val = selected,
- isNodeHidden: (/** @type TreeNode */ node) => {
- let hidden = ![ node.criticalDataElement, false ].includes(filters.criticalDataElement.val);
- hidden = hidden || TAG_KEYS.some(key => ![ node[key], null ].includes(filters[key].val));
- return hidden;
+ isNodeHidden: (/** @type TreeNode */ node, /** string */ search) =>
+ !node.label.toLowerCase().includes(search.toLowerCase())
+ || (!!node.children && !searchOptions.tableName.val)
+ || (!node.children && !searchOptions.columnName.val)
+ || ![ node.criticalDataElement, false ].includes(filters.criticalDataElement.val)
+ || TAG_KEYS.some(key => ![ node[key], null ].includes(filters[key].val)),
+ onApplySearchOptions: () => {
+ copyState(tempSearchOptions, searchOptions);
+ // If both were unselected, reset their values
+ // Otherwise, nothing will be matched and the user might not realize why
+ if (!searchOptions.tableName.val && !searchOptions.columnName.val) {
+ searchOptions.tableName.val = true;
+ searchOptions.columnName.val = true
+ }
},
hasActiveFilters: () => filters.criticalDataElement.val || TAG_KEYS.some(key => !!filters[key].val),
+ onApplyFilters: () => copyState(tempFilters, filters),
onResetFilters: () => {
- filters.criticalDataElement.val = false;
- TAG_KEYS.forEach(key => filters[key].val = null);
+ tempFilters.criticalDataElement.val = false;
+ TAG_KEYS.forEach(key => tempFilters[key].val = null);
},
},
+ () => {
+ copyState(searchOptions, tempSearchOptions);
+ return div(
+ { class: 'flex-column fx-gap-2' },
+ span({ class: 'text-caption' }, 'Search by'),
+ Checkbox({
+ label: 'Table name',
+ checked: tempSearchOptions.tableName,
+ onChange: (checked) => tempSearchOptions.tableName.val = checked,
+ }),
+ Checkbox({
+ label: 'Column name',
+ checked: tempSearchOptions.columnName,
+ onChange: (checked) => tempSearchOptions.columnName.val = checked,
+ }),
+ );
+ },
// Pass as a function that will be called when the filter portal is opened
// Otherwise state bindings get garbage collected and Select dropdowns won't open
// https://vanjs.org/advanced#gc
- () => div(
- Checkbox({
- label: 'Only critical data elements (CDEs)',
- checked: filters.criticalDataElement,
- onChange: (checked) => filters.criticalDataElement.val = checked,
- }),
- div(
- {
- class: 'flex-row fx-flex-wrap fx-gap-4 fx-justify-space-between mt-4',
- style: 'max-width: 420px;',
- },
- TAG_KEYS.map(key => Select({
- id: `data-catalog-${key}`,
- label: capitalize(key.replaceAll('_', ' ')),
- height: 32,
- value: filters[key],
- options: getValue(props.tag_values)?.[key]?.map(key => ({ label: key, value: key })),
- allowNull: true,
- disabled: !getValue(props.tag_values)?.[key]?.length,
- onChange: v => filters[key].val = v,
- })),
- ),
- ),
+ () => {
+ copyState(filters, tempFilters);
+ return div(
+ Checkbox({
+ label: 'Only critical data elements (CDEs)',
+ checked: tempFilters.criticalDataElement,
+ onChange: (checked) => tempFilters.criticalDataElement.val = checked,
+ }),
+ div(
+ {
+ class: 'flex-row fx-flex-wrap fx-gap-4 fx-justify-space-between mt-4',
+ style: 'max-width: 420px;',
+ },
+ TAG_KEYS.map(key => Select({
+ id: `data-catalog-${key}`,
+ label: capitalize(key.replaceAll('_', ' ')),
+ height: 32,
+ value: tempFilters[key],
+ options: getValue(props.tag_values)?.[key]?.map(key => ({ label: key, value: key })),
+ allowNull: true,
+ disabled: !getValue(props.tag_values)?.[key]?.length,
+ onChange: (value) => tempFilters[key].val = value,
+ })),
+ ),
+ );
+ },
),
div(
{
From 2882b52fa25d53a9ff5c04c26af8ba26ca4e4dca Mon Sep 17 00:00:00 2001
From: Aarthy Adityan
Date: Fri, 23 May 2025 16:14:24 -0400
Subject: [PATCH 16/33] fix(data-catalog): remove description field from
multi-edit mode
---
testgen/ui/components/frontend/js/pages/data_catalog.js | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/testgen/ui/components/frontend/js/pages/data_catalog.js b/testgen/ui/components/frontend/js/pages/data_catalog.js
index 9fdea9f6..b9587b5f 100644
--- a/testgen/ui/components/frontend/js/pages/data_catalog.js
+++ b/testgen/ui/components/frontend/js/pages/data_catalog.js
@@ -478,7 +478,6 @@ const MultiEdit = (/** @type Properties */ props, /** @type Object */ selectedIt
const columnCount = van.derive(() => selectedItems.val?.reduce((count, { children }) => count + children.length, 0));
const attributes = [
- 'description',
'critical_data_element',
...TAG_KEYS,
].map(key => ({
@@ -496,7 +495,6 @@ const MultiEdit = (/** @type Properties */ props, /** @type Object */ selectedIt
];
const tagOptions = getValue(props.tag_values) ?? {};
const width = 400;
- const descriptionWidth = 800;
return div(
{ class: 'tg-dh--details flex-column' },
@@ -529,8 +527,7 @@ const MultiEdit = (/** @type Properties */ props, /** @type Object */ selectedIt
onChange: (value) => valueState.val = value,
})
: Input({
- label, help,
- width: key === 'description' ? descriptionWidth : width,
+ label, help, width,
placeholder: () => checkedState.val ? null : '(keep current values)',
autocompleteOptions: tagOptions[key],
onChange: (value) => valueState.val = value || null,
From b8602e245557b082974056078976429b2b127db2 Mon Sep 17 00:00:00 2001
From: Aarthy Adityan
Date: Wed, 21 May 2025 18:06:12 -0400
Subject: [PATCH 17/33] fix(run-dialog): cli command error in run tests
---
testgen/ui/components/frontend/js/components/expander_toggle.js | 2 +-
testgen/ui/views/dialogs/run_tests_dialog.py | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/testgen/ui/components/frontend/js/components/expander_toggle.js b/testgen/ui/components/frontend/js/components/expander_toggle.js
index f95a76d2..72aab775 100644
--- a/testgen/ui/components/frontend/js/components/expander_toggle.js
+++ b/testgen/ui/components/frontend/js/components/expander_toggle.js
@@ -21,7 +21,7 @@ const ExpanderToggle = (/** @type Properties */ props) => {
Streamlit.setFrameHeight(24);
}
- const expandedState = van.state(!!props.default);
+ const expandedState = van.state(!!getValue(props.default));
const expandLabel = getValue(props.expandLabel) || 'Expand';
const collapseLabel = getValue(props.collapseLabel) || 'Collapse';
diff --git a/testgen/ui/views/dialogs/run_tests_dialog.py b/testgen/ui/views/dialogs/run_tests_dialog.py
index d0cb0ada..212c1361 100644
--- a/testgen/ui/views/dialogs/run_tests_dialog.py
+++ b/testgen/ui/views/dialogs/run_tests_dialog.py
@@ -36,7 +36,7 @@ def run_tests_dialog(project_code: str, test_suite: pd.Series | None = None, def
if testgen.expander_toggle(expand_label="Show CLI command", key="run_tests_dialog:keys:show-cli"):
st.code(
- f"testgen run-tests --project-key {project_code} --test-suite-key {test_suite['test_suite']}",
+ f"testgen run-tests --project-key {project_code} --test-suite-key {test_suite_name}",
language="shellSession"
)
From f0aba170f8bea2b6367dec82f5d6253af06c991b Mon Sep 17 00:00:00 2001
From: Aarthy Adityan
Date: Wed, 21 May 2025 18:50:29 -0400
Subject: [PATCH 18/33] fix(users): password required when editing user
---
.../030_initialize_new_schema_structure.sql | 1 -
.../dbupgrade/0139_incremental_upgrade.sql | 3 +++
testgen/ui/queries/user_queries.py | 17 ++++++++++-------
testgen/ui/services/user_session_service.py | 6 +-----
testgen/ui/views/login.py | 1 -
5 files changed, 14 insertions(+), 14 deletions(-)
create mode 100644 testgen/template/dbupgrade/0139_incremental_upgrade.sql
diff --git a/testgen/template/dbsetup/030_initialize_new_schema_structure.sql b/testgen/template/dbsetup/030_initialize_new_schema_structure.sql
index f1ff5b9d..6b46af6d 100644
--- a/testgen/template/dbsetup/030_initialize_new_schema_structure.sql
+++ b/testgen/template/dbsetup/030_initialize_new_schema_structure.sql
@@ -622,7 +622,6 @@ CREATE TABLE auth_users (
email VARCHAR(120),
name VARCHAR(120),
password VARCHAR(120),
- preauthorized BOOLEAN default false,
role VARCHAR(20)
);
diff --git a/testgen/template/dbupgrade/0139_incremental_upgrade.sql b/testgen/template/dbupgrade/0139_incremental_upgrade.sql
new file mode 100644
index 00000000..bc19f48b
--- /dev/null
+++ b/testgen/template/dbupgrade/0139_incremental_upgrade.sql
@@ -0,0 +1,3 @@
+SET SEARCH_PATH TO {SCHEMA_NAME};
+
+ALTER TABLE auth_users DROP COLUMN preauthorized;
diff --git a/testgen/ui/queries/user_queries.py b/testgen/ui/queries/user_queries.py
index c9953b4c..d245dbeb 100644
--- a/testgen/ui/queries/user_queries.py
+++ b/testgen/ui/queries/user_queries.py
@@ -5,12 +5,16 @@
@st.cache_data(show_spinner=False)
-def get_users():
+def get_users(include_password: bool=False):
schema: str = st.session_state["dbschema"]
- sql = f"""SELECT
- id::VARCHAR(50),
- username, email, "name", "password", preauthorized, role
- FROM {schema}.auth_users"""
+ sql = f"""
+ SELECT
+ id::VARCHAR(50),
+ username, email, "name",
+ {"password," if include_password else ""}
+ role
+ FROM {schema}.auth_users
+ """
return db.retrieve_data(sql)
@@ -42,12 +46,11 @@ def add_user(user):
def edit_user(user):
schema: str = st.session_state["dbschema"]
- encrypted_password = encrypt_ui_password(user["password"])
sql = f"""UPDATE {schema}.auth_users SET
username = '{user["username"]}',
email = '{user["email"]}',
name = '{user["name"]}',
- password = '{encrypted_password}',
+ {f"password = '{encrypt_ui_password(user["password"])}'," if user["password"] else ""}
role = '{user["role"]}'
WHERE id = '{user["user_id"]}';"""
db.execute_sql(sql)
diff --git a/testgen/ui/services/user_session_service.py b/testgen/ui/services/user_session_service.py
index 4d2dc840..463f454a 100644
--- a/testgen/ui/services/user_session_service.py
+++ b/testgen/ui/services/user_session_service.py
@@ -72,10 +72,9 @@ def end_user_session() -> None:
def get_auth_data():
- auth_data = user_queries.get_users()
+ auth_data = user_queries.get_users(include_password=True)
usernames = {}
- preauthorized_list = []
for item in auth_data.itertuples():
usernames[item.username.lower()] = {
@@ -84,8 +83,6 @@ def get_auth_data():
"password": item.password,
"role": item.role,
}
- if item.preauthorized:
- preauthorized_list.append(item.email)
return {
"credentials": {"usernames": usernames},
@@ -94,7 +91,6 @@ def get_auth_data():
"key": _get_jwt_hashing_key(),
"name": AUTH_TOKEN_COOKIE_NAME,
},
- "preauthorized": {"emails": preauthorized_list},
}
diff --git a/testgen/ui/views/login.py b/testgen/ui/views/login.py
index 3f08d190..14bda7c0 100644
--- a/testgen/ui/views/login.py
+++ b/testgen/ui/views/login.py
@@ -27,7 +27,6 @@ def render(self, **_kwargs) -> None:
auth_data["cookie"]["name"],
auth_data["cookie"]["key"],
auth_data["cookie"]["expiry_days"],
- auth_data["preauthorized"],
)
_, login_column, links_column = st.columns([0.25, 0.5, 0.25])
From 6f8c16b0a0b5162011b8ab0bbbab3d87364ed7a4 Mon Sep 17 00:00:00 2001
From: Aarthy Adityan
Date: Tue, 27 May 2025 16:17:24 -0400
Subject: [PATCH 19/33] misc(security): upgrading tornado and xz
---
deploy/testgen-base.dockerfile | 4 +++-
deploy/testgen.dockerfile | 2 +-
pyproject.toml | 1 -
3 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/deploy/testgen-base.dockerfile b/deploy/testgen-base.dockerfile
index 1c7d7ea7..f04aa3ba 100644
--- a/deploy/testgen-base.dockerfile
+++ b/deploy/testgen-base.dockerfile
@@ -23,7 +23,9 @@ RUN apk update && apk upgrade && apk add --no-cache \
openblas=0.3.28-r0 \
openblas-dev=0.3.28-r0 \
unixodbc=2.3.12-r0 \
- unixodbc-dev=2.3.12-r0
+ unixodbc-dev=2.3.12-r0 \
+ # Pinned versions for security
+ xz=5.6.2-r1
RUN apk add --no-cache \
--repository https://dl-cdn.alpinelinux.org/alpine/v3.21/community \
diff --git a/deploy/testgen.dockerfile b/deploy/testgen.dockerfile
index 415bc91c..f8ba88fd 100644
--- a/deploy/testgen.dockerfile
+++ b/deploy/testgen.dockerfile
@@ -1,4 +1,4 @@
-ARG TESTGEN_BASE_LABEL=v5
+ARG TESTGEN_BASE_LABEL=v6
FROM datakitchen/dataops-testgen-base:${TESTGEN_BASE_LABEL} AS release-image
diff --git a/pyproject.toml b/pyproject.toml
index dded5064..1f5b19c1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -64,7 +64,6 @@ dependencies = [
"snowflake-connector-python==3.13.1",
"matplotlib==3.9.2",
"scipy==1.14.1",
- "tornado==6.4.2",
"jinja2==3.1.6",
]
From ebf4a78762391173859fb5be623e6510e335880e Mon Sep 17 00:00:00 2001
From: Aarthy Adityan
Date: Thu, 29 May 2025 23:13:49 -0400
Subject: [PATCH 20/33] fix(test-definitions): test results should not prevent
manual deletion of tests
---
.../frontend/js/pages/test_suites.js | 2 +-
testgen/ui/queries/test_definition_queries.py | 8 --
.../ui/services/test_definition_service.py | 8 +-
testgen/ui/views/test_definitions.py | 79 +++++++------------
4 files changed, 31 insertions(+), 66 deletions(-)
diff --git a/testgen/ui/components/frontend/js/pages/test_suites.js b/testgen/ui/components/frontend/js/pages/test_suites.js
index 91ce0929..923e9de3 100644
--- a/testgen/ui/components/frontend/js/pages/test_suites.js
+++ b/testgen/ui/components/frontend/js/pages/test_suites.js
@@ -146,7 +146,7 @@ const TestSuites = (/** @type Properties */ props) => {
Link({
href: 'test-suites:definitions',
params: { test_suite_id: testSuite.id },
- label: `${testSuite.test_ct ?? 0} test definitions`,
+ label: `View ${testSuite.test_ct ?? 0} test definitions`,
right_icon: 'chevron_right',
right_icon_size: 20,
class: 'mb-4',
diff --git a/testgen/ui/queries/test_definition_queries.py b/testgen/ui/queries/test_definition_queries.py
index 67a36f87..5af277fd 100644
--- a/testgen/ui/queries/test_definition_queries.py
+++ b/testgen/ui/queries/test_definition_queries.py
@@ -237,14 +237,6 @@ def add(schema, test_definition):
st.cache_data.clear()
-def get_test_definition_usage(schema, test_definition_ids):
- ids_str = ",".join([f"'{item}'" for item in test_definition_ids])
- sql = f"""
- select distinct test_definition_id from {schema}.test_results where test_definition_id in ({ids_str});
- """
- return db.retrieve_data(sql)
-
-
def delete(schema, test_definition_ids):
if test_definition_ids is None or len(test_definition_ids) == 0:
raise ValueError("No Test Definition is specified.")
diff --git a/testgen/ui/services/test_definition_service.py b/testgen/ui/services/test_definition_service.py
index 7f224f71..452e7cda 100644
--- a/testgen/ui/services/test_definition_service.py
+++ b/testgen/ui/services/test_definition_service.py
@@ -43,13 +43,9 @@ def get_test_definition(db_schema, test_def_id):
return database_service.retrieve_data(str_sql)
-def delete(test_definition_ids, dry_run=False):
+def delete(test_definition_ids):
schema = st.session_state["dbschema"]
- usage_result = test_definition_queries.get_test_definition_usage(schema, test_definition_ids)
- can_be_deleted = usage_result.empty
- if not dry_run and can_be_deleted:
- test_definition_queries.delete(schema, test_definition_ids)
- return can_be_deleted
+ test_definition_queries.delete(schema, test_definition_ids)
def cascade_delete(test_suite_ids: list[str]):
diff --git a/testgen/ui/views/test_definitions.py b/testgen/ui/views/test_definitions.py
index 82927b26..429943d0 100644
--- a/testgen/ui/views/test_definitions.py
+++ b/testgen/ui/views/test_definitions.py
@@ -24,7 +24,7 @@
from testgen.ui.navigation.page import Page
from testgen.ui.services import project_service, user_session_service
from testgen.ui.services.string_service import empty_if_null, snake_case_to_title_case
-from testgen.ui.session import session
+from testgen.ui.session import session, temp_value
from testgen.ui.views.dialogs.profiling_results_dialog import view_profiling_button
LOG = logging.getLogger("testgen")
@@ -134,71 +134,48 @@ def render(self, test_suite_id: str, table_name: str | None = None, column_name:
if user_can_edit:
if actions_column.button(
":material/edit: Edit",
- help="Edit the Test Definition",
disabled=not selected,
):
edit_test_dialog(project_code, table_group, test_suite, table_name, column_name, selected_test_def)
if actions_column.button(
":material/file_copy: Copy/Move",
- help="Copy or Move the Test Definition",
disabled=not selected,
):
copy_move_test_dialog(project_code, table_group, test_suite, selected)
if actions_column.button(
":material/delete: Delete",
- help="Delete the selected Test Definition",
disabled=not selected,
):
- delete_test_dialog(selected_test_def)
-
-
-@st.dialog("Delete Test")
-def delete_test_dialog(selected_test_definition):
- test_definition_id = selected_test_definition["id"]
- test_name_short = selected_test_definition["test_name_short"]
-
- can_be_deleted = test_definition_service.delete([test_definition_id], dry_run=True)
-
- fm.render_html_list(
- selected_test_definition,
- [
- "id",
- "project_code",
- "schema_name",
- "table_name",
- "column_name",
- "test_name_short",
- "table_groups_id",
- "test_suite",
- "test_active_display",
- "test_description",
- "last_manual_update",
- ],
- "Test Definition Information",
- int_data_width=700,
- )
-
- with st.form("Delete Test Definition", clear_on_submit=True, border=False):
- _, button_column = st.columns([.85, .15])
- with button_column:
- delete = st.form_submit_button(
- "Delete",
- disabled=not can_be_deleted,
- type="primary",
- use_container_width=True,
- )
-
- if delete:
- test_definition_service.delete([test_definition_id])
- success_message = f"Test Definition {test_name_short} has been deleted. "
- st.success(success_message)
- time.sleep(1)
- st.rerun()
+ delete_test_dialog(selected)
+
+
+@st.dialog("Delete Tests")
+def delete_test_dialog(test_definitions: list[dict]):
+ delete_clicked, set_delete_clicked = temp_value("test-definitions:confirm-delete-tests-val")
+ st.html(f"""
+ Are you sure you want to delete
+ {f"{len(test_definitions)} selected test definitions?"
+ if len(test_definitions) > 1
+ else "the selected test definition?"}
+ """)
+
+ _, button_column = st.columns([.85, .15])
+ with button_column:
+ testgen.button(
+ label="Delete",
+ type_="flat",
+ color="warn",
+ key="test-definitions:confirm-delete-tests-btn",
+ on_click=lambda: set_delete_clicked(True),
+ )
- if not can_be_deleted:
- st.markdown(":orange[This Test Definition cannot be deleted because it is being used in existing tests.]")
+ if delete_clicked():
+ test_definition_service.delete([ item["id"] for item in test_definitions ])
+ st.success("Test definitions have been deleted.")
+ time.sleep(1)
+ st.rerun()
def show_test_form_by_id(test_definition_id):
From f98d48df37c0a95fc157a3efc175180c41dcedfb Mon Sep 17 00:00:00 2001
From: Aarthy Adityan
Date: Thu, 29 May 2025 23:15:30 -0400
Subject: [PATCH 21/33] feat(data-catalog): add related test suites card
---
.../js/data_profiling/data_profiling_utils.js | 10 +++++
.../frontend/js/pages/data_catalog.js | 43 +++++++++++++++++++
testgen/ui/views/data_catalog.py | 29 +++++++++++++
3 files changed, 82 insertions(+)
diff --git a/testgen/ui/components/frontend/js/data_profiling/data_profiling_utils.js b/testgen/ui/components/frontend/js/data_profiling/data_profiling_utils.js
index 0aafa3d2..ea62143a 100644
--- a/testgen/ui/components/frontend/js/data_profiling/data_profiling_utils.js
+++ b/testgen/ui/components/frontend/js/data_profiling/data_profiling_utils.js
@@ -17,6 +17,12 @@
* @property {string} test_suite
* @property {string} test_run_id
* @property {number} test_run_date
+ *
+ * @typedef TestSuite
+ * @type {object}
+ * @property {string} id
+ * @property {string} name
+ * @property {string} test_count
*
* @typedef Column
* @type {object}
@@ -127,6 +133,8 @@
* * Issues
* @property {HygieneIssue[]?} hygiene_issues
* @property {TestIssue[]?} test_issues
+ * * Test Suites
+ * @property {TestSuite[]?} test_suites
*
* @typedef Table
* @type {object}
@@ -175,6 +183,8 @@
* * Issues
* @property {HygieneIssue[]?} hygiene_issues
* @property {TestIssue[]?} test_issues
+ * * Test Suites
+ * @property {TestSuite[]?} test_suites
*/
import van from '../van.min.js';
import { Link } from '../components/link.js';
diff --git a/testgen/ui/components/frontend/js/pages/data_catalog.js b/testgen/ui/components/frontend/js/pages/data_catalog.js
index b9587b5f..0d083a4a 100644
--- a/testgen/ui/components/frontend/js/pages/data_catalog.js
+++ b/testgen/ui/components/frontend/js/pages/data_catalog.js
@@ -56,6 +56,7 @@ import { capitalize } from '../display_utils.js';
import { TableSizeCard } from '../data_profiling/table_size.js';
import { Card } from '../components/card.js';
import { Button } from '../components/button.js';
+import { Link } from '../components/link.js';
import { EMPTY_STATE_MESSAGE, EmptyState } from '../components/empty_state.js';
const { div, h2, span, i } = van.tags;
@@ -351,6 +352,7 @@ const SelectedDetails = (/** @type Properties */ props, /** @type Table | Column
PotentialPIICard({ noLinks: !userCanNavigate }, item),
HygieneIssuesCard({ noLinks: !userCanNavigate }, item),
TestIssuesCard({ noLinks: !userCanNavigate }, item),
+ TestSuitesCard(item),
)
: ItemEmptyState(
'Select a table or column on the left to view its details.',
@@ -473,6 +475,47 @@ const TagsCard = (/** @type TagProperties */ props, /** @type Table | Column */
});
};
+const TestSuitesCard = (/** @type Table | Column */ item) => {
+ return Card({
+ title: 'Related Test Suites',
+ content: div(
+ { class: 'flex-column fx-gap-2' },
+ item.test_suites.map(({ id, name, test_count }) => div(
+ { class: 'flex-row fx-gap-1' },
+ Link({
+ href: 'test-suites:definitions',
+ params: {
+ test_suite_id: id,
+ table_name: item.table_name,
+ column_name: item.column_name,
+ },
+ open_new: true,
+ label: name,
+ }),
+ span({ class: 'text-caption' }, `(${test_count} test definitions)`),
+ ))
+ ),
+ actionContent: item.test_suites.length
+ ? null
+ : item.drop_date
+ ? span({ class: 'text-secondary' }, `No test definitions for ${item.type}`)
+ : span(
+ { class: 'text-secondary flex-row fx-gap-1 fx-justify-content-flex-end' },
+ `No test definitions yet for ${item.type}.`,
+ Link({
+ href: 'test-suites',
+ params: {
+ project_code: item.project_code,
+ table_group_id: item.table_group_id,
+ },
+ open_new: true,
+ label: 'Go to Test Suites',
+ right_icon: 'chevron_right',
+ }),
+ ),
+ });
+};
+
const MultiEdit = (/** @type Properties */ props, /** @type Object */ selectedItems, /** @type Object */ multiEditMode) => {
const hasSelection = van.derive(() => selectedItems.val?.length);
const columnCount = van.derive(() => selectedItems.val?.reduce((count, { children }) => count + children.length, 0));
diff --git a/testgen/ui/views/data_catalog.py b/testgen/ui/views/data_catalog.py
index e1c549ec..5b9b70c8 100644
--- a/testgen/ui/views/data_catalog.py
+++ b/testgen/ui/views/data_catalog.py
@@ -363,6 +363,7 @@ def get_selected_item(selected: str, table_group_id: str) -> dict | None:
item["dq_score_testing"] = friendly_score(item["dq_score_testing"])
item["hygiene_issues"] = get_hygiene_issues(item["profile_run_id"], item["table_name"], item.get("column_name"))
item["test_issues"] = get_latest_test_issues(item["table_group_id"], item["table_name"], item.get("column_name"))
+ item["test_suites"] = get_related_test_suites(item["table_group_id"], item["table_name"], item.get("column_name"))
return item
@@ -411,6 +412,34 @@ def get_latest_test_issues(table_group_id: str, table_name: str, column_name: st
return [row.to_dict() for _, row in df.iterrows()]
+@st.cache_data(show_spinner=False)
+def get_related_test_suites(table_group_id: str, table_name: str, column_name: str | None = None) -> dict | None:
+ schema = st.session_state["dbschema"]
+
+ column_condition = ""
+ if column_name:
+ column_condition = f"AND column_name = '{column_name}'"
+
+ sql = f"""
+ SELECT
+ test_suites.id::VARCHAR,
+ test_suite AS name,
+ COUNT(*) AS test_count
+ FROM {schema}.test_definitions
+ LEFT JOIN {schema}.test_suites ON (
+ test_definitions.test_suite_id = test_suites.id
+ )
+ WHERE test_suites.table_groups_id = '{table_group_id}'
+ AND table_name = '{table_name}'
+ {column_condition}
+ GROUP BY test_suites.id
+ ORDER BY test_suite;
+ """
+
+ df = db.retrieve_data(sql)
+ return [row.to_dict() for _, row in df.iterrows()]
+
+
@st.cache_data(show_spinner=False)
def get_tag_values() -> dict[str, list[str]]:
schema = st.session_state["dbschema"]
From d6239514243ec99ea08d8431a2f4e6ef1d62fccc Mon Sep 17 00:00:00 2001
From: Aarthy Adityan
Date: Thu, 29 May 2025 23:17:19 -0400
Subject: [PATCH 22/33] feat(data-catalog): display as-of date for table size
---
.../js/data_profiling/data_characteristics.js | 8 ++++++--
.../js/data_profiling/data_profiling_utils.js | 1 +
.../frontend/js/data_profiling/table_size.js | 12 ++++++++----
testgen/ui/queries/profiling_queries.py | 1 +
4 files changed, 16 insertions(+), 6 deletions(-)
diff --git a/testgen/ui/components/frontend/js/data_profiling/data_characteristics.js b/testgen/ui/components/frontend/js/data_profiling/data_characteristics.js
index 6aeb3aa2..136a8f1b 100644
--- a/testgen/ui/components/frontend/js/data_profiling/data_characteristics.js
+++ b/testgen/ui/components/frontend/js/data_profiling/data_characteristics.js
@@ -23,9 +23,13 @@ const DataCharacteristicsCard = (/** @type Properties */ props, /** @type Column
if (item.type === 'column') {
attributes.push(
{ key: 'column_type', label: 'Data Type' },
- { key: 'datatype_suggestion', label: `Suggested Data Type ${item.is_latest_profile ? '*' : ''}` },
{ key: 'functional_data_type', label: `Semantic Data Type ${item.is_latest_profile ? '*' : ''}` },
);
+ if (item.datatype_suggestion && item.datatype_suggestion.toLowerCase() !== item.column_type.toLowerCase()) {
+ attributes.push(
+ { key: 'datatype_suggestion', label: `Suggested Data Type ${item.is_latest_profile ? '*' : ''}` },
+ );
+ }
} else {
attributes.push(
{ key: 'functional_table_type', label: `Semantic Table Type ${item.is_latest_profile ? '*' : ''}` },
@@ -34,7 +38,7 @@ const DataCharacteristicsCard = (/** @type Properties */ props, /** @type Column
if (item.add_date) {
attributes.push({ key: 'add_date', label: 'First Detected' });
}
- if (item.last_mod_date !== item.add_date) {
+ if (item.last_mod_date && item.last_mod_date !== item.add_date) {
attributes.push({ key: 'last_mod_date', label: 'Modification Detected' });
}
if (item.drop_date) {
diff --git a/testgen/ui/components/frontend/js/data_profiling/data_profiling_utils.js b/testgen/ui/components/frontend/js/data_profiling/data_profiling_utils.js
index ea62143a..84fe407e 100644
--- a/testgen/ui/components/frontend/js/data_profiling/data_profiling_utils.js
+++ b/testgen/ui/components/frontend/js/data_profiling/data_profiling_utils.js
@@ -151,6 +151,7 @@
* @property {number} column_ct
* @property {number} data_point_ct
* @property {number} add_date
+ * @property {number} last_refresh_date
* @property {number} drop_date
* * Table Tags
* @property {string} description
diff --git a/testgen/ui/components/frontend/js/data_profiling/table_size.js b/testgen/ui/components/frontend/js/data_profiling/table_size.js
index 3f7af986..56307626 100644
--- a/testgen/ui/components/frontend/js/data_profiling/table_size.js
+++ b/testgen/ui/components/frontend/js/data_profiling/table_size.js
@@ -9,8 +9,9 @@ import { Card } from '../components/card.js';
import { Attribute } from '../components/attribute.js';
import { Button } from '../components/button.js';
import { emitEvent } from '../utils.js';
+import { formatTimestamp } from '../display_utils.js';
-const { div } = van.tags;
+const { div, span } = van.tags;
const TableSizeCard = (/** @type Properties */ _props, /** @type Table */ item) => {
const attributes = [
@@ -20,10 +21,13 @@ const TableSizeCard = (/** @type Properties */ _props, /** @type Table */ item)
]
return Card({
- title: 'Table Size',
+ title: 'Table Size **',
content: div(
- { class: 'flex-row fx-flex-wrap fx-gap-4' },
- attributes.map(({ key, label }) => Attribute({ label, value: item[key], width: 250 })),
+ div(
+ { class: 'flex-row fx-flex-wrap fx-gap-4' },
+ attributes.map(({ key, label }) => Attribute({ label, value: item[key], width: 250 })),
+ ),
+ span({ class: 'text-caption flex-row fx-justify-content-flex-end mt-2' }, `** as of ${formatTimestamp(item.last_refresh_date)}`),
),
actionContent: Button({
type: 'stroked',
diff --git a/testgen/ui/queries/profiling_queries.py b/testgen/ui/queries/profiling_queries.py
index 73c5c663..b7cda3f4 100644
--- a/testgen/ui/queries/profiling_queries.py
+++ b/testgen/ui/queries/profiling_queries.py
@@ -158,6 +158,7 @@ def get_table_by_id(table_id: str) -> dict | None:
table_chars.column_ct,
data_point_ct,
add_date,
+ last_refresh_date,
drop_date,
-- Table Tags
table_chars.description,
From 0288325c5afb4e0db26da6e46ead391bc2f00116 Mon Sep 17 00:00:00 2001
From: Aarthy Adityan
Date: Thu, 29 May 2025 23:17:36 -0400
Subject: [PATCH 23/33] feat(data-catalog): support removing dropped tables
---
.../js/data_profiling/data_characteristics.js | 78 ++++++++++++-------
.../frontend/js/pages/data_catalog.js | 2 +-
testgen/ui/views/data_catalog.py | 41 +++++++++-
3 files changed, 88 insertions(+), 33 deletions(-)
diff --git a/testgen/ui/components/frontend/js/data_profiling/data_characteristics.js b/testgen/ui/components/frontend/js/data_profiling/data_characteristics.js
index 136a8f1b..7a0f5f20 100644
--- a/testgen/ui/components/frontend/js/data_profiling/data_characteristics.js
+++ b/testgen/ui/components/frontend/js/data_profiling/data_characteristics.js
@@ -5,13 +5,15 @@
* @type {object}
* @property {boolean?} scores
* @property {boolean?} border
+ * @property {boolean?} allowRemove
*/
import van from '../van.min.js';
import { Card } from '../components/card.js';
import { Attribute } from '../components/attribute.js';
+import { Button } from '../components/button.js';
import { ScoreMetric } from '../components/score_metric.js';
import { formatTimestamp } from '../display_utils.js';
-import { loadStylesheet } from '../utils.js';
+import { emitEvent, loadStylesheet } from '../utils.js';
import { getColumnIcon } from './data_profiling_utils.js';
const { div, span, i } = van.tags;
@@ -51,37 +53,53 @@ const DataCharacteristicsCard = (/** @type Properties */ props, /** @type Column
content: div(
{ class: 'flex-row fx-gap-4 fx-justify-space-between' },
div(
- { class: 'flex-row fx-flex-wrap fx-gap-4' },
- attributes.map(({ key, label }) => {
- let value = item[key];
- if (key === 'column_type') {
- const { icon, iconSize } = getColumnIcon(item);
- value = div(
- { class: 'flex-row' },
- i(
- {
- class: 'material-symbols-rounded tg-data-chars--column-icon',
- style: `font-size: ${iconSize || 24}px;`,
- },
- icon,
- ),
- (value || 'unknown').toLowerCase(),
- );
- } else if (key === 'datatype_suggestion') {
- value = (value || '').toLowerCase();
- } else if (key === 'functional_table_type') {
- value = (value || '').split('-')
- .map(word => word ? (word[0].toUpperCase() + word.substring(1)) : '')
- .join(' ');
- } else if (['add_date', 'last_mod_date', 'drop_date'].includes(key)) {
- value = formatTimestamp(value, true);
- if (key === 'drop_date') {
- label = span({ class: 'text-error' }, label);
+ { class: 'flex-column fx-align-flex-start fx-gap-3' },
+ div(
+ { class: 'flex-row fx-flex-wrap fx-gap-4' },
+ attributes.map(({ key, label }) => {
+ let value = item[key];
+ if (key === 'column_type') {
+ const { icon, iconSize } = getColumnIcon(item);
+ value = div(
+ { class: 'flex-row' },
+ i(
+ {
+ class: 'material-symbols-rounded tg-data-chars--column-icon',
+ style: `font-size: ${iconSize || 24}px;`,
+ },
+ icon,
+ ),
+ (value || 'unknown').toLowerCase(),
+ );
+ } else if (key === 'datatype_suggestion') {
+ value = (value || '').toLowerCase();
+ } else if (key === 'functional_table_type') {
+ value = (value || '').split('-')
+ .map(word => word ? (word[0].toUpperCase() + word.substring(1)) : '')
+ .join(' ');
+ } else if (['add_date', 'last_mod_date', 'drop_date'].includes(key)) {
+ value = formatTimestamp(value, true);
+ if (key === 'drop_date') {
+ label = span({ class: 'text-error' }, label);
+ }
}
- }
- return Attribute({ label, value, width: 250 });
- }),
+ return Attribute({ label, value, width: 250 });
+ }),
+ ),
+ props.allowRemove && item.drop_date && item.type === 'table'
+ ? Button({
+ type: 'stroked',
+ color: 'warn',
+ label: 'Remove from Catalog',
+ icon: 'delete',
+ width: 'auto',
+ disabled: item.test_suites.length,
+ tooltip: item.test_suites.length ? 'The table has associated test definitions and cannot be removed from Data Catalog. Delete the test definitions first.' : 'Remove the table and its columns from Data Catalog',
+ tooltipPosition: 'right',
+ onclick: () => emitEvent('RemoveTableClicked', { payload: item }),
+ })
+ : null,
),
props.scores ? div(
{ style: 'margin-top: -40px;' },
diff --git a/testgen/ui/components/frontend/js/pages/data_catalog.js b/testgen/ui/components/frontend/js/pages/data_catalog.js
index 0d083a4a..78252ec3 100644
--- a/testgen/ui/components/frontend/js/pages/data_catalog.js
+++ b/testgen/ui/components/frontend/js/pages/data_catalog.js
@@ -344,7 +344,7 @@ const SelectedDetails = (/** @type Properties */ props, /** @type Table | Column
),
LatestProfilingTime({ noLinks: !userCanNavigate }, item),
),
- DataCharacteristicsCard({ scores: true }, item),
+ DataCharacteristicsCard({ scores: true, allowRemove: true }, item),
item.type === 'column'
? ColumnDistributionCard({ dataPreview: true, history: true }, item)
: TableSizeCard({}, item),
diff --git a/testgen/ui/views/data_catalog.py b/testgen/ui/views/data_catalog.py
index 5b9b70c8..3d8d45d4 100644
--- a/testgen/ui/views/data_catalog.py
+++ b/testgen/ui/views/data_catalog.py
@@ -1,4 +1,5 @@
import json
+import time
import typing
from collections import defaultdict
from datetime import datetime
@@ -30,7 +31,7 @@
get_table_by_id,
)
from testgen.ui.services import user_session_service
-from testgen.ui.session import session
+from testgen.ui.session import session, temp_value
from testgen.ui.views.dialogs.column_history_dialog import column_history_dialog
from testgen.ui.views.dialogs.data_preview_dialog import data_preview_dialog
from testgen.ui.views.dialogs.run_profiling_dialog import run_profiling_dialog
@@ -121,6 +122,7 @@ def render(self, project_code: str, table_group_id: str | None = None, selected:
file_content_func=get_excel_report_data,
args=(selected_table_group["table_groups_name"], columns),
),
+ "RemoveTableClicked": remove_table_dialog,
"DataPreviewClicked": lambda item: data_preview_dialog(
item["table_group_id"],
item["schema_name"],
@@ -264,7 +266,42 @@ def get_excel_report_data(update_progress: PROGRESS_UPDATE_TYPE, table_group: st
update_progress=update_progress,
)
-
+
+@st.dialog(title="Remove Table from Catalog")
+def remove_table_dialog(item: dict) -> None:
+ remove_clicked, set_remove_clicked = temp_value("data-catalog:confirm-remove-table-val")
+ st.html(f"Are you sure you want to remove the table {item['table_name']} from the data catalog?")
+ st.warning("This action cannot be undone.")
+
+ _, button_column = st.columns([.85, .15])
+ with button_column:
+ testgen.button(
+ label="Remove",
+ type_="flat",
+ color="warn",
+ key="data-catalog:confirm-remove-table-btn",
+ on_click=lambda: set_remove_clicked(True),
+ )
+
+ if remove_clicked():
+ schema = st.session_state["dbschema"]
+ db.execute_sql(f"""
+ DELETE FROM {schema}.data_column_chars
+ WHERE table_id = '{item["id"]}';
+ """)
+ db.execute_sql(f"""
+ DELETE FROM {schema}.data_table_chars
+ WHERE table_id = '{item["id"]}';
+ """)
+
+ st.success("Table has been removed.")
+ time.sleep(1)
+ for func in [ get_table_group_columns, get_tag_values ]:
+ func.clear()
+ st.session_state["data_catalog:last_saved_timestamp"] = datetime.now().timestamp()
+ st.rerun()
+
+
def on_tags_changed(spinner_container: DeltaGenerator, payload: dict) -> FILE_DATA_TYPE:
attributes = ["description"]
attributes.extend(TAG_FIELDS)
From a721d8600dc405f17d9de407824c2486053fdd79 Mon Sep 17 00:00:00 2001
From: Aarthy Adityan
Date: Fri, 30 May 2025 16:05:51 -0400
Subject: [PATCH 24/33] feat(data-catalog): indicate empty tables on tree icon
---
testgen/ui/components/frontend/css/shared.css | 3 +++
.../components/frontend/js/components/tooltip.js | 2 +-
.../ui/components/frontend/js/components/tree.js | 15 ++++++++++++++-
.../js/data_profiling/data_profiling_utils.js | 12 ++++++------
.../components/frontend/js/pages/data_catalog.js | 5 ++++-
testgen/ui/views/data_catalog.py | 1 +
6 files changed, 29 insertions(+), 9 deletions(-)
diff --git a/testgen/ui/components/frontend/css/shared.css b/testgen/ui/components/frontend/css/shared.css
index a918e619..50d55ffc 100644
--- a/testgen/ui/components/frontend/css/shared.css
+++ b/testgen/ui/components/frontend/css/shared.css
@@ -30,6 +30,7 @@ body {
--form-field-color: rgb(240, 242, 246); /* Match Streamlit's form field color */
--border-color: rgba(0, 0, 0, .12);
--tooltip-color: #333d;
+ --tooltip-text-color: #fff;
--dk-card-background: #fff;
--sidebar-background-color: white;
@@ -89,6 +90,8 @@ body {
--caption-text-color: rgba(250, 250, 250, .6); /* Match Streamlit's caption color */
--form-field-color: rgb(38, 39, 48); /* Match Streamlit's form field color */
--border-color: rgba(255, 255, 255, .25);
+ --tooltip-color: #eee;
+ --tooltip-text-color: #000;
--dk-card-background: #14181f;
--sidebar-background-color: #14181f;
diff --git a/testgen/ui/components/frontend/js/components/tooltip.js b/testgen/ui/components/frontend/js/components/tooltip.js
index 38a814e2..6663afb1 100644
--- a/testgen/ui/components/frontend/js/components/tooltip.js
+++ b/testgen/ui/components/frontend/js/components/tooltip.js
@@ -50,7 +50,7 @@ stylesheet.replace(`
border-radius: 4px;
background-color: var(--tooltip-color);
padding: 4px 8px;
- color: white;
+ color: var(--tooltip-text-color);
font-size: 13px;
font-family: 'Roboto', 'Helvetica Neue', sans-serif;
text-align: center;
diff --git a/testgen/ui/components/frontend/js/components/tree.js b/testgen/ui/components/frontend/js/components/tree.js
index 184bdff4..8f8e95b0 100644
--- a/testgen/ui/components/frontend/js/components/tree.js
+++ b/testgen/ui/components/frontend/js/components/tree.js
@@ -6,6 +6,8 @@
* @property {string?} classes
* @property {string?} icon
* @property {number?} iconSize
+ * @property {'red'?} iconColor
+ * @property {string?} iconTooltip
* @property {TreeNode[]?} children
* @property {number?} level
* @property {boolean?} expanded
@@ -43,6 +45,7 @@ import { Portal } from './portal.js';
import { Icon } from './icon.js';
import { Checkbox } from './checkbox.js';
import { Toggle } from './toggle.js';
+import { withTooltip } from './tooltip.js';
const { div, h3, span } = van.tags;
const levelOffset = 14;
@@ -307,7 +310,13 @@ const TreeNode = (
span({ class: 'mr-1' }),
]
: null,
- node.icon ? Icon({ size: 24, classes: 'tg-tree--row-icon' }, node.icon) : null,
+ () => {
+ if (node.icon) {
+ const icon = Icon({ size: node.iconSize, classes: `tg-tree--row-icon ${node.iconColor}` }, node.icon);
+ return node.iconTooltip ? withTooltip(icon, { text: node.iconTooltip, position: 'right' }) : icon;
+ }
+ return null;
+ },
node.label,
),
hasChildren ? div(
@@ -507,6 +516,10 @@ stylesheet.replace(`
color: #B0BEC5;
text-align: center;
}
+
+.tg-tree--row-icon.red {
+ color: var(--red);
+}
`);
export { Tree };
diff --git a/testgen/ui/components/frontend/js/data_profiling/data_profiling_utils.js b/testgen/ui/components/frontend/js/data_profiling/data_profiling_utils.js
index 84fe407e..df3cbf17 100644
--- a/testgen/ui/components/frontend/js/data_profiling/data_profiling_utils.js
+++ b/testgen/ui/components/frontend/js/data_profiling/data_profiling_utils.js
@@ -193,13 +193,13 @@ import { formatTimestamp } from '../display_utils.js';
const { span, b } = van.tags;
-const TABLE_ICON = { icon: 'table', iconSize: 20 };
+const TABLE_ICON = { icon: 'table' };
const COLUMN_ICONS = {
- A: { icon: 'abc' },
- B: { icon: 'toggle_off', iconSize: 20 },
- D: { icon: 'calendar_clock', iconSize: 20 },
- N: { icon: '123' },
- T: { icon: 'calendar_clock', iconSize: 20 },
+ A: { icon: 'abc', iconSize: 24 },
+ B: { icon: 'toggle_off' },
+ D: { icon: 'calendar_clock' },
+ N: { icon: '123', iconSize: 24 },
+ T: { icon: 'calendar_clock' },
X: { icon: 'question_mark', iconSize: 18 },
};
const BOOLEAN_TYPE = 'Boolean';
diff --git a/testgen/ui/components/frontend/js/pages/data_catalog.js b/testgen/ui/components/frontend/js/pages/data_catalog.js
index 78252ec3..ca6b0108 100644
--- a/testgen/ui/components/frontend/js/pages/data_catalog.js
+++ b/testgen/ui/components/frontend/js/pages/data_catalog.js
@@ -17,6 +17,7 @@
* @property {string} table_name
* @property {'A' | 'B' | 'D' | 'N' | 'T' | 'X'} general_type
* @property {string} functional_data_type
+ * @property {number} record_ct
* @property {number} drop_date
* @property {number} table_drop_date
* @property {boolean} critical_data_element
@@ -102,13 +103,15 @@ const DataCatalog = (/** @type Properties */ props) => {
const tables = {};
columns.forEach((item) => {
- const { column_id, table_id, column_name, table_name, drop_date, table_drop_date } = item;
+ const { column_id, table_id, column_name, table_name, record_ct, drop_date, table_drop_date } = item;
if (!tables[table_id]) {
tables[table_id] = {
id: table_id,
label: table_name,
classes: table_drop_date ? 'text-disabled' : '',
...TABLE_ICON,
+ iconColor: record_ct === 0 ? 'red' : null,
+ iconTooltip: record_ct === 0 ? 'No records detected' : null,
criticalDataElement: !!item.table_critical_data_element,
children: [],
};
diff --git a/testgen/ui/views/data_catalog.py b/testgen/ui/views/data_catalog.py
index 3d8d45d4..7f1894d8 100644
--- a/testgen/ui/views/data_catalog.py
+++ b/testgen/ui/views/data_catalog.py
@@ -365,6 +365,7 @@ def get_table_group_columns(table_group_id: str) -> pd.DataFrame:
table_chars.table_name,
column_chars.general_type,
column_chars.functional_data_type,
+ table_chars.record_ct,
column_chars.drop_date,
table_chars.drop_date AS table_drop_date,
column_chars.critical_data_element,
From a1de489fb1996e5db2f6a45a2945d90a5f2f39ca Mon Sep 17 00:00:00 2001
From: Aarthy Adityan
Date: Fri, 30 May 2025 17:06:55 -0400
Subject: [PATCH 25/33] feat(data-catalog): highlight zero record/value counts
---
.../js/data_profiling/column_distribution.js | 55 ++++++++++++-------
.../frontend/js/data_profiling/table_size.js | 8 ++-
2 files changed, 40 insertions(+), 23 deletions(-)
diff --git a/testgen/ui/components/frontend/js/data_profiling/column_distribution.js b/testgen/ui/components/frontend/js/data_profiling/column_distribution.js
index f0a515e0..bcd0ad55 100644
--- a/testgen/ui/components/frontend/js/data_profiling/column_distribution.js
+++ b/testgen/ui/components/frontend/js/data_profiling/column_distribution.js
@@ -38,32 +38,38 @@ const ColumnDistributionCard = (/** @type Properties */ props, /** @type Column
return Card({
border: props.border,
title: `Value Distribution ${item.is_latest_profile ? '*' : ''}`,
- content: item.profile_run_id && columnFunction ? columnFunction(item) : null,
+ content: item.profile_run_id
+ ? (item.record_ct === 0
+ ? BaseCounts(item)
+ : columnFunction?.(item))
+ : null,
actionContent: div(
{ class: 'flex-row fx-gap-3' },
item.profile_run_id
- ? (getValue(props.dataPreview)
- ? Button({
- type: 'stroked',
- label: 'Data Preview',
- icon: 'pageview',
- width: 'auto',
- onclick: () => emitEvent('DataPreviewClicked', { payload: item }),
- })
- : null)
+ ? ([
+ getValue(props.dataPreview)
+ ? Button({
+ type: 'stroked',
+ label: 'Data Preview',
+ icon: 'pageview',
+ width: 'auto',
+ onclick: () => emitEvent('DataPreviewClicked', { payload: item }),
+ })
+ : null,
+ getValue(props.history)
+ ? Button({
+ type: 'stroked',
+ label: 'History',
+ icon: 'history',
+ width: 'auto',
+ onclick: () => emitEvent('HistoryClicked', { payload: item }),
+ })
+ : null,
+ ])
: span(
{ class: 'text-secondary' },
'No profiling data available',
),
- getValue(props.history)
- ? Button({
- type: 'stroked',
- label: 'History',
- icon: 'history',
- width: 'auto',
- onclick: () => emitEvent('HistoryClicked', { payload: item }),
- })
- : null,
),
})
};
@@ -302,10 +308,17 @@ function NumericColumn(/** @type ColumnProfile */ item) {
}
const BaseCounts = (/** @type ColumnProfile */ item) => {
+ const attributes = [
+ { key: 'record_ct', label: 'Record Count' },
+ { key: 'value_ct', label: 'Value Count' },
+ ];
return div(
{ class: 'flex-row fx-gap-4' },
- Attribute({ label: 'Record Count', value: item.record_ct, width: attributeWidth }),
- Attribute({ label: 'Value Count', value: item.value_ct, width: attributeWidth }),
+ attributes.map(({ key, label }) => Attribute({
+ label: item[key] === 0 ? span({ class: 'text-error' }, label) : label,
+ value: item[key],
+ width: attributeWidth,
+ })),
);
};
diff --git a/testgen/ui/components/frontend/js/data_profiling/table_size.js b/testgen/ui/components/frontend/js/data_profiling/table_size.js
index 56307626..9c5055b1 100644
--- a/testgen/ui/components/frontend/js/data_profiling/table_size.js
+++ b/testgen/ui/components/frontend/js/data_profiling/table_size.js
@@ -18,14 +18,18 @@ const TableSizeCard = (/** @type Properties */ _props, /** @type Table */ item)
{ key: 'column_ct', label: 'Column Count' },
{ key: 'record_ct', label: 'Row Count' },
{ key: 'data_point_ct', label: 'Data Point Count' },
- ]
+ ];
return Card({
title: 'Table Size **',
content: div(
div(
{ class: 'flex-row fx-flex-wrap fx-gap-4' },
- attributes.map(({ key, label }) => Attribute({ label, value: item[key], width: 250 })),
+ attributes.map(({ key, label }) => Attribute({
+ label: item[key] === 0 ? span({ class: 'text-error' }, label) : label,
+ value: item[key],
+ width: 250,
+ })),
),
span({ class: 'text-caption flex-row fx-justify-content-flex-end mt-2' }, `** as of ${formatTimestamp(item.last_refresh_date)}`),
),
From 95c7ef4d1f02c6da1bf6ab845b7fafe7f8ecc31a Mon Sep 17 00:00:00 2001
From: Aarthy Adityan
Date: Mon, 2 Jun 2025 12:24:26 -0400
Subject: [PATCH 26/33] feat(data-catalog): indicate null and other values in
frequency bars
---
testgen/ui/components/frontend/css/shared.css | 2 +
.../frontend/js/components/frequency_bars.js | 45 +++++++++++++++----
.../js/data_profiling/column_distribution.js | 2 +
.../components/frontend/js/display_utils.js | 1 +
4 files changed, 41 insertions(+), 9 deletions(-)
diff --git a/testgen/ui/components/frontend/css/shared.css b/testgen/ui/components/frontend/css/shared.css
index 50d55ffc..20452b94 100644
--- a/testgen/ui/components/frontend/css/shared.css
+++ b/testgen/ui/components/frontend/css/shared.css
@@ -22,6 +22,7 @@ body {
--grey: #BDBDBD;
--empty: #EEEEEE;
--empty-light: #FAFAFA;
+ --empty-teal: #E7F1F0;
--primary-text-color: #000000de;
--secondary-text-color: #0000008a;
@@ -83,6 +84,7 @@ body {
body {
--empty: #424242;
--empty-light: #212121;
+ --empty-teal: #242E2D;
--primary-text-color: rgba(255, 255, 255);
--secondary-text-color: rgba(255, 255, 255, .7);
diff --git a/testgen/ui/components/frontend/js/components/frequency_bars.js b/testgen/ui/components/frontend/js/components/frequency_bars.js
index ed49bf5b..c3ad64a3 100644
--- a/testgen/ui/components/frontend/js/components/frequency_bars.js
+++ b/testgen/ui/components/frontend/js/components/frequency_bars.js
@@ -8,6 +8,7 @@
* @type {object}
* @property {FrequencyItem[]} items
* @property {number} total
+ * @property {number} nullCount
* @property {string} title
* @property {string?} color
*/
@@ -17,11 +18,14 @@ import { colorMap } from '../display_utils.js';
const { div, span } = van.tags;
const defaultColor = 'teal';
+const otherColor = colorMap['emptyTeal'];
+const nullColor = colorMap['emptyLight'];
const FrequencyBars = (/** @type Properties */ props) => {
loadStylesheet('frequencyBars', stylesheet);
const total = van.derive(() => getValue(props.total));
+ const nullCount = van.derive(() => getValue(props.nullCount));
const color = van.derive(() => {
const colorValue = getValue(props.color) || defaultColor;
return colorMap[colorValue] || colorValue;
@@ -41,7 +45,16 @@ const FrequencyBars = (/** @type Properties */ props) => {
{ class: 'flex-row fx-gap-2' },
div(
{ class: 'tg-frequency-bars' },
- span({ class: 'tg-frequency-bars--empty' }),
+ span({
+ class: 'tg-frequency-bars--fill',
+ style: `width: 100%; background-color: ${nullColor};`,
+ }),
+ span({
+ class: 'tg-frequency-bars--fill',
+ style: () => `width: ${(total.val - nullCount.val) * 100 / total.val}%;
+ ${(total.val - nullCount.val) ? 'min-width: 1px;' : ''}
+ background-color: ${otherColor};`,
+ }),
span({
class: 'tg-frequency-bars--fill',
style: () => `width: ${count * 100 / total.val}%;
@@ -59,6 +72,15 @@ const FrequencyBars = (/** @type Properties */ props) => {
div(value),
);
}),
+ div(
+ { class: 'tg-frequency-bars--legend flex-row fx-flex-wrap text-caption mt-1' },
+ span({ class: 'dot', style: `color: ${color.val};` }),
+ 'Value',
+ span({ class: 'dot', style: `color: ${otherColor};` }),
+ 'Other',
+ span({ class: 'dot', style: `color: ${nullColor};` }),
+ 'Null',
+ ),
);
};
@@ -71,14 +93,6 @@ stylesheet.replace(`
position: relative;
}
-.tg-frequency-bars--empty {
- position: absolute;
- width: 100%;
- height: 100%;
- border-radius: 4px;
- background-color: ${colorMap['emptyLight']}
-}
-
.tg-frequency-bars--fill {
position: absolute;
border-radius: 4px;
@@ -89,6 +103,19 @@ stylesheet.replace(`
flex-shrink: 0;
text-align: right;
}
+
+.tg-frequency-bars--legend {
+ font-style: italic;
+}
+
+.tg-frequency-bars--legend span {
+ margin-right: 2px;
+ font-size: 4px;
+}
+
+.tg-frequency-bars--legend span:not(:first-child) {
+ margin-left: 8px;
+}
`);
export { FrequencyBars };
diff --git a/testgen/ui/components/frontend/js/data_profiling/column_distribution.js b/testgen/ui/components/frontend/js/data_profiling/column_distribution.js
index bcd0ad55..a0810553 100644
--- a/testgen/ui/components/frontend/js/data_profiling/column_distribution.js
+++ b/testgen/ui/components/frontend/js/data_profiling/column_distribution.js
@@ -154,6 +154,7 @@ function AlphaColumn(/** @type ColumnProfile */ item) {
item.top_freq_values ? FrequencyBars({
title: 'Frequent Values',
total: item.record_ct,
+ nullCount: item.null_value_ct,
items: item.top_freq_values.substring(2).split('\n| ').map(parts => {
const [value, count] = parts.split(' | ');
return { value, count: Number(count) };
@@ -162,6 +163,7 @@ function AlphaColumn(/** @type ColumnProfile */ item) {
item.top_patterns ? FrequencyBars({
title: 'Frequent Patterns',
total: item.record_ct,
+ nullCount: item.null_value_ct,
items: item.top_patterns.split(' | ').reduce((array, item, index) => {
if (index % 2) {
array[(index - 1) / 2].value = item;
diff --git a/testgen/ui/components/frontend/js/display_utils.js b/testgen/ui/components/frontend/js/display_utils.js
index e5fb11a2..652d3822 100644
--- a/testgen/ui/components/frontend/js/display_utils.js
+++ b/testgen/ui/components/frontend/js/display_utils.js
@@ -63,6 +63,7 @@ const colorMap = {
grey: '#BDBDBD', // Gray 400
empty: 'var(--empty)', // Light: Gray 200, Dark: Gray 800
emptyLight: 'var(--empty-light)', // Light: Gray 50, Dark: Gray 900
+ emptyTeal: 'var(--empty-teal)',
}
const DISABLED_ACTION_TEXT = 'You do not have permissions to perform this action. Contact your administrator.';
From 467ef02ac7d5d2a27498a648f470548c6f284852 Mon Sep 17 00:00:00 2001
From: Aarthy Adityan
Date: Thu, 5 Jun 2025 17:10:17 -0400
Subject: [PATCH 27/33] feat(data-preview): use select distinct
---
testgen/ui/views/dialogs/data_preview_dialog.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/testgen/ui/views/dialogs/data_preview_dialog.py b/testgen/ui/views/dialogs/data_preview_dialog.py
index dd8f6195..20ccb4bb 100644
--- a/testgen/ui/views/dialogs/data_preview_dialog.py
+++ b/testgen/ui/views/dialogs/data_preview_dialog.py
@@ -65,7 +65,7 @@ def get_preview_data(
if not connection_df.empty:
use_top = connection_df["sql_flavor"] == "mssql"
query = f"""
- SELECT
+ SELECT DISTINCT
{"TOP 100" if use_top else ""}
{column_name or "*"}
FROM {schema_name}.{table_name}
From 00ba4f1585737b9aec633771cddaf030c859d64f Mon Sep 17 00:00:00 2001
From: Aarthy Adityan
Date: Mon, 16 Jun 2025 10:32:53 -0400
Subject: [PATCH 28/33] fix(export): round error when value is null
---
testgen/ui/views/data_catalog.py | 4 ++--
testgen/ui/views/profiling_results.py | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/testgen/ui/views/data_catalog.py b/testgen/ui/views/data_catalog.py
index 7f1894d8..b5ad9a96 100644
--- a/testgen/ui/views/data_catalog.py
+++ b/testgen/ui/views/data_catalog.py
@@ -157,10 +157,10 @@ def get_excel_report_data(update_progress: PROGRESS_UPDATE_TYPE, table_group: st
data = pd.DataFrame(data)
for key in ["column_type", "datatype_suggestion"]:
- data[key] = data[key].apply(lambda val: val.lower())
+ data[key] = data[key].apply(lambda val: val.lower() if not pd.isna(val) else None)
for key in ["avg_embedded_spaces", "avg_length", "avg_value", "stdev_value"]:
- data[key] = data[key].apply(lambda val: round(val, 2))
+ data[key] = data[key].apply(lambda val: round(val, 2) if not pd.isna(val) else None)
for key in ["min_date", "max_date", "add_date", "last_mod_date", "drop_date"]:
data[key] = data[key].apply(
diff --git a/testgen/ui/views/profiling_results.py b/testgen/ui/views/profiling_results.py
index 5c4ad506..4a97ad7a 100644
--- a/testgen/ui/views/profiling_results.py
+++ b/testgen/ui/views/profiling_results.py
@@ -160,10 +160,10 @@ def get_excel_report_data(
data = data.copy()
for key in ["column_type", "datatype_suggestion"]:
- data[key] = data[key].apply(lambda val: val.lower())
+ data[key] = data[key].apply(lambda val: val.lower() if not pd.isna(val) else None)
for key in ["avg_embedded_spaces", "avg_length", "avg_value", "stdev_value"]:
- data[key] = data[key].apply(lambda val: round(val, 2))
+ data[key] = data[key].apply(lambda val: round(val, 2) if not pd.isna(val) else None)
for key in ["min_date", "max_date"]:
data[key] = data[key].apply(
From 1125db634968755c0bedd8d647fd1e9960163417 Mon Sep 17 00:00:00 2001
From: Aarthy Adityan
Date: Mon, 16 Jun 2025 10:33:44 -0400
Subject: [PATCH 29/33] fix: use spinner to prevent grid intermittently
refreshing
---
testgen/ui/queries/profiling_queries.py | 2 +-
testgen/ui/views/hygiene_issues.py | 24 ++++++++++++++----------
testgen/ui/views/profiling_results.py | 5 ++++-
testgen/ui/views/profiling_runs.py | 2 +-
testgen/ui/views/test_definitions.py | 14 ++++++++------
testgen/ui/views/test_results.py | 25 +++++++++++++++----------
testgen/ui/views/test_runs.py | 2 +-
7 files changed, 44 insertions(+), 30 deletions(-)
diff --git a/testgen/ui/queries/profiling_queries.py b/testgen/ui/queries/profiling_queries.py
index b7cda3f4..4893e0ec 100644
--- a/testgen/ui/queries/profiling_queries.py
+++ b/testgen/ui/queries/profiling_queries.py
@@ -93,7 +93,7 @@ def get_run_by_id(profile_run_id: str) -> pd.Series:
return pd.Series()
-@st.cache_data(show_spinner="Loading data ...")
+@st.cache_data(show_spinner=False)
def get_profiling_results(profiling_run_id: str, table_name: str, column_name: str, sorting_columns = None):
order_by = ""
if sorting_columns is None:
diff --git a/testgen/ui/views/hygiene_issues.py b/testgen/ui/views/hygiene_issues.py
index 2facaac4..f14d0f70 100644
--- a/testgen/ui/views/hygiene_issues.py
+++ b/testgen/ui/views/hygiene_issues.py
@@ -130,15 +130,17 @@ def render(
str_help = "Toggle on to perform actions on multiple Hygiene Issues"
do_multi_select = st.toggle("Multi-Select", help=str_help)
+ with st.container():
+ with st.spinner("Loading data ..."):
+ # Get hygiene issue list
+ df_pa = get_profiling_anomalies(run_id, issue_class, issue_type_id, table_name, column_name, sorting_columns)
- # Get hygiene issue list
- df_pa = get_profiling_anomalies(run_id, issue_class, issue_type_id, table_name, column_name, sorting_columns)
+ # Retrieve disposition action (cache refreshed)
+ df_action = get_anomaly_disposition(run_id)
- # Retrieve disposition action (cache refreshed)
- df_action = get_anomaly_disposition(run_id)
- # Update action from disposition df
- action_map = df_action.set_index("id")["action"].to_dict()
- df_pa["action"] = df_pa["id"].map(action_map).fillna(df_pa["action"])
+ # Update action from disposition df
+ action_map = df_action.set_index("id")["action"].to_dict()
+ df_pa["action"] = df_pa["id"].map(action_map).fillna(df_pa["action"])
if not df_pa.empty:
summaries = get_profiling_anomaly_summary(run_id)
@@ -324,7 +326,7 @@ def refresh_score(project_code: str, run_id: str, table_group_id: str | None) ->
st.cache_data.clear()
-@st.cache_data(show_spinner="False")
+@st.cache_data(show_spinner=False)
def get_profiling_run_columns(profiling_run_id: str) -> pd.DataFrame:
schema: str = st.session_state["dbschema"]
sql = f"""
@@ -336,7 +338,7 @@ def get_profiling_run_columns(profiling_run_id: str) -> pd.DataFrame:
return db.retrieve_data(sql)
-@st.cache_data(show_spinner="Retrieving Data")
+@st.cache_data(show_spinner=False)
def get_profiling_anomalies(
profile_run_id: str,
likelihood: str | None,
@@ -423,7 +425,7 @@ def get_profiling_anomalies(
return df
-@st.cache_data(show_spinner="Retrieving Status")
+@st.cache_data(show_spinner=False)
def get_anomaly_disposition(str_profile_run_id):
str_schema = st.session_state["dbschema"]
str_sql = f"""
@@ -540,6 +542,8 @@ def source_data_dialog(selected_row):
# Pretify the dataframe
df_bad.columns = [col.replace("_", " ").title() for col in df_bad.columns]
df_bad.fillna("[NULL]", inplace=True)
+ if len(df_bad) == 500:
+ testgen.caption("* Top 500 records displayed", "text-align: right;")
# Display the dataframe
st.dataframe(df_bad, height=500, width=1050, hide_index=True)
diff --git a/testgen/ui/views/profiling_results.py b/testgen/ui/views/profiling_results.py
index 4a97ad7a..cc656ce0 100644
--- a/testgen/ui/views/profiling_results.py
+++ b/testgen/ui/views/profiling_results.py
@@ -100,7 +100,10 @@ def render(self, run_id: str, table_name: str | None = None, column_name: str |
column_name = "%%"
# Display main results grid
- df = profiling_queries.get_profiling_results(run_id, table_name, column_name, sorting_columns)
+ with st.container():
+ with st.spinner("Loading data ..."):
+ df = profiling_queries.get_profiling_results(run_id, table_name, column_name, sorting_columns)
+
show_columns = [
"schema_name",
"table_name",
diff --git a/testgen/ui/views/profiling_runs.py b/testgen/ui/views/profiling_runs.py
index e67b58aa..cd3c8fe0 100644
--- a/testgen/ui/views/profiling_runs.py
+++ b/testgen/ui/views/profiling_runs.py
@@ -181,7 +181,7 @@ def get_db_table_group_choices(project_code: str) -> pd.DataFrame:
return dq.run_table_groups_lookup_query(schema, project_code)
-@st.cache_data(show_spinner="Retrieving Data")
+@st.cache_data(show_spinner="Loading data ...")
def get_db_profiling_runs(project_code: str, table_group_id: str | None = None) -> pd.DataFrame:
schema = st.session_state["dbschema"]
table_group_condition = f" AND v_profiling_runs.table_groups_id = '{table_group_id}' " if table_group_id else ""
diff --git a/testgen/ui/views/test_definitions.py b/testgen/ui/views/test_definitions.py
index 429943d0..8f4e6ad8 100644
--- a/testgen/ui/views/test_definitions.py
+++ b/testgen/ui/views/test_definitions.py
@@ -752,13 +752,15 @@ def show_test_defs_grid(
str_project_code, str_test_suite, str_table_name, str_column_name, do_multi_select, export_container,
str_table_groups_id
):
- df = test_definition_service.get_test_definitions(
- str_project_code, str_test_suite, str_table_name, str_column_name
- )
- date_service.accommodate_dataframe_to_timezone(df, st.session_state)
+ with st.container():
+ with st.spinner("Loading data ..."):
+ df = test_definition_service.get_test_definitions(
+ str_project_code, str_test_suite, str_table_name, str_column_name
+ )
+ date_service.accommodate_dataframe_to_timezone(df, st.session_state)
- for col in df.select_dtypes(include=["datetime"]).columns:
- df[col] = df[col].astype(str).replace("NaT", "")
+ for col in df.select_dtypes(include=["datetime"]).columns:
+ df[col] = df[col].astype(str).replace("NaT", "")
lst_show_columns = [
"schema_name",
diff --git a/testgen/ui/views/test_results.py b/testgen/ui/views/test_results.py
index c2b01a3e..1397f18b 100644
--- a/testgen/ui/views/test_results.py
+++ b/testgen/ui/views/test_results.py
@@ -274,7 +274,7 @@ def get_test_types():
return df
-@st.cache_data(show_spinner="False")
+@st.cache_data(show_spinner=False)
def get_test_run_columns(test_run_id: str) -> pd.DataFrame:
schema: str = st.session_state["dbschema"]
sql = f"""
@@ -286,7 +286,7 @@ def get_test_run_columns(test_run_id: str) -> pd.DataFrame:
return db.retrieve_data(sql)
-@st.cache_data(show_spinner="Retrieving Results")
+@st.cache_data(show_spinner=False)
def get_test_results(
run_id: str,
test_status: str | None = None,
@@ -299,7 +299,7 @@ def get_test_results(
return test_results_service.get_test_results(schema, run_id, test_status, test_type_id, table_name, column_name, sorting_columns)
-@st.cache_data(show_spinner="Retrieving Status")
+@st.cache_data(show_spinner=False)
def get_test_disposition(str_run_id):
str_schema = st.session_state["dbschema"]
str_sql = f"""
@@ -483,13 +483,16 @@ def show_result_detail(
sorting_columns: list[str] | None = None,
do_multi_select: bool = False,
):
- # Retrieve test results (always cached, action as null)
- df = get_test_results(run_id, test_status, test_type_id, table_name, column_name, sorting_columns)
- # Retrieve disposition action (cache refreshed)
- df_action = get_test_disposition(run_id)
- # Update action from disposition df
- action_map = df_action.set_index("id")["action"].to_dict()
- df["action"] = df["test_result_id"].map(action_map).fillna(df["action"])
+ with st.container():
+ with st.spinner("Loading data ..."):
+ # Retrieve test results (always cached, action as null)
+ df = get_test_results(run_id, test_status, test_type_id, table_name, column_name, sorting_columns)
+ # Retrieve disposition action (cache refreshed)
+ df_action = get_test_disposition(run_id)
+
+ # Update action from disposition df
+ action_map = df_action.set_index("id")["action"].to_dict()
+ df["action"] = df["test_result_id"].map(action_map).fillna(df["action"])
lst_show_columns = [
"table_name",
@@ -775,6 +778,8 @@ def source_data_dialog(selected_row):
# Pretify the dataframe
df_bad.columns = [col.replace("_", " ").title() for col in df_bad.columns]
df_bad.fillna("[NULL]", inplace=True)
+ if len(df_bad) == 500:
+ testgen.caption("* Top 500 records displayed", "text-align: right;")
# Display the dataframe
st.dataframe(df_bad, height=500, width=1050, hide_index=True)
diff --git a/testgen/ui/views/test_runs.py b/testgen/ui/views/test_runs.py
index 6806fe37..5bd8888f 100644
--- a/testgen/ui/views/test_runs.py
+++ b/testgen/ui/views/test_runs.py
@@ -218,7 +218,7 @@ def get_db_test_suite_choices(project_code: str, table_groups_id: str | None = N
return run_test_suite_lookup_query(schema, project_code, table_groups_id)
-# @st.cache_data(show_spinner="Retrieving Data")
+@st.cache_data(show_spinner="Loading data ...")
def get_db_test_runs(project_code: str, table_groups_id: str | None = None, test_suite_id: str | None = None) -> pd.DataFrame:
schema = st.session_state["dbschema"]
table_group_condition = f" AND test_suites.table_groups_id = '{table_groups_id}' " if table_groups_id else ""
From 8d14b65bb65e95219457520dbf8a079f53701e43 Mon Sep 17 00:00:00 2001
From: Aarthy Adityan
Date: Mon, 16 Jun 2025 10:34:32 -0400
Subject: [PATCH 30/33] fix: use unnest array for better update performance
---
testgen/ui/queries/test_definition_queries.py | 39 +++++++++++--------
testgen/ui/views/data_catalog.py | 21 +++++++---
2 files changed, 38 insertions(+), 22 deletions(-)
diff --git a/testgen/ui/queries/test_definition_queries.py b/testgen/ui/queries/test_definition_queries.py
index 5af277fd..ae16d4c6 100644
--- a/testgen/ui/queries/test_definition_queries.py
+++ b/testgen/ui/queries/test_definition_queries.py
@@ -4,14 +4,17 @@
def update_attribute(schema, test_definition_ids, attribute, value):
- sql = f"""UPDATE {schema}.test_definitions
- SET
- {attribute}='{value}'
- where
- id in ({"'" + "','".join(test_definition_ids) + "'"})
- ;
- """
- db.execute_sql(sql)
+ sql = f"""
+ WITH selected as (
+ SELECT UNNEST(ARRAY [{", ".join([ f"'{item}'" for item in test_definition_ids ])}]) AS id
+ )
+ UPDATE {schema}.test_definitions
+ SET {attribute}='{value}'
+ FROM {schema}.test_definitions td
+ INNER JOIN selected ON (td.id = selected.id::UUID)
+ WHERE td.id = test_definitions.id;
+ """
+ db.execute_sql_raw(sql)
st.cache_data.clear()
@@ -260,17 +263,19 @@ def cascade_delete(schema, test_suite_ids):
def move(schema, test_definitions, target_table_group, target_test_suite):
- test_definition_ids = [f"'{td['id']}'" for td in test_definitions]
sql = f"""
- UPDATE {schema}.test_definitions
- SET
- table_groups_id = '{target_table_group}'::UUID,
- test_suite_id = '{target_test_suite}'::UUID
- WHERE
- id in ({",".join(test_definition_ids)})
- ;
+ WITH selected as (
+ SELECT UNNEST(ARRAY [{", ".join([ f"'{td['id']}'" for td in test_definitions ])}]) AS id
+ )
+ UPDATE {schema}.test_definitions
+ SET
+ table_groups_id = '{target_table_group}'::UUID,
+ test_suite_id = '{target_test_suite}'::UUID
+ FROM {schema}.test_definitions td
+ INNER JOIN selected ON (td.id = selected.id::UUID)
+ WHERE td.id = test_definitions.id;
"""
- db.execute_sql(sql)
+ db.execute_sql_raw(sql)
st.cache_data.clear()
diff --git a/testgen/ui/views/data_catalog.py b/testgen/ui/views/data_catalog.py
index b5ad9a96..be9dab0b 100644
--- a/testgen/ui/views/data_catalog.py
+++ b/testgen/ui/views/data_catalog.py
@@ -327,18 +327,29 @@ def on_tags_changed(spinner_container: DeltaGenerator, payload: dict) -> FILE_DA
with spinner_container:
with st.spinner("Saving tags"):
if tables:
- db.execute_sql(f"""
+ db.execute_sql_raw(f"""
+ WITH selected as (
+ SELECT UNNEST(ARRAY [{", ".join([ f"'{item}'" for item in tables ])}]) AS table_id
+ )
UPDATE {schema}.data_table_chars
SET {', '.join(set_attributes)}
- WHERE table_id IN ({", ".join([ f"'{item}'" for item in tables ])});
+ FROM {schema}.data_table_chars dtc
+ INNER JOIN selected ON (dtc.table_id = selected.table_id::UUID)
+ WHERE dtc.table_id = data_table_chars.table_id;
""")
+
if columns:
- db.execute_sql(f"""
+ db.execute_sql_raw(f"""
+ WITH selected as (
+ SELECT UNNEST(ARRAY [{", ".join([ f"'{item}'" for item in columns ])}]) AS column_id
+ )
UPDATE {schema}.data_column_chars
SET {', '.join(set_attributes)}
- WHERE column_id IN ({", ".join([ f"'{item}'" for item in columns ])});
- """)
+ FROM {schema}.data_column_chars dcc
+ INNER JOIN selected ON (dcc.column_id = selected.column_id::UUID)
+ WHERE dcc.column_id = data_column_chars.column_id;
+ """)
for func in [ get_table_group_columns, get_table_by_id, get_column_by_id, get_tag_values ]:
func.clear()
From fdcde39283f593d97f381b1e39e14db385277c79 Mon Sep 17 00:00:00 2001
From: Aarthy Adityan
Date: Mon, 16 Jun 2025 10:35:07 -0400
Subject: [PATCH 31/33] feat(data-catalog): highlight columns with no values
---
.../components/frontend/js/components/sorting_selector.js | 4 ++--
testgen/ui/components/frontend/js/pages/data_catalog.js | 5 ++++-
testgen/ui/views/data_catalog.py | 6 ++++++
3 files changed, 12 insertions(+), 3 deletions(-)
diff --git a/testgen/ui/components/frontend/js/components/sorting_selector.js b/testgen/ui/components/frontend/js/components/sorting_selector.js
index 60b9afa6..824e118a 100644
--- a/testgen/ui/components/frontend/js/components/sorting_selector.js
+++ b/testgen/ui/components/frontend/js/components/sorting_selector.js
@@ -45,8 +45,8 @@ const SortingSelector = (/** @type {Properties} */ props) => {
);
const directionIcons = {
- ASC: `arrow_downward`,
- DESC: `arrow_upward`,
+ ASC: `arrow_upward`,
+ DESC: `arrow_downward`,
}
const activeColumnItem = (colId) => {
diff --git a/testgen/ui/components/frontend/js/pages/data_catalog.js b/testgen/ui/components/frontend/js/pages/data_catalog.js
index ca6b0108..1641ca5e 100644
--- a/testgen/ui/components/frontend/js/pages/data_catalog.js
+++ b/testgen/ui/components/frontend/js/pages/data_catalog.js
@@ -18,6 +18,7 @@
* @property {'A' | 'B' | 'D' | 'N' | 'T' | 'X'} general_type
* @property {string} functional_data_type
* @property {number} record_ct
+ * @property {number} value_ct
* @property {number} drop_date
* @property {number} table_drop_date
* @property {boolean} critical_data_element
@@ -103,7 +104,7 @@ const DataCatalog = (/** @type Properties */ props) => {
const tables = {};
columns.forEach((item) => {
- const { column_id, table_id, column_name, table_name, record_ct, drop_date, table_drop_date } = item;
+ const { column_id, table_id, column_name, table_name, record_ct, value_ct, drop_date, table_drop_date } = item;
if (!tables[table_id]) {
tables[table_id] = {
id: table_id,
@@ -122,6 +123,8 @@ const DataCatalog = (/** @type Properties */ props) => {
label: column_name,
classes: drop_date ? 'text-disabled' : '',
...getColumnIcon(item),
+ iconColor: value_ct === 0 ? 'red' : null,
+ iconTooltip: value_ct === 0 ? 'No non-null values detected' : null,
criticalDataElement: !!(item.critical_data_element ?? item.table_critical_data_element),
};
TAG_KEYS.forEach(key => columnNode[key] = item[key] ?? item[`table_${key}`]);
diff --git a/testgen/ui/views/data_catalog.py b/testgen/ui/views/data_catalog.py
index be9dab0b..8eff9142 100644
--- a/testgen/ui/views/data_catalog.py
+++ b/testgen/ui/views/data_catalog.py
@@ -377,6 +377,7 @@ def get_table_group_columns(table_group_id: str) -> pd.DataFrame:
column_chars.general_type,
column_chars.functional_data_type,
table_chars.record_ct,
+ profile_results.value_ct,
column_chars.drop_date,
table_chars.drop_date AS table_drop_date,
column_chars.critical_data_element,
@@ -387,6 +388,11 @@ def get_table_group_columns(table_group_id: str) -> pd.DataFrame:
LEFT JOIN {schema}.data_table_chars table_chars ON (
column_chars.table_id = table_chars.table_id
)
+ LEFT JOIN {schema}.profile_results ON (
+ column_chars.last_complete_profile_run_id = profile_results.profile_run_id
+ AND column_chars.table_name = profile_results.table_name
+ AND column_chars.column_name = profile_results.column_name
+ )
WHERE column_chars.table_groups_id = '{table_group_id}'
ORDER BY table_name, ordinal_position;
"""
From e3c6ca58f352d7d3bdedaf1827c5b01c041384a8 Mon Sep 17 00:00:00 2001
From: Aarthy Adityan
Date: Mon, 16 Jun 2025 12:11:28 -0400
Subject: [PATCH 32/33] fix: update null value representation in source data
displays
---
testgen/ui/views/dialogs/data_preview_dialog.py | 1 +
testgen/ui/views/hygiene_issues.py | 2 +-
testgen/ui/views/test_results.py | 2 +-
3 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/testgen/ui/views/dialogs/data_preview_dialog.py b/testgen/ui/views/dialogs/data_preview_dialog.py
index 20ccb4bb..6911c3d6 100644
--- a/testgen/ui/views/dialogs/data_preview_dialog.py
+++ b/testgen/ui/views/dialogs/data_preview_dialog.py
@@ -92,6 +92,7 @@ def get_preview_data(
return pd.DataFrame()
else:
df.index = df.index + 1
+ df.fillna("", inplace=True)
return df
else:
return pd.DataFrame()
diff --git a/testgen/ui/views/hygiene_issues.py b/testgen/ui/views/hygiene_issues.py
index f14d0f70..3cf0fe3d 100644
--- a/testgen/ui/views/hygiene_issues.py
+++ b/testgen/ui/views/hygiene_issues.py
@@ -541,7 +541,7 @@ def source_data_dialog(selected_row):
st.info(bad_data_msg)
# Pretify the dataframe
df_bad.columns = [col.replace("_", " ").title() for col in df_bad.columns]
- df_bad.fillna("[NULL]", inplace=True)
+ df_bad.fillna("", inplace=True)
if len(df_bad) == 500:
testgen.caption("* Top 500 records displayed", "text-align: right;")
# Display the dataframe
diff --git a/testgen/ui/views/test_results.py b/testgen/ui/views/test_results.py
index 1397f18b..39373a40 100644
--- a/testgen/ui/views/test_results.py
+++ b/testgen/ui/views/test_results.py
@@ -777,7 +777,7 @@ def source_data_dialog(selected_row):
st.info(bad_data_msg)
# Pretify the dataframe
df_bad.columns = [col.replace("_", " ").title() for col in df_bad.columns]
- df_bad.fillna("[NULL]", inplace=True)
+ df_bad.fillna("", inplace=True)
if len(df_bad) == 500:
testgen.caption("* Top 500 records displayed", "text-align: right;")
# Display the dataframe
From 70fd2f6049ad35979d1fabef6d64af061f554c7c Mon Sep 17 00:00:00 2001
From: Aarthy Adityan
Date: Tue, 17 Jun 2025 12:56:06 -0400
Subject: [PATCH 33/33] release: 4.0.12 -> 4.1.1
---
pyproject.toml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/pyproject.toml b/pyproject.toml
index 1f5b19c1..b0f36486 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -8,7 +8,7 @@ build-backend = "setuptools.build_meta"
[project]
name = "dataops-testgen"
-version = "4.0.12"
+version = "4.1.2"
description = "DataKitchen's Data Quality DataOps TestGen"
authors = [
{ "name" = "DataKitchen, Inc.", "email" = "info@datakitchen.io" },