Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
eb2ae8a
Merge branch 'main' into 'enterprise'
aarthy-dk May 23, 2025
f0a97c0
fix(scoring): add missing label for data product category
luis-dk May 6, 2025
684f1c9
refactor(scoring): display loading message for long wait periods
luis-dk May 7, 2025
1157d07
misc(scoring): display info alert when no filters are applied
luis-dk May 7, 2025
60ede57
refactor(scoring): use scorecard category as default breakdown grouping
luis-dk May 8, 2025
7d10838
fix(scoring): replace grid with wrap-enabled columns in scorecard cat…
luis-dk May 8, 2025
24f0668
feat(scoring): enable per-column filtering for score cards
luis-dk May 14, 2025
7090038
Merge branch 'scoring-fndfs' into 'enterprise'
May 23, 2025
c9826f2
fix(data-catalog): update drop date only once
aarthy-dk May 9, 2025
2a5fe7f
fix: improve text and tooltip in data catalog and test suites
aarthy-dk May 9, 2025
6b6437f
feat(issue-report): add column tags to pdf reports
aarthy-dk May 9, 2025
b3ae976
refactor(data-catalog): move filter and empty states to vanjs
aarthy-dk May 16, 2025
0f206f3
refactor: download excel dialogs
aarthy-dk May 20, 2025
e044caf
feat(data-catalog): add export to excel
aarthy-dk May 20, 2025
813898b
feat(data-column): add column history dialog
aarthy-dk May 22, 2025
1c800d1
feat(data-catalog): add duplicates bar
aarthy-dk May 23, 2025
2213662
feat(data-catalog): add search options and make case insensitive
aarthy-dk May 23, 2025
2882b52
fix(data-catalog): remove description field from multi-edit mode
aarthy-dk May 23, 2025
b8602e2
fix(run-dialog): cli command error in run tests
aarthy-dk May 21, 2025
f0aba17
fix(users): password required when editing user
aarthy-dk May 21, 2025
6c38d41
Merge branch 'data-catalog-fixes' into 'enterprise'
May 27, 2025
6f8c16b
misc(security): upgrading tornado and xz
aarthy-dk May 27, 2025
fef4c56
Merge branch 'base-v6' into 'enterprise'
Jun 2, 2025
ebf4a78
fix(test-definitions): test results should not prevent manual deletio…
aarthy-dk May 30, 2025
f98d48d
feat(data-catalog): add related test suites card
aarthy-dk May 30, 2025
d623951
feat(data-catalog): display as-of date for table size
aarthy-dk May 30, 2025
0288325
feat(data-catalog): support removing dropped tables
aarthy-dk May 30, 2025
a721d86
feat(data-catalog): indicate empty tables on tree icon
aarthy-dk May 30, 2025
a1de489
feat(data-catalog): highlight zero record/value counts
aarthy-dk May 30, 2025
95c7ef4
feat(data-catalog): indicate null and other values in frequency bars
aarthy-dk Jun 2, 2025
467ef02
feat(data-preview): use select distinct
aarthy-dk Jun 5, 2025
f138065
Merge branch 'data-catalog' into 'enterprise'
Jun 6, 2025
00ba4f1
fix(export): round error when value is null
aarthy-dk Jun 16, 2025
1125db6
fix: use spinner to prevent grid intermittently refreshing
aarthy-dk Jun 16, 2025
8d14b65
fix: use unnest array for better update performance
aarthy-dk Jun 16, 2025
fdcde39
feat(data-catalog): highlight columns with no values
aarthy-dk Jun 16, 2025
e3c6ca5
fix: update null value representation in source data displays
aarthy-dk Jun 16, 2025
4c2867c
Merge branch 'qa-fixes' into 'enterprise'
Jun 16, 2025
70fd2f6
release: 4.0.12 -> 4.1.1
aarthy-dk Jun 17, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion deploy/testgen-base.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@ RUN apk update && apk upgrade && apk add --no-cache \
openblas=0.3.28-r0 \
openblas-dev=0.3.28-r0 \
unixodbc=2.3.12-r0 \
unixodbc-dev=2.3.12-r0
unixodbc-dev=2.3.12-r0 \
# Pinned versions for security
xz=5.6.2-r1

RUN apk add --no-cache \
--repository https://dl-cdn.alpinelinux.org/alpine/v3.21/community \
Expand Down
2 changes: 1 addition & 1 deletion deploy/testgen.dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
ARG TESTGEN_BASE_LABEL=v5
ARG TESTGEN_BASE_LABEL=v6

FROM datakitchen/dataops-testgen-base:${TESTGEN_BASE_LABEL} AS release-image

Expand Down
3 changes: 1 addition & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "dataops-testgen"
version = "4.0.12"
version = "4.1.2"
description = "DataKitchen's Data Quality DataOps TestGen"
authors = [
{ "name" = "DataKitchen, Inc.", "email" = "info@datakitchen.io" },
Expand Down Expand Up @@ -64,7 +64,6 @@ dependencies = [
"snowflake-connector-python==3.13.1",
"matplotlib==3.9.2",
"scipy==1.14.1",
"tornado==6.4.2",
"jinja2==3.1.6",
]

Expand Down
168 changes: 150 additions & 18 deletions testgen/common/models/scores.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import enum
import uuid
from collections import defaultdict
from collections.abc import Iterable
from datetime import UTC, datetime
from itertools import groupby
from typing import Literal, Self, TypedDict

import pandas as pd
Expand Down Expand Up @@ -69,15 +69,17 @@ class ScoreDefinition(Base):
cde_score: bool = Column(Boolean, default=False, nullable=False)
category: ScoreCategory | None = Column(Enum(ScoreCategory), nullable=True)

results: Iterable["ScoreDefinitionResult"] = relationship(
"ScoreDefinitionResult",
criteria: "ScoreDefinitionCriteria" = relationship(
"ScoreDefinitionCriteria",
cascade="all, delete-orphan",
order_by="ScoreDefinitionResult.category",
lazy="joined",
uselist=False,
single_parent=True,
)
filters: Iterable["ScoreDefinitionFilter"] = relationship(
"ScoreDefinitionFilter",
results: Iterable["ScoreDefinitionResult"] = relationship(
"ScoreDefinitionResult",
cascade="all, delete-orphan",
order_by="ScoreDefinitionResult.category",
lazy="joined",
)
breakdown: Iterable["ScoreDefinitionBreakdownItem"] = relationship(
Expand All @@ -102,9 +104,12 @@ def from_table_group(cls, table_group: dict) -> Self:
definition.total_score = True
definition.cde_score = True
definition.category = ScoreCategory.dq_dimension
definition.filters = [
ScoreDefinitionFilter(field="table_groups_name", value=table_group["table_groups_name"]),
]
definition.criteria = ScoreDefinitionCriteria(
operand="AND",
filters=[
ScoreDefinitionFilter(field="table_groups_name", value=table_group["table_groups_name"]),
],
)
return definition

@classmethod
Expand Down Expand Up @@ -159,7 +164,7 @@ def as_score_card(self) -> "ScoreCard":
score_cards/get_category_scores_by_column.sql
score_cards/get_category_scores_by_dimension.sql
"""
if len(self.filters) <= 0:
if not self.criteria.has_filters():
return {
"id": self.id,
"project_code": self.project_code,
Expand Down Expand Up @@ -378,15 +383,15 @@ def recalculate_scores_history(self) -> None:
self.history = list(current_history.values())

def _get_raw_query_filters(self, cde_only: bool = False, prefix: str | None = None) -> list[str]:
values_by_field = defaultdict(list)
for filter_ in self.filters:
values_by_field[filter_.field].append(f"'{filter_.value}'")
values_by_field["project_code"].append(f"'{self.project_code}'")
extra_filters = [
f"{prefix or ''}project_code = '{self.project_code}'"
]
if cde_only:
values_by_field["critical_data_element"].append("true")
extra_filters.append(f"{prefix or ''}critical_data_element = true")

return [
f"{prefix or ''}{field} IN ({', '.join(values)})" for field, values in values_by_field.items()
*extra_filters,
self.criteria.get_as_sql(prefix=prefix),
]

def to_dict(self) -> dict:
Expand All @@ -397,17 +402,144 @@ def to_dict(self) -> dict:
"total_score": self.total_score,
"cde_score": self.cde_score,
"category": self.category.value if self.category else None,
"filters": [{"field": f.field, "value": f.value} for f in self.filters],
"filters": list(self.criteria),
"filter_by_columns": (not self.criteria.group_by_field)
if self.criteria.group_by_field is not None else None,
}


class ScoreDefinitionCriteria(Base):
"""
Hold the filter conditions applied for a given scorecard.

Properties are as follow:

:param operand: boolean operand to join the final filters

Either `AND` or `OR`. The operand is used to join the filters
after they have been individually processed, grouped and
formatted into valid SQL expressions.

:param group_by_field: boolean to group filters by field name

Boolean indicating that filters to same field must be combined
to produce the intermediary filters that will later be joined
with :property:`operand`.

When false, filters are individually converted to valid SQL and
then joined with :property:`operand`.

When true, filters are sorted and grouped by field name, all
filters for a given field name are combined with an `OR` boolean
condition into a single filter. Then, the resulting filters
are joined with :property:`operand`.

:param filters: a list of :class:`ScoreDefinitionFilter` objects
"""

__tablename__ = "score_definition_criteria"

id: str = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
definition_id: str = Column(UUID(as_uuid=True), ForeignKey("score_definitions.id", ondelete="CASCADE"))
operand: Literal["AND", "OR"] = Column(String, nullable=False, default="AND")
group_by_field: bool = Column(Boolean, nullable=False, default=True)
filters: list["ScoreDefinitionFilter"] = relationship(
"ScoreDefinitionFilter",
cascade="all, delete-orphan",
lazy="joined",
)

def __str__(self):
return self.get_as_sql()

def get_as_sql(
self,
prefix: str | None = None,
) -> str | None:
if len(self.filters) > 0:
if self.group_by_field:
filters_sql = []
grouped_filters = groupby(sorted(self.filters, key=lambda f: f.field), key=lambda f: f.field)
for _, field_filters in grouped_filters:
field_filters_sql = [f.get_as_sql(prefix=prefix, operand="AND") for f in field_filters]
filters_sql.append(
f"({" OR ".join(field_filters_sql)})" if len(field_filters_sql) > 1 else field_filters_sql[0]
)
else:
filters_sql = [ f.get_as_sql(prefix=prefix, operand="AND") for f in self.filters ]
return f"({f' {self.operand} '.join(filters_sql)})" if len(filters_sql) > 1 else filters_sql[0]
return None

def __iter__(self):
for filter_ in self.filters:
yield {
"field": filter_.field,
"value": filter_.value,
"others": [
{"field": linked_filter.field, "value": linked_filter.value}
for linked_filter in filter_.next_filter
] if filter_.next_filter else [],
}

def has_filters(self) -> bool:
return len(self.filters) > 0

@classmethod
def from_filters(cls, filters: list[dict], group_by_field: bool = True) -> "ScoreDefinitionCriteria":
chained_filters: list[ScoreDefinitionFilter] = []
for filter_ in filters:
root_filter = current_filter = ScoreDefinitionFilter(
field=filter_["field"],
value=filter_["value"],
next_filter=None,
)
for linked_filter in (filter_.get("others") or []):
current_filter.next_filter = ScoreDefinitionFilter(
field=linked_filter["field"],
value=linked_filter["value"],
next_filter=None,
)
current_filter = current_filter.next_filter
chained_filters.append(root_filter)
return cls(operand="AND" if group_by_field else "OR", filters=chained_filters, group_by_field=group_by_field)


class ScoreDefinitionFilter(Base):
__tablename__ = "score_definition_filters"

id: str = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
definition_id: str = Column(UUID(as_uuid=True), ForeignKey("score_definitions.id", ondelete="CASCADE"))
criteria_id = Column(
UUID(as_uuid=True),
ForeignKey("score_definition_criteria.id", ondelete="CASCADE"),
nullable=True,
default=None,
)
field: str = Column(String, nullable=False)
value: str = Column(String, nullable=False)
next_filter_id = Column(
UUID(as_uuid=True),
ForeignKey("score_definition_filters.id", ondelete="CASCADE"),
nullable=True,
default=None,
)
next_filter: "ScoreDefinitionFilter" = relationship(
"ScoreDefinitionFilter",
cascade="all, delete-orphan",
lazy="joined",
uselist=False,
single_parent=True,
)

def __iter__(self):
current_filter = self
yield current_filter
while current_filter.next_filter:
yield current_filter.next_filter
current_filter = current_filter.next_filter

def get_as_sql(self, prefix: str | None = None, operand: Literal["AND", "OR"] = "AND") -> str:
sql_filters = [f"{prefix or ''}{f.field} = '{f.value}'" for f in self]
return f"({f' {operand} '.join(sql_filters)})"


class ScoreDefinitionResult(Base):
Expand Down
2 changes: 2 additions & 0 deletions testgen/template/data_chars/data_chars_update.sql
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ FROM last_run l
AND d.table_name = n.table_name
)
WHERE data_table_chars.table_id = d.table_id
AND d.drop_date IS NULL
AND n.table_name IS NULL;

-- ==============================================================================
Expand Down Expand Up @@ -221,4 +222,5 @@ FROM last_run l
)
WHERE data_column_chars.table_id = d.table_id
AND data_column_chars.column_name = d.column_name
AND d.drop_date IS NULL
AND n.column_name IS NULL;
23 changes: 16 additions & 7 deletions testgen/template/dbsetup/030_initialize_new_schema_structure.sql
Original file line number Diff line number Diff line change
Expand Up @@ -622,7 +622,6 @@ CREATE TABLE auth_users (
email VARCHAR(120),
name VARCHAR(120),
password VARCHAR(120),
preauthorized BOOLEAN default false,
role VARCHAR(20)
);

Expand Down Expand Up @@ -657,13 +656,23 @@ CREATE TABLE IF NOT EXISTS score_definitions (
category VARCHAR(30) DEFAULT NULL
);

CREATE TABLE IF NOT EXISTS score_definition_criteria (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
definition_id UUID NOT NULL REFERENCES score_definitions(id) ON DELETE CASCADE,
operand VARCHAR NOT NULL DEFAULT 'AND',
group_by_field BOOLEAN NOT NULL DEFAULT true
);

CREATE TABLE IF NOT EXISTS score_definition_filters (
id UUID DEFAULT gen_random_uuid() PRIMARY KEY,
definition_id UUID CONSTRAINT score_definitions_filters_score_definitions_definition_id_fk
REFERENCES score_definitions (id)
ON DELETE CASCADE,
field TEXT DEFAULT NULL,
value TEXT DEFAULT NULL
id UUID DEFAULT gen_random_uuid() PRIMARY KEY,
criteria_id UUID DEFAULT NULL CONSTRAINT score_definitions_filters_score_definition_criteria_fk
REFERENCES score_definition_criteria (id)
ON DELETE CASCADE,
next_filter_id UUID DEFAULT NULL CONSTRAINT score_definitions_filters_score_definitions_filters_fk
REFERENCES score_definition_filters (id)
ON DELETE CASCADE,
field TEXT DEFAULT NULL,
value TEXT DEFAULT NULL
);

CREATE TABLE IF NOT EXISTS score_definition_results (
Expand Down
1 change: 1 addition & 0 deletions testgen/template/dbsetup/075_grant_role_rights.sql
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ GRANT SELECT, INSERT, DELETE, UPDATE ON
{SCHEMA_NAME}.data_column_chars,
{SCHEMA_NAME}.auth_users,
{SCHEMA_NAME}.score_definitions,
{SCHEMA_NAME}.score_definition_criteria,
{SCHEMA_NAME}.score_definition_filters,
{SCHEMA_NAME}.score_definition_results,
{SCHEMA_NAME}.score_definition_results_breakdown,
Expand Down
2 changes: 1 addition & 1 deletion testgen/template/dbupgrade/0137_incremental_upgrade.sql
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ SET SEARCH_PATH TO {SCHEMA_NAME};

UPDATE job_schedules
SET kwargs = kwargs - 'project_code' || jsonb_build_object('project_key', kwargs->'project_code')
WHERE key = 'run-tests';
WHERE key = 'run-tests';
38 changes: 38 additions & 0 deletions testgen/template/dbupgrade/0138_incremental_upgrade.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
SET SEARCH_PATH TO {SCHEMA_NAME};

CREATE TABLE score_definition_criteria (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
definition_id UUID NOT NULL REFERENCES score_definitions(id) ON DELETE CASCADE,
operand VARCHAR NOT NULL DEFAULT 'AND',
group_by_field BOOLEAN NOT NULL DEFAULT true
);

ALTER TABLE score_definition_filters
ADD COLUMN criteria_id UUID DEFAULT NULL,
ADD COLUMN next_filter_id UUID DEFAULT NULL,
ADD CONSTRAINT score_definitions_filters_score_definition_criteria_fk FOREIGN KEY (criteria_id) REFERENCES score_definition_criteria (id) ON DELETE CASCADE,
ADD CONSTRAINT score_definitions_filters_score_definitions_filters_fk FOREIGN KEY (next_filter_id) REFERENCES score_definition_filters (id) ON DELETE CASCADE;

DO $$
DECLARE
current_definition_id UUID;
new_criteria_id UUID;
definition_filter RECORD;
BEGIN
FOR current_definition_id IN SELECT id FROM score_definitions LOOP
new_criteria_id := gen_random_uuid();
RAISE NOTICE 'Definition = %', current_definition_id;
RAISE NOTICE 'Create Score Criteria (AND)';
EXECUTE format(
'INSERT INTO score_definition_criteria (id, definition_id, operand, group_by_field) VALUES (%L, %L, %L, %L)',
new_criteria_id, current_definition_id, 'AND', true
);

FOR definition_filter IN SELECT id, field, value FROM score_definition_filters WHERE definition_id = current_definition_id LOOP
RAISE NOTICE 'Link filter to Score Criteria Field=% Value=%', definition_filter.field, definition_filter.value;
EXECUTE format('UPDATE score_definition_filters SET criteria_id = %L WHERE id = %L', new_criteria_id, definition_filter.id);
END LOOP;
END LOOP;
END $$;

ALTER TABLE score_definition_filters DROP COLUMN definition_id;
3 changes: 3 additions & 0 deletions testgen/template/dbupgrade/0139_incremental_upgrade.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
SET SEARCH_PATH TO {SCHEMA_NAME};

ALTER TABLE auth_users DROP COLUMN preauthorized;
8 changes: 8 additions & 0 deletions testgen/ui/assets/style.css
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,10 @@ div[data-testid="stDialog"] div[role="dialog"]:has(i.s-dialog) {
width: calc(35rem);
}

div[data-testid="stDialog"] div[role="dialog"]:has(i.l-dialog) {
width: calc(75rem);
}

div[data-testid="stDialog"] div[role="dialog"]:has(i.xl-dialog) {
width: calc(95rem);
}
Expand All @@ -112,6 +116,10 @@ div[data-testid="stSpinner"] > div > i {
border-color: var(--primary-color) rgba(49, 51, 63, 0.2) rgba(49, 51, 63, 0.2);
}

div.st-key-data_catalog-spinner {
position: absolute;
}

/* Theming for buttons, tabs and form inputs */
button[data-testid="stBaseButton-secondary"]:hover,
button[data-testid="stBaseButton-secondary"]:focus:not(:active),
Expand Down
Loading