Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions google/cloud/bigtable/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -782,13 +782,13 @@ def sample_row_keys(self):
or by casting to a :class:`list` and can be cancelled by
calling ``cancel()``.
"""
data_client = self._instance._client.table_data_client
response_iterator = data_client.sample_row_keys(
request={"table_name": self.name, "app_profile_id": self._app_profile_id}
return (
data_messages_v2_pb2.SampleRowKeysResponse(
row_key=row_key, offset_bytes=offset_bytes
)
for row_key, offset_bytes in self._table_impl.sample_row_keys()
)
Comment on lines +785 to 790

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

This change alters the behavior of sample_row_keys from streaming results to buffering them in memory. The new implementation using self._table_impl.sample_row_keys() collects all row key samples into a list before this method yields them. The previous implementation returned a streaming iterator from the gRPC call.

This can lead to significantly increased memory consumption for tables with a large number of sample keys.

Additionally, the docstring for this method is now inaccurate. It states that the method returns a cancel-able iterator (GrpcRendezvous), but it now returns a generator that is not cancel-able and is backed by an in-memory list. The docstring should be updated to reflect this new behavior and warn about the potential for increased memory usage.


return response_iterator

def truncate(self, timeout=None):
"""Truncate the table

Expand Down
23 changes: 18 additions & 5 deletions tests/unit/v2_client/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -1202,18 +1202,31 @@ def test_table_yield_rows_with_row_set():


def test_table_sample_row_keys():
from google.cloud.bigtable_v2 import SampleRowKeysResponse

credentials = _make_credentials()
client = _make_client(project="project-id", credentials=credentials, admin=True)
data_api = _make_data_api(client)
instance = client.instance(instance_id=INSTANCE_ID)
table = _make_table(TABLE_ID, instance)
response_iterator = object()

gapic_api = _make_gapic_api(client)
gapic_api.sample_row_keys.return_value = [response_iterator]
data_api.get_table.return_value.sample_row_keys.return_value = [
(b"row-1", 1),
(b"row-2", 2),
(b"row-3", 3),
(b"", 5),
]

expected_sample_row_keys = [
SampleRowKeysResponse(row_key=b"row-1", offset_bytes=1),
SampleRowKeysResponse(row_key=b"row-2", offset_bytes=2),
SampleRowKeysResponse(row_key=b"row-3", offset_bytes=3),
SampleRowKeysResponse(row_key=b"", offset_bytes=5),
]

result = table.sample_row_keys()
result = list(table.sample_row_keys())

assert result[0] == response_iterator
assert result == expected_sample_row_keys


def test_table_truncate():
Expand Down
Loading