Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 16 additions & 4 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ CLI Help output::
github-backup [-h] [-t TOKEN_CLASSIC] [-f TOKEN_FINE] [-q] [--as-app]
[-o OUTPUT_DIRECTORY] [-l LOG_LEVEL] [-i]
[--incremental-by-files]
[--starred] [--all-starred]
[--starred] [--all-starred] [--starred-skip-size-over MB]
[--watched] [--followers] [--following] [--all]
[--issues] [--issue-comments] [--issue-events] [--pulls]
[--pull-comments] [--pull-commits] [--pull-details]
Expand Down Expand Up @@ -84,6 +84,8 @@ CLI Help output::
incremental backup based on modification date of files
--starred include JSON output of starred repositories in backup
--all-starred include starred repositories in backup [*]
--starred-skip-size-over MB
skip starred repositories larger than this size in MB
--watched include JSON output of watched repositories in backup
--followers include JSON output of followers in backup
--following include JSON output of following users in backup
Expand Down Expand Up @@ -292,10 +294,20 @@ All is not everything

The ``--all`` argument does not include: cloning private repos (``-P, --private``), cloning forks (``-F, --fork``), cloning starred repositories (``--all-starred``), ``--pull-details``, cloning LFS repositories (``--lfs``), cloning gists (``--gists``) or cloning starred gist repos (``--starred-gists``). See examples for more.

Cloning all starred size
------------------------
Starred repository size
-----------------------

Using the ``--all-starred`` argument to clone all starred repositories may use a large amount of storage space.

To see your starred repositories sorted by size (requires `GitHub CLI <https://cli.github.com>`_)::

gh api user/starred --paginate --jq 'sort_by(-.size)[]|"\(.full_name) \(.size/1024|round)MB"'

To limit which starred repositories are cloned, use ``--starred-skip-size-over SIZE`` where SIZE is in MB. For example, ``--starred-skip-size-over 500`` will skip any starred repository where the git repository size (code and history) exceeds 500 MB. Note that this size limit only applies to the repository itself, not issues, release assets or other metadata. This filter only affects starred repositories; your own repositories are always included regardless of size.

For finer control, avoid using ``--assets`` with starred repos, or use ``--skip-assets-on`` for specific repositories with large release binaries.

Using the ``--all-starred`` argument to clone all starred repositories may use a large amount of storage space, especially if ``--all`` or more arguments are used. e.g. commonly starred repos can have tens of thousands of issues, many large assets and the repo itself etc. Consider just storing links to starred repos in JSON format with ``--starred``.
Alternatively, consider just storing links to starred repos in JSON format with ``--starred``.

Incremental Backup
------------------
Expand Down
26 changes: 26 additions & 0 deletions github_backup/github_backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,13 @@ def parse_args(args=None):
dest="all_starred",
help="include starred repositories in backup [*]",
)
parser.add_argument(
"--starred-skip-size-over",
type=int,
metavar="MB",
dest="starred_skip_size_over",
help="skip starred repositories larger than this size in MB",
)
parser.add_argument(
"--watched",
action="store_true",
Expand Down Expand Up @@ -1570,6 +1577,25 @@ def filter_repositories(args, unfiltered_repositories):
]
if args.skip_archived:
repositories = [r for r in repositories if not r.get("archived")]
if args.starred_skip_size_over is not None:
if args.starred_skip_size_over <= 0:
logger.warning(
"--starred-skip-size-over must be greater than 0, ignoring"
)
else:
size_limit_kb = args.starred_skip_size_over * 1024
filtered = []
for r in repositories:
if r.get("is_starred") and r.get("size", 0) > size_limit_kb:
size_mb = r.get("size", 0) / 1024
logger.info(
"Skipping starred repo {0} ({1:.0f} MB) due to --starred-skip-size-over {2}".format(
r.get("full_name", r.get("name")), size_mb, args.starred_skip_size_over
)
)
else:
filtered.append(r)
repositories = filtered
if args.exclude:
repositories = [
r for r in repositories if "name" not in r or r["name"] not in args.exclude
Expand Down
6 changes: 6 additions & 0 deletions tests/test_case_sensitivity.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ def test_filter_repositories_case_insensitive_user(self):
args.private = False
args.public = False
args.all = True
args.skip_archived = False
args.starred_skip_size_over = None

# Simulate GitHub API returning canonical case
repos = [
Expand Down Expand Up @@ -65,6 +67,8 @@ def test_filter_repositories_case_insensitive_org(self):
args.private = False
args.public = False
args.all = True
args.skip_archived = False
args.starred_skip_size_over = None

repos = [
{
Expand Down Expand Up @@ -93,6 +97,8 @@ def test_filter_repositories_case_variations(self):
args.private = False
args.public = False
args.all = True
args.skip_archived = False
args.starred_skip_size_over = None

repos = [
{"name": "repo1", "owner": {"login": "test-user"}, "private": False, "fork": False},
Expand Down
224 changes: 224 additions & 0 deletions tests/test_starred_skip_size_over.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,224 @@
"""Tests for --starred-skip-size-over flag behavior (issue #108)."""

import pytest
from unittest.mock import Mock

from github_backup import github_backup


class TestStarredSkipSizeOver:
"""Test suite for --starred-skip-size-over flag.

Issue #108: Allow restricting size of starred repositories before cloning.
The size is based on the GitHub API's 'size' field (in KB), but the CLI
argument accepts MB for user convenience.
"""

def _create_mock_args(self, **overrides):
"""Create a mock args object with sensible defaults."""
args = Mock()
args.user = "testuser"
args.repository = None
args.name_regex = None
args.languages = None
args.fork = False
args.private = False
args.skip_archived = False
args.starred_skip_size_over = None
args.exclude = None

for key, value in overrides.items():
setattr(args, key, value)

return args


class TestStarredSkipSizeOverArgumentParsing(TestStarredSkipSizeOver):
"""Tests for --starred-skip-size-over argument parsing."""

def test_starred_skip_size_over_not_set_defaults_to_none(self):
"""When --starred-skip-size-over is not specified, it should default to None."""
args = github_backup.parse_args(["testuser"])
assert args.starred_skip_size_over is None

def test_starred_skip_size_over_accepts_integer(self):
"""--starred-skip-size-over should accept an integer value."""
args = github_backup.parse_args(["testuser", "--starred-skip-size-over", "500"])
assert args.starred_skip_size_over == 500

def test_starred_skip_size_over_rejects_non_integer(self):
"""--starred-skip-size-over should reject non-integer values."""
with pytest.raises(SystemExit):
github_backup.parse_args(["testuser", "--starred-skip-size-over", "abc"])


class TestStarredSkipSizeOverFiltering(TestStarredSkipSizeOver):
"""Tests for --starred-skip-size-over filtering behavior."""

def test_starred_repo_under_limit_is_kept(self):
"""Starred repos under the size limit should be kept."""
args = self._create_mock_args(starred_skip_size_over=500)

repos = [
{
"name": "small-repo",
"owner": {"login": "otheruser"},
"size": 100 * 1024, # 100 MB in KB
"is_starred": True,
}
]

result = github_backup.filter_repositories(args, repos)
assert len(result) == 1
assert result[0]["name"] == "small-repo"

def test_starred_repo_over_limit_is_filtered(self):
"""Starred repos over the size limit should be filtered out."""
args = self._create_mock_args(starred_skip_size_over=500)

repos = [
{
"name": "huge-repo",
"owner": {"login": "otheruser"},
"size": 600 * 1024, # 600 MB in KB
"is_starred": True,
}
]

result = github_backup.filter_repositories(args, repos)
assert len(result) == 0

def test_own_repo_over_limit_is_kept(self):
"""User's own repos should not be affected by the size limit."""
args = self._create_mock_args(starred_skip_size_over=500)

repos = [
{
"name": "my-huge-repo",
"owner": {"login": "testuser"},
"size": 600 * 1024, # 600 MB in KB
# No is_starred flag - this is the user's own repo
}
]

result = github_backup.filter_repositories(args, repos)
assert len(result) == 1
assert result[0]["name"] == "my-huge-repo"

def test_starred_repo_at_exact_limit_is_kept(self):
"""Starred repos at exactly the size limit should be kept."""
args = self._create_mock_args(starred_skip_size_over=500)

repos = [
{
"name": "exact-limit-repo",
"owner": {"login": "otheruser"},
"size": 500 * 1024, # Exactly 500 MB in KB
"is_starred": True,
}
]

result = github_backup.filter_repositories(args, repos)
assert len(result) == 1
assert result[0]["name"] == "exact-limit-repo"

def test_mixed_repos_filtered_correctly(self):
"""Mix of own and starred repos should be filtered correctly."""
args = self._create_mock_args(starred_skip_size_over=500)

repos = [
{
"name": "my-huge-repo",
"owner": {"login": "testuser"},
"size": 1000 * 1024, # 1 GB - own repo, should be kept
},
{
"name": "starred-small",
"owner": {"login": "otheruser"},
"size": 100 * 1024, # 100 MB - under limit
"is_starred": True,
},
{
"name": "starred-huge",
"owner": {"login": "anotheruser"},
"size": 2000 * 1024, # 2 GB - over limit
"is_starred": True,
},
]

result = github_backup.filter_repositories(args, repos)
assert len(result) == 2
names = [r["name"] for r in result]
assert "my-huge-repo" in names
assert "starred-small" in names
assert "starred-huge" not in names

def test_no_size_limit_keeps_all_starred(self):
"""When no size limit is set, all starred repos should be kept."""
args = self._create_mock_args(starred_skip_size_over=None)

repos = [
{
"name": "huge-starred-repo",
"owner": {"login": "otheruser"},
"size": 10000 * 1024, # 10 GB
"is_starred": True,
}
]

result = github_backup.filter_repositories(args, repos)
assert len(result) == 1

def test_repo_without_size_field_is_kept(self):
"""Repos without a size field should be kept (size defaults to 0)."""
args = self._create_mock_args(starred_skip_size_over=500)

repos = [
{
"name": "no-size-repo",
"owner": {"login": "otheruser"},
"is_starred": True,
# No size field
}
]

result = github_backup.filter_repositories(args, repos)
assert len(result) == 1

def test_zero_value_warns_and_is_ignored(self, caplog):
"""Zero value should warn and keep all repos."""
args = self._create_mock_args(starred_skip_size_over=0)

repos = [
{
"name": "huge-starred-repo",
"owner": {"login": "otheruser"},
"size": 10000 * 1024, # 10 GB
"is_starred": True,
}
]

result = github_backup.filter_repositories(args, repos)
assert len(result) == 1
assert "must be greater than 0" in caplog.text

def test_negative_value_warns_and_is_ignored(self, caplog):
"""Negative value should warn and keep all repos."""
args = self._create_mock_args(starred_skip_size_over=-5)

repos = [
{
"name": "huge-starred-repo",
"owner": {"login": "otheruser"},
"size": 10000 * 1024, # 10 GB
"is_starred": True,
}
]

result = github_backup.filter_repositories(args, repos)
assert len(result) == 1
assert "must be greater than 0" in caplog.text


if __name__ == "__main__":
pytest.main([__file__, "-v"])