Merge pull request #468 from Iamrodos/feature/135-skip-assets-on

Add --skip-assets-on flag to skip release asset downloads (#135)
This commit is contained in:
Jose Diaz-Gonzalez
2025-12-12 06:05:47 -05:00
committed by GitHub
3 changed files with 397 additions and 32 deletions

View File

@@ -50,8 +50,8 @@ CLI Help output::
[--keychain-name OSX_KEYCHAIN_ITEM_NAME]
[--keychain-account OSX_KEYCHAIN_ITEM_ACCOUNT]
[--releases] [--latest-releases NUMBER_OF_LATEST_RELEASES]
[--skip-prerelease] [--assets] [--attachments]
[--exclude [REPOSITORY [REPOSITORY ...]]
[--skip-prerelease] [--assets] [--skip-assets-on [REPO ...]]
[--attachments] [--exclude [REPOSITORY [REPOSITORY ...]]
[--throttle-limit THROTTLE_LIMIT] [--throttle-pause THROTTLE_PAUSE]
USER
@@ -133,6 +133,9 @@ CLI Help output::
--skip-prerelease skip prerelease and draft versions; only applies if including releases
--assets include assets alongside release information; only
applies if including releases
--skip-assets-on [REPO ...]
skip asset downloads for these repositories (e.g.
--skip-assets-on repo1 owner/repo2)
--attachments download user-attachments from issues and pull requests
to issues/attachments/{issue_number}/ and
pulls/attachments/{pull_number}/ directories

View File

@@ -440,6 +440,12 @@ def parse_args(args=None):
dest="include_assets",
help="include assets alongside release information; only applies if including releases",
)
parser.add_argument(
"--skip-assets-on",
dest="skip_assets_on",
nargs="*",
help="skip asset downloads for these repositories",
)
parser.add_argument(
"--attachments",
action="store_true",
@@ -561,7 +567,7 @@ def get_github_host(args):
def read_file_contents(file_uri):
return open(file_uri[len(FILE_URI_PREFIX):], "rt").readline().strip()
return open(file_uri[len(FILE_URI_PREFIX) :], "rt").readline().strip()
def get_github_repo_url(args, repository):
@@ -631,7 +637,7 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False):
pass
raise RepositoryUnavailableError(
"Repository unavailable due to legal reasons (HTTP 451)",
dmca_url=dmca_url
dmca_url=dmca_url,
)
# Check if we got correct data
@@ -709,7 +715,7 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False):
# Parse Link header: <https://api.github.com/...?per_page=100&after=cursor>; rel="next"
for link in link_header.split(","):
if 'rel="next"' in link:
next_url = link[link.find("<") + 1:link.find(">")]
next_url = link[link.find("<") + 1 : link.find(">")]
break
if not next_url:
break
@@ -763,9 +769,7 @@ def _get_response(request, auth, template):
return r, errors
def _construct_request(
per_page, query_args, template, auth, as_app=None, fine=False
):
def _construct_request(per_page, query_args, template, auth, as_app=None, fine=False):
# If template is already a full URL with query params (from Link header), use it directly
if "?" in template and template.startswith("http"):
request_url = template
@@ -1480,9 +1484,11 @@ def download_attachments(
manifest = {
"issue_number": number,
"issue_type": item_type,
"repository": f"{args.user}/{args.repository}"
if hasattr(args, "repository") and args.repository
else args.user,
"repository": (
f"{args.user}/{args.repository}"
if hasattr(args, "repository") and args.repository
else args.user
),
"manifest_updated_at": datetime.now(timezone.utc).isoformat(),
"attachments": attachment_metadata_list,
}
@@ -1538,9 +1544,7 @@ def retrieve_repositories(args, authenticated_user):
else:
repo_path = "{0}/{1}".format(args.user, args.repository)
single_request = True
template = "https://{0}/repos/{1}".format(
get_github_api_host(args), repo_path
)
template = "https://{0}/repos/{1}".format(get_github_api_host(args), repo_path)
repos = retrieve_data(args, template, single_request=single_request)
@@ -1565,7 +1569,10 @@ def retrieve_repositories(args, authenticated_user):
repos.extend(gists)
if args.include_starred_gists:
if not authenticated_user.get("login") or args.user.lower() != authenticated_user["login"].lower():
if (
not authenticated_user.get("login")
or args.user.lower() != authenticated_user["login"].lower()
):
logger.warning(
"Cannot retrieve starred gists for '%s'. GitHub only allows access to the authenticated user's starred gists.",
args.user,
@@ -1673,9 +1680,11 @@ def backup_repositories(args, output_directory, repositories):
include_gists = args.include_gists or args.include_starred_gists
include_starred = args.all_starred and repository.get("is_starred")
if (args.include_repository or args.include_everything) or (
include_gists and repository.get("is_gist")
) or include_starred:
if (
(args.include_repository or args.include_everything)
or (include_gists and repository.get("is_gist"))
or include_starred
):
repo_name = (
repository.get("name")
if not repository.get("is_gist")
@@ -1735,7 +1744,9 @@ def backup_repositories(args, output_directory, repositories):
include_assets=args.include_assets or args.include_everything,
)
except RepositoryUnavailableError as e:
logger.warning(f"Repository {repository['full_name']} is unavailable (HTTP 451)")
logger.warning(
f"Repository {repository['full_name']} is unavailable (HTTP 451)"
)
if e.dmca_url:
logger.warning(f"DMCA notice: {e.dmca_url}")
logger.info(f"Skipping remaining resources for {repository['full_name']}")
@@ -1795,7 +1806,11 @@ def backup_issues(args, repo_cwd, repository, repos_template):
modified = os.path.getmtime(issue_file)
modified = datetime.fromtimestamp(modified).strftime("%Y-%m-%dT%H:%M:%SZ")
if modified > issue["updated_at"]:
logger.info("Skipping issue {0} because it wasn't modified since last backup".format(number))
logger.info(
"Skipping issue {0} because it wasn't modified since last backup".format(
number
)
)
continue
if args.include_issue_comments or args.include_everything:
@@ -1869,7 +1884,11 @@ def backup_pulls(args, repo_cwd, repository, repos_template):
modified = os.path.getmtime(pull_file)
modified = datetime.fromtimestamp(modified).strftime("%Y-%m-%dT%H:%M:%SZ")
if modified > pull["updated_at"]:
logger.info("Skipping pull request {0} because it wasn't modified since last backup".format(number))
logger.info(
"Skipping pull request {0} because it wasn't modified since last backup".format(
number
)
)
continue
if args.include_pull_comments or args.include_everything:
template = comments_regular_template.format(number)
@@ -1919,9 +1938,11 @@ def backup_milestones(args, repo_cwd, repository, repos_template):
elif written_count == 0:
logger.info("{0} milestones unchanged, skipped write".format(total))
else:
logger.info("Saved {0} of {1} milestones to disk ({2} unchanged)".format(
written_count, total, total - written_count
))
logger.info(
"Saved {0} of {1} milestones to disk ({2} unchanged)".format(
written_count, total, total - written_count
)
)
def backup_labels(args, repo_cwd, repository, repos_template):
@@ -1975,6 +1996,20 @@ def backup_releases(args, repo_cwd, repository, repos_template, include_assets=F
)
releases = releases[: args.number_of_latest_releases]
# Check if this repo should skip asset downloads (case-insensitive)
skip_assets = False
if include_assets:
repo_name = repository.get("name", "").lower()
repo_full_name = repository.get("full_name", "").lower()
skip_repos = [r.lower() for r in (args.skip_assets_on or [])]
skip_assets = repo_name in skip_repos or repo_full_name in skip_repos
if skip_assets:
logger.info(
"Skipping assets for {0} ({1} releases) due to --skip-assets-on".format(
repository.get("name"), len(releases)
)
)
# for each release, store it
written_count = 0
for release in releases:
@@ -1986,7 +2021,7 @@ def backup_releases(args, repo_cwd, repository, repos_template, include_assets=F
if json_dump_if_changed(release, output_filepath):
written_count += 1
if include_assets:
if include_assets and not skip_assets:
assets = retrieve_data(args, release["assets_url"])
if len(assets) > 0:
# give release asset files somewhere to live & download them (not including source archives)
@@ -2008,9 +2043,11 @@ def backup_releases(args, repo_cwd, repository, repos_template, include_assets=F
elif written_count == 0:
logger.info("{0} releases unchanged, skipped write".format(total))
else:
logger.info("Saved {0} of {1} releases to disk ({2} unchanged)".format(
written_count, total, total - written_count
))
logger.info(
"Saved {0} of {1} releases to disk ({2} unchanged)".format(
written_count, total, total - written_count
)
)
def fetch_repository(
@@ -2024,9 +2061,12 @@ def fetch_repository(
):
if bare_clone:
if os.path.exists(local_dir):
clone_exists = subprocess.check_output(
["git", "rev-parse", "--is-bare-repository"], cwd=local_dir
) == b"true\n"
clone_exists = (
subprocess.check_output(
["git", "rev-parse", "--is-bare-repository"], cwd=local_dir
)
== b"true\n"
)
else:
clone_exists = False
else:
@@ -2047,7 +2087,9 @@ def fetch_repository(
)
else:
logger.info(
"Skipping {0} (repository not accessible - may be empty, private, or credentials invalid)".format(name)
"Skipping {0} (repository not accessible - may be empty, private, or credentials invalid)".format(
name
)
)
return

View File

@@ -0,0 +1,320 @@
"""Tests for --skip-assets-on flag behavior (issue #135)."""
import pytest
from unittest.mock import Mock, patch
from github_backup import github_backup
class TestSkipAssetsOn:
"""Test suite for --skip-assets-on flag.
Issue #135: Allow skipping asset downloads for specific repositories
while still backing up release metadata.
"""
def _create_mock_args(self, **overrides):
"""Create a mock args object with sensible defaults."""
args = Mock()
args.user = "testuser"
args.output_directory = "/tmp/backup"
args.include_repository = False
args.include_everything = False
args.include_gists = False
args.include_starred_gists = False
args.all_starred = False
args.skip_existing = False
args.bare_clone = False
args.lfs_clone = False
args.no_prune = False
args.include_wiki = False
args.include_issues = False
args.include_issue_comments = False
args.include_issue_events = False
args.include_pulls = False
args.include_pull_comments = False
args.include_pull_commits = False
args.include_pull_details = False
args.include_labels = False
args.include_hooks = False
args.include_milestones = False
args.include_releases = True
args.include_assets = True
args.skip_assets_on = []
args.include_attachments = False
args.incremental = False
args.incremental_by_files = False
args.github_host = None
args.prefer_ssh = False
args.token_classic = "test-token"
args.token_fine = None
args.username = None
args.password = None
args.as_app = False
args.osx_keychain_item_name = None
args.osx_keychain_item_account = None
args.skip_prerelease = False
args.number_of_latest_releases = None
for key, value in overrides.items():
setattr(args, key, value)
return args
def _create_mock_repository(self, name="test-repo", owner="testuser"):
"""Create a mock repository object."""
return {
"name": name,
"full_name": f"{owner}/{name}",
"owner": {"login": owner},
"private": False,
"fork": False,
"has_wiki": False,
}
def _create_mock_release(self, tag="v1.0.0"):
"""Create a mock release object."""
return {
"tag_name": tag,
"name": tag,
"prerelease": False,
"draft": False,
"assets_url": f"https://api.github.com/repos/testuser/test-repo/releases/{tag}/assets",
}
def _create_mock_asset(self, name="asset.zip"):
"""Create a mock asset object."""
return {
"name": name,
"url": f"https://api.github.com/repos/testuser/test-repo/releases/assets/{name}",
}
class TestSkipAssetsOnArgumentParsing(TestSkipAssetsOn):
"""Tests for --skip-assets-on argument parsing."""
def test_skip_assets_on_not_set_defaults_to_none(self):
"""When --skip-assets-on is not specified, it should default to None."""
args = github_backup.parse_args(["testuser"])
assert args.skip_assets_on is None
def test_skip_assets_on_single_repo(self):
"""Single --skip-assets-on should create list with one item."""
args = github_backup.parse_args(["testuser", "--skip-assets-on", "big-repo"])
assert args.skip_assets_on == ["big-repo"]
def test_skip_assets_on_multiple_repos(self):
"""Multiple repos can be specified space-separated (like --exclude)."""
args = github_backup.parse_args(
[
"testuser",
"--skip-assets-on",
"big-repo",
"another-repo",
"owner/third-repo",
]
)
assert args.skip_assets_on == ["big-repo", "another-repo", "owner/third-repo"]
class TestSkipAssetsOnBehavior(TestSkipAssetsOn):
"""Tests for --skip-assets-on behavior in backup_releases."""
@patch("github_backup.github_backup.download_file")
@patch("github_backup.github_backup.retrieve_data")
@patch("github_backup.github_backup.mkdir_p")
@patch("github_backup.github_backup.json_dump_if_changed")
def test_assets_downloaded_when_not_skipped(
self, mock_json_dump, mock_mkdir, mock_retrieve, mock_download
):
"""Assets should be downloaded when repo is not in skip list."""
args = self._create_mock_args(skip_assets_on=[])
repository = self._create_mock_repository(name="normal-repo")
release = self._create_mock_release()
asset = self._create_mock_asset()
mock_json_dump.return_value = True
mock_retrieve.side_effect = [
[release], # First call: get releases
[asset], # Second call: get assets
]
with patch("os.path.join", side_effect=lambda *args: "/".join(args)):
github_backup.backup_releases(
args,
"/tmp/backup/repositories/normal-repo",
repository,
"https://api.github.com/repos/{owner}/{repo}",
include_assets=True,
)
# download_file should have been called for the asset
mock_download.assert_called_once()
@patch("github_backup.github_backup.download_file")
@patch("github_backup.github_backup.retrieve_data")
@patch("github_backup.github_backup.mkdir_p")
@patch("github_backup.github_backup.json_dump_if_changed")
def test_assets_skipped_when_repo_name_matches(
self, mock_json_dump, mock_mkdir, mock_retrieve, mock_download
):
"""Assets should be skipped when repo name is in skip list."""
args = self._create_mock_args(skip_assets_on=["big-repo"])
repository = self._create_mock_repository(name="big-repo")
release = self._create_mock_release()
mock_json_dump.return_value = True
mock_retrieve.return_value = [release]
github_backup.backup_releases(
args,
"/tmp/backup/repositories/big-repo",
repository,
"https://api.github.com/repos/{owner}/{repo}",
include_assets=True,
)
# download_file should NOT have been called
mock_download.assert_not_called()
@patch("github_backup.github_backup.download_file")
@patch("github_backup.github_backup.retrieve_data")
@patch("github_backup.github_backup.mkdir_p")
@patch("github_backup.github_backup.json_dump_if_changed")
def test_assets_skipped_when_full_name_matches(
self, mock_json_dump, mock_mkdir, mock_retrieve, mock_download
):
"""Assets should be skipped when owner/repo format matches."""
args = self._create_mock_args(skip_assets_on=["otheruser/big-repo"])
repository = self._create_mock_repository(name="big-repo", owner="otheruser")
release = self._create_mock_release()
mock_json_dump.return_value = True
mock_retrieve.return_value = [release]
github_backup.backup_releases(
args,
"/tmp/backup/repositories/big-repo",
repository,
"https://api.github.com/repos/{owner}/{repo}",
include_assets=True,
)
# download_file should NOT have been called
mock_download.assert_not_called()
@patch("github_backup.github_backup.download_file")
@patch("github_backup.github_backup.retrieve_data")
@patch("github_backup.github_backup.mkdir_p")
@patch("github_backup.github_backup.json_dump_if_changed")
def test_case_insensitive_matching(
self, mock_json_dump, mock_mkdir, mock_retrieve, mock_download
):
"""Skip matching should be case-insensitive."""
# User types uppercase, repo name is lowercase
args = self._create_mock_args(skip_assets_on=["BIG-REPO"])
repository = self._create_mock_repository(name="big-repo")
release = self._create_mock_release()
mock_json_dump.return_value = True
mock_retrieve.return_value = [release]
github_backup.backup_releases(
args,
"/tmp/backup/repositories/big-repo",
repository,
"https://api.github.com/repos/{owner}/{repo}",
include_assets=True,
)
# download_file should NOT have been called (case-insensitive match)
assert not mock_download.called
@patch("github_backup.github_backup.download_file")
@patch("github_backup.github_backup.retrieve_data")
@patch("github_backup.github_backup.mkdir_p")
@patch("github_backup.github_backup.json_dump_if_changed")
def test_multiple_skip_repos(
self, mock_json_dump, mock_mkdir, mock_retrieve, mock_download
):
"""Multiple repos in skip list should all be skipped."""
args = self._create_mock_args(skip_assets_on=["repo1", "repo2", "repo3"])
repository = self._create_mock_repository(name="repo2")
release = self._create_mock_release()
mock_json_dump.return_value = True
mock_retrieve.return_value = [release]
github_backup.backup_releases(
args,
"/tmp/backup/repositories/repo2",
repository,
"https://api.github.com/repos/{owner}/{repo}",
include_assets=True,
)
# download_file should NOT have been called
mock_download.assert_not_called()
@patch("github_backup.github_backup.download_file")
@patch("github_backup.github_backup.retrieve_data")
@patch("github_backup.github_backup.mkdir_p")
@patch("github_backup.github_backup.json_dump_if_changed")
def test_release_metadata_still_saved_when_assets_skipped(
self, mock_json_dump, mock_mkdir, mock_retrieve, mock_download
):
"""Release JSON should still be saved even when assets are skipped."""
args = self._create_mock_args(skip_assets_on=["big-repo"])
repository = self._create_mock_repository(name="big-repo")
release = self._create_mock_release()
mock_json_dump.return_value = True
mock_retrieve.return_value = [release]
github_backup.backup_releases(
args,
"/tmp/backup/repositories/big-repo",
repository,
"https://api.github.com/repos/{owner}/{repo}",
include_assets=True,
)
# json_dump_if_changed should have been called for release metadata
mock_json_dump.assert_called_once()
# But download_file should NOT have been called
mock_download.assert_not_called()
@patch("github_backup.github_backup.download_file")
@patch("github_backup.github_backup.retrieve_data")
@patch("github_backup.github_backup.mkdir_p")
@patch("github_backup.github_backup.json_dump_if_changed")
def test_non_matching_repo_still_downloads_assets(
self, mock_json_dump, mock_mkdir, mock_retrieve, mock_download
):
"""Repos not in skip list should still download assets."""
args = self._create_mock_args(skip_assets_on=["other-repo"])
repository = self._create_mock_repository(name="normal-repo")
release = self._create_mock_release()
asset = self._create_mock_asset()
mock_json_dump.return_value = True
mock_retrieve.side_effect = [
[release], # First call: get releases
[asset], # Second call: get assets
]
with patch("os.path.join", side_effect=lambda *args: "/".join(args)):
github_backup.backup_releases(
args,
"/tmp/backup/repositories/normal-repo",
repository,
"https://api.github.com/repos/{owner}/{repo}",
include_assets=True,
)
# download_file SHOULD have been called
mock_download.assert_called_once()
if __name__ == "__main__":
pytest.main([__file__, "-v"])