diff --git a/README.rst b/README.rst index f292c87..506b67b 100644 --- a/README.rst +++ b/README.rst @@ -50,8 +50,8 @@ CLI Help output:: [--keychain-name OSX_KEYCHAIN_ITEM_NAME] [--keychain-account OSX_KEYCHAIN_ITEM_ACCOUNT] [--releases] [--latest-releases NUMBER_OF_LATEST_RELEASES] - [--skip-prerelease] [--assets] [--attachments] - [--exclude [REPOSITORY [REPOSITORY ...]] + [--skip-prerelease] [--assets] [--skip-assets-on [REPO ...]] + [--attachments] [--exclude [REPOSITORY [REPOSITORY ...]] [--throttle-limit THROTTLE_LIMIT] [--throttle-pause THROTTLE_PAUSE] USER @@ -133,6 +133,9 @@ CLI Help output:: --skip-prerelease skip prerelease and draft versions; only applies if including releases --assets include assets alongside release information; only applies if including releases + --skip-assets-on [REPO ...] + skip asset downloads for these repositories (e.g. + --skip-assets-on repo1 owner/repo2) --attachments download user-attachments from issues and pull requests to issues/attachments/{issue_number}/ and pulls/attachments/{pull_number}/ directories diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 0782514..b9c23a7 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -440,6 +440,12 @@ def parse_args(args=None): dest="include_assets", help="include assets alongside release information; only applies if including releases", ) + parser.add_argument( + "--skip-assets-on", + dest="skip_assets_on", + nargs="*", + help="skip asset downloads for these repositories", + ) parser.add_argument( "--attachments", action="store_true", @@ -561,7 +567,7 @@ def get_github_host(args): def read_file_contents(file_uri): - return open(file_uri[len(FILE_URI_PREFIX):], "rt").readline().strip() + return open(file_uri[len(FILE_URI_PREFIX) :], "rt").readline().strip() def get_github_repo_url(args, repository): @@ -631,7 +637,7 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): pass raise RepositoryUnavailableError( "Repository unavailable due to legal reasons (HTTP 451)", - dmca_url=dmca_url + dmca_url=dmca_url, ) # Check if we got correct data @@ -709,7 +715,7 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): # Parse Link header: ; rel="next" for link in link_header.split(","): if 'rel="next"' in link: - next_url = link[link.find("<") + 1:link.find(">")] + next_url = link[link.find("<") + 1 : link.find(">")] break if not next_url: break @@ -763,9 +769,7 @@ def _get_response(request, auth, template): return r, errors -def _construct_request( - per_page, query_args, template, auth, as_app=None, fine=False -): +def _construct_request(per_page, query_args, template, auth, as_app=None, fine=False): # If template is already a full URL with query params (from Link header), use it directly if "?" in template and template.startswith("http"): request_url = template @@ -1480,9 +1484,11 @@ def download_attachments( manifest = { "issue_number": number, "issue_type": item_type, - "repository": f"{args.user}/{args.repository}" - if hasattr(args, "repository") and args.repository - else args.user, + "repository": ( + f"{args.user}/{args.repository}" + if hasattr(args, "repository") and args.repository + else args.user + ), "manifest_updated_at": datetime.now(timezone.utc).isoformat(), "attachments": attachment_metadata_list, } @@ -1538,9 +1544,7 @@ def retrieve_repositories(args, authenticated_user): else: repo_path = "{0}/{1}".format(args.user, args.repository) single_request = True - template = "https://{0}/repos/{1}".format( - get_github_api_host(args), repo_path - ) + template = "https://{0}/repos/{1}".format(get_github_api_host(args), repo_path) repos = retrieve_data(args, template, single_request=single_request) @@ -1565,7 +1569,10 @@ def retrieve_repositories(args, authenticated_user): repos.extend(gists) if args.include_starred_gists: - if not authenticated_user.get("login") or args.user.lower() != authenticated_user["login"].lower(): + if ( + not authenticated_user.get("login") + or args.user.lower() != authenticated_user["login"].lower() + ): logger.warning( "Cannot retrieve starred gists for '%s'. GitHub only allows access to the authenticated user's starred gists.", args.user, @@ -1673,9 +1680,11 @@ def backup_repositories(args, output_directory, repositories): include_gists = args.include_gists or args.include_starred_gists include_starred = args.all_starred and repository.get("is_starred") - if (args.include_repository or args.include_everything) or ( - include_gists and repository.get("is_gist") - ) or include_starred: + if ( + (args.include_repository or args.include_everything) + or (include_gists and repository.get("is_gist")) + or include_starred + ): repo_name = ( repository.get("name") if not repository.get("is_gist") @@ -1735,7 +1744,9 @@ def backup_repositories(args, output_directory, repositories): include_assets=args.include_assets or args.include_everything, ) except RepositoryUnavailableError as e: - logger.warning(f"Repository {repository['full_name']} is unavailable (HTTP 451)") + logger.warning( + f"Repository {repository['full_name']} is unavailable (HTTP 451)" + ) if e.dmca_url: logger.warning(f"DMCA notice: {e.dmca_url}") logger.info(f"Skipping remaining resources for {repository['full_name']}") @@ -1795,7 +1806,11 @@ def backup_issues(args, repo_cwd, repository, repos_template): modified = os.path.getmtime(issue_file) modified = datetime.fromtimestamp(modified).strftime("%Y-%m-%dT%H:%M:%SZ") if modified > issue["updated_at"]: - logger.info("Skipping issue {0} because it wasn't modified since last backup".format(number)) + logger.info( + "Skipping issue {0} because it wasn't modified since last backup".format( + number + ) + ) continue if args.include_issue_comments or args.include_everything: @@ -1869,7 +1884,11 @@ def backup_pulls(args, repo_cwd, repository, repos_template): modified = os.path.getmtime(pull_file) modified = datetime.fromtimestamp(modified).strftime("%Y-%m-%dT%H:%M:%SZ") if modified > pull["updated_at"]: - logger.info("Skipping pull request {0} because it wasn't modified since last backup".format(number)) + logger.info( + "Skipping pull request {0} because it wasn't modified since last backup".format( + number + ) + ) continue if args.include_pull_comments or args.include_everything: template = comments_regular_template.format(number) @@ -1919,9 +1938,11 @@ def backup_milestones(args, repo_cwd, repository, repos_template): elif written_count == 0: logger.info("{0} milestones unchanged, skipped write".format(total)) else: - logger.info("Saved {0} of {1} milestones to disk ({2} unchanged)".format( - written_count, total, total - written_count - )) + logger.info( + "Saved {0} of {1} milestones to disk ({2} unchanged)".format( + written_count, total, total - written_count + ) + ) def backup_labels(args, repo_cwd, repository, repos_template): @@ -1975,6 +1996,20 @@ def backup_releases(args, repo_cwd, repository, repos_template, include_assets=F ) releases = releases[: args.number_of_latest_releases] + # Check if this repo should skip asset downloads (case-insensitive) + skip_assets = False + if include_assets: + repo_name = repository.get("name", "").lower() + repo_full_name = repository.get("full_name", "").lower() + skip_repos = [r.lower() for r in (args.skip_assets_on or [])] + skip_assets = repo_name in skip_repos or repo_full_name in skip_repos + if skip_assets: + logger.info( + "Skipping assets for {0} ({1} releases) due to --skip-assets-on".format( + repository.get("name"), len(releases) + ) + ) + # for each release, store it written_count = 0 for release in releases: @@ -1986,7 +2021,7 @@ def backup_releases(args, repo_cwd, repository, repos_template, include_assets=F if json_dump_if_changed(release, output_filepath): written_count += 1 - if include_assets: + if include_assets and not skip_assets: assets = retrieve_data(args, release["assets_url"]) if len(assets) > 0: # give release asset files somewhere to live & download them (not including source archives) @@ -2008,9 +2043,11 @@ def backup_releases(args, repo_cwd, repository, repos_template, include_assets=F elif written_count == 0: logger.info("{0} releases unchanged, skipped write".format(total)) else: - logger.info("Saved {0} of {1} releases to disk ({2} unchanged)".format( - written_count, total, total - written_count - )) + logger.info( + "Saved {0} of {1} releases to disk ({2} unchanged)".format( + written_count, total, total - written_count + ) + ) def fetch_repository( @@ -2024,9 +2061,12 @@ def fetch_repository( ): if bare_clone: if os.path.exists(local_dir): - clone_exists = subprocess.check_output( - ["git", "rev-parse", "--is-bare-repository"], cwd=local_dir - ) == b"true\n" + clone_exists = ( + subprocess.check_output( + ["git", "rev-parse", "--is-bare-repository"], cwd=local_dir + ) + == b"true\n" + ) else: clone_exists = False else: @@ -2047,7 +2087,9 @@ def fetch_repository( ) else: logger.info( - "Skipping {0} (repository not accessible - may be empty, private, or credentials invalid)".format(name) + "Skipping {0} (repository not accessible - may be empty, private, or credentials invalid)".format( + name + ) ) return diff --git a/tests/test_skip_assets_on.py b/tests/test_skip_assets_on.py new file mode 100644 index 0000000..2437e05 --- /dev/null +++ b/tests/test_skip_assets_on.py @@ -0,0 +1,320 @@ +"""Tests for --skip-assets-on flag behavior (issue #135).""" + +import pytest +from unittest.mock import Mock, patch + +from github_backup import github_backup + + +class TestSkipAssetsOn: + """Test suite for --skip-assets-on flag. + + Issue #135: Allow skipping asset downloads for specific repositories + while still backing up release metadata. + """ + + def _create_mock_args(self, **overrides): + """Create a mock args object with sensible defaults.""" + args = Mock() + args.user = "testuser" + args.output_directory = "/tmp/backup" + args.include_repository = False + args.include_everything = False + args.include_gists = False + args.include_starred_gists = False + args.all_starred = False + args.skip_existing = False + args.bare_clone = False + args.lfs_clone = False + args.no_prune = False + args.include_wiki = False + args.include_issues = False + args.include_issue_comments = False + args.include_issue_events = False + args.include_pulls = False + args.include_pull_comments = False + args.include_pull_commits = False + args.include_pull_details = False + args.include_labels = False + args.include_hooks = False + args.include_milestones = False + args.include_releases = True + args.include_assets = True + args.skip_assets_on = [] + args.include_attachments = False + args.incremental = False + args.incremental_by_files = False + args.github_host = None + args.prefer_ssh = False + args.token_classic = "test-token" + args.token_fine = None + args.username = None + args.password = None + args.as_app = False + args.osx_keychain_item_name = None + args.osx_keychain_item_account = None + args.skip_prerelease = False + args.number_of_latest_releases = None + + for key, value in overrides.items(): + setattr(args, key, value) + + return args + + def _create_mock_repository(self, name="test-repo", owner="testuser"): + """Create a mock repository object.""" + return { + "name": name, + "full_name": f"{owner}/{name}", + "owner": {"login": owner}, + "private": False, + "fork": False, + "has_wiki": False, + } + + def _create_mock_release(self, tag="v1.0.0"): + """Create a mock release object.""" + return { + "tag_name": tag, + "name": tag, + "prerelease": False, + "draft": False, + "assets_url": f"https://api.github.com/repos/testuser/test-repo/releases/{tag}/assets", + } + + def _create_mock_asset(self, name="asset.zip"): + """Create a mock asset object.""" + return { + "name": name, + "url": f"https://api.github.com/repos/testuser/test-repo/releases/assets/{name}", + } + + +class TestSkipAssetsOnArgumentParsing(TestSkipAssetsOn): + """Tests for --skip-assets-on argument parsing.""" + + def test_skip_assets_on_not_set_defaults_to_none(self): + """When --skip-assets-on is not specified, it should default to None.""" + args = github_backup.parse_args(["testuser"]) + assert args.skip_assets_on is None + + def test_skip_assets_on_single_repo(self): + """Single --skip-assets-on should create list with one item.""" + args = github_backup.parse_args(["testuser", "--skip-assets-on", "big-repo"]) + assert args.skip_assets_on == ["big-repo"] + + def test_skip_assets_on_multiple_repos(self): + """Multiple repos can be specified space-separated (like --exclude).""" + args = github_backup.parse_args( + [ + "testuser", + "--skip-assets-on", + "big-repo", + "another-repo", + "owner/third-repo", + ] + ) + assert args.skip_assets_on == ["big-repo", "another-repo", "owner/third-repo"] + + +class TestSkipAssetsOnBehavior(TestSkipAssetsOn): + """Tests for --skip-assets-on behavior in backup_releases.""" + + @patch("github_backup.github_backup.download_file") + @patch("github_backup.github_backup.retrieve_data") + @patch("github_backup.github_backup.mkdir_p") + @patch("github_backup.github_backup.json_dump_if_changed") + def test_assets_downloaded_when_not_skipped( + self, mock_json_dump, mock_mkdir, mock_retrieve, mock_download + ): + """Assets should be downloaded when repo is not in skip list.""" + args = self._create_mock_args(skip_assets_on=[]) + repository = self._create_mock_repository(name="normal-repo") + release = self._create_mock_release() + asset = self._create_mock_asset() + + mock_json_dump.return_value = True + mock_retrieve.side_effect = [ + [release], # First call: get releases + [asset], # Second call: get assets + ] + + with patch("os.path.join", side_effect=lambda *args: "/".join(args)): + github_backup.backup_releases( + args, + "/tmp/backup/repositories/normal-repo", + repository, + "https://api.github.com/repos/{owner}/{repo}", + include_assets=True, + ) + + # download_file should have been called for the asset + mock_download.assert_called_once() + + @patch("github_backup.github_backup.download_file") + @patch("github_backup.github_backup.retrieve_data") + @patch("github_backup.github_backup.mkdir_p") + @patch("github_backup.github_backup.json_dump_if_changed") + def test_assets_skipped_when_repo_name_matches( + self, mock_json_dump, mock_mkdir, mock_retrieve, mock_download + ): + """Assets should be skipped when repo name is in skip list.""" + args = self._create_mock_args(skip_assets_on=["big-repo"]) + repository = self._create_mock_repository(name="big-repo") + release = self._create_mock_release() + + mock_json_dump.return_value = True + mock_retrieve.return_value = [release] + + github_backup.backup_releases( + args, + "/tmp/backup/repositories/big-repo", + repository, + "https://api.github.com/repos/{owner}/{repo}", + include_assets=True, + ) + + # download_file should NOT have been called + mock_download.assert_not_called() + + @patch("github_backup.github_backup.download_file") + @patch("github_backup.github_backup.retrieve_data") + @patch("github_backup.github_backup.mkdir_p") + @patch("github_backup.github_backup.json_dump_if_changed") + def test_assets_skipped_when_full_name_matches( + self, mock_json_dump, mock_mkdir, mock_retrieve, mock_download + ): + """Assets should be skipped when owner/repo format matches.""" + args = self._create_mock_args(skip_assets_on=["otheruser/big-repo"]) + repository = self._create_mock_repository(name="big-repo", owner="otheruser") + release = self._create_mock_release() + + mock_json_dump.return_value = True + mock_retrieve.return_value = [release] + + github_backup.backup_releases( + args, + "/tmp/backup/repositories/big-repo", + repository, + "https://api.github.com/repos/{owner}/{repo}", + include_assets=True, + ) + + # download_file should NOT have been called + mock_download.assert_not_called() + + @patch("github_backup.github_backup.download_file") + @patch("github_backup.github_backup.retrieve_data") + @patch("github_backup.github_backup.mkdir_p") + @patch("github_backup.github_backup.json_dump_if_changed") + def test_case_insensitive_matching( + self, mock_json_dump, mock_mkdir, mock_retrieve, mock_download + ): + """Skip matching should be case-insensitive.""" + # User types uppercase, repo name is lowercase + args = self._create_mock_args(skip_assets_on=["BIG-REPO"]) + repository = self._create_mock_repository(name="big-repo") + release = self._create_mock_release() + + mock_json_dump.return_value = True + mock_retrieve.return_value = [release] + + github_backup.backup_releases( + args, + "/tmp/backup/repositories/big-repo", + repository, + "https://api.github.com/repos/{owner}/{repo}", + include_assets=True, + ) + + # download_file should NOT have been called (case-insensitive match) + assert not mock_download.called + + @patch("github_backup.github_backup.download_file") + @patch("github_backup.github_backup.retrieve_data") + @patch("github_backup.github_backup.mkdir_p") + @patch("github_backup.github_backup.json_dump_if_changed") + def test_multiple_skip_repos( + self, mock_json_dump, mock_mkdir, mock_retrieve, mock_download + ): + """Multiple repos in skip list should all be skipped.""" + args = self._create_mock_args(skip_assets_on=["repo1", "repo2", "repo3"]) + repository = self._create_mock_repository(name="repo2") + release = self._create_mock_release() + + mock_json_dump.return_value = True + mock_retrieve.return_value = [release] + + github_backup.backup_releases( + args, + "/tmp/backup/repositories/repo2", + repository, + "https://api.github.com/repos/{owner}/{repo}", + include_assets=True, + ) + + # download_file should NOT have been called + mock_download.assert_not_called() + + @patch("github_backup.github_backup.download_file") + @patch("github_backup.github_backup.retrieve_data") + @patch("github_backup.github_backup.mkdir_p") + @patch("github_backup.github_backup.json_dump_if_changed") + def test_release_metadata_still_saved_when_assets_skipped( + self, mock_json_dump, mock_mkdir, mock_retrieve, mock_download + ): + """Release JSON should still be saved even when assets are skipped.""" + args = self._create_mock_args(skip_assets_on=["big-repo"]) + repository = self._create_mock_repository(name="big-repo") + release = self._create_mock_release() + + mock_json_dump.return_value = True + mock_retrieve.return_value = [release] + + github_backup.backup_releases( + args, + "/tmp/backup/repositories/big-repo", + repository, + "https://api.github.com/repos/{owner}/{repo}", + include_assets=True, + ) + + # json_dump_if_changed should have been called for release metadata + mock_json_dump.assert_called_once() + # But download_file should NOT have been called + mock_download.assert_not_called() + + @patch("github_backup.github_backup.download_file") + @patch("github_backup.github_backup.retrieve_data") + @patch("github_backup.github_backup.mkdir_p") + @patch("github_backup.github_backup.json_dump_if_changed") + def test_non_matching_repo_still_downloads_assets( + self, mock_json_dump, mock_mkdir, mock_retrieve, mock_download + ): + """Repos not in skip list should still download assets.""" + args = self._create_mock_args(skip_assets_on=["other-repo"]) + repository = self._create_mock_repository(name="normal-repo") + release = self._create_mock_release() + asset = self._create_mock_asset() + + mock_json_dump.return_value = True + mock_retrieve.side_effect = [ + [release], # First call: get releases + [asset], # Second call: get assets + ] + + with patch("os.path.join", side_effect=lambda *args: "/".join(args)): + github_backup.backup_releases( + args, + "/tmp/backup/repositories/normal-repo", + repository, + "https://api.github.com/repos/{owner}/{repo}", + include_assets=True, + ) + + # download_file SHOULD have been called + mock_download.assert_called_once() + + +if __name__ == "__main__": + pytest.main([__file__, "-v"])