diff --git a/README.rst b/README.rst index e2c8fc2..c23027d 100644 --- a/README.rst +++ b/README.rst @@ -281,6 +281,8 @@ The tool automatically extracts file extensions from HTTP headers to ensure file **Repository filtering** for repo files/assets handles renamed and transferred repositories gracefully. URLs are included if they either match the current repository name directly, or redirect to it (e.g., ``willmcgugan/rich`` redirects to ``Textualize/rich`` after transfer). +**Fine-grained token limitation:** Due to a GitHub platform limitation, fine-grained personal access tokens (``github_pat_...``) cannot download attachments from private repositories directly. This affects both ``/assets/`` (images) and ``/files/`` (documents) URLs. The tool implements a workaround for image attachments using GitHub's Markdown API, which converts URLs to temporary JWT-signed URLs that can be downloaded. However, this workaround only works for images - document attachments (PDFs, text files, etc.) will fail with 404 errors when using fine-grained tokens on private repos. For full attachment support on private repositories, use a classic token (``-t``) instead of a fine-grained token (``-f``). See `#477 `_ for details. + Run in Docker container ----------------------- diff --git a/github_backup/cli.py b/github_backup/cli.py index 54849d4..987ae71 100644 --- a/github_backup/cli.py +++ b/github_backup/cli.py @@ -46,6 +46,16 @@ def main(): "Use -t/--token or -f/--token-fine to authenticate." ) + # Issue #477: Fine-grained PATs cannot download all attachment types from + # private repos. Image attachments will be retried via Markdown API workaround. + if args.include_attachments and args.token_fine: + logger.warning( + "Using --attachments with fine-grained token. Due to GitHub platform " + "limitations, file attachments (PDFs, etc.) from private repos may fail. " + "Image attachments will be retried via workaround. For full attachment " + "support, use --token-classic instead." + ) + if args.quiet: logger.setLevel(logging.WARNING) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 8a60f66..705f013 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -1062,6 +1062,65 @@ def download_attachment_file(url, path, auth, as_app=False, fine=False): return metadata +def get_jwt_signed_url_via_markdown_api(url, token, repo_context): + """Convert a user-attachments/assets URL to a JWT-signed URL via Markdown API. + + GitHub's Markdown API renders image URLs and returns HTML containing + JWT-signed private-user-images.githubusercontent.com URLs that work + without token authentication. + + This is a workaround for issue #477 where fine-grained PATs cannot + download user-attachments URLs from private repos directly. + + Limitations: + - Only works for /assets/ URLs (images) + - Does NOT work for /files/ URLs (PDFs, text files, etc.) + - JWT URLs expire after ~5 minutes + + Args: + url: The github.com/user-attachments/assets/UUID URL + token: Raw fine-grained PAT (github_pat_...) + repo_context: Repository context as "owner/repo" + + Returns: + str: JWT-signed URL from private-user-images.githubusercontent.com + None: If conversion fails + """ + + try: + payload = json.dumps( + {"text": f"![img]({url})", "mode": "gfm", "context": repo_context} + ).encode("utf-8") + + request = Request("https://api.github.com/markdown", data=payload, method="POST") + request.add_header("Authorization", f"token {token}") + request.add_header("Content-Type", "application/json") + request.add_header("Accept", "application/vnd.github+json") + + html = urlopen(request, timeout=30).read().decode("utf-8") + + # Parse JWT-signed URL from HTML response + # Format: + if match := re.search( + r'src="(https://private-user-images\.githubusercontent\.com/[^"]+)"', html + ): + jwt_url = match.group(1) + logger.debug("Converted attachment URL to JWT-signed URL via Markdown API") + return jwt_url + + logger.debug("Markdown API response did not contain JWT-signed URL") + return None + + except HTTPError as e: + logger.debug( + "Markdown API request failed with HTTP {0}: {1}".format(e.code, e.reason) + ) + return None + except Exception as e: + logger.debug("Markdown API request failed: {0}".format(str(e))) + return None + + def extract_attachment_urls(item_data, issue_number=None, repository_full_name=None): """Extract GitHub-hosted attachment URLs from issue/PR body and comments. @@ -1415,15 +1474,46 @@ def download_attachments( filename = get_attachment_filename(url) filepath = os.path.join(attachments_dir, filename) - # Download and get metadata - metadata = download_attachment_file( - url, - filepath, - get_auth(args, encode=not args.as_app), - as_app=args.as_app, - fine=args.token_fine is not None, + # Issue #477: Fine-grained PATs cannot download user-attachments/assets + # from private repos directly (404). Use Markdown API workaround to get + # a JWT-signed URL. Only works for /assets/ (images), not /files/. + needs_jwt = ( + args.token_fine is not None + and repository.get("private", False) + and "github.com/user-attachments/assets/" in url ) + if not needs_jwt: + # NORMAL download path + metadata = download_attachment_file( + url, + filepath, + get_auth(args, encode=not args.as_app), + as_app=args.as_app, + fine=args.token_fine is not None, + ) + elif jwt_url := get_jwt_signed_url_via_markdown_api( + url, args.token_fine, repository["full_name"] + ): + # JWT needed and extracted, download via JWT + metadata = download_attachment_file( + jwt_url, filepath, auth=None, as_app=False, fine=False + ) + metadata["url"] = url # Apply back the original URL + metadata["jwt_workaround"] = True + else: + # Markdown API workaround failed - skip download we know will fail + metadata = { + "url": url, + "success": False, + "skipped_at": datetime.now(timezone.utc).isoformat(), + "error": "Fine-grained token cannot download private repo attachments. " + "Markdown API workaround failed. Use --token-classic instead.", + } + logger.warning( + "Skipping attachment {0}: {1}".format(url, metadata["error"]) + ) + # If download succeeded but we got an extension from Content-Disposition, # we may need to rename the file to add the extension if metadata["success"] and metadata.get("original_filename"): @@ -1951,7 +2041,9 @@ def backup_security_advisories(args, repo_cwd, repository, repos_template): logger.info("Retrieving {0} security advisories".format(repository["full_name"])) mkdir_p(repo_cwd, advisory_cwd) - template = "{0}/{1}/security-advisories".format(repos_template, repository["full_name"]) + template = "{0}/{1}/security-advisories".format( + repos_template, repository["full_name"] + ) _advisories = retrieve_data(args, template) diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..b36fe64 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,25 @@ +"""Shared pytest fixtures for github-backup tests.""" + +import pytest + +from github_backup.github_backup import parse_args + + +@pytest.fixture +def create_args(): + """Factory fixture that creates args with real CLI defaults. + + Uses the actual argument parser so new CLI args are automatically + available with their defaults - no test updates needed. + + Usage: + def test_something(self, create_args): + args = create_args(include_releases=True, user="myuser") + """ + def _create(**overrides): + # Use real parser to get actual defaults + args = parse_args(["testuser"]) + for key, value in overrides.items(): + setattr(args, key, value) + return args + return _create diff --git a/tests/test_all_starred.py b/tests/test_all_starred.py index 297d148..9776926 100644 --- a/tests/test_all_starred.py +++ b/tests/test_all_starred.py @@ -1,7 +1,7 @@ """Tests for --all-starred flag behavior (issue #225).""" import pytest -from unittest.mock import Mock, patch +from unittest.mock import patch from github_backup import github_backup @@ -12,58 +12,14 @@ class TestAllStarredCloning: Issue #225: --all-starred should clone starred repos without requiring --repositories. """ - def _create_mock_args(self, **overrides): - """Create a mock args object with sensible defaults.""" - args = Mock() - args.user = "testuser" - args.output_directory = "/tmp/backup" - args.include_repository = False - args.include_everything = False - args.include_gists = False - args.include_starred_gists = False - args.all_starred = False - args.skip_existing = False - args.bare_clone = False - args.lfs_clone = False - args.no_prune = False - args.include_wiki = False - args.include_issues = False - args.include_issue_comments = False - args.include_issue_events = False - args.include_pulls = False - args.include_pull_comments = False - args.include_pull_commits = False - args.include_pull_details = False - args.include_labels = False - args.include_hooks = False - args.include_milestones = False - args.include_security_advisories = False - args.include_releases = False - args.include_assets = False - args.include_attachments = False - args.incremental = False - args.incremental_by_files = False - args.github_host = None - args.prefer_ssh = False - args.token_classic = None - args.token_fine = None - args.as_app = False - args.osx_keychain_item_name = None - args.osx_keychain_item_account = None - - for key, value in overrides.items(): - setattr(args, key, value) - - return args - @patch('github_backup.github_backup.fetch_repository') @patch('github_backup.github_backup.get_github_repo_url') - def test_all_starred_clones_without_repositories_flag(self, mock_get_url, mock_fetch): + def test_all_starred_clones_without_repositories_flag(self, mock_get_url, mock_fetch, create_args): """--all-starred should clone starred repos without --repositories flag. This is the core fix for issue #225. """ - args = self._create_mock_args(all_starred=True) + args = create_args(all_starred=True) mock_get_url.return_value = "https://github.com/otheruser/awesome-project.git" # A starred repository (is_starred flag set by retrieve_repositories) @@ -88,9 +44,9 @@ class TestAllStarredCloning: @patch('github_backup.github_backup.fetch_repository') @patch('github_backup.github_backup.get_github_repo_url') - def test_starred_repo_not_cloned_without_all_starred_flag(self, mock_get_url, mock_fetch): + def test_starred_repo_not_cloned_without_all_starred_flag(self, mock_get_url, mock_fetch, create_args): """Starred repos should NOT be cloned if --all-starred is not set.""" - args = self._create_mock_args(all_starred=False) + args = create_args(all_starred=False) mock_get_url.return_value = "https://github.com/otheruser/awesome-project.git" starred_repo = { @@ -111,9 +67,9 @@ class TestAllStarredCloning: @patch('github_backup.github_backup.fetch_repository') @patch('github_backup.github_backup.get_github_repo_url') - def test_non_starred_repo_not_cloned_with_only_all_starred(self, mock_get_url, mock_fetch): + def test_non_starred_repo_not_cloned_with_only_all_starred(self, mock_get_url, mock_fetch, create_args): """Non-starred repos should NOT be cloned when only --all-starred is set.""" - args = self._create_mock_args(all_starred=True) + args = create_args(all_starred=True) mock_get_url.return_value = "https://github.com/testuser/my-project.git" # A regular (non-starred) repository @@ -135,9 +91,9 @@ class TestAllStarredCloning: @patch('github_backup.github_backup.fetch_repository') @patch('github_backup.github_backup.get_github_repo_url') - def test_repositories_flag_still_works(self, mock_get_url, mock_fetch): + def test_repositories_flag_still_works(self, mock_get_url, mock_fetch, create_args): """--repositories flag should still clone repos as before.""" - args = self._create_mock_args(include_repository=True) + args = create_args(include_repository=True) mock_get_url.return_value = "https://github.com/testuser/my-project.git" regular_repo = { diff --git a/tests/test_attachments.py b/tests/test_attachments.py index b338caf..241a08f 100644 --- a/tests/test_attachments.py +++ b/tests/test_attachments.py @@ -4,7 +4,7 @@ import json import os import tempfile from pathlib import Path -from unittest.mock import Mock +from unittest.mock import Mock, patch import pytest @@ -12,22 +12,13 @@ from github_backup import github_backup @pytest.fixture -def attachment_test_setup(tmp_path): +def attachment_test_setup(tmp_path, create_args): """Fixture providing setup and helper for attachment download tests.""" - from unittest.mock import patch - issue_cwd = tmp_path / "issues" issue_cwd.mkdir() - # Mock args - args = Mock() - args.as_app = False - args.token_fine = None - args.token_classic = None - args.osx_keychain_item_name = None - args.osx_keychain_item_account = None - args.user = "testuser" - args.repository = "testrepo" + # Create args using shared fixture + args = create_args(user="testuser", repository="testrepo") repository = {"full_name": "testuser/testrepo"} @@ -349,3 +340,146 @@ class TestManifestDuplicatePrevention: downloaded_urls[0] == "https://github.com/user-attachments/assets/unavailable" ) + + +class TestJWTWorkaround: + """Test JWT workaround for fine-grained tokens on private repos (issue #477).""" + + def test_markdown_api_extracts_jwt_url(self): + """Markdown API response with JWT URL is extracted correctly.""" + html_response = ( + '

' + ) + + mock_response = Mock() + mock_response.read.return_value = html_response.encode("utf-8") + + with patch("github_backup.github_backup.urlopen", return_value=mock_response): + result = github_backup.get_jwt_signed_url_via_markdown_api( + "https://github.com/user-attachments/assets/abc123", + "github_pat_token", + "owner/repo" + ) + + expected = ( + "https://private-user-images.githubusercontent.com" + "/123/abc.png?jwt=eyJhbGciOiJ" + ) + assert result == expected + + def test_markdown_api_returns_none_on_http_error(self): + """HTTP errors return None.""" + from urllib.error import HTTPError + + error = HTTPError("http://test", 403, "Forbidden", {}, None) + with patch("github_backup.github_backup.urlopen", side_effect=error): + result = github_backup.get_jwt_signed_url_via_markdown_api( + "https://github.com/user-attachments/assets/abc123", + "github_pat_token", + "owner/repo" + ) + + assert result is None + + def test_markdown_api_returns_none_when_no_jwt_url(self): + """Response without JWT URL returns None.""" + mock_response = Mock() + mock_response.read.return_value = b"

No image here

" + + with patch("github_backup.github_backup.urlopen", return_value=mock_response): + result = github_backup.get_jwt_signed_url_via_markdown_api( + "https://github.com/user-attachments/assets/abc123", + "github_pat_token", + "owner/repo" + ) + + assert result is None + + def test_needs_jwt_only_for_fine_grained_private_assets(self): + """needs_jwt is True only for fine-grained + private + /assets/ URL.""" + assets_url = "https://github.com/user-attachments/assets/abc123" + files_url = "https://github.com/user-attachments/files/123/doc.pdf" + token_fine = "github_pat_test" + private = True + public = False + + # Fine-grained + private + assets = True + needs_jwt = ( + token_fine is not None + and private + and "github.com/user-attachments/assets/" in assets_url + ) + assert needs_jwt is True + + # Fine-grained + private + files = False + needs_jwt = ( + token_fine is not None + and private + and "github.com/user-attachments/assets/" in files_url + ) + assert needs_jwt is False + + # Fine-grained + public + assets = False + needs_jwt = ( + token_fine is not None + and public + and "github.com/user-attachments/assets/" in assets_url + ) + assert needs_jwt is False + + def test_jwt_workaround_sets_manifest_flag(self, attachment_test_setup): + """Successful JWT workaround sets jwt_workaround flag in manifest.""" + setup = attachment_test_setup + setup["args"].token_fine = "github_pat_test" + setup["repository"]["private"] = True + + issue_data = {"body": "https://github.com/user-attachments/assets/abc123"} + + jwt_url = "https://private-user-images.githubusercontent.com/123/abc.png?jwt=token" + + with patch( + "github_backup.github_backup.get_jwt_signed_url_via_markdown_api", + return_value=jwt_url + ), patch( + "github_backup.github_backup.download_attachment_file", + return_value={"success": True, "http_status": 200, "url": jwt_url} + ): + github_backup.download_attachments( + setup["args"], setup["issue_cwd"], issue_data, 123, setup["repository"] + ) + + manifest_path = os.path.join(setup["issue_cwd"], "attachments", "123", "manifest.json") + with open(manifest_path) as f: + manifest = json.load(f) + + assert manifest["attachments"][0]["jwt_workaround"] is True + assert manifest["attachments"][0]["url"] == "https://github.com/user-attachments/assets/abc123" + + def test_jwt_workaround_failure_uses_skipped_at(self, attachment_test_setup): + """Failed JWT workaround uses skipped_at instead of downloaded_at.""" + setup = attachment_test_setup + setup["args"].token_fine = "github_pat_test" + setup["repository"]["private"] = True + + issue_data = {"body": "https://github.com/user-attachments/assets/abc123"} + + with patch( + "github_backup.github_backup.get_jwt_signed_url_via_markdown_api", + return_value=None # Markdown API failed + ): + github_backup.download_attachments( + setup["args"], setup["issue_cwd"], issue_data, 123, setup["repository"] + ) + + manifest_path = os.path.join(setup["issue_cwd"], "attachments", "123", "manifest.json") + with open(manifest_path) as f: + manifest = json.load(f) + + attachment = manifest["attachments"][0] + assert attachment["success"] is False + assert "skipped_at" in attachment + assert "downloaded_at" not in attachment + assert "Use --token-classic" in attachment["error"] diff --git a/tests/test_case_sensitivity.py b/tests/test_case_sensitivity.py index 058a7df..795c14b 100644 --- a/tests/test_case_sensitivity.py +++ b/tests/test_case_sensitivity.py @@ -1,7 +1,6 @@ """Tests for case-insensitive username/organization filtering.""" import pytest -from unittest.mock import Mock from github_backup import github_backup @@ -9,25 +8,14 @@ from github_backup import github_backup class TestCaseSensitivity: """Test suite for case-insensitive username matching in filter_repositories.""" - def test_filter_repositories_case_insensitive_user(self): + def test_filter_repositories_case_insensitive_user(self, create_args): """Should filter repositories case-insensitively for usernames. Reproduces issue #198 where typing 'iamrodos' fails to match repositories with owner.login='Iamrodos' (the canonical case from GitHub API). """ # Simulate user typing lowercase username - args = Mock() - args.user = "iamrodos" # lowercase (what user typed) - args.repository = None - args.name_regex = None - args.languages = None - args.exclude = None - args.fork = False - args.private = False - args.public = False - args.all = True - args.skip_archived = False - args.starred_skip_size_over = None + args = create_args(user="iamrodos") # Simulate GitHub API returning canonical case repos = [ @@ -52,23 +40,12 @@ class TestCaseSensitivity: assert filtered[0]["name"] == "repo1" assert filtered[1]["name"] == "repo2" - def test_filter_repositories_case_insensitive_org(self): + def test_filter_repositories_case_insensitive_org(self, create_args): """Should filter repositories case-insensitively for organizations. Tests the example from issue #198 where 'prai-org' doesn't match 'PRAI-Org'. """ - args = Mock() - args.user = "prai-org" # lowercase (what user typed) - args.repository = None - args.name_regex = None - args.languages = None - args.exclude = None - args.fork = False - args.private = False - args.public = False - args.all = True - args.skip_archived = False - args.starred_skip_size_over = None + args = create_args(user="prai-org") repos = [ { @@ -85,20 +62,9 @@ class TestCaseSensitivity: assert len(filtered) == 1 assert filtered[0]["name"] == "repo1" - def test_filter_repositories_case_variations(self): + def test_filter_repositories_case_variations(self, create_args): """Should handle various case combinations correctly.""" - args = Mock() - args.user = "TeSt-UsEr" # Mixed case - args.repository = None - args.name_regex = None - args.languages = None - args.exclude = None - args.fork = False - args.private = False - args.public = False - args.all = True - args.skip_archived = False - args.starred_skip_size_over = None + args = create_args(user="TeSt-UsEr") repos = [ {"name": "repo1", "owner": {"login": "test-user"}, "private": False, "fork": False}, diff --git a/tests/test_http_451.py b/tests/test_http_451.py index bb825f7..b556069 100644 --- a/tests/test_http_451.py +++ b/tests/test_http_451.py @@ -11,17 +11,9 @@ from github_backup import github_backup class TestHTTP451Exception: """Test suite for HTTP 451 DMCA takedown exception handling.""" - def test_repository_unavailable_error_raised(self): + def test_repository_unavailable_error_raised(self, create_args): """HTTP 451 should raise RepositoryUnavailableError with DMCA URL.""" - args = Mock() - args.as_app = False - args.token_fine = None - args.token_classic = None - args.osx_keychain_item_name = None - args.osx_keychain_item_account = None - args.throttle_limit = None - args.throttle_pause = 0 - args.max_retries = 5 + args = create_args() mock_response = Mock() mock_response.getcode.return_value = 451 @@ -53,17 +45,9 @@ class TestHTTP451Exception: ) assert "451" in str(exc_info.value) - def test_repository_unavailable_error_without_dmca_url(self): + def test_repository_unavailable_error_without_dmca_url(self, create_args): """HTTP 451 without DMCA details should still raise exception.""" - args = Mock() - args.as_app = False - args.token_fine = None - args.token_classic = None - args.osx_keychain_item_name = None - args.osx_keychain_item_account = None - args.throttle_limit = None - args.throttle_pause = 0 - args.max_retries = 5 + args = create_args() mock_response = Mock() mock_response.getcode.return_value = 451 @@ -83,17 +67,9 @@ class TestHTTP451Exception: assert exc_info.value.dmca_url is None assert "451" in str(exc_info.value) - def test_repository_unavailable_error_with_malformed_json(self): + def test_repository_unavailable_error_with_malformed_json(self, create_args): """HTTP 451 with malformed JSON should still raise exception.""" - args = Mock() - args.as_app = False - args.token_fine = None - args.token_classic = None - args.osx_keychain_item_name = None - args.osx_keychain_item_account = None - args.throttle_limit = None - args.throttle_pause = 0 - args.max_retries = 5 + args = create_args() mock_response = Mock() mock_response.getcode.return_value = 451 diff --git a/tests/test_pagination.py b/tests/test_pagination.py index e35ff38..1931042 100644 --- a/tests/test_pagination.py +++ b/tests/test_pagination.py @@ -1,9 +1,7 @@ """Tests for Link header pagination handling.""" import json -from unittest.mock import Mock, patch - -import pytest +from unittest.mock import patch from github_backup import github_backup @@ -38,23 +36,9 @@ class MockHTTPResponse: return headers -@pytest.fixture -def mock_args(): - """Mock args for retrieve_data.""" - args = Mock() - args.as_app = False - args.token_fine = None - args.token_classic = "fake_token" - args.osx_keychain_item_name = None - args.osx_keychain_item_account = None - args.throttle_limit = None - args.throttle_pause = 0 - args.max_retries = 5 - return args - - -def test_cursor_based_pagination(mock_args): +def test_cursor_based_pagination(create_args): """Link header with 'after' cursor parameter works correctly.""" + args = create_args(token_classic="fake_token") # Simulate issues endpoint behavior: returns cursor in Link header responses = [ @@ -77,7 +61,7 @@ def test_cursor_based_pagination(mock_args): with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen): results = github_backup.retrieve_data( - mock_args, "https://api.github.com/repos/owner/repo/issues" + args, "https://api.github.com/repos/owner/repo/issues" ) # Verify all items retrieved and cursor was used in second request @@ -86,8 +70,9 @@ def test_cursor_based_pagination(mock_args): assert "after=ABC123" in requests_made[1] -def test_page_based_pagination(mock_args): +def test_page_based_pagination(create_args): """Link header with 'page' parameter works correctly.""" + args = create_args(token_classic="fake_token") # Simulate pulls/repos endpoint behavior: returns page numbers in Link header responses = [ @@ -110,7 +95,7 @@ def test_page_based_pagination(mock_args): with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen): results = github_backup.retrieve_data( - mock_args, "https://api.github.com/repos/owner/repo/pulls" + args, "https://api.github.com/repos/owner/repo/pulls" ) # Verify all items retrieved and page parameter was used (not cursor) @@ -120,8 +105,9 @@ def test_page_based_pagination(mock_args): assert "after" not in requests_made[1] -def test_no_link_header_stops_pagination(mock_args): +def test_no_link_header_stops_pagination(create_args): """Pagination stops when Link header is absent.""" + args = create_args(token_classic="fake_token") # Simulate endpoint with results that fit in a single page responses = [ @@ -138,7 +124,7 @@ def test_no_link_header_stops_pagination(mock_args): with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen): results = github_backup.retrieve_data( - mock_args, "https://api.github.com/repos/owner/repo/labels" + args, "https://api.github.com/repos/owner/repo/labels" ) # Verify pagination stopped after first request diff --git a/tests/test_retrieve_data.py b/tests/test_retrieve_data.py index fa82bd7..159f06e 100644 --- a/tests/test_retrieve_data.py +++ b/tests/test_retrieve_data.py @@ -63,21 +63,9 @@ class TestCalculateRetryDelay: class TestRetrieveDataRetry: """Tests for retry behavior in retrieve_data.""" - @pytest.fixture - def mock_args(self): - args = Mock() - args.as_app = False - args.token_fine = None - args.token_classic = "fake_token" - args.osx_keychain_item_name = None - args.osx_keychain_item_account = None - args.throttle_limit = None - args.throttle_pause = 0 - args.max_retries = DEFAULT_MAX_RETRIES - return args - - def test_json_parse_error_retries_and_fails(self, mock_args): + def test_json_parse_error_retries_and_fails(self, create_args): """HTTP 200 with invalid JSON should retry and eventually fail.""" + args = create_args(token_classic="fake_token") mock_response = Mock() mock_response.getcode.return_value = 200 mock_response.read.return_value = b"not valid json {" @@ -85,7 +73,7 @@ class TestRetrieveDataRetry: call_count = 0 - def mock_make_request(*args, **kwargs): + def mock_make_request(*a, **kw): nonlocal call_count call_count += 1 return mock_response @@ -99,7 +87,7 @@ class TestRetrieveDataRetry: ): # No delay in tests with pytest.raises(Exception) as exc_info: github_backup.retrieve_data( - mock_args, "https://api.github.com/repos/test/repo/issues" + args, "https://api.github.com/repos/test/repo/issues" ) assert "Failed to read response after" in str(exc_info.value) @@ -107,8 +95,9 @@ class TestRetrieveDataRetry: call_count == DEFAULT_MAX_RETRIES + 1 ) # 1 initial + 5 retries = 6 attempts - def test_json_parse_error_recovers_on_retry(self, mock_args): + def test_json_parse_error_recovers_on_retry(self, create_args): """HTTP 200 with invalid JSON should succeed if retry returns valid JSON.""" + args = create_args(token_classic="fake_token") bad_response = Mock() bad_response.getcode.return_value = 200 bad_response.read.return_value = b"not valid json {" @@ -122,7 +111,7 @@ class TestRetrieveDataRetry: responses = [bad_response, bad_response, good_response] call_count = 0 - def mock_make_request(*args, **kwargs): + def mock_make_request(*a, **kw): nonlocal call_count result = responses[call_count] call_count += 1 @@ -136,14 +125,15 @@ class TestRetrieveDataRetry: "github_backup.github_backup.calculate_retry_delay", return_value=0 ): result = github_backup.retrieve_data( - mock_args, "https://api.github.com/repos/test/repo/issues" + args, "https://api.github.com/repos/test/repo/issues" ) assert result == [{"id": 1}] assert call_count == 3 # Failed twice, succeeded on third - def test_http_error_raises_exception(self, mock_args): + def test_http_error_raises_exception(self, create_args): """Non-success HTTP status codes should raise Exception.""" + args = create_args(token_classic="fake_token") mock_response = Mock() mock_response.getcode.return_value = 404 mock_response.read.return_value = b'{"message": "Not Found"}' @@ -156,7 +146,7 @@ class TestRetrieveDataRetry: ): with pytest.raises(Exception) as exc_info: github_backup.retrieve_data( - mock_args, "https://api.github.com/repos/test/notfound/issues" + args, "https://api.github.com/repos/test/notfound/issues" ) assert not isinstance( @@ -346,21 +336,13 @@ class TestMakeRequestWithRetry: class TestRetrieveDataThrottling: """Tests for throttling behavior in retrieve_data.""" - @pytest.fixture - def mock_args(self): - args = Mock() - args.as_app = False - args.token_fine = None - args.token_classic = "fake_token" - args.osx_keychain_item_name = None - args.osx_keychain_item_account = None - args.throttle_limit = 10 # Throttle when remaining <= 10 - args.throttle_pause = 5 # Pause 5 seconds - args.max_retries = DEFAULT_MAX_RETRIES - return args - - def test_throttling_pauses_when_rate_limit_low(self, mock_args): + def test_throttling_pauses_when_rate_limit_low(self, create_args): """Should pause when x-ratelimit-remaining is at or below throttle_limit.""" + args = create_args( + token_classic="fake_token", + throttle_limit=10, + throttle_pause=5, + ) mock_response = Mock() mock_response.getcode.return_value = 200 mock_response.read.return_value = json.dumps([{"id": 1}]).encode("utf-8") @@ -375,7 +357,7 @@ class TestRetrieveDataThrottling: ): with patch("github_backup.github_backup.time.sleep") as mock_sleep: github_backup.retrieve_data( - mock_args, "https://api.github.com/repos/test/repo/issues" + args, "https://api.github.com/repos/test/repo/issues" ) mock_sleep.assert_called_once_with(5) # throttle_pause value @@ -384,21 +366,9 @@ class TestRetrieveDataThrottling: class TestRetrieveDataSingleItem: """Tests for single item (dict) responses in retrieve_data.""" - @pytest.fixture - def mock_args(self): - args = Mock() - args.as_app = False - args.token_fine = None - args.token_classic = "fake_token" - args.osx_keychain_item_name = None - args.osx_keychain_item_account = None - args.throttle_limit = None - args.throttle_pause = 0 - args.max_retries = DEFAULT_MAX_RETRIES - return args - - def test_dict_response_returned_as_list(self, mock_args): + def test_dict_response_returned_as_list(self, create_args): """Single dict response should be returned as a list with one item.""" + args = create_args(token_classic="fake_token") mock_response = Mock() mock_response.getcode.return_value = 200 mock_response.read.return_value = json.dumps( @@ -411,7 +381,7 @@ class TestRetrieveDataSingleItem: return_value=mock_response, ): result = github_backup.retrieve_data( - mock_args, "https://api.github.com/user" + args, "https://api.github.com/user" ) assert result == [{"login": "testuser", "id": 123}] @@ -474,17 +444,12 @@ class TestRetriesCliArgument: assert result == good_response assert call_count == 2 # 1 initial + 1 retry = 2 attempts - def test_custom_retry_count_limits_attempts(self): + def test_custom_retry_count_limits_attempts(self, create_args): """Custom --retries value should limit actual retry attempts.""" - args = Mock() - args.as_app = False - args.token_fine = None - args.token_classic = "fake_token" - args.osx_keychain_item_name = None - args.osx_keychain_item_account = None - args.throttle_limit = None - args.throttle_pause = 0 - args.max_retries = 2 # 2 retries = 3 total attempts (1 initial + 2 retries) + args = create_args( + token_classic="fake_token", + max_retries=2, # 2 retries = 3 total attempts (1 initial + 2 retries) + ) mock_response = Mock() mock_response.getcode.return_value = 200 diff --git a/tests/test_skip_assets_on.py b/tests/test_skip_assets_on.py index ce28287..519750e 100644 --- a/tests/test_skip_assets_on.py +++ b/tests/test_skip_assets_on.py @@ -1,7 +1,7 @@ """Tests for --skip-assets-on flag behavior (issue #135).""" import pytest -from unittest.mock import Mock, patch +from unittest.mock import patch from github_backup import github_backup @@ -13,52 +13,6 @@ class TestSkipAssetsOn: while still backing up release metadata. """ - def _create_mock_args(self, **overrides): - """Create a mock args object with sensible defaults.""" - args = Mock() - args.user = "testuser" - args.output_directory = "/tmp/backup" - args.include_repository = False - args.include_everything = False - args.include_gists = False - args.include_starred_gists = False - args.all_starred = False - args.skip_existing = False - args.bare_clone = False - args.lfs_clone = False - args.no_prune = False - args.include_wiki = False - args.include_issues = False - args.include_issue_comments = False - args.include_issue_events = False - args.include_pulls = False - args.include_pull_comments = False - args.include_pull_commits = False - args.include_pull_details = False - args.include_labels = False - args.include_hooks = False - args.include_milestones = False - args.include_releases = True - args.include_assets = True - args.skip_assets_on = [] - args.include_attachments = False - args.incremental = False - args.incremental_by_files = False - args.github_host = None - args.prefer_ssh = False - args.token_classic = "test-token" - args.token_fine = None - args.as_app = False - args.osx_keychain_item_name = None - args.osx_keychain_item_account = None - args.skip_prerelease = False - args.number_of_latest_releases = None - - for key, value in overrides.items(): - setattr(args, key, value) - - return args - def _create_mock_repository(self, name="test-repo", owner="testuser"): """Create a mock repository object.""" return { @@ -123,10 +77,10 @@ class TestSkipAssetsOnBehavior(TestSkipAssetsOn): @patch("github_backup.github_backup.mkdir_p") @patch("github_backup.github_backup.json_dump_if_changed") def test_assets_downloaded_when_not_skipped( - self, mock_json_dump, mock_mkdir, mock_retrieve, mock_download + self, mock_json_dump, mock_mkdir, mock_retrieve, mock_download, create_args ): """Assets should be downloaded when repo is not in skip list.""" - args = self._create_mock_args(skip_assets_on=[]) + args = create_args(skip_assets_on=[]) repository = self._create_mock_repository(name="normal-repo") release = self._create_mock_release() asset = self._create_mock_asset() @@ -154,10 +108,10 @@ class TestSkipAssetsOnBehavior(TestSkipAssetsOn): @patch("github_backup.github_backup.mkdir_p") @patch("github_backup.github_backup.json_dump_if_changed") def test_assets_skipped_when_repo_name_matches( - self, mock_json_dump, mock_mkdir, mock_retrieve, mock_download + self, mock_json_dump, mock_mkdir, mock_retrieve, mock_download, create_args ): """Assets should be skipped when repo name is in skip list.""" - args = self._create_mock_args(skip_assets_on=["big-repo"]) + args = create_args(skip_assets_on=["big-repo"]) repository = self._create_mock_repository(name="big-repo") release = self._create_mock_release() @@ -180,10 +134,10 @@ class TestSkipAssetsOnBehavior(TestSkipAssetsOn): @patch("github_backup.github_backup.mkdir_p") @patch("github_backup.github_backup.json_dump_if_changed") def test_assets_skipped_when_full_name_matches( - self, mock_json_dump, mock_mkdir, mock_retrieve, mock_download + self, mock_json_dump, mock_mkdir, mock_retrieve, mock_download, create_args ): """Assets should be skipped when owner/repo format matches.""" - args = self._create_mock_args(skip_assets_on=["otheruser/big-repo"]) + args = create_args(skip_assets_on=["otheruser/big-repo"]) repository = self._create_mock_repository(name="big-repo", owner="otheruser") release = self._create_mock_release() @@ -206,11 +160,11 @@ class TestSkipAssetsOnBehavior(TestSkipAssetsOn): @patch("github_backup.github_backup.mkdir_p") @patch("github_backup.github_backup.json_dump_if_changed") def test_case_insensitive_matching( - self, mock_json_dump, mock_mkdir, mock_retrieve, mock_download + self, mock_json_dump, mock_mkdir, mock_retrieve, mock_download, create_args ): """Skip matching should be case-insensitive.""" # User types uppercase, repo name is lowercase - args = self._create_mock_args(skip_assets_on=["BIG-REPO"]) + args = create_args(skip_assets_on=["BIG-REPO"]) repository = self._create_mock_repository(name="big-repo") release = self._create_mock_release() @@ -233,10 +187,10 @@ class TestSkipAssetsOnBehavior(TestSkipAssetsOn): @patch("github_backup.github_backup.mkdir_p") @patch("github_backup.github_backup.json_dump_if_changed") def test_multiple_skip_repos( - self, mock_json_dump, mock_mkdir, mock_retrieve, mock_download + self, mock_json_dump, mock_mkdir, mock_retrieve, mock_download, create_args ): """Multiple repos in skip list should all be skipped.""" - args = self._create_mock_args(skip_assets_on=["repo1", "repo2", "repo3"]) + args = create_args(skip_assets_on=["repo1", "repo2", "repo3"]) repository = self._create_mock_repository(name="repo2") release = self._create_mock_release() @@ -259,10 +213,10 @@ class TestSkipAssetsOnBehavior(TestSkipAssetsOn): @patch("github_backup.github_backup.mkdir_p") @patch("github_backup.github_backup.json_dump_if_changed") def test_release_metadata_still_saved_when_assets_skipped( - self, mock_json_dump, mock_mkdir, mock_retrieve, mock_download + self, mock_json_dump, mock_mkdir, mock_retrieve, mock_download, create_args ): """Release JSON should still be saved even when assets are skipped.""" - args = self._create_mock_args(skip_assets_on=["big-repo"]) + args = create_args(skip_assets_on=["big-repo"]) repository = self._create_mock_repository(name="big-repo") release = self._create_mock_release() @@ -287,10 +241,10 @@ class TestSkipAssetsOnBehavior(TestSkipAssetsOn): @patch("github_backup.github_backup.mkdir_p") @patch("github_backup.github_backup.json_dump_if_changed") def test_non_matching_repo_still_downloads_assets( - self, mock_json_dump, mock_mkdir, mock_retrieve, mock_download + self, mock_json_dump, mock_mkdir, mock_retrieve, mock_download, create_args ): """Repos not in skip list should still download assets.""" - args = self._create_mock_args(skip_assets_on=["other-repo"]) + args = create_args(skip_assets_on=["other-repo"]) repository = self._create_mock_repository(name="normal-repo") release = self._create_mock_release() asset = self._create_mock_asset() diff --git a/tests/test_starred_skip_size_over.py b/tests/test_starred_skip_size_over.py index 2deb72a..250d191 100644 --- a/tests/test_starred_skip_size_over.py +++ b/tests/test_starred_skip_size_over.py @@ -1,39 +1,11 @@ """Tests for --starred-skip-size-over flag behavior (issue #108).""" import pytest -from unittest.mock import Mock from github_backup import github_backup -class TestStarredSkipSizeOver: - """Test suite for --starred-skip-size-over flag. - - Issue #108: Allow restricting size of starred repositories before cloning. - The size is based on the GitHub API's 'size' field (in KB), but the CLI - argument accepts MB for user convenience. - """ - - def _create_mock_args(self, **overrides): - """Create a mock args object with sensible defaults.""" - args = Mock() - args.user = "testuser" - args.repository = None - args.name_regex = None - args.languages = None - args.fork = False - args.private = False - args.skip_archived = False - args.starred_skip_size_over = None - args.exclude = None - - for key, value in overrides.items(): - setattr(args, key, value) - - return args - - -class TestStarredSkipSizeOverArgumentParsing(TestStarredSkipSizeOver): +class TestStarredSkipSizeOverArgumentParsing: """Tests for --starred-skip-size-over argument parsing.""" def test_starred_skip_size_over_not_set_defaults_to_none(self): @@ -52,12 +24,17 @@ class TestStarredSkipSizeOverArgumentParsing(TestStarredSkipSizeOver): github_backup.parse_args(["testuser", "--starred-skip-size-over", "abc"]) -class TestStarredSkipSizeOverFiltering(TestStarredSkipSizeOver): - """Tests for --starred-skip-size-over filtering behavior.""" +class TestStarredSkipSizeOverFiltering: + """Tests for --starred-skip-size-over filtering behavior. - def test_starred_repo_under_limit_is_kept(self): + Issue #108: Allow restricting size of starred repositories before cloning. + The size is based on the GitHub API's 'size' field (in KB), but the CLI + argument accepts MB for user convenience. + """ + + def test_starred_repo_under_limit_is_kept(self, create_args): """Starred repos under the size limit should be kept.""" - args = self._create_mock_args(starred_skip_size_over=500) + args = create_args(starred_skip_size_over=500) repos = [ { @@ -72,9 +49,9 @@ class TestStarredSkipSizeOverFiltering(TestStarredSkipSizeOver): assert len(result) == 1 assert result[0]["name"] == "small-repo" - def test_starred_repo_over_limit_is_filtered(self): + def test_starred_repo_over_limit_is_filtered(self, create_args): """Starred repos over the size limit should be filtered out.""" - args = self._create_mock_args(starred_skip_size_over=500) + args = create_args(starred_skip_size_over=500) repos = [ { @@ -88,9 +65,9 @@ class TestStarredSkipSizeOverFiltering(TestStarredSkipSizeOver): result = github_backup.filter_repositories(args, repos) assert len(result) == 0 - def test_own_repo_over_limit_is_kept(self): + def test_own_repo_over_limit_is_kept(self, create_args): """User's own repos should not be affected by the size limit.""" - args = self._create_mock_args(starred_skip_size_over=500) + args = create_args(starred_skip_size_over=500) repos = [ { @@ -105,9 +82,9 @@ class TestStarredSkipSizeOverFiltering(TestStarredSkipSizeOver): assert len(result) == 1 assert result[0]["name"] == "my-huge-repo" - def test_starred_repo_at_exact_limit_is_kept(self): + def test_starred_repo_at_exact_limit_is_kept(self, create_args): """Starred repos at exactly the size limit should be kept.""" - args = self._create_mock_args(starred_skip_size_over=500) + args = create_args(starred_skip_size_over=500) repos = [ { @@ -122,9 +99,9 @@ class TestStarredSkipSizeOverFiltering(TestStarredSkipSizeOver): assert len(result) == 1 assert result[0]["name"] == "exact-limit-repo" - def test_mixed_repos_filtered_correctly(self): + def test_mixed_repos_filtered_correctly(self, create_args): """Mix of own and starred repos should be filtered correctly.""" - args = self._create_mock_args(starred_skip_size_over=500) + args = create_args(starred_skip_size_over=500) repos = [ { @@ -153,9 +130,9 @@ class TestStarredSkipSizeOverFiltering(TestStarredSkipSizeOver): assert "starred-small" in names assert "starred-huge" not in names - def test_no_size_limit_keeps_all_starred(self): + def test_no_size_limit_keeps_all_starred(self, create_args): """When no size limit is set, all starred repos should be kept.""" - args = self._create_mock_args(starred_skip_size_over=None) + args = create_args(starred_skip_size_over=None) repos = [ { @@ -169,9 +146,9 @@ class TestStarredSkipSizeOverFiltering(TestStarredSkipSizeOver): result = github_backup.filter_repositories(args, repos) assert len(result) == 1 - def test_repo_without_size_field_is_kept(self): + def test_repo_without_size_field_is_kept(self, create_args): """Repos without a size field should be kept (size defaults to 0).""" - args = self._create_mock_args(starred_skip_size_over=500) + args = create_args(starred_skip_size_over=500) repos = [ { @@ -185,9 +162,9 @@ class TestStarredSkipSizeOverFiltering(TestStarredSkipSizeOver): result = github_backup.filter_repositories(args, repos) assert len(result) == 1 - def test_zero_value_warns_and_is_ignored(self, caplog): + def test_zero_value_warns_and_is_ignored(self, create_args, caplog): """Zero value should warn and keep all repos.""" - args = self._create_mock_args(starred_skip_size_over=0) + args = create_args(starred_skip_size_over=0) repos = [ { @@ -202,9 +179,9 @@ class TestStarredSkipSizeOverFiltering(TestStarredSkipSizeOver): assert len(result) == 1 assert "must be greater than 0" in caplog.text - def test_negative_value_warns_and_is_ignored(self, caplog): + def test_negative_value_warns_and_is_ignored(self, create_args, caplog): """Negative value should warn and keep all repos.""" - args = self._create_mock_args(starred_skip_size_over=-5) + args = create_args(starred_skip_size_over=-5) repos = [ {