From 44b0003ec9766759f39e23084db1ba152d90d1a1 Mon Sep 17 00:00:00 2001 From: michaelmartinez Date: Tue, 23 Dec 2025 14:07:38 -0800 Subject: [PATCH] updates to the tests, and fixes to the retry --- github_backup/github_backup.py | 59 ++++++--- tests/test_http_451.py | 39 ++++-- tests/test_pagination.py | 1 + tests/test_retrieve_data.py | 233 +++++++++++++++++++++++++++------ 4 files changed, 264 insertions(+), 68 deletions(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 7aaf722..12b354b 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -141,6 +141,17 @@ def mask_password(url, secret="*****"): return url.replace(parsed.password, secret) +def non_negative_int(value): + """Argparse type validator for non-negative integers.""" + try: + ivalue = int(value) + except ValueError: + raise argparse.ArgumentTypeError(f"'{value}' is not a valid integer") + if ivalue < 0: + raise argparse.ArgumentTypeError(f"{value} must be 0 or greater") + return ivalue + + def parse_args(args=None): parser = argparse.ArgumentParser(description="Backup a github account") parser.add_argument("user", metavar="USER", type=str, help="github username") @@ -468,7 +479,7 @@ def parse_args(args=None): parser.add_argument( "--retries", dest="max_retries", - type=int, + type=non_negative_int, default=5, help="maximum number of retries for API calls (default: 5)", ) @@ -626,7 +637,7 @@ def retrieve_data(args, template, query_args=None, paginated=True): def _extract_next_page_url(link_header): for link in link_header.split(","): if 'rel="next"' in link: - return link[link.find("<") + 1:link.find(">")] + return link[link.find("<") + 1 : link.find(">")] return None def fetch_all() -> Generator[dict, None, None]: @@ -635,7 +646,7 @@ def retrieve_data(args, template, query_args=None, paginated=True): while True: # FIRST: Fetch response - for attempt in range(args.max_retries): + for attempt in range(args.max_retries + 1): request = _construct_request( per_page=per_page if paginated else None, query_args=query_args, @@ -658,10 +669,10 @@ def retrieve_data(args, template, query_args=None, paginated=True): TimeoutError, ) as e: logger.warning(f"{type(e).__name__} reading response") - if attempt < args.max_retries - 1: + if attempt < args.max_retries: delay = calculate_retry_delay(attempt, {}) logger.warning( - f"Retrying read in {delay:.1f}s (attempt {attempt + 1}/{args.max_retries})" + f"Retrying read in {delay:.1f}s (attempt {attempt + 1}/{args.max_retries + 1})" ) time.sleep(delay) continue # Next retry attempt @@ -687,10 +698,10 @@ def retrieve_data(args, template, query_args=None, paginated=True): ) else: logger.error( - f"Failed to read response after {args.max_retries} attempts for {next_url or template}" + f"Failed to read response after {args.max_retries + 1} attempts for {next_url or template}" ) raise Exception( - f"Failed to read response after {args.max_retries} attempts for {next_url or template}" + f"Failed to read response after {args.max_retries + 1} attempts for {next_url or template}" ) # SECOND: Process and paginate @@ -734,41 +745,49 @@ def make_request_with_retry(request, auth, max_retries=5): return int(headers.get("x-ratelimit-remaining", 1)) < 1 return False - for attempt in range(max_retries): + for attempt in range(max_retries + 1): try: return urlopen(request, context=https_ctx) except HTTPError as exc: # HTTPError can be used as a response-like object if not is_retryable_status(exc.code, exc.headers): - logger.error(f"API Error: {exc.code} {exc.reason} for {request.full_url}") + logger.error( + f"API Error: {exc.code} {exc.reason} for {request.full_url}" + ) raise # Non-retryable error - if attempt >= max_retries - 1: - logger.error(f"HTTP {exc.code} failed after {max_retries} attempts for {request.full_url}") + if attempt >= max_retries: + logger.error( + f"HTTP {exc.code} failed after {max_retries + 1} attempts for {request.full_url}" + ) raise delay = calculate_retry_delay(attempt, exc.headers) logger.warning( f"HTTP {exc.code} ({exc.reason}), retrying in {delay:.1f}s " - f"(attempt {attempt + 1}/{max_retries}) for {request.full_url}" + f"(attempt {attempt + 1}/{max_retries + 1}) for {request.full_url}" ) if auth is None and exc.code in (403, 429): logger.info("Hint: Authenticate to raise your GitHub rate limit") time.sleep(delay) except (URLError, socket.error) as e: - if attempt >= max_retries - 1: - logger.error(f"Connection error failed after {max_retries} attempts: {e} for {request.full_url}") + if attempt >= max_retries: + logger.error( + f"Connection error failed after {max_retries + 1} attempts: {e} for {request.full_url}" + ) raise delay = calculate_retry_delay(attempt, {}) logger.warning( f"Connection error: {e}, retrying in {delay:.1f}s " - f"(attempt {attempt + 1}/{max_retries}) for {request.full_url}" + f"(attempt {attempt + 1}/{max_retries + 1}) for {request.full_url}" ) time.sleep(delay) - raise Exception(f"Request failed after {max_retries} attempts") # pragma: no cover + raise Exception( + f"Request failed after {max_retries + 1} attempts" + ) # pragma: no cover def _construct_request(per_page, query_args, template, auth, as_app=None, fine=False): @@ -1584,9 +1603,7 @@ def filter_repositories(args, unfiltered_repositories): repositories = [r for r in repositories if not r.get("archived")] if args.starred_skip_size_over is not None: if args.starred_skip_size_over <= 0: - logger.warning( - "--starred-skip-size-over must be greater than 0, ignoring" - ) + logger.warning("--starred-skip-size-over must be greater than 0, ignoring") else: size_limit_kb = args.starred_skip_size_over * 1024 filtered = [] @@ -1595,7 +1612,9 @@ def filter_repositories(args, unfiltered_repositories): size_mb = r.get("size", 0) / 1024 logger.info( "Skipping starred repo {0} ({1:.0f} MB) due to --starred-skip-size-over {2}".format( - r.get("full_name", r.get("name")), size_mb, args.starred_skip_size_over + r.get("full_name", r.get("name")), + size_mb, + args.starred_skip_size_over, ) ) else: diff --git a/tests/test_http_451.py b/tests/test_http_451.py index d53d65c..bb825f7 100644 --- a/tests/test_http_451.py +++ b/tests/test_http_451.py @@ -21,6 +21,7 @@ class TestHTTP451Exception: args.osx_keychain_item_account = None args.throttle_limit = None args.throttle_pause = 0 + args.max_retries = 5 mock_response = Mock() mock_response.getcode.return_value = 451 @@ -30,18 +31,26 @@ class TestHTTP451Exception: "block": { "reason": "dmca", "created_at": "2024-11-12T14:38:04Z", - "html_url": "https://github.com/github/dmca/blob/master/2024/11/2024-11-04-source-code.md" - } + "html_url": "https://github.com/github/dmca/blob/master/2024/11/2024-11-04-source-code.md", + }, } mock_response.read.return_value = json.dumps(dmca_data).encode("utf-8") mock_response.headers = {"x-ratelimit-remaining": "5000"} mock_response.reason = "Unavailable For Legal Reasons" - with patch("github_backup.github_backup.make_request_with_retry", return_value=mock_response): + with patch( + "github_backup.github_backup.make_request_with_retry", + return_value=mock_response, + ): with pytest.raises(github_backup.RepositoryUnavailableError) as exc_info: - github_backup.retrieve_data(args, "https://api.github.com/repos/test/dmca/issues") + github_backup.retrieve_data( + args, "https://api.github.com/repos/test/dmca/issues" + ) - assert exc_info.value.dmca_url == "https://github.com/github/dmca/blob/master/2024/11/2024-11-04-source-code.md" + assert ( + exc_info.value.dmca_url + == "https://github.com/github/dmca/blob/master/2024/11/2024-11-04-source-code.md" + ) assert "451" in str(exc_info.value) def test_repository_unavailable_error_without_dmca_url(self): @@ -54,6 +63,7 @@ class TestHTTP451Exception: args.osx_keychain_item_account = None args.throttle_limit = None args.throttle_pause = 0 + args.max_retries = 5 mock_response = Mock() mock_response.getcode.return_value = 451 @@ -61,9 +71,14 @@ class TestHTTP451Exception: mock_response.headers = {"x-ratelimit-remaining": "5000"} mock_response.reason = "Unavailable For Legal Reasons" - with patch("github_backup.github_backup.make_request_with_retry", return_value=mock_response): + with patch( + "github_backup.github_backup.make_request_with_retry", + return_value=mock_response, + ): with pytest.raises(github_backup.RepositoryUnavailableError) as exc_info: - github_backup.retrieve_data(args, "https://api.github.com/repos/test/dmca/issues") + github_backup.retrieve_data( + args, "https://api.github.com/repos/test/dmca/issues" + ) assert exc_info.value.dmca_url is None assert "451" in str(exc_info.value) @@ -78,6 +93,7 @@ class TestHTTP451Exception: args.osx_keychain_item_account = None args.throttle_limit = None args.throttle_pause = 0 + args.max_retries = 5 mock_response = Mock() mock_response.getcode.return_value = 451 @@ -85,9 +101,14 @@ class TestHTTP451Exception: mock_response.headers = {"x-ratelimit-remaining": "5000"} mock_response.reason = "Unavailable For Legal Reasons" - with patch("github_backup.github_backup.make_request_with_retry", return_value=mock_response): + with patch( + "github_backup.github_backup.make_request_with_retry", + return_value=mock_response, + ): with pytest.raises(github_backup.RepositoryUnavailableError): - github_backup.retrieve_data(args, "https://api.github.com/repos/test/dmca/issues") + github_backup.retrieve_data( + args, "https://api.github.com/repos/test/dmca/issues" + ) if __name__ == "__main__": diff --git a/tests/test_pagination.py b/tests/test_pagination.py index 831b913..e35ff38 100644 --- a/tests/test_pagination.py +++ b/tests/test_pagination.py @@ -49,6 +49,7 @@ def mock_args(): args.osx_keychain_item_account = None args.throttle_limit = None args.throttle_pause = 0 + args.max_retries = 5 return args diff --git a/tests/test_retrieve_data.py b/tests/test_retrieve_data.py index adb1152..fa82bd7 100644 --- a/tests/test_retrieve_data.py +++ b/tests/test_retrieve_data.py @@ -9,26 +9,27 @@ import pytest from github_backup import github_backup from github_backup.github_backup import ( - MAX_RETRIES, calculate_retry_delay, make_request_with_retry, ) +# Default retry count used in tests (matches argparse default) +# With max_retries=5, total attempts = 6 (1 initial + 5 retries) +DEFAULT_MAX_RETRIES = 5 + class TestCalculateRetryDelay: def test_respects_retry_after_header(self): - headers = {'retry-after': '30'} + headers = {"retry-after": "30"} assert calculate_retry_delay(0, headers) == 30 def test_respects_rate_limit_reset(self): import time import calendar + # Set reset time 60 seconds in the future future_reset = calendar.timegm(time.gmtime()) + 60 - headers = { - 'x-ratelimit-remaining': '0', - 'x-ratelimit-reset': str(future_reset) - } + headers = {"x-ratelimit-remaining": "0", "x-ratelimit-reset": str(future_reset)} delay = calculate_retry_delay(0, headers) # Should be approximately 60 seconds (with some tolerance for execution time) assert 55 <= delay <= 65 @@ -50,12 +51,10 @@ class TestCalculateRetryDelay: def test_minimum_rate_limit_delay(self): import time import calendar + # Set reset time in the past (already reset) past_reset = calendar.timegm(time.gmtime()) - 100 - headers = { - 'x-ratelimit-remaining': '0', - 'x-ratelimit-reset': str(past_reset) - } + headers = {"x-ratelimit-remaining": "0", "x-ratelimit-reset": str(past_reset)} delay = calculate_retry_delay(0, headers) # Should be minimum 10 seconds even if reset time is in past assert delay >= 10 @@ -74,6 +73,7 @@ class TestRetrieveDataRetry: args.osx_keychain_item_account = None args.throttle_limit = None args.throttle_pause = 0 + args.max_retries = DEFAULT_MAX_RETRIES return args def test_json_parse_error_retries_and_fails(self, mock_args): @@ -90,13 +90,22 @@ class TestRetrieveDataRetry: call_count += 1 return mock_response - with patch("github_backup.github_backup.make_request_with_retry", side_effect=mock_make_request): - with patch("github_backup.github_backup.calculate_retry_delay", return_value=0): # No delay in tests + with patch( + "github_backup.github_backup.make_request_with_retry", + side_effect=mock_make_request, + ): + with patch( + "github_backup.github_backup.calculate_retry_delay", return_value=0 + ): # No delay in tests with pytest.raises(Exception) as exc_info: - github_backup.retrieve_data(mock_args, "https://api.github.com/repos/test/repo/issues") + github_backup.retrieve_data( + mock_args, "https://api.github.com/repos/test/repo/issues" + ) assert "Failed to read response after" in str(exc_info.value) - assert call_count == MAX_RETRIES + assert ( + call_count == DEFAULT_MAX_RETRIES + 1 + ) # 1 initial + 5 retries = 6 attempts def test_json_parse_error_recovers_on_retry(self, mock_args): """HTTP 200 with invalid JSON should succeed if retry returns valid JSON.""" @@ -119,9 +128,16 @@ class TestRetrieveDataRetry: call_count += 1 return result - with patch("github_backup.github_backup.make_request_with_retry", side_effect=mock_make_request): - with patch("github_backup.github_backup.calculate_retry_delay", return_value=0): - result = github_backup.retrieve_data(mock_args, "https://api.github.com/repos/test/repo/issues") + with patch( + "github_backup.github_backup.make_request_with_retry", + side_effect=mock_make_request, + ): + with patch( + "github_backup.github_backup.calculate_retry_delay", return_value=0 + ): + result = github_backup.retrieve_data( + mock_args, "https://api.github.com/repos/test/repo/issues" + ) assert result == [{"id": 1}] assert call_count == 3 # Failed twice, succeeded on third @@ -134,11 +150,18 @@ class TestRetrieveDataRetry: mock_response.headers = {"x-ratelimit-remaining": "5000"} mock_response.reason = "Not Found" - with patch("github_backup.github_backup.make_request_with_retry", return_value=mock_response): + with patch( + "github_backup.github_backup.make_request_with_retry", + return_value=mock_response, + ): with pytest.raises(Exception) as exc_info: - github_backup.retrieve_data(mock_args, "https://api.github.com/repos/test/notfound/issues") + github_backup.retrieve_data( + mock_args, "https://api.github.com/repos/test/notfound/issues" + ) - assert not isinstance(exc_info.value, github_backup.RepositoryUnavailableError) + assert not isinstance( + exc_info.value, github_backup.RepositoryUnavailableError + ) assert "404" in str(exc_info.value) @@ -151,7 +174,7 @@ class TestMakeRequestWithRetry: good_response.read.return_value = b'{"ok": true}' call_count = 0 - fail_count = MAX_RETRIES - 1 # Fail all but last attempt + fail_count = DEFAULT_MAX_RETRIES # Fail all retries, succeed on last attempt def mock_urlopen(*args, **kwargs): nonlocal call_count @@ -167,14 +190,18 @@ class TestMakeRequestWithRetry: return good_response with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen): - with patch("github_backup.github_backup.calculate_retry_delay", return_value=0): + with patch( + "github_backup.github_backup.calculate_retry_delay", return_value=0 + ): result = make_request_with_retry(Mock(), None) assert result == good_response - assert call_count == MAX_RETRIES + assert ( + call_count == DEFAULT_MAX_RETRIES + 1 + ) # 1 initial + 5 retries = 6 attempts def test_503_error_retries_until_exhausted(self): - """HTTP 503 should retry MAX_RETRIES times then raise.""" + """HTTP 503 should make 1 initial + DEFAULT_MAX_RETRIES retry attempts then raise.""" call_count = 0 def mock_urlopen(*args, **kwargs): @@ -189,12 +216,16 @@ class TestMakeRequestWithRetry: ) with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen): - with patch("github_backup.github_backup.calculate_retry_delay", return_value=0): + with patch( + "github_backup.github_backup.calculate_retry_delay", return_value=0 + ): with pytest.raises(HTTPError) as exc_info: make_request_with_retry(Mock(), None) assert exc_info.value.code == 503 - assert call_count == MAX_RETRIES + assert ( + call_count == DEFAULT_MAX_RETRIES + 1 + ) # 1 initial + 5 retries = 6 attempts def test_404_error_not_retried(self): """HTTP 404 should not be retried - raise immediately.""" @@ -237,7 +268,9 @@ class TestMakeRequestWithRetry: return good_response with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen): - with patch("github_backup.github_backup.calculate_retry_delay", return_value=0): + with patch( + "github_backup.github_backup.calculate_retry_delay", return_value=0 + ): result = make_request_with_retry(Mock(), None) assert result == good_response @@ -269,7 +302,7 @@ class TestMakeRequestWithRetry: """URLError (connection error) should retry and succeed if subsequent request works.""" good_response = Mock() call_count = 0 - fail_count = MAX_RETRIES - 1 # Fail all but last attempt + fail_count = DEFAULT_MAX_RETRIES # Fail all retries, succeed on last attempt def mock_urlopen(*args, **kwargs): nonlocal call_count @@ -279,14 +312,18 @@ class TestMakeRequestWithRetry: return good_response with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen): - with patch("github_backup.github_backup.calculate_retry_delay", return_value=0): + with patch( + "github_backup.github_backup.calculate_retry_delay", return_value=0 + ): result = make_request_with_retry(Mock(), None) assert result == good_response - assert call_count == MAX_RETRIES + assert ( + call_count == DEFAULT_MAX_RETRIES + 1 + ) # 1 initial + 5 retries = 6 attempts def test_socket_error_retries_until_exhausted(self): - """socket.error should retry MAX_RETRIES times then raise.""" + """socket.error should make 1 initial + DEFAULT_MAX_RETRIES retry attempts then raise.""" call_count = 0 def mock_urlopen(*args, **kwargs): @@ -295,11 +332,15 @@ class TestMakeRequestWithRetry: raise socket.error("Connection reset by peer") with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen): - with patch("github_backup.github_backup.calculate_retry_delay", return_value=0): + with patch( + "github_backup.github_backup.calculate_retry_delay", return_value=0 + ): with pytest.raises(socket.error): make_request_with_retry(Mock(), None) - assert call_count == MAX_RETRIES + assert ( + call_count == DEFAULT_MAX_RETRIES + 1 + ) # 1 initial + 5 retries = 6 attempts class TestRetrieveDataThrottling: @@ -315,6 +356,7 @@ class TestRetrieveDataThrottling: args.osx_keychain_item_account = None args.throttle_limit = 10 # Throttle when remaining <= 10 args.throttle_pause = 5 # Pause 5 seconds + args.max_retries = DEFAULT_MAX_RETRIES return args def test_throttling_pauses_when_rate_limit_low(self, mock_args): @@ -322,11 +364,19 @@ class TestRetrieveDataThrottling: mock_response = Mock() mock_response.getcode.return_value = 200 mock_response.read.return_value = json.dumps([{"id": 1}]).encode("utf-8") - mock_response.headers = {"x-ratelimit-remaining": "5", "Link": ""} # Below throttle_limit + mock_response.headers = { + "x-ratelimit-remaining": "5", + "Link": "", + } # Below throttle_limit - with patch("github_backup.github_backup.make_request_with_retry", return_value=mock_response): + with patch( + "github_backup.github_backup.make_request_with_retry", + return_value=mock_response, + ): with patch("github_backup.github_backup.time.sleep") as mock_sleep: - github_backup.retrieve_data(mock_args, "https://api.github.com/repos/test/repo/issues") + github_backup.retrieve_data( + mock_args, "https://api.github.com/repos/test/repo/issues" + ) mock_sleep.assert_called_once_with(5) # throttle_pause value @@ -344,16 +394,121 @@ class TestRetrieveDataSingleItem: args.osx_keychain_item_account = None args.throttle_limit = None args.throttle_pause = 0 + args.max_retries = DEFAULT_MAX_RETRIES return args def test_dict_response_returned_as_list(self, mock_args): """Single dict response should be returned as a list with one item.""" mock_response = Mock() mock_response.getcode.return_value = 200 - mock_response.read.return_value = json.dumps({"login": "testuser", "id": 123}).encode("utf-8") + mock_response.read.return_value = json.dumps( + {"login": "testuser", "id": 123} + ).encode("utf-8") mock_response.headers = {"x-ratelimit-remaining": "5000", "Link": ""} - with patch("github_backup.github_backup.make_request_with_retry", return_value=mock_response): - result = github_backup.retrieve_data(mock_args, "https://api.github.com/user") + with patch( + "github_backup.github_backup.make_request_with_retry", + return_value=mock_response, + ): + result = github_backup.retrieve_data( + mock_args, "https://api.github.com/user" + ) assert result == [{"login": "testuser", "id": 123}] + + +class TestRetriesCliArgument: + """Tests for --retries CLI argument validation and behavior.""" + + def test_retries_argument_accepted(self): + """--retries flag should be accepted and parsed correctly.""" + args = github_backup.parse_args(["--retries", "3", "testuser"]) + assert args.max_retries == 3 + + def test_retries_default_value(self): + """--retries should default to 5 if not specified.""" + args = github_backup.parse_args(["testuser"]) + assert args.max_retries == 5 + + def test_retries_zero_is_valid(self): + """--retries 0 should be valid and mean 1 attempt (no retries).""" + args = github_backup.parse_args(["--retries", "0", "testuser"]) + assert args.max_retries == 0 + + def test_retries_negative_rejected(self): + """--retries with negative value should be rejected by argparse.""" + with pytest.raises(SystemExit): + github_backup.parse_args(["--retries", "-1", "testuser"]) + + def test_retries_non_integer_rejected(self): + """--retries with non-integer value should be rejected by argparse.""" + with pytest.raises(SystemExit): + github_backup.parse_args(["--retries", "abc", "testuser"]) + + def test_retries_one_with_transient_error_succeeds(self): + """--retries 1 should allow one retry after initial failure.""" + good_response = Mock() + good_response.read.return_value = b'{"ok": true}' + + call_count = 0 + + def mock_urlopen(*args, **kwargs): + nonlocal call_count + call_count += 1 + if call_count == 1: + raise HTTPError( + url="https://api.github.com/test", + code=502, + msg="Bad Gateway", + hdrs={"x-ratelimit-remaining": "5000"}, + fp=None, + ) + return good_response + + with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen): + with patch( + "github_backup.github_backup.calculate_retry_delay", return_value=0 + ): + result = make_request_with_retry(Mock(), None, max_retries=1) + + assert result == good_response + assert call_count == 2 # 1 initial + 1 retry = 2 attempts + + def test_custom_retry_count_limits_attempts(self): + """Custom --retries value should limit actual retry attempts.""" + args = Mock() + args.as_app = False + args.token_fine = None + args.token_classic = "fake_token" + args.osx_keychain_item_name = None + args.osx_keychain_item_account = None + args.throttle_limit = None + args.throttle_pause = 0 + args.max_retries = 2 # 2 retries = 3 total attempts (1 initial + 2 retries) + + mock_response = Mock() + mock_response.getcode.return_value = 200 + mock_response.read.return_value = b"not valid json {" + mock_response.headers = {"x-ratelimit-remaining": "5000"} + + call_count = 0 + + def mock_make_request(*args, **kwargs): + nonlocal call_count + call_count += 1 + return mock_response + + with patch( + "github_backup.github_backup.make_request_with_retry", + side_effect=mock_make_request, + ): + with patch( + "github_backup.github_backup.calculate_retry_delay", return_value=0 + ): + with pytest.raises(Exception) as exc_info: + github_backup.retrieve_data( + args, "https://api.github.com/repos/test/repo/issues" + ) + + assert "Failed to read response after 3 attempts" in str(exc_info.value) + assert call_count == 3 # 1 initial + 2 retries = 3 attempts