Fix retry logic for HTTP 5xx errors and network failures

Refactors error handling to retry all 5xx errors (not just 502), network errors (URLError, socket.error, IncompleteRead), and JSON parse errors with exponential backoff and jitter. Respects retry-after and rate limit headers per GitHub API requirements. Consolidates retry logic into make_request_with_retry() wrapper and adds clear logging for retry attempts and failures. Removes dead code from 2016 (errors list, _request_http_error, _request_url_error) that was intentionally disabled in commit 1e5a9048 to fix #29.

Fixes #140, #110, #138
This commit is contained in:
Rodos
2025-12-16 21:44:16 +11:00
parent 02dd902b67
commit 46140b0ff1
4 changed files with 541 additions and 260 deletions

View File

@@ -12,6 +12,7 @@ import json
import logging import logging
import os import os
import platform import platform
import random
import re import re
import select import select
import socket import socket
@@ -19,6 +20,7 @@ import ssl
import subprocess import subprocess
import sys import sys
import time import time
from collections.abc import Generator
from datetime import datetime from datetime import datetime
from http.client import IncompleteRead from http.client import IncompleteRead
from urllib.error import HTTPError, URLError from urllib.error import HTTPError, URLError
@@ -74,6 +76,9 @@ else:
" 3. Debian/Ubuntu: apt-get install ca-certificates\n\n" " 3. Debian/Ubuntu: apt-get install ca-certificates\n\n"
) )
# Retry configuration
MAX_RETRIES = 5
def logging_subprocess( def logging_subprocess(
popenargs, stdout_log_level=logging.DEBUG, stderr_log_level=logging.ERROR, **kwargs popenargs, stdout_log_level=logging.DEBUG, stderr_log_level=logging.ERROR, **kwargs
@@ -603,170 +608,178 @@ def get_github_repo_url(args, repository):
return repo_url return repo_url
def retrieve_data_gen(args, template, query_args=None, single_request=False): def calculate_retry_delay(attempt, headers):
"""Calculate delay before next retry with exponential backoff."""
# Respect retry-after header if present
if retry_after := headers.get("retry-after"):
return int(retry_after)
# Respect rate limit reset time
if int(headers.get("x-ratelimit-remaining", 1)) < 1:
reset_time = int(headers.get("x-ratelimit-reset", 0))
return max(10, reset_time - calendar.timegm(time.gmtime()))
# Exponential backoff with jitter for server errors (1s base, 120s max)
delay = min(1.0 * (2**attempt), 120.0)
return delay + random.uniform(0, delay * 0.1)
def retrieve_data(args, template, query_args=None, paginated=True):
"""
Fetch the data from GitHub API.
Handle both single requests and pagination with yield of individual dicts.
Handles throttling, retries, read errors, and DMCA takedowns.
"""
query_args = query_args or {}
auth = get_auth(args, encode=not args.as_app) auth = get_auth(args, encode=not args.as_app)
query_args = get_query_args(query_args)
per_page = 100 per_page = 100
next_url = None
while True: def _extract_next_page_url(link_header):
if single_request: for link in link_header.split(","):
request_per_page = None if 'rel="next"' in link:
else: return link[link.find("<") + 1:link.find(">")]
request_per_page = per_page return None
request = _construct_request( def fetch_all() -> Generator[dict, None, None]:
request_per_page, next_url = None
query_args,
next_url or template,
auth,
as_app=args.as_app,
fine=True if args.token_fine is not None else False,
) # noqa
r, errors = _get_response(request, auth, next_url or template)
status_code = int(r.getcode()) while True:
# FIRST: Fetch response
# Handle DMCA takedown (HTTP 451) - raise exception to skip entire repository for attempt in range(MAX_RETRIES):
if status_code == 451: request = _construct_request(
dmca_url = None per_page=per_page if paginated else None,
try: query_args=query_args,
response_data = json.loads(r.read().decode("utf-8")) template=next_url or template,
dmca_url = response_data.get("block", {}).get("html_url") auth=auth,
except Exception: as_app=args.as_app,
pass fine=args.token_fine is not None,
raise RepositoryUnavailableError(
"Repository unavailable due to legal reasons (HTTP 451)",
dmca_url=dmca_url,
)
# Check if we got correct data
try:
response = json.loads(r.read().decode("utf-8"))
except IncompleteRead:
logger.warning("Incomplete read error detected")
read_error = True
except json.decoder.JSONDecodeError:
logger.warning("JSON decode error detected")
read_error = True
except TimeoutError:
logger.warning("Tiemout error detected")
read_error = True
else:
read_error = False
# be gentle with API request limit and throttle requests if remaining requests getting low
limit_remaining = int(r.headers.get("x-ratelimit-remaining", 0))
if args.throttle_limit and limit_remaining <= args.throttle_limit:
logger.info(
"API request limit hit: {} requests left, pausing further requests for {}s".format(
limit_remaining, args.throttle_pause
) )
) http_response = make_request_with_retry(request, auth)
time.sleep(args.throttle_pause)
retries = 0 match http_response.getcode():
while retries < 3 and (status_code == 502 or read_error): case 200:
logger.warning("API request failed. Retrying in 5 seconds") # Success - Parse JSON response
retries += 1 try:
time.sleep(5) response = json.loads(http_response.read().decode("utf-8"))
request = _construct_request( break # Exit retry loop and handle the data returned
request_per_page, except (
query_args, IncompleteRead,
next_url or template, json.decoder.JSONDecodeError,
auth, TimeoutError,
as_app=args.as_app, ) as e:
fine=True if args.token_fine is not None else False, logger.warning(f"{type(e).__name__} reading response")
) # noqa if attempt < MAX_RETRIES - 1:
r, errors = _get_response(request, auth, next_url or template) delay = calculate_retry_delay(attempt, {})
logger.warning(
f"Retrying in {delay:.1f}s (attempt {attempt + 1}/{MAX_RETRIES})"
)
time.sleep(delay)
continue # Next retry attempt
status_code = int(r.getcode()) case 451:
try: # DMCA takedown - extract URL if available, then raise
response = json.loads(r.read().decode("utf-8")) dmca_url = None
read_error = False try:
except IncompleteRead: response_data = json.loads(
logger.warning("Incomplete read error detected") http_response.read().decode("utf-8")
read_error = True )
except json.decoder.JSONDecodeError: dmca_url = response_data.get("block", {}).get("html_url")
logger.warning("JSON decode error detected") except Exception:
read_error = True pass
except TimeoutError: raise RepositoryUnavailableError(
logger.warning("Tiemout error detected") "Repository unavailable due to legal reasons (HTTP 451)",
read_error = True dmca_url=dmca_url,
)
if status_code != 200: case _:
template = "API request returned HTTP {0}: {1}" raise Exception(
errors.append(template.format(status_code, r.reason)) f"API request returned HTTP {http_response.getcode()}: {http_response.reason}"
raise Exception(", ".join(errors)) )
else:
logger.error(
f"Failed to read response after {MAX_RETRIES} attempts for {next_url or template}"
)
raise Exception(
f"Failed to read response after {MAX_RETRIES} attempts for {next_url or template}"
)
if read_error: # SECOND: Process and paginate
template = "API request problem reading response for {0}"
errors.append(template.format(request))
raise Exception(", ".join(errors))
if len(errors) == 0: # Pause before next request if rate limit is low
if type(response) is list: if (
for resp in response: remaining := int(http_response.headers.get("x-ratelimit-remaining", 0))
yield resp ) <= (args.throttle_limit or 0):
# Parse Link header for next page URL (cursor-based pagination) if args.throttle_limit:
link_header = r.headers.get("Link", "") logger.info(
next_url = None f"Throttling: {remaining} requests left, pausing {args.throttle_pause}s"
if link_header: )
# Parse Link header: <https://api.github.com/...?per_page=100&after=cursor>; rel="next" time.sleep(args.throttle_pause)
for link in link_header.split(","):
if 'rel="next"' in link: # Yield results
next_url = link[link.find("<") + 1 : link.find(">")] if isinstance(response, list):
break yield from response
if not next_url: elif isinstance(response, dict):
break
elif type(response) is dict and single_request:
yield response yield response
if len(errors) > 0: # Check for more pages
raise Exception(", ".join(errors)) if not paginated or not (
next_url := _extract_next_page_url(
http_response.headers.get("Link", "")
)
):
break # No more data
if single_request: return list(fetch_all())
break
def retrieve_data(args, template, query_args=None, single_request=False): def make_request_with_retry(request, auth):
return list(retrieve_data_gen(args, template, query_args, single_request)) """Make HTTP request with automatic retry for transient errors."""
def is_retryable_status(status_code, headers):
# Server errors are always retryable
if status_code in (500, 502, 503, 504):
return True
# Rate limit (403/429) is retryable if limit exhausted
if status_code in (403, 429):
return int(headers.get("x-ratelimit-remaining", 1)) < 1
return False
def get_query_args(query_args=None): for attempt in range(MAX_RETRIES):
if not query_args:
query_args = {}
return query_args
def _get_response(request, auth, template):
retry_timeout = 3
errors = []
# We'll make requests in a loop so we can
# delay and retry in the case of rate-limiting
while True:
should_continue = False
try: try:
r = urlopen(request, context=https_ctx) return urlopen(request, context=https_ctx)
except HTTPError as exc: except HTTPError as exc:
errors, should_continue = _request_http_error(exc, auth, errors) # noqa # HTTPError can be used as a response-like object
r = exc if not is_retryable_status(exc.code, exc.headers):
except URLError as e: raise # Non-retryable error
logger.warning(e.reason)
should_continue, retry_timeout = _request_url_error(template, retry_timeout) if attempt >= MAX_RETRIES - 1:
if not should_continue: logger.error(f"HTTP {exc.code} failed after {MAX_RETRIES} attempts")
raise
except socket.error as e:
logger.warning(e.strerror)
should_continue, retry_timeout = _request_url_error(template, retry_timeout)
if not should_continue:
raise raise
if should_continue: delay = calculate_retry_delay(attempt, exc.headers)
continue logger.warning(
f"HTTP {exc.code}, retrying in {delay:.1f}s "
f"(attempt {attempt + 1}/{MAX_RETRIES})"
)
if auth is None and exc.code in (403, 429):
logger.info("Hint: Authenticate to raise your GitHub rate limit")
time.sleep(delay)
break except (URLError, socket.error) as e:
return r, errors if attempt >= MAX_RETRIES - 1:
logger.error(f"Connection error failed after {MAX_RETRIES} attempts: {e}")
raise
delay = calculate_retry_delay(attempt, {})
logger.warning(
f"Connection error: {e}, retrying in {delay:.1f}s "
f"(attempt {attempt + 1}/{MAX_RETRIES})"
)
time.sleep(delay)
raise Exception(f"Request failed after {MAX_RETRIES} attempts") # pragma: no cover
def _construct_request(per_page, query_args, template, auth, as_app=None, fine=False): def _construct_request(per_page, query_args, template, auth, as_app=None, fine=False):
@@ -808,52 +821,6 @@ def _construct_request(per_page, query_args, template, auth, as_app=None, fine=F
return request return request
def _request_http_error(exc, auth, errors):
# HTTPError behaves like a Response so we can
# check the status code and headers to see exactly
# what failed.
should_continue = False
headers = exc.headers
limit_remaining = int(headers.get("x-ratelimit-remaining", 0))
if exc.code == 403 and limit_remaining < 1:
# The X-RateLimit-Reset header includes a
# timestamp telling us when the limit will reset
# so we can calculate how long to wait rather
# than inefficiently polling:
gm_now = calendar.timegm(time.gmtime())
reset = int(headers.get("x-ratelimit-reset", 0)) or gm_now
# We'll never sleep for less than 10 seconds:
delta = max(10, reset - gm_now)
limit = headers.get("x-ratelimit-limit")
logger.warning(
"Exceeded rate limit of {} requests; waiting {} seconds to reset".format(
limit, delta
)
) # noqa
if auth is None:
logger.info("Hint: Authenticate to raise your GitHub rate limit")
time.sleep(delta)
should_continue = True
return errors, should_continue
def _request_url_error(template, retry_timeout):
# In case of a connection timing out, we can retry a few time
# But we won't crash and not back-up the rest now
logger.info("'{}' timed out".format(template))
retry_timeout -= 1
if retry_timeout >= 0:
return True, retry_timeout
raise Exception("'{}' timed out to much, skipping!".format(template))
class S3HTTPRedirectHandler(HTTPRedirectHandler): class S3HTTPRedirectHandler(HTTPRedirectHandler):
""" """
A subclassed redirect handler for downloading Github assets from S3. A subclassed redirect handler for downloading Github assets from S3.
@@ -1503,7 +1470,7 @@ def download_attachments(
def get_authenticated_user(args): def get_authenticated_user(args):
template = "https://{0}/user".format(get_github_api_host(args)) template = "https://{0}/user".format(get_github_api_host(args))
data = retrieve_data(args, template, single_request=True) data = retrieve_data(args, template, paginated=False)
return data[0] return data[0]
@@ -1517,7 +1484,7 @@ def check_git_lfs_install():
def retrieve_repositories(args, authenticated_user): def retrieve_repositories(args, authenticated_user):
logger.info("Retrieving repositories") logger.info("Retrieving repositories")
single_request = False paginated = True
if args.user == authenticated_user["login"]: if args.user == authenticated_user["login"]:
# we must use the /user/repos API to be able to access private repos # we must use the /user/repos API to be able to access private repos
template = "https://{0}/user/repos".format(get_github_api_host(args)) template = "https://{0}/user/repos".format(get_github_api_host(args))
@@ -1540,16 +1507,16 @@ def retrieve_repositories(args, authenticated_user):
repo_path = args.repository repo_path = args.repository
else: else:
repo_path = "{0}/{1}".format(args.user, args.repository) repo_path = "{0}/{1}".format(args.user, args.repository)
single_request = True paginated = False
template = "https://{0}/repos/{1}".format(get_github_api_host(args), repo_path) template = "https://{0}/repos/{1}".format(get_github_api_host(args), repo_path)
repos = retrieve_data(args, template, single_request=single_request) repos = retrieve_data(args, template, paginated=paginated)
if args.all_starred: if args.all_starred:
starred_template = "https://{0}/users/{1}/starred".format( starred_template = "https://{0}/users/{1}/starred".format(
get_github_api_host(args), args.user get_github_api_host(args), args.user
) )
starred_repos = retrieve_data(args, starred_template, single_request=False) starred_repos = retrieve_data(args, starred_template)
# flag each repo as starred for downstream processing # flag each repo as starred for downstream processing
for item in starred_repos: for item in starred_repos:
item.update({"is_starred": True}) item.update({"is_starred": True})
@@ -1559,7 +1526,7 @@ def retrieve_repositories(args, authenticated_user):
gists_template = "https://{0}/users/{1}/gists".format( gists_template = "https://{0}/users/{1}/gists".format(
get_github_api_host(args), args.user get_github_api_host(args), args.user
) )
gists = retrieve_data(args, gists_template, single_request=False) gists = retrieve_data(args, gists_template)
# flag each repo as a gist for downstream processing # flag each repo as a gist for downstream processing
for item in gists: for item in gists:
item.update({"is_gist": True}) item.update({"is_gist": True})
@@ -1578,9 +1545,7 @@ def retrieve_repositories(args, authenticated_user):
starred_gists_template = "https://{0}/gists/starred".format( starred_gists_template = "https://{0}/gists/starred".format(
get_github_api_host(args) get_github_api_host(args)
) )
starred_gists = retrieve_data( starred_gists = retrieve_data(args, starred_gists_template)
args, starred_gists_template, single_request=False
)
# flag each repo as a starred gist for downstream processing # flag each repo as a starred gist for downstream processing
for item in starred_gists: for item in starred_gists:
item.update({"is_gist": True, "is_starred": True}) item.update({"is_gist": True, "is_starred": True})
@@ -1849,14 +1814,14 @@ def backup_pulls(args, repo_cwd, repository, repos_template):
pull_states = ["open", "closed"] pull_states = ["open", "closed"]
for pull_state in pull_states: for pull_state in pull_states:
query_args["state"] = pull_state query_args["state"] = pull_state
_pulls = retrieve_data_gen(args, _pulls_template, query_args=query_args) _pulls = retrieve_data(args, _pulls_template, query_args=query_args)
for pull in _pulls: for pull in _pulls:
if args.since and pull["updated_at"] < args.since: if args.since and pull["updated_at"] < args.since:
break break
if not args.since or pull["updated_at"] >= args.since: if not args.since or pull["updated_at"] >= args.since:
pulls[pull["number"]] = pull pulls[pull["number"]] = pull
else: else:
_pulls = retrieve_data_gen(args, _pulls_template, query_args=query_args) _pulls = retrieve_data(args, _pulls_template, query_args=query_args)
for pull in _pulls: for pull in _pulls:
if args.since and pull["updated_at"] < args.since: if args.since and pull["updated_at"] < args.since:
break break
@@ -1864,7 +1829,7 @@ def backup_pulls(args, repo_cwd, repository, repos_template):
pulls[pull["number"]] = retrieve_data( pulls[pull["number"]] = retrieve_data(
args, args,
_pulls_template + "/{}".format(pull["number"]), _pulls_template + "/{}".format(pull["number"]),
single_request=True, paginated=False,
)[0] )[0]
logger.info("Saving {0} pull requests to disk".format(len(list(pulls.keys())))) logger.info("Saving {0} pull requests to disk".format(len(list(pulls.keys()))))

View File

@@ -13,7 +13,6 @@ class TestHTTP451Exception:
def test_repository_unavailable_error_raised(self): def test_repository_unavailable_error_raised(self):
"""HTTP 451 should raise RepositoryUnavailableError with DMCA URL.""" """HTTP 451 should raise RepositoryUnavailableError with DMCA URL."""
# Create mock args
args = Mock() args = Mock()
args.as_app = False args.as_app = False
args.token_fine = None args.token_fine = None
@@ -25,7 +24,6 @@ class TestHTTP451Exception:
args.throttle_limit = None args.throttle_limit = None
args.throttle_pause = 0 args.throttle_pause = 0
# Mock HTTPError 451 response
mock_response = Mock() mock_response = Mock()
mock_response.getcode.return_value = 451 mock_response.getcode.return_value = 451
@@ -41,14 +39,10 @@ class TestHTTP451Exception:
mock_response.headers = {"x-ratelimit-remaining": "5000"} mock_response.headers = {"x-ratelimit-remaining": "5000"}
mock_response.reason = "Unavailable For Legal Reasons" mock_response.reason = "Unavailable For Legal Reasons"
def mock_get_response(request, auth, template): with patch("github_backup.github_backup.make_request_with_retry", return_value=mock_response):
return mock_response, []
with patch("github_backup.github_backup._get_response", side_effect=mock_get_response):
with pytest.raises(github_backup.RepositoryUnavailableError) as exc_info: with pytest.raises(github_backup.RepositoryUnavailableError) as exc_info:
list(github_backup.retrieve_data_gen(args, "https://api.github.com/repos/test/dmca/issues")) github_backup.retrieve_data(args, "https://api.github.com/repos/test/dmca/issues")
# Check exception has DMCA URL
assert exc_info.value.dmca_url == "https://github.com/github/dmca/blob/master/2024/11/2024-11-04-source-code.md" assert exc_info.value.dmca_url == "https://github.com/github/dmca/blob/master/2024/11/2024-11-04-source-code.md"
assert "451" in str(exc_info.value) assert "451" in str(exc_info.value)
@@ -71,14 +65,10 @@ class TestHTTP451Exception:
mock_response.headers = {"x-ratelimit-remaining": "5000"} mock_response.headers = {"x-ratelimit-remaining": "5000"}
mock_response.reason = "Unavailable For Legal Reasons" mock_response.reason = "Unavailable For Legal Reasons"
def mock_get_response(request, auth, template): with patch("github_backup.github_backup.make_request_with_retry", return_value=mock_response):
return mock_response, []
with patch("github_backup.github_backup._get_response", side_effect=mock_get_response):
with pytest.raises(github_backup.RepositoryUnavailableError) as exc_info: with pytest.raises(github_backup.RepositoryUnavailableError) as exc_info:
list(github_backup.retrieve_data_gen(args, "https://api.github.com/repos/test/dmca/issues")) github_backup.retrieve_data(args, "https://api.github.com/repos/test/dmca/issues")
# Exception raised even without DMCA URL
assert exc_info.value.dmca_url is None assert exc_info.value.dmca_url is None
assert "451" in str(exc_info.value) assert "451" in str(exc_info.value)
@@ -101,42 +91,9 @@ class TestHTTP451Exception:
mock_response.headers = {"x-ratelimit-remaining": "5000"} mock_response.headers = {"x-ratelimit-remaining": "5000"}
mock_response.reason = "Unavailable For Legal Reasons" mock_response.reason = "Unavailable For Legal Reasons"
def mock_get_response(request, auth, template): with patch("github_backup.github_backup.make_request_with_retry", return_value=mock_response):
return mock_response, []
with patch("github_backup.github_backup._get_response", side_effect=mock_get_response):
with pytest.raises(github_backup.RepositoryUnavailableError): with pytest.raises(github_backup.RepositoryUnavailableError):
list(github_backup.retrieve_data_gen(args, "https://api.github.com/repos/test/dmca/issues")) github_backup.retrieve_data(args, "https://api.github.com/repos/test/dmca/issues")
def test_other_http_errors_unchanged(self):
"""Other HTTP errors should still raise generic Exception."""
args = Mock()
args.as_app = False
args.token_fine = None
args.token_classic = None
args.username = None
args.password = None
args.osx_keychain_item_name = None
args.osx_keychain_item_account = None
args.throttle_limit = None
args.throttle_pause = 0
mock_response = Mock()
mock_response.getcode.return_value = 404
mock_response.read.return_value = b'{"message": "Not Found"}'
mock_response.headers = {"x-ratelimit-remaining": "5000"}
mock_response.reason = "Not Found"
def mock_get_response(request, auth, template):
return mock_response, []
with patch("github_backup.github_backup._get_response", side_effect=mock_get_response):
# Should raise generic Exception, not RepositoryUnavailableError
with pytest.raises(Exception) as exc_info:
list(github_backup.retrieve_data_gen(args, "https://api.github.com/repos/test/notfound/issues"))
assert not isinstance(exc_info.value, github_backup.RepositoryUnavailableError)
assert "404" in str(exc_info.value)
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -40,7 +40,7 @@ class MockHTTPResponse:
@pytest.fixture @pytest.fixture
def mock_args(): def mock_args():
"""Mock args for retrieve_data_gen.""" """Mock args for retrieve_data."""
args = Mock() args = Mock()
args.as_app = False args.as_app = False
args.token_fine = None args.token_fine = None
@@ -77,10 +77,8 @@ def test_cursor_based_pagination(mock_args):
return responses[len(requests_made) - 1] return responses[len(requests_made) - 1]
with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen): with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen):
results = list( results = github_backup.retrieve_data(
github_backup.retrieve_data_gen( mock_args, "https://api.github.com/repos/owner/repo/issues"
mock_args, "https://api.github.com/repos/owner/repo/issues"
)
) )
# Verify all items retrieved and cursor was used in second request # Verify all items retrieved and cursor was used in second request
@@ -112,10 +110,8 @@ def test_page_based_pagination(mock_args):
return responses[len(requests_made) - 1] return responses[len(requests_made) - 1]
with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen): with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen):
results = list( results = github_backup.retrieve_data(
github_backup.retrieve_data_gen( mock_args, "https://api.github.com/repos/owner/repo/pulls"
mock_args, "https://api.github.com/repos/owner/repo/pulls"
)
) )
# Verify all items retrieved and page parameter was used (not cursor) # Verify all items retrieved and page parameter was used (not cursor)
@@ -142,10 +138,8 @@ def test_no_link_header_stops_pagination(mock_args):
return responses[len(requests_made) - 1] return responses[len(requests_made) - 1]
with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen): with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen):
results = list( results = github_backup.retrieve_data(
github_backup.retrieve_data_gen( mock_args, "https://api.github.com/repos/owner/repo/labels"
mock_args, "https://api.github.com/repos/owner/repo/labels"
)
) )
# Verify pagination stopped after first request # Verify pagination stopped after first request

365
tests/test_retrieve_data.py Normal file
View File

@@ -0,0 +1,365 @@
"""Tests for retrieve_data function."""
import json
import socket
from unittest.mock import Mock, patch
from urllib.error import HTTPError, URLError
import pytest
from github_backup import github_backup
from github_backup.github_backup import (
MAX_RETRIES,
calculate_retry_delay,
make_request_with_retry,
)
class TestCalculateRetryDelay:
def test_respects_retry_after_header(self):
headers = {'retry-after': '30'}
assert calculate_retry_delay(0, headers) == 30
def test_respects_rate_limit_reset(self):
import time
import calendar
# Set reset time 60 seconds in the future
future_reset = calendar.timegm(time.gmtime()) + 60
headers = {
'x-ratelimit-remaining': '0',
'x-ratelimit-reset': str(future_reset)
}
delay = calculate_retry_delay(0, headers)
# Should be approximately 60 seconds (with some tolerance for execution time)
assert 55 <= delay <= 65
def test_exponential_backoff(self):
delay_0 = calculate_retry_delay(0, {})
delay_1 = calculate_retry_delay(1, {})
delay_2 = calculate_retry_delay(2, {})
# Base delay is 1s, so delays should be roughly 1, 2, 4 (plus jitter)
assert 0.9 <= delay_0 <= 1.2 # ~1s + up to 10% jitter
assert 1.8 <= delay_1 <= 2.4 # ~2s + up to 10% jitter
assert 3.6 <= delay_2 <= 4.8 # ~4s + up to 10% jitter
def test_max_delay_cap(self):
# Very high attempt number should not exceed 120s + jitter
delay = calculate_retry_delay(100, {})
assert delay <= 120 * 1.1 # 120s max + 10% jitter
def test_minimum_rate_limit_delay(self):
import time
import calendar
# Set reset time in the past (already reset)
past_reset = calendar.timegm(time.gmtime()) - 100
headers = {
'x-ratelimit-remaining': '0',
'x-ratelimit-reset': str(past_reset)
}
delay = calculate_retry_delay(0, headers)
# Should be minimum 10 seconds even if reset time is in past
assert delay >= 10
class TestRetrieveDataRetry:
"""Tests for retry behavior in retrieve_data."""
@pytest.fixture
def mock_args(self):
args = Mock()
args.as_app = False
args.token_fine = None
args.token_classic = "fake_token"
args.username = None
args.password = None
args.osx_keychain_item_name = None
args.osx_keychain_item_account = None
args.throttle_limit = None
args.throttle_pause = 0
return args
def test_json_parse_error_retries_and_fails(self, mock_args):
"""HTTP 200 with invalid JSON should retry and eventually fail."""
mock_response = Mock()
mock_response.getcode.return_value = 200
mock_response.read.return_value = b"not valid json {"
mock_response.headers = {"x-ratelimit-remaining": "5000"}
call_count = 0
def mock_make_request(*args, **kwargs):
nonlocal call_count
call_count += 1
return mock_response
with patch("github_backup.github_backup.make_request_with_retry", side_effect=mock_make_request):
with patch("github_backup.github_backup.calculate_retry_delay", return_value=0): # No delay in tests
with pytest.raises(Exception) as exc_info:
github_backup.retrieve_data(mock_args, "https://api.github.com/repos/test/repo/issues")
assert "Failed to read response after" in str(exc_info.value)
assert call_count == MAX_RETRIES
def test_json_parse_error_recovers_on_retry(self, mock_args):
"""HTTP 200 with invalid JSON should succeed if retry returns valid JSON."""
bad_response = Mock()
bad_response.getcode.return_value = 200
bad_response.read.return_value = b"not valid json {"
bad_response.headers = {"x-ratelimit-remaining": "5000"}
good_response = Mock()
good_response.getcode.return_value = 200
good_response.read.return_value = json.dumps([{"id": 1}]).encode("utf-8")
good_response.headers = {"x-ratelimit-remaining": "5000", "Link": ""}
responses = [bad_response, bad_response, good_response]
call_count = 0
def mock_make_request(*args, **kwargs):
nonlocal call_count
result = responses[call_count]
call_count += 1
return result
with patch("github_backup.github_backup.make_request_with_retry", side_effect=mock_make_request):
with patch("github_backup.github_backup.calculate_retry_delay", return_value=0):
result = github_backup.retrieve_data(mock_args, "https://api.github.com/repos/test/repo/issues")
assert result == [{"id": 1}]
assert call_count == 3 # Failed twice, succeeded on third
def test_http_error_raises_exception(self, mock_args):
"""Non-success HTTP status codes should raise Exception."""
mock_response = Mock()
mock_response.getcode.return_value = 404
mock_response.read.return_value = b'{"message": "Not Found"}'
mock_response.headers = {"x-ratelimit-remaining": "5000"}
mock_response.reason = "Not Found"
with patch("github_backup.github_backup.make_request_with_retry", return_value=mock_response):
with pytest.raises(Exception) as exc_info:
github_backup.retrieve_data(mock_args, "https://api.github.com/repos/test/notfound/issues")
assert not isinstance(exc_info.value, github_backup.RepositoryUnavailableError)
assert "404" in str(exc_info.value)
class TestMakeRequestWithRetry:
"""Tests for HTTP error retry behavior in make_request_with_retry."""
def test_502_error_retries_and_succeeds(self):
"""HTTP 502 should retry and succeed if subsequent request works."""
good_response = Mock()
good_response.read.return_value = b'{"ok": true}'
call_count = 0
fail_count = MAX_RETRIES - 1 # Fail all but last attempt
def mock_urlopen(*args, **kwargs):
nonlocal call_count
call_count += 1
if call_count <= fail_count:
raise HTTPError(
url="https://api.github.com/test",
code=502,
msg="Bad Gateway",
hdrs={"x-ratelimit-remaining": "5000"},
fp=None,
)
return good_response
with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen):
with patch("github_backup.github_backup.calculate_retry_delay", return_value=0):
result = make_request_with_retry(Mock(), None)
assert result == good_response
assert call_count == MAX_RETRIES
def test_503_error_retries_until_exhausted(self):
"""HTTP 503 should retry MAX_RETRIES times then raise."""
call_count = 0
def mock_urlopen(*args, **kwargs):
nonlocal call_count
call_count += 1
raise HTTPError(
url="https://api.github.com/test",
code=503,
msg="Service Unavailable",
hdrs={"x-ratelimit-remaining": "5000"},
fp=None,
)
with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen):
with patch("github_backup.github_backup.calculate_retry_delay", return_value=0):
with pytest.raises(HTTPError) as exc_info:
make_request_with_retry(Mock(), None)
assert exc_info.value.code == 503
assert call_count == MAX_RETRIES
def test_404_error_not_retried(self):
"""HTTP 404 should not be retried - raise immediately."""
call_count = 0
def mock_urlopen(*args, **kwargs):
nonlocal call_count
call_count += 1
raise HTTPError(
url="https://api.github.com/test",
code=404,
msg="Not Found",
hdrs={"x-ratelimit-remaining": "5000"},
fp=None,
)
with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen):
with pytest.raises(HTTPError) as exc_info:
make_request_with_retry(Mock(), None)
assert exc_info.value.code == 404
assert call_count == 1 # No retries
def test_rate_limit_403_retried_when_remaining_zero(self):
"""HTTP 403 with x-ratelimit-remaining=0 should retry."""
good_response = Mock()
call_count = 0
def mock_urlopen(*args, **kwargs):
nonlocal call_count
call_count += 1
if call_count == 1:
raise HTTPError(
url="https://api.github.com/test",
code=403,
msg="Forbidden",
hdrs={"x-ratelimit-remaining": "0"},
fp=None,
)
return good_response
with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen):
with patch("github_backup.github_backup.calculate_retry_delay", return_value=0):
result = make_request_with_retry(Mock(), None)
assert result == good_response
assert call_count == 2
def test_403_not_retried_when_remaining_nonzero(self):
"""HTTP 403 with x-ratelimit-remaining>0 should not retry (permission error)."""
call_count = 0
def mock_urlopen(*args, **kwargs):
nonlocal call_count
call_count += 1
raise HTTPError(
url="https://api.github.com/test",
code=403,
msg="Forbidden",
hdrs={"x-ratelimit-remaining": "5000"},
fp=None,
)
with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen):
with pytest.raises(HTTPError) as exc_info:
make_request_with_retry(Mock(), None)
assert exc_info.value.code == 403
assert call_count == 1 # No retries
def test_connection_error_retries_and_succeeds(self):
"""URLError (connection error) should retry and succeed if subsequent request works."""
good_response = Mock()
call_count = 0
fail_count = MAX_RETRIES - 1 # Fail all but last attempt
def mock_urlopen(*args, **kwargs):
nonlocal call_count
call_count += 1
if call_count <= fail_count:
raise URLError("Connection refused")
return good_response
with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen):
with patch("github_backup.github_backup.calculate_retry_delay", return_value=0):
result = make_request_with_retry(Mock(), None)
assert result == good_response
assert call_count == MAX_RETRIES
def test_socket_error_retries_until_exhausted(self):
"""socket.error should retry MAX_RETRIES times then raise."""
call_count = 0
def mock_urlopen(*args, **kwargs):
nonlocal call_count
call_count += 1
raise socket.error("Connection reset by peer")
with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen):
with patch("github_backup.github_backup.calculate_retry_delay", return_value=0):
with pytest.raises(socket.error):
make_request_with_retry(Mock(), None)
assert call_count == MAX_RETRIES
class TestRetrieveDataThrottling:
"""Tests for throttling behavior in retrieve_data."""
@pytest.fixture
def mock_args(self):
args = Mock()
args.as_app = False
args.token_fine = None
args.token_classic = "fake_token"
args.username = None
args.password = None
args.osx_keychain_item_name = None
args.osx_keychain_item_account = None
args.throttle_limit = 10 # Throttle when remaining <= 10
args.throttle_pause = 5 # Pause 5 seconds
return args
def test_throttling_pauses_when_rate_limit_low(self, mock_args):
"""Should pause when x-ratelimit-remaining is at or below throttle_limit."""
mock_response = Mock()
mock_response.getcode.return_value = 200
mock_response.read.return_value = json.dumps([{"id": 1}]).encode("utf-8")
mock_response.headers = {"x-ratelimit-remaining": "5", "Link": ""} # Below throttle_limit
with patch("github_backup.github_backup.make_request_with_retry", return_value=mock_response):
with patch("github_backup.github_backup.time.sleep") as mock_sleep:
github_backup.retrieve_data(mock_args, "https://api.github.com/repos/test/repo/issues")
mock_sleep.assert_called_once_with(5) # throttle_pause value
class TestRetrieveDataSingleItem:
"""Tests for single item (dict) responses in retrieve_data."""
@pytest.fixture
def mock_args(self):
args = Mock()
args.as_app = False
args.token_fine = None
args.token_classic = "fake_token"
args.username = None
args.password = None
args.osx_keychain_item_name = None
args.osx_keychain_item_account = None
args.throttle_limit = None
args.throttle_pause = 0
return args
def test_dict_response_returned_as_list(self, mock_args):
"""Single dict response should be returned as a list with one item."""
mock_response = Mock()
mock_response.getcode.return_value = 200
mock_response.read.return_value = json.dumps({"login": "testuser", "id": 123}).encode("utf-8")
mock_response.headers = {"x-ratelimit-remaining": "5000", "Link": ""}
with patch("github_backup.github_backup.make_request_with_retry", return_value=mock_response):
result = github_backup.retrieve_data(mock_args, "https://api.github.com/user")
assert result == [{"login": "testuser", "id": 123}]