mirror of
https://github.com/josegonzalez/python-github-backup.git
synced 2025-12-18 05:31:10 +01:00
Merge pull request #471 from Iamrodos/fix/retry-logic
Fix retry logic for HTTP 5xx errors and network failures
This commit is contained in:
@@ -12,6 +12,7 @@ import json
|
|||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import platform
|
import platform
|
||||||
|
import random
|
||||||
import re
|
import re
|
||||||
import select
|
import select
|
||||||
import socket
|
import socket
|
||||||
@@ -19,6 +20,7 @@ import ssl
|
|||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
|
from collections.abc import Generator
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from http.client import IncompleteRead
|
from http.client import IncompleteRead
|
||||||
from urllib.error import HTTPError, URLError
|
from urllib.error import HTTPError, URLError
|
||||||
@@ -74,6 +76,9 @@ else:
|
|||||||
" 3. Debian/Ubuntu: apt-get install ca-certificates\n\n"
|
" 3. Debian/Ubuntu: apt-get install ca-certificates\n\n"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Retry configuration
|
||||||
|
MAX_RETRIES = 5
|
||||||
|
|
||||||
|
|
||||||
def logging_subprocess(
|
def logging_subprocess(
|
||||||
popenargs, stdout_log_level=logging.DEBUG, stderr_log_level=logging.ERROR, **kwargs
|
popenargs, stdout_log_level=logging.DEBUG, stderr_log_level=logging.ERROR, **kwargs
|
||||||
@@ -603,170 +608,178 @@ def get_github_repo_url(args, repository):
|
|||||||
return repo_url
|
return repo_url
|
||||||
|
|
||||||
|
|
||||||
def retrieve_data_gen(args, template, query_args=None, single_request=False):
|
def calculate_retry_delay(attempt, headers):
|
||||||
|
"""Calculate delay before next retry with exponential backoff."""
|
||||||
|
# Respect retry-after header if present
|
||||||
|
if retry_after := headers.get("retry-after"):
|
||||||
|
return int(retry_after)
|
||||||
|
|
||||||
|
# Respect rate limit reset time
|
||||||
|
if int(headers.get("x-ratelimit-remaining", 1)) < 1:
|
||||||
|
reset_time = int(headers.get("x-ratelimit-reset", 0))
|
||||||
|
return max(10, reset_time - calendar.timegm(time.gmtime()))
|
||||||
|
|
||||||
|
# Exponential backoff with jitter for server errors (1s base, 120s max)
|
||||||
|
delay = min(1.0 * (2**attempt), 120.0)
|
||||||
|
return delay + random.uniform(0, delay * 0.1)
|
||||||
|
|
||||||
|
|
||||||
|
def retrieve_data(args, template, query_args=None, paginated=True):
|
||||||
|
"""
|
||||||
|
Fetch the data from GitHub API.
|
||||||
|
|
||||||
|
Handle both single requests and pagination with yield of individual dicts.
|
||||||
|
Handles throttling, retries, read errors, and DMCA takedowns.
|
||||||
|
"""
|
||||||
|
query_args = query_args or {}
|
||||||
auth = get_auth(args, encode=not args.as_app)
|
auth = get_auth(args, encode=not args.as_app)
|
||||||
query_args = get_query_args(query_args)
|
|
||||||
per_page = 100
|
per_page = 100
|
||||||
next_url = None
|
|
||||||
|
|
||||||
while True:
|
def _extract_next_page_url(link_header):
|
||||||
if single_request:
|
for link in link_header.split(","):
|
||||||
request_per_page = None
|
if 'rel="next"' in link:
|
||||||
else:
|
return link[link.find("<") + 1:link.find(">")]
|
||||||
request_per_page = per_page
|
return None
|
||||||
|
|
||||||
request = _construct_request(
|
def fetch_all() -> Generator[dict, None, None]:
|
||||||
request_per_page,
|
next_url = None
|
||||||
query_args,
|
|
||||||
next_url or template,
|
|
||||||
auth,
|
|
||||||
as_app=args.as_app,
|
|
||||||
fine=True if args.token_fine is not None else False,
|
|
||||||
) # noqa
|
|
||||||
r, errors = _get_response(request, auth, next_url or template)
|
|
||||||
|
|
||||||
status_code = int(r.getcode())
|
while True:
|
||||||
|
# FIRST: Fetch response
|
||||||
|
|
||||||
# Handle DMCA takedown (HTTP 451) - raise exception to skip entire repository
|
for attempt in range(MAX_RETRIES):
|
||||||
if status_code == 451:
|
request = _construct_request(
|
||||||
dmca_url = None
|
per_page=per_page if paginated else None,
|
||||||
try:
|
query_args=query_args,
|
||||||
response_data = json.loads(r.read().decode("utf-8"))
|
template=next_url or template,
|
||||||
dmca_url = response_data.get("block", {}).get("html_url")
|
auth=auth,
|
||||||
except Exception:
|
as_app=args.as_app,
|
||||||
pass
|
fine=args.token_fine is not None,
|
||||||
raise RepositoryUnavailableError(
|
|
||||||
"Repository unavailable due to legal reasons (HTTP 451)",
|
|
||||||
dmca_url=dmca_url,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Check if we got correct data
|
|
||||||
try:
|
|
||||||
response = json.loads(r.read().decode("utf-8"))
|
|
||||||
except IncompleteRead:
|
|
||||||
logger.warning("Incomplete read error detected")
|
|
||||||
read_error = True
|
|
||||||
except json.decoder.JSONDecodeError:
|
|
||||||
logger.warning("JSON decode error detected")
|
|
||||||
read_error = True
|
|
||||||
except TimeoutError:
|
|
||||||
logger.warning("Tiemout error detected")
|
|
||||||
read_error = True
|
|
||||||
else:
|
|
||||||
read_error = False
|
|
||||||
|
|
||||||
# be gentle with API request limit and throttle requests if remaining requests getting low
|
|
||||||
limit_remaining = int(r.headers.get("x-ratelimit-remaining", 0))
|
|
||||||
if args.throttle_limit and limit_remaining <= args.throttle_limit:
|
|
||||||
logger.info(
|
|
||||||
"API request limit hit: {} requests left, pausing further requests for {}s".format(
|
|
||||||
limit_remaining, args.throttle_pause
|
|
||||||
)
|
)
|
||||||
)
|
http_response = make_request_with_retry(request, auth)
|
||||||
time.sleep(args.throttle_pause)
|
|
||||||
|
|
||||||
retries = 0
|
match http_response.getcode():
|
||||||
while retries < 3 and (status_code == 502 or read_error):
|
case 200:
|
||||||
logger.warning("API request failed. Retrying in 5 seconds")
|
# Success - Parse JSON response
|
||||||
retries += 1
|
try:
|
||||||
time.sleep(5)
|
response = json.loads(http_response.read().decode("utf-8"))
|
||||||
request = _construct_request(
|
break # Exit retry loop and handle the data returned
|
||||||
request_per_page,
|
except (
|
||||||
query_args,
|
IncompleteRead,
|
||||||
next_url or template,
|
json.decoder.JSONDecodeError,
|
||||||
auth,
|
TimeoutError,
|
||||||
as_app=args.as_app,
|
) as e:
|
||||||
fine=True if args.token_fine is not None else False,
|
logger.warning(f"{type(e).__name__} reading response")
|
||||||
) # noqa
|
if attempt < MAX_RETRIES - 1:
|
||||||
r, errors = _get_response(request, auth, next_url or template)
|
delay = calculate_retry_delay(attempt, {})
|
||||||
|
logger.warning(
|
||||||
|
f"Retrying in {delay:.1f}s (attempt {attempt + 1}/{MAX_RETRIES})"
|
||||||
|
)
|
||||||
|
time.sleep(delay)
|
||||||
|
continue # Next retry attempt
|
||||||
|
|
||||||
status_code = int(r.getcode())
|
case 451:
|
||||||
try:
|
# DMCA takedown - extract URL if available, then raise
|
||||||
response = json.loads(r.read().decode("utf-8"))
|
dmca_url = None
|
||||||
read_error = False
|
try:
|
||||||
except IncompleteRead:
|
response_data = json.loads(
|
||||||
logger.warning("Incomplete read error detected")
|
http_response.read().decode("utf-8")
|
||||||
read_error = True
|
)
|
||||||
except json.decoder.JSONDecodeError:
|
dmca_url = response_data.get("block", {}).get("html_url")
|
||||||
logger.warning("JSON decode error detected")
|
except Exception:
|
||||||
read_error = True
|
pass
|
||||||
except TimeoutError:
|
raise RepositoryUnavailableError(
|
||||||
logger.warning("Tiemout error detected")
|
"Repository unavailable due to legal reasons (HTTP 451)",
|
||||||
read_error = True
|
dmca_url=dmca_url,
|
||||||
|
)
|
||||||
|
|
||||||
if status_code != 200:
|
case _:
|
||||||
template = "API request returned HTTP {0}: {1}"
|
raise Exception(
|
||||||
errors.append(template.format(status_code, r.reason))
|
f"API request returned HTTP {http_response.getcode()}: {http_response.reason}"
|
||||||
raise Exception(", ".join(errors))
|
)
|
||||||
|
else:
|
||||||
|
logger.error(
|
||||||
|
f"Failed to read response after {MAX_RETRIES} attempts for {next_url or template}"
|
||||||
|
)
|
||||||
|
raise Exception(
|
||||||
|
f"Failed to read response after {MAX_RETRIES} attempts for {next_url or template}"
|
||||||
|
)
|
||||||
|
|
||||||
if read_error:
|
# SECOND: Process and paginate
|
||||||
template = "API request problem reading response for {0}"
|
|
||||||
errors.append(template.format(request))
|
|
||||||
raise Exception(", ".join(errors))
|
|
||||||
|
|
||||||
if len(errors) == 0:
|
# Pause before next request if rate limit is low
|
||||||
if type(response) is list:
|
if (
|
||||||
for resp in response:
|
remaining := int(http_response.headers.get("x-ratelimit-remaining", 0))
|
||||||
yield resp
|
) <= (args.throttle_limit or 0):
|
||||||
# Parse Link header for next page URL (cursor-based pagination)
|
if args.throttle_limit:
|
||||||
link_header = r.headers.get("Link", "")
|
logger.info(
|
||||||
next_url = None
|
f"Throttling: {remaining} requests left, pausing {args.throttle_pause}s"
|
||||||
if link_header:
|
)
|
||||||
# Parse Link header: <https://api.github.com/...?per_page=100&after=cursor>; rel="next"
|
time.sleep(args.throttle_pause)
|
||||||
for link in link_header.split(","):
|
|
||||||
if 'rel="next"' in link:
|
# Yield results
|
||||||
next_url = link[link.find("<") + 1 : link.find(">")]
|
if isinstance(response, list):
|
||||||
break
|
yield from response
|
||||||
if not next_url:
|
elif isinstance(response, dict):
|
||||||
break
|
|
||||||
elif type(response) is dict and single_request:
|
|
||||||
yield response
|
yield response
|
||||||
|
|
||||||
if len(errors) > 0:
|
# Check for more pages
|
||||||
raise Exception(", ".join(errors))
|
if not paginated or not (
|
||||||
|
next_url := _extract_next_page_url(
|
||||||
|
http_response.headers.get("Link", "")
|
||||||
|
)
|
||||||
|
):
|
||||||
|
break # No more data
|
||||||
|
|
||||||
if single_request:
|
return list(fetch_all())
|
||||||
break
|
|
||||||
|
|
||||||
|
|
||||||
def retrieve_data(args, template, query_args=None, single_request=False):
|
def make_request_with_retry(request, auth):
|
||||||
return list(retrieve_data_gen(args, template, query_args, single_request))
|
"""Make HTTP request with automatic retry for transient errors."""
|
||||||
|
|
||||||
|
def is_retryable_status(status_code, headers):
|
||||||
|
# Server errors are always retryable
|
||||||
|
if status_code in (500, 502, 503, 504):
|
||||||
|
return True
|
||||||
|
# Rate limit (403/429) is retryable if limit exhausted
|
||||||
|
if status_code in (403, 429):
|
||||||
|
return int(headers.get("x-ratelimit-remaining", 1)) < 1
|
||||||
|
return False
|
||||||
|
|
||||||
def get_query_args(query_args=None):
|
for attempt in range(MAX_RETRIES):
|
||||||
if not query_args:
|
|
||||||
query_args = {}
|
|
||||||
return query_args
|
|
||||||
|
|
||||||
|
|
||||||
def _get_response(request, auth, template):
|
|
||||||
retry_timeout = 3
|
|
||||||
errors = []
|
|
||||||
# We'll make requests in a loop so we can
|
|
||||||
# delay and retry in the case of rate-limiting
|
|
||||||
while True:
|
|
||||||
should_continue = False
|
|
||||||
try:
|
try:
|
||||||
r = urlopen(request, context=https_ctx)
|
return urlopen(request, context=https_ctx)
|
||||||
|
|
||||||
except HTTPError as exc:
|
except HTTPError as exc:
|
||||||
errors, should_continue = _request_http_error(exc, auth, errors) # noqa
|
# HTTPError can be used as a response-like object
|
||||||
r = exc
|
if not is_retryable_status(exc.code, exc.headers):
|
||||||
except URLError as e:
|
raise # Non-retryable error
|
||||||
logger.warning(e.reason)
|
|
||||||
should_continue, retry_timeout = _request_url_error(template, retry_timeout)
|
if attempt >= MAX_RETRIES - 1:
|
||||||
if not should_continue:
|
logger.error(f"HTTP {exc.code} failed after {MAX_RETRIES} attempts")
|
||||||
raise
|
|
||||||
except socket.error as e:
|
|
||||||
logger.warning(e.strerror)
|
|
||||||
should_continue, retry_timeout = _request_url_error(template, retry_timeout)
|
|
||||||
if not should_continue:
|
|
||||||
raise
|
raise
|
||||||
|
|
||||||
if should_continue:
|
delay = calculate_retry_delay(attempt, exc.headers)
|
||||||
continue
|
logger.warning(
|
||||||
|
f"HTTP {exc.code}, retrying in {delay:.1f}s "
|
||||||
|
f"(attempt {attempt + 1}/{MAX_RETRIES})"
|
||||||
|
)
|
||||||
|
if auth is None and exc.code in (403, 429):
|
||||||
|
logger.info("Hint: Authenticate to raise your GitHub rate limit")
|
||||||
|
time.sleep(delay)
|
||||||
|
|
||||||
break
|
except (URLError, socket.error) as e:
|
||||||
return r, errors
|
if attempt >= MAX_RETRIES - 1:
|
||||||
|
logger.error(f"Connection error failed after {MAX_RETRIES} attempts: {e}")
|
||||||
|
raise
|
||||||
|
delay = calculate_retry_delay(attempt, {})
|
||||||
|
logger.warning(
|
||||||
|
f"Connection error: {e}, retrying in {delay:.1f}s "
|
||||||
|
f"(attempt {attempt + 1}/{MAX_RETRIES})"
|
||||||
|
)
|
||||||
|
time.sleep(delay)
|
||||||
|
|
||||||
|
raise Exception(f"Request failed after {MAX_RETRIES} attempts") # pragma: no cover
|
||||||
|
|
||||||
|
|
||||||
def _construct_request(per_page, query_args, template, auth, as_app=None, fine=False):
|
def _construct_request(per_page, query_args, template, auth, as_app=None, fine=False):
|
||||||
@@ -808,52 +821,6 @@ def _construct_request(per_page, query_args, template, auth, as_app=None, fine=F
|
|||||||
return request
|
return request
|
||||||
|
|
||||||
|
|
||||||
def _request_http_error(exc, auth, errors):
|
|
||||||
# HTTPError behaves like a Response so we can
|
|
||||||
# check the status code and headers to see exactly
|
|
||||||
# what failed.
|
|
||||||
|
|
||||||
should_continue = False
|
|
||||||
headers = exc.headers
|
|
||||||
limit_remaining = int(headers.get("x-ratelimit-remaining", 0))
|
|
||||||
|
|
||||||
if exc.code == 403 and limit_remaining < 1:
|
|
||||||
# The X-RateLimit-Reset header includes a
|
|
||||||
# timestamp telling us when the limit will reset
|
|
||||||
# so we can calculate how long to wait rather
|
|
||||||
# than inefficiently polling:
|
|
||||||
gm_now = calendar.timegm(time.gmtime())
|
|
||||||
reset = int(headers.get("x-ratelimit-reset", 0)) or gm_now
|
|
||||||
# We'll never sleep for less than 10 seconds:
|
|
||||||
delta = max(10, reset - gm_now)
|
|
||||||
|
|
||||||
limit = headers.get("x-ratelimit-limit")
|
|
||||||
logger.warning(
|
|
||||||
"Exceeded rate limit of {} requests; waiting {} seconds to reset".format(
|
|
||||||
limit, delta
|
|
||||||
)
|
|
||||||
) # noqa
|
|
||||||
|
|
||||||
if auth is None:
|
|
||||||
logger.info("Hint: Authenticate to raise your GitHub rate limit")
|
|
||||||
|
|
||||||
time.sleep(delta)
|
|
||||||
should_continue = True
|
|
||||||
return errors, should_continue
|
|
||||||
|
|
||||||
|
|
||||||
def _request_url_error(template, retry_timeout):
|
|
||||||
# In case of a connection timing out, we can retry a few time
|
|
||||||
# But we won't crash and not back-up the rest now
|
|
||||||
logger.info("'{}' timed out".format(template))
|
|
||||||
retry_timeout -= 1
|
|
||||||
|
|
||||||
if retry_timeout >= 0:
|
|
||||||
return True, retry_timeout
|
|
||||||
|
|
||||||
raise Exception("'{}' timed out to much, skipping!".format(template))
|
|
||||||
|
|
||||||
|
|
||||||
class S3HTTPRedirectHandler(HTTPRedirectHandler):
|
class S3HTTPRedirectHandler(HTTPRedirectHandler):
|
||||||
"""
|
"""
|
||||||
A subclassed redirect handler for downloading Github assets from S3.
|
A subclassed redirect handler for downloading Github assets from S3.
|
||||||
@@ -1503,7 +1470,7 @@ def download_attachments(
|
|||||||
|
|
||||||
def get_authenticated_user(args):
|
def get_authenticated_user(args):
|
||||||
template = "https://{0}/user".format(get_github_api_host(args))
|
template = "https://{0}/user".format(get_github_api_host(args))
|
||||||
data = retrieve_data(args, template, single_request=True)
|
data = retrieve_data(args, template, paginated=False)
|
||||||
return data[0]
|
return data[0]
|
||||||
|
|
||||||
|
|
||||||
@@ -1517,7 +1484,7 @@ def check_git_lfs_install():
|
|||||||
|
|
||||||
def retrieve_repositories(args, authenticated_user):
|
def retrieve_repositories(args, authenticated_user):
|
||||||
logger.info("Retrieving repositories")
|
logger.info("Retrieving repositories")
|
||||||
single_request = False
|
paginated = True
|
||||||
if args.user == authenticated_user["login"]:
|
if args.user == authenticated_user["login"]:
|
||||||
# we must use the /user/repos API to be able to access private repos
|
# we must use the /user/repos API to be able to access private repos
|
||||||
template = "https://{0}/user/repos".format(get_github_api_host(args))
|
template = "https://{0}/user/repos".format(get_github_api_host(args))
|
||||||
@@ -1540,16 +1507,16 @@ def retrieve_repositories(args, authenticated_user):
|
|||||||
repo_path = args.repository
|
repo_path = args.repository
|
||||||
else:
|
else:
|
||||||
repo_path = "{0}/{1}".format(args.user, args.repository)
|
repo_path = "{0}/{1}".format(args.user, args.repository)
|
||||||
single_request = True
|
paginated = False
|
||||||
template = "https://{0}/repos/{1}".format(get_github_api_host(args), repo_path)
|
template = "https://{0}/repos/{1}".format(get_github_api_host(args), repo_path)
|
||||||
|
|
||||||
repos = retrieve_data(args, template, single_request=single_request)
|
repos = retrieve_data(args, template, paginated=paginated)
|
||||||
|
|
||||||
if args.all_starred:
|
if args.all_starred:
|
||||||
starred_template = "https://{0}/users/{1}/starred".format(
|
starred_template = "https://{0}/users/{1}/starred".format(
|
||||||
get_github_api_host(args), args.user
|
get_github_api_host(args), args.user
|
||||||
)
|
)
|
||||||
starred_repos = retrieve_data(args, starred_template, single_request=False)
|
starred_repos = retrieve_data(args, starred_template)
|
||||||
# flag each repo as starred for downstream processing
|
# flag each repo as starred for downstream processing
|
||||||
for item in starred_repos:
|
for item in starred_repos:
|
||||||
item.update({"is_starred": True})
|
item.update({"is_starred": True})
|
||||||
@@ -1559,7 +1526,7 @@ def retrieve_repositories(args, authenticated_user):
|
|||||||
gists_template = "https://{0}/users/{1}/gists".format(
|
gists_template = "https://{0}/users/{1}/gists".format(
|
||||||
get_github_api_host(args), args.user
|
get_github_api_host(args), args.user
|
||||||
)
|
)
|
||||||
gists = retrieve_data(args, gists_template, single_request=False)
|
gists = retrieve_data(args, gists_template)
|
||||||
# flag each repo as a gist for downstream processing
|
# flag each repo as a gist for downstream processing
|
||||||
for item in gists:
|
for item in gists:
|
||||||
item.update({"is_gist": True})
|
item.update({"is_gist": True})
|
||||||
@@ -1578,9 +1545,7 @@ def retrieve_repositories(args, authenticated_user):
|
|||||||
starred_gists_template = "https://{0}/gists/starred".format(
|
starred_gists_template = "https://{0}/gists/starred".format(
|
||||||
get_github_api_host(args)
|
get_github_api_host(args)
|
||||||
)
|
)
|
||||||
starred_gists = retrieve_data(
|
starred_gists = retrieve_data(args, starred_gists_template)
|
||||||
args, starred_gists_template, single_request=False
|
|
||||||
)
|
|
||||||
# flag each repo as a starred gist for downstream processing
|
# flag each repo as a starred gist for downstream processing
|
||||||
for item in starred_gists:
|
for item in starred_gists:
|
||||||
item.update({"is_gist": True, "is_starred": True})
|
item.update({"is_gist": True, "is_starred": True})
|
||||||
@@ -1849,14 +1814,14 @@ def backup_pulls(args, repo_cwd, repository, repos_template):
|
|||||||
pull_states = ["open", "closed"]
|
pull_states = ["open", "closed"]
|
||||||
for pull_state in pull_states:
|
for pull_state in pull_states:
|
||||||
query_args["state"] = pull_state
|
query_args["state"] = pull_state
|
||||||
_pulls = retrieve_data_gen(args, _pulls_template, query_args=query_args)
|
_pulls = retrieve_data(args, _pulls_template, query_args=query_args)
|
||||||
for pull in _pulls:
|
for pull in _pulls:
|
||||||
if args.since and pull["updated_at"] < args.since:
|
if args.since and pull["updated_at"] < args.since:
|
||||||
break
|
break
|
||||||
if not args.since or pull["updated_at"] >= args.since:
|
if not args.since or pull["updated_at"] >= args.since:
|
||||||
pulls[pull["number"]] = pull
|
pulls[pull["number"]] = pull
|
||||||
else:
|
else:
|
||||||
_pulls = retrieve_data_gen(args, _pulls_template, query_args=query_args)
|
_pulls = retrieve_data(args, _pulls_template, query_args=query_args)
|
||||||
for pull in _pulls:
|
for pull in _pulls:
|
||||||
if args.since and pull["updated_at"] < args.since:
|
if args.since and pull["updated_at"] < args.since:
|
||||||
break
|
break
|
||||||
@@ -1864,7 +1829,7 @@ def backup_pulls(args, repo_cwd, repository, repos_template):
|
|||||||
pulls[pull["number"]] = retrieve_data(
|
pulls[pull["number"]] = retrieve_data(
|
||||||
args,
|
args,
|
||||||
_pulls_template + "/{}".format(pull["number"]),
|
_pulls_template + "/{}".format(pull["number"]),
|
||||||
single_request=True,
|
paginated=False,
|
||||||
)[0]
|
)[0]
|
||||||
|
|
||||||
logger.info("Saving {0} pull requests to disk".format(len(list(pulls.keys()))))
|
logger.info("Saving {0} pull requests to disk".format(len(list(pulls.keys()))))
|
||||||
|
|||||||
@@ -13,7 +13,6 @@ class TestHTTP451Exception:
|
|||||||
|
|
||||||
def test_repository_unavailable_error_raised(self):
|
def test_repository_unavailable_error_raised(self):
|
||||||
"""HTTP 451 should raise RepositoryUnavailableError with DMCA URL."""
|
"""HTTP 451 should raise RepositoryUnavailableError with DMCA URL."""
|
||||||
# Create mock args
|
|
||||||
args = Mock()
|
args = Mock()
|
||||||
args.as_app = False
|
args.as_app = False
|
||||||
args.token_fine = None
|
args.token_fine = None
|
||||||
@@ -25,7 +24,6 @@ class TestHTTP451Exception:
|
|||||||
args.throttle_limit = None
|
args.throttle_limit = None
|
||||||
args.throttle_pause = 0
|
args.throttle_pause = 0
|
||||||
|
|
||||||
# Mock HTTPError 451 response
|
|
||||||
mock_response = Mock()
|
mock_response = Mock()
|
||||||
mock_response.getcode.return_value = 451
|
mock_response.getcode.return_value = 451
|
||||||
|
|
||||||
@@ -41,14 +39,10 @@ class TestHTTP451Exception:
|
|||||||
mock_response.headers = {"x-ratelimit-remaining": "5000"}
|
mock_response.headers = {"x-ratelimit-remaining": "5000"}
|
||||||
mock_response.reason = "Unavailable For Legal Reasons"
|
mock_response.reason = "Unavailable For Legal Reasons"
|
||||||
|
|
||||||
def mock_get_response(request, auth, template):
|
with patch("github_backup.github_backup.make_request_with_retry", return_value=mock_response):
|
||||||
return mock_response, []
|
|
||||||
|
|
||||||
with patch("github_backup.github_backup._get_response", side_effect=mock_get_response):
|
|
||||||
with pytest.raises(github_backup.RepositoryUnavailableError) as exc_info:
|
with pytest.raises(github_backup.RepositoryUnavailableError) as exc_info:
|
||||||
list(github_backup.retrieve_data_gen(args, "https://api.github.com/repos/test/dmca/issues"))
|
github_backup.retrieve_data(args, "https://api.github.com/repos/test/dmca/issues")
|
||||||
|
|
||||||
# Check exception has DMCA URL
|
|
||||||
assert exc_info.value.dmca_url == "https://github.com/github/dmca/blob/master/2024/11/2024-11-04-source-code.md"
|
assert exc_info.value.dmca_url == "https://github.com/github/dmca/blob/master/2024/11/2024-11-04-source-code.md"
|
||||||
assert "451" in str(exc_info.value)
|
assert "451" in str(exc_info.value)
|
||||||
|
|
||||||
@@ -71,14 +65,10 @@ class TestHTTP451Exception:
|
|||||||
mock_response.headers = {"x-ratelimit-remaining": "5000"}
|
mock_response.headers = {"x-ratelimit-remaining": "5000"}
|
||||||
mock_response.reason = "Unavailable For Legal Reasons"
|
mock_response.reason = "Unavailable For Legal Reasons"
|
||||||
|
|
||||||
def mock_get_response(request, auth, template):
|
with patch("github_backup.github_backup.make_request_with_retry", return_value=mock_response):
|
||||||
return mock_response, []
|
|
||||||
|
|
||||||
with patch("github_backup.github_backup._get_response", side_effect=mock_get_response):
|
|
||||||
with pytest.raises(github_backup.RepositoryUnavailableError) as exc_info:
|
with pytest.raises(github_backup.RepositoryUnavailableError) as exc_info:
|
||||||
list(github_backup.retrieve_data_gen(args, "https://api.github.com/repos/test/dmca/issues"))
|
github_backup.retrieve_data(args, "https://api.github.com/repos/test/dmca/issues")
|
||||||
|
|
||||||
# Exception raised even without DMCA URL
|
|
||||||
assert exc_info.value.dmca_url is None
|
assert exc_info.value.dmca_url is None
|
||||||
assert "451" in str(exc_info.value)
|
assert "451" in str(exc_info.value)
|
||||||
|
|
||||||
@@ -101,42 +91,9 @@ class TestHTTP451Exception:
|
|||||||
mock_response.headers = {"x-ratelimit-remaining": "5000"}
|
mock_response.headers = {"x-ratelimit-remaining": "5000"}
|
||||||
mock_response.reason = "Unavailable For Legal Reasons"
|
mock_response.reason = "Unavailable For Legal Reasons"
|
||||||
|
|
||||||
def mock_get_response(request, auth, template):
|
with patch("github_backup.github_backup.make_request_with_retry", return_value=mock_response):
|
||||||
return mock_response, []
|
|
||||||
|
|
||||||
with patch("github_backup.github_backup._get_response", side_effect=mock_get_response):
|
|
||||||
with pytest.raises(github_backup.RepositoryUnavailableError):
|
with pytest.raises(github_backup.RepositoryUnavailableError):
|
||||||
list(github_backup.retrieve_data_gen(args, "https://api.github.com/repos/test/dmca/issues"))
|
github_backup.retrieve_data(args, "https://api.github.com/repos/test/dmca/issues")
|
||||||
|
|
||||||
def test_other_http_errors_unchanged(self):
|
|
||||||
"""Other HTTP errors should still raise generic Exception."""
|
|
||||||
args = Mock()
|
|
||||||
args.as_app = False
|
|
||||||
args.token_fine = None
|
|
||||||
args.token_classic = None
|
|
||||||
args.username = None
|
|
||||||
args.password = None
|
|
||||||
args.osx_keychain_item_name = None
|
|
||||||
args.osx_keychain_item_account = None
|
|
||||||
args.throttle_limit = None
|
|
||||||
args.throttle_pause = 0
|
|
||||||
|
|
||||||
mock_response = Mock()
|
|
||||||
mock_response.getcode.return_value = 404
|
|
||||||
mock_response.read.return_value = b'{"message": "Not Found"}'
|
|
||||||
mock_response.headers = {"x-ratelimit-remaining": "5000"}
|
|
||||||
mock_response.reason = "Not Found"
|
|
||||||
|
|
||||||
def mock_get_response(request, auth, template):
|
|
||||||
return mock_response, []
|
|
||||||
|
|
||||||
with patch("github_backup.github_backup._get_response", side_effect=mock_get_response):
|
|
||||||
# Should raise generic Exception, not RepositoryUnavailableError
|
|
||||||
with pytest.raises(Exception) as exc_info:
|
|
||||||
list(github_backup.retrieve_data_gen(args, "https://api.github.com/repos/test/notfound/issues"))
|
|
||||||
|
|
||||||
assert not isinstance(exc_info.value, github_backup.RepositoryUnavailableError)
|
|
||||||
assert "404" in str(exc_info.value)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@@ -40,7 +40,7 @@ class MockHTTPResponse:
|
|||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def mock_args():
|
def mock_args():
|
||||||
"""Mock args for retrieve_data_gen."""
|
"""Mock args for retrieve_data."""
|
||||||
args = Mock()
|
args = Mock()
|
||||||
args.as_app = False
|
args.as_app = False
|
||||||
args.token_fine = None
|
args.token_fine = None
|
||||||
@@ -77,10 +77,8 @@ def test_cursor_based_pagination(mock_args):
|
|||||||
return responses[len(requests_made) - 1]
|
return responses[len(requests_made) - 1]
|
||||||
|
|
||||||
with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen):
|
with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen):
|
||||||
results = list(
|
results = github_backup.retrieve_data(
|
||||||
github_backup.retrieve_data_gen(
|
mock_args, "https://api.github.com/repos/owner/repo/issues"
|
||||||
mock_args, "https://api.github.com/repos/owner/repo/issues"
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Verify all items retrieved and cursor was used in second request
|
# Verify all items retrieved and cursor was used in second request
|
||||||
@@ -112,10 +110,8 @@ def test_page_based_pagination(mock_args):
|
|||||||
return responses[len(requests_made) - 1]
|
return responses[len(requests_made) - 1]
|
||||||
|
|
||||||
with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen):
|
with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen):
|
||||||
results = list(
|
results = github_backup.retrieve_data(
|
||||||
github_backup.retrieve_data_gen(
|
mock_args, "https://api.github.com/repos/owner/repo/pulls"
|
||||||
mock_args, "https://api.github.com/repos/owner/repo/pulls"
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Verify all items retrieved and page parameter was used (not cursor)
|
# Verify all items retrieved and page parameter was used (not cursor)
|
||||||
@@ -142,10 +138,8 @@ def test_no_link_header_stops_pagination(mock_args):
|
|||||||
return responses[len(requests_made) - 1]
|
return responses[len(requests_made) - 1]
|
||||||
|
|
||||||
with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen):
|
with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen):
|
||||||
results = list(
|
results = github_backup.retrieve_data(
|
||||||
github_backup.retrieve_data_gen(
|
mock_args, "https://api.github.com/repos/owner/repo/labels"
|
||||||
mock_args, "https://api.github.com/repos/owner/repo/labels"
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Verify pagination stopped after first request
|
# Verify pagination stopped after first request
|
||||||
|
|||||||
365
tests/test_retrieve_data.py
Normal file
365
tests/test_retrieve_data.py
Normal file
@@ -0,0 +1,365 @@
|
|||||||
|
"""Tests for retrieve_data function."""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import socket
|
||||||
|
from unittest.mock import Mock, patch
|
||||||
|
from urllib.error import HTTPError, URLError
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from github_backup import github_backup
|
||||||
|
from github_backup.github_backup import (
|
||||||
|
MAX_RETRIES,
|
||||||
|
calculate_retry_delay,
|
||||||
|
make_request_with_retry,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestCalculateRetryDelay:
|
||||||
|
def test_respects_retry_after_header(self):
|
||||||
|
headers = {'retry-after': '30'}
|
||||||
|
assert calculate_retry_delay(0, headers) == 30
|
||||||
|
|
||||||
|
def test_respects_rate_limit_reset(self):
|
||||||
|
import time
|
||||||
|
import calendar
|
||||||
|
# Set reset time 60 seconds in the future
|
||||||
|
future_reset = calendar.timegm(time.gmtime()) + 60
|
||||||
|
headers = {
|
||||||
|
'x-ratelimit-remaining': '0',
|
||||||
|
'x-ratelimit-reset': str(future_reset)
|
||||||
|
}
|
||||||
|
delay = calculate_retry_delay(0, headers)
|
||||||
|
# Should be approximately 60 seconds (with some tolerance for execution time)
|
||||||
|
assert 55 <= delay <= 65
|
||||||
|
|
||||||
|
def test_exponential_backoff(self):
|
||||||
|
delay_0 = calculate_retry_delay(0, {})
|
||||||
|
delay_1 = calculate_retry_delay(1, {})
|
||||||
|
delay_2 = calculate_retry_delay(2, {})
|
||||||
|
# Base delay is 1s, so delays should be roughly 1, 2, 4 (plus jitter)
|
||||||
|
assert 0.9 <= delay_0 <= 1.2 # ~1s + up to 10% jitter
|
||||||
|
assert 1.8 <= delay_1 <= 2.4 # ~2s + up to 10% jitter
|
||||||
|
assert 3.6 <= delay_2 <= 4.8 # ~4s + up to 10% jitter
|
||||||
|
|
||||||
|
def test_max_delay_cap(self):
|
||||||
|
# Very high attempt number should not exceed 120s + jitter
|
||||||
|
delay = calculate_retry_delay(100, {})
|
||||||
|
assert delay <= 120 * 1.1 # 120s max + 10% jitter
|
||||||
|
|
||||||
|
def test_minimum_rate_limit_delay(self):
|
||||||
|
import time
|
||||||
|
import calendar
|
||||||
|
# Set reset time in the past (already reset)
|
||||||
|
past_reset = calendar.timegm(time.gmtime()) - 100
|
||||||
|
headers = {
|
||||||
|
'x-ratelimit-remaining': '0',
|
||||||
|
'x-ratelimit-reset': str(past_reset)
|
||||||
|
}
|
||||||
|
delay = calculate_retry_delay(0, headers)
|
||||||
|
# Should be minimum 10 seconds even if reset time is in past
|
||||||
|
assert delay >= 10
|
||||||
|
|
||||||
|
|
||||||
|
class TestRetrieveDataRetry:
|
||||||
|
"""Tests for retry behavior in retrieve_data."""
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_args(self):
|
||||||
|
args = Mock()
|
||||||
|
args.as_app = False
|
||||||
|
args.token_fine = None
|
||||||
|
args.token_classic = "fake_token"
|
||||||
|
args.username = None
|
||||||
|
args.password = None
|
||||||
|
args.osx_keychain_item_name = None
|
||||||
|
args.osx_keychain_item_account = None
|
||||||
|
args.throttle_limit = None
|
||||||
|
args.throttle_pause = 0
|
||||||
|
return args
|
||||||
|
|
||||||
|
def test_json_parse_error_retries_and_fails(self, mock_args):
|
||||||
|
"""HTTP 200 with invalid JSON should retry and eventually fail."""
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_response.getcode.return_value = 200
|
||||||
|
mock_response.read.return_value = b"not valid json {"
|
||||||
|
mock_response.headers = {"x-ratelimit-remaining": "5000"}
|
||||||
|
|
||||||
|
call_count = 0
|
||||||
|
|
||||||
|
def mock_make_request(*args, **kwargs):
|
||||||
|
nonlocal call_count
|
||||||
|
call_count += 1
|
||||||
|
return mock_response
|
||||||
|
|
||||||
|
with patch("github_backup.github_backup.make_request_with_retry", side_effect=mock_make_request):
|
||||||
|
with patch("github_backup.github_backup.calculate_retry_delay", return_value=0): # No delay in tests
|
||||||
|
with pytest.raises(Exception) as exc_info:
|
||||||
|
github_backup.retrieve_data(mock_args, "https://api.github.com/repos/test/repo/issues")
|
||||||
|
|
||||||
|
assert "Failed to read response after" in str(exc_info.value)
|
||||||
|
assert call_count == MAX_RETRIES
|
||||||
|
|
||||||
|
def test_json_parse_error_recovers_on_retry(self, mock_args):
|
||||||
|
"""HTTP 200 with invalid JSON should succeed if retry returns valid JSON."""
|
||||||
|
bad_response = Mock()
|
||||||
|
bad_response.getcode.return_value = 200
|
||||||
|
bad_response.read.return_value = b"not valid json {"
|
||||||
|
bad_response.headers = {"x-ratelimit-remaining": "5000"}
|
||||||
|
|
||||||
|
good_response = Mock()
|
||||||
|
good_response.getcode.return_value = 200
|
||||||
|
good_response.read.return_value = json.dumps([{"id": 1}]).encode("utf-8")
|
||||||
|
good_response.headers = {"x-ratelimit-remaining": "5000", "Link": ""}
|
||||||
|
|
||||||
|
responses = [bad_response, bad_response, good_response]
|
||||||
|
call_count = 0
|
||||||
|
|
||||||
|
def mock_make_request(*args, **kwargs):
|
||||||
|
nonlocal call_count
|
||||||
|
result = responses[call_count]
|
||||||
|
call_count += 1
|
||||||
|
return result
|
||||||
|
|
||||||
|
with patch("github_backup.github_backup.make_request_with_retry", side_effect=mock_make_request):
|
||||||
|
with patch("github_backup.github_backup.calculate_retry_delay", return_value=0):
|
||||||
|
result = github_backup.retrieve_data(mock_args, "https://api.github.com/repos/test/repo/issues")
|
||||||
|
|
||||||
|
assert result == [{"id": 1}]
|
||||||
|
assert call_count == 3 # Failed twice, succeeded on third
|
||||||
|
|
||||||
|
def test_http_error_raises_exception(self, mock_args):
|
||||||
|
"""Non-success HTTP status codes should raise Exception."""
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_response.getcode.return_value = 404
|
||||||
|
mock_response.read.return_value = b'{"message": "Not Found"}'
|
||||||
|
mock_response.headers = {"x-ratelimit-remaining": "5000"}
|
||||||
|
mock_response.reason = "Not Found"
|
||||||
|
|
||||||
|
with patch("github_backup.github_backup.make_request_with_retry", return_value=mock_response):
|
||||||
|
with pytest.raises(Exception) as exc_info:
|
||||||
|
github_backup.retrieve_data(mock_args, "https://api.github.com/repos/test/notfound/issues")
|
||||||
|
|
||||||
|
assert not isinstance(exc_info.value, github_backup.RepositoryUnavailableError)
|
||||||
|
assert "404" in str(exc_info.value)
|
||||||
|
|
||||||
|
|
||||||
|
class TestMakeRequestWithRetry:
|
||||||
|
"""Tests for HTTP error retry behavior in make_request_with_retry."""
|
||||||
|
|
||||||
|
def test_502_error_retries_and_succeeds(self):
|
||||||
|
"""HTTP 502 should retry and succeed if subsequent request works."""
|
||||||
|
good_response = Mock()
|
||||||
|
good_response.read.return_value = b'{"ok": true}'
|
||||||
|
|
||||||
|
call_count = 0
|
||||||
|
fail_count = MAX_RETRIES - 1 # Fail all but last attempt
|
||||||
|
|
||||||
|
def mock_urlopen(*args, **kwargs):
|
||||||
|
nonlocal call_count
|
||||||
|
call_count += 1
|
||||||
|
if call_count <= fail_count:
|
||||||
|
raise HTTPError(
|
||||||
|
url="https://api.github.com/test",
|
||||||
|
code=502,
|
||||||
|
msg="Bad Gateway",
|
||||||
|
hdrs={"x-ratelimit-remaining": "5000"},
|
||||||
|
fp=None,
|
||||||
|
)
|
||||||
|
return good_response
|
||||||
|
|
||||||
|
with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen):
|
||||||
|
with patch("github_backup.github_backup.calculate_retry_delay", return_value=0):
|
||||||
|
result = make_request_with_retry(Mock(), None)
|
||||||
|
|
||||||
|
assert result == good_response
|
||||||
|
assert call_count == MAX_RETRIES
|
||||||
|
|
||||||
|
def test_503_error_retries_until_exhausted(self):
|
||||||
|
"""HTTP 503 should retry MAX_RETRIES times then raise."""
|
||||||
|
call_count = 0
|
||||||
|
|
||||||
|
def mock_urlopen(*args, **kwargs):
|
||||||
|
nonlocal call_count
|
||||||
|
call_count += 1
|
||||||
|
raise HTTPError(
|
||||||
|
url="https://api.github.com/test",
|
||||||
|
code=503,
|
||||||
|
msg="Service Unavailable",
|
||||||
|
hdrs={"x-ratelimit-remaining": "5000"},
|
||||||
|
fp=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen):
|
||||||
|
with patch("github_backup.github_backup.calculate_retry_delay", return_value=0):
|
||||||
|
with pytest.raises(HTTPError) as exc_info:
|
||||||
|
make_request_with_retry(Mock(), None)
|
||||||
|
|
||||||
|
assert exc_info.value.code == 503
|
||||||
|
assert call_count == MAX_RETRIES
|
||||||
|
|
||||||
|
def test_404_error_not_retried(self):
|
||||||
|
"""HTTP 404 should not be retried - raise immediately."""
|
||||||
|
call_count = 0
|
||||||
|
|
||||||
|
def mock_urlopen(*args, **kwargs):
|
||||||
|
nonlocal call_count
|
||||||
|
call_count += 1
|
||||||
|
raise HTTPError(
|
||||||
|
url="https://api.github.com/test",
|
||||||
|
code=404,
|
||||||
|
msg="Not Found",
|
||||||
|
hdrs={"x-ratelimit-remaining": "5000"},
|
||||||
|
fp=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen):
|
||||||
|
with pytest.raises(HTTPError) as exc_info:
|
||||||
|
make_request_with_retry(Mock(), None)
|
||||||
|
|
||||||
|
assert exc_info.value.code == 404
|
||||||
|
assert call_count == 1 # No retries
|
||||||
|
|
||||||
|
def test_rate_limit_403_retried_when_remaining_zero(self):
|
||||||
|
"""HTTP 403 with x-ratelimit-remaining=0 should retry."""
|
||||||
|
good_response = Mock()
|
||||||
|
call_count = 0
|
||||||
|
|
||||||
|
def mock_urlopen(*args, **kwargs):
|
||||||
|
nonlocal call_count
|
||||||
|
call_count += 1
|
||||||
|
if call_count == 1:
|
||||||
|
raise HTTPError(
|
||||||
|
url="https://api.github.com/test",
|
||||||
|
code=403,
|
||||||
|
msg="Forbidden",
|
||||||
|
hdrs={"x-ratelimit-remaining": "0"},
|
||||||
|
fp=None,
|
||||||
|
)
|
||||||
|
return good_response
|
||||||
|
|
||||||
|
with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen):
|
||||||
|
with patch("github_backup.github_backup.calculate_retry_delay", return_value=0):
|
||||||
|
result = make_request_with_retry(Mock(), None)
|
||||||
|
|
||||||
|
assert result == good_response
|
||||||
|
assert call_count == 2
|
||||||
|
|
||||||
|
def test_403_not_retried_when_remaining_nonzero(self):
|
||||||
|
"""HTTP 403 with x-ratelimit-remaining>0 should not retry (permission error)."""
|
||||||
|
call_count = 0
|
||||||
|
|
||||||
|
def mock_urlopen(*args, **kwargs):
|
||||||
|
nonlocal call_count
|
||||||
|
call_count += 1
|
||||||
|
raise HTTPError(
|
||||||
|
url="https://api.github.com/test",
|
||||||
|
code=403,
|
||||||
|
msg="Forbidden",
|
||||||
|
hdrs={"x-ratelimit-remaining": "5000"},
|
||||||
|
fp=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen):
|
||||||
|
with pytest.raises(HTTPError) as exc_info:
|
||||||
|
make_request_with_retry(Mock(), None)
|
||||||
|
|
||||||
|
assert exc_info.value.code == 403
|
||||||
|
assert call_count == 1 # No retries
|
||||||
|
|
||||||
|
def test_connection_error_retries_and_succeeds(self):
|
||||||
|
"""URLError (connection error) should retry and succeed if subsequent request works."""
|
||||||
|
good_response = Mock()
|
||||||
|
call_count = 0
|
||||||
|
fail_count = MAX_RETRIES - 1 # Fail all but last attempt
|
||||||
|
|
||||||
|
def mock_urlopen(*args, **kwargs):
|
||||||
|
nonlocal call_count
|
||||||
|
call_count += 1
|
||||||
|
if call_count <= fail_count:
|
||||||
|
raise URLError("Connection refused")
|
||||||
|
return good_response
|
||||||
|
|
||||||
|
with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen):
|
||||||
|
with patch("github_backup.github_backup.calculate_retry_delay", return_value=0):
|
||||||
|
result = make_request_with_retry(Mock(), None)
|
||||||
|
|
||||||
|
assert result == good_response
|
||||||
|
assert call_count == MAX_RETRIES
|
||||||
|
|
||||||
|
def test_socket_error_retries_until_exhausted(self):
|
||||||
|
"""socket.error should retry MAX_RETRIES times then raise."""
|
||||||
|
call_count = 0
|
||||||
|
|
||||||
|
def mock_urlopen(*args, **kwargs):
|
||||||
|
nonlocal call_count
|
||||||
|
call_count += 1
|
||||||
|
raise socket.error("Connection reset by peer")
|
||||||
|
|
||||||
|
with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen):
|
||||||
|
with patch("github_backup.github_backup.calculate_retry_delay", return_value=0):
|
||||||
|
with pytest.raises(socket.error):
|
||||||
|
make_request_with_retry(Mock(), None)
|
||||||
|
|
||||||
|
assert call_count == MAX_RETRIES
|
||||||
|
|
||||||
|
|
||||||
|
class TestRetrieveDataThrottling:
|
||||||
|
"""Tests for throttling behavior in retrieve_data."""
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_args(self):
|
||||||
|
args = Mock()
|
||||||
|
args.as_app = False
|
||||||
|
args.token_fine = None
|
||||||
|
args.token_classic = "fake_token"
|
||||||
|
args.username = None
|
||||||
|
args.password = None
|
||||||
|
args.osx_keychain_item_name = None
|
||||||
|
args.osx_keychain_item_account = None
|
||||||
|
args.throttle_limit = 10 # Throttle when remaining <= 10
|
||||||
|
args.throttle_pause = 5 # Pause 5 seconds
|
||||||
|
return args
|
||||||
|
|
||||||
|
def test_throttling_pauses_when_rate_limit_low(self, mock_args):
|
||||||
|
"""Should pause when x-ratelimit-remaining is at or below throttle_limit."""
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_response.getcode.return_value = 200
|
||||||
|
mock_response.read.return_value = json.dumps([{"id": 1}]).encode("utf-8")
|
||||||
|
mock_response.headers = {"x-ratelimit-remaining": "5", "Link": ""} # Below throttle_limit
|
||||||
|
|
||||||
|
with patch("github_backup.github_backup.make_request_with_retry", return_value=mock_response):
|
||||||
|
with patch("github_backup.github_backup.time.sleep") as mock_sleep:
|
||||||
|
github_backup.retrieve_data(mock_args, "https://api.github.com/repos/test/repo/issues")
|
||||||
|
|
||||||
|
mock_sleep.assert_called_once_with(5) # throttle_pause value
|
||||||
|
|
||||||
|
|
||||||
|
class TestRetrieveDataSingleItem:
|
||||||
|
"""Tests for single item (dict) responses in retrieve_data."""
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_args(self):
|
||||||
|
args = Mock()
|
||||||
|
args.as_app = False
|
||||||
|
args.token_fine = None
|
||||||
|
args.token_classic = "fake_token"
|
||||||
|
args.username = None
|
||||||
|
args.password = None
|
||||||
|
args.osx_keychain_item_name = None
|
||||||
|
args.osx_keychain_item_account = None
|
||||||
|
args.throttle_limit = None
|
||||||
|
args.throttle_pause = 0
|
||||||
|
return args
|
||||||
|
|
||||||
|
def test_dict_response_returned_as_list(self, mock_args):
|
||||||
|
"""Single dict response should be returned as a list with one item."""
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_response.getcode.return_value = 200
|
||||||
|
mock_response.read.return_value = json.dumps({"login": "testuser", "id": 123}).encode("utf-8")
|
||||||
|
mock_response.headers = {"x-ratelimit-remaining": "5000", "Link": ""}
|
||||||
|
|
||||||
|
with patch("github_backup.github_backup.make_request_with_retry", return_value=mock_response):
|
||||||
|
result = github_backup.retrieve_data(mock_args, "https://api.github.com/user")
|
||||||
|
|
||||||
|
assert result == [{"login": "testuser", "id": 123}]
|
||||||
Reference in New Issue
Block a user