mirror of
https://github.com/josegonzalez/python-github-backup.git
synced 2025-12-17 21:21:11 +01:00
Compare commits
7 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c70cc43f57 | ||
|
|
27d3fcdafa | ||
|
|
46140b0ff1 | ||
|
|
02dd902b67 | ||
|
|
241949137d | ||
|
|
1155da849d | ||
|
|
59a70ff11a |
31
CHANGES.rst
31
CHANGES.rst
@@ -1,9 +1,38 @@
|
||||
Changelog
|
||||
=========
|
||||
|
||||
0.57.0 (2025-12-12)
|
||||
0.58.0 (2025-12-16)
|
||||
-------------------
|
||||
------------------------
|
||||
- Fix retry logic for HTTP 5xx errors and network failures. [Rodos]
|
||||
|
||||
Refactors error handling to retry all 5xx errors (not just 502), network errors (URLError, socket.error, IncompleteRead), and JSON parse errors with exponential backoff and jitter. Respects retry-after and rate limit headers per GitHub API requirements. Consolidates retry logic into make_request_with_retry() wrapper and adds clear logging for retry attempts and failures. Removes dead code from 2016 (errors list, _request_http_error, _request_url_error) that was intentionally disabled in commit 1e5a9048 to fix #29.
|
||||
|
||||
Fixes #140, #110, #138
|
||||
- Chore: remove transitive deps from release-requirements.txt. [Rodos]
|
||||
- Chore(deps): bump urllib3 in the python-packages group.
|
||||
[dependabot[bot]]
|
||||
|
||||
Bumps the python-packages group with 1 update: [urllib3](https://github.com/urllib3/urllib3).
|
||||
|
||||
|
||||
Updates `urllib3` from 2.6.1 to 2.6.2
|
||||
- [Release notes](https://github.com/urllib3/urllib3/releases)
|
||||
- [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst)
|
||||
- [Commits](https://github.com/urllib3/urllib3/compare/2.6.1...2.6.2)
|
||||
|
||||
---
|
||||
updated-dependencies:
|
||||
- dependency-name: urllib3
|
||||
dependency-version: 2.6.2
|
||||
dependency-type: direct:production
|
||||
update-type: version-update:semver-patch
|
||||
dependency-group: python-packages
|
||||
...
|
||||
|
||||
|
||||
0.57.0 (2025-12-12)
|
||||
-------------------
|
||||
- Add GitHub Apps documentation and remove outdated header. [Rodos]
|
||||
|
||||
- Add GitHub Apps authentication section with setup steps
|
||||
|
||||
@@ -1 +1 @@
|
||||
__version__ = "0.57.0"
|
||||
__version__ = "0.58.0"
|
||||
|
||||
@@ -12,6 +12,7 @@ import json
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
import random
|
||||
import re
|
||||
import select
|
||||
import socket
|
||||
@@ -19,6 +20,7 @@ import ssl
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
from collections.abc import Generator
|
||||
from datetime import datetime
|
||||
from http.client import IncompleteRead
|
||||
from urllib.error import HTTPError, URLError
|
||||
@@ -74,6 +76,9 @@ else:
|
||||
" 3. Debian/Ubuntu: apt-get install ca-certificates\n\n"
|
||||
)
|
||||
|
||||
# Retry configuration
|
||||
MAX_RETRIES = 5
|
||||
|
||||
|
||||
def logging_subprocess(
|
||||
popenargs, stdout_log_level=logging.DEBUG, stderr_log_level=logging.ERROR, **kwargs
|
||||
@@ -603,170 +608,178 @@ def get_github_repo_url(args, repository):
|
||||
return repo_url
|
||||
|
||||
|
||||
def retrieve_data_gen(args, template, query_args=None, single_request=False):
|
||||
def calculate_retry_delay(attempt, headers):
|
||||
"""Calculate delay before next retry with exponential backoff."""
|
||||
# Respect retry-after header if present
|
||||
if retry_after := headers.get("retry-after"):
|
||||
return int(retry_after)
|
||||
|
||||
# Respect rate limit reset time
|
||||
if int(headers.get("x-ratelimit-remaining", 1)) < 1:
|
||||
reset_time = int(headers.get("x-ratelimit-reset", 0))
|
||||
return max(10, reset_time - calendar.timegm(time.gmtime()))
|
||||
|
||||
# Exponential backoff with jitter for server errors (1s base, 120s max)
|
||||
delay = min(1.0 * (2**attempt), 120.0)
|
||||
return delay + random.uniform(0, delay * 0.1)
|
||||
|
||||
|
||||
def retrieve_data(args, template, query_args=None, paginated=True):
|
||||
"""
|
||||
Fetch the data from GitHub API.
|
||||
|
||||
Handle both single requests and pagination with yield of individual dicts.
|
||||
Handles throttling, retries, read errors, and DMCA takedowns.
|
||||
"""
|
||||
query_args = query_args or {}
|
||||
auth = get_auth(args, encode=not args.as_app)
|
||||
query_args = get_query_args(query_args)
|
||||
per_page = 100
|
||||
next_url = None
|
||||
|
||||
while True:
|
||||
if single_request:
|
||||
request_per_page = None
|
||||
else:
|
||||
request_per_page = per_page
|
||||
def _extract_next_page_url(link_header):
|
||||
for link in link_header.split(","):
|
||||
if 'rel="next"' in link:
|
||||
return link[link.find("<") + 1:link.find(">")]
|
||||
return None
|
||||
|
||||
request = _construct_request(
|
||||
request_per_page,
|
||||
query_args,
|
||||
next_url or template,
|
||||
auth,
|
||||
as_app=args.as_app,
|
||||
fine=True if args.token_fine is not None else False,
|
||||
) # noqa
|
||||
r, errors = _get_response(request, auth, next_url or template)
|
||||
def fetch_all() -> Generator[dict, None, None]:
|
||||
next_url = None
|
||||
|
||||
status_code = int(r.getcode())
|
||||
while True:
|
||||
# FIRST: Fetch response
|
||||
|
||||
# Handle DMCA takedown (HTTP 451) - raise exception to skip entire repository
|
||||
if status_code == 451:
|
||||
dmca_url = None
|
||||
try:
|
||||
response_data = json.loads(r.read().decode("utf-8"))
|
||||
dmca_url = response_data.get("block", {}).get("html_url")
|
||||
except Exception:
|
||||
pass
|
||||
raise RepositoryUnavailableError(
|
||||
"Repository unavailable due to legal reasons (HTTP 451)",
|
||||
dmca_url=dmca_url,
|
||||
)
|
||||
|
||||
# Check if we got correct data
|
||||
try:
|
||||
response = json.loads(r.read().decode("utf-8"))
|
||||
except IncompleteRead:
|
||||
logger.warning("Incomplete read error detected")
|
||||
read_error = True
|
||||
except json.decoder.JSONDecodeError:
|
||||
logger.warning("JSON decode error detected")
|
||||
read_error = True
|
||||
except TimeoutError:
|
||||
logger.warning("Tiemout error detected")
|
||||
read_error = True
|
||||
else:
|
||||
read_error = False
|
||||
|
||||
# be gentle with API request limit and throttle requests if remaining requests getting low
|
||||
limit_remaining = int(r.headers.get("x-ratelimit-remaining", 0))
|
||||
if args.throttle_limit and limit_remaining <= args.throttle_limit:
|
||||
logger.info(
|
||||
"API request limit hit: {} requests left, pausing further requests for {}s".format(
|
||||
limit_remaining, args.throttle_pause
|
||||
for attempt in range(MAX_RETRIES):
|
||||
request = _construct_request(
|
||||
per_page=per_page if paginated else None,
|
||||
query_args=query_args,
|
||||
template=next_url or template,
|
||||
auth=auth,
|
||||
as_app=args.as_app,
|
||||
fine=args.token_fine is not None,
|
||||
)
|
||||
)
|
||||
time.sleep(args.throttle_pause)
|
||||
http_response = make_request_with_retry(request, auth)
|
||||
|
||||
retries = 0
|
||||
while retries < 3 and (status_code == 502 or read_error):
|
||||
logger.warning("API request failed. Retrying in 5 seconds")
|
||||
retries += 1
|
||||
time.sleep(5)
|
||||
request = _construct_request(
|
||||
request_per_page,
|
||||
query_args,
|
||||
next_url or template,
|
||||
auth,
|
||||
as_app=args.as_app,
|
||||
fine=True if args.token_fine is not None else False,
|
||||
) # noqa
|
||||
r, errors = _get_response(request, auth, next_url or template)
|
||||
match http_response.getcode():
|
||||
case 200:
|
||||
# Success - Parse JSON response
|
||||
try:
|
||||
response = json.loads(http_response.read().decode("utf-8"))
|
||||
break # Exit retry loop and handle the data returned
|
||||
except (
|
||||
IncompleteRead,
|
||||
json.decoder.JSONDecodeError,
|
||||
TimeoutError,
|
||||
) as e:
|
||||
logger.warning(f"{type(e).__name__} reading response")
|
||||
if attempt < MAX_RETRIES - 1:
|
||||
delay = calculate_retry_delay(attempt, {})
|
||||
logger.warning(
|
||||
f"Retrying in {delay:.1f}s (attempt {attempt + 1}/{MAX_RETRIES})"
|
||||
)
|
||||
time.sleep(delay)
|
||||
continue # Next retry attempt
|
||||
|
||||
status_code = int(r.getcode())
|
||||
try:
|
||||
response = json.loads(r.read().decode("utf-8"))
|
||||
read_error = False
|
||||
except IncompleteRead:
|
||||
logger.warning("Incomplete read error detected")
|
||||
read_error = True
|
||||
except json.decoder.JSONDecodeError:
|
||||
logger.warning("JSON decode error detected")
|
||||
read_error = True
|
||||
except TimeoutError:
|
||||
logger.warning("Tiemout error detected")
|
||||
read_error = True
|
||||
case 451:
|
||||
# DMCA takedown - extract URL if available, then raise
|
||||
dmca_url = None
|
||||
try:
|
||||
response_data = json.loads(
|
||||
http_response.read().decode("utf-8")
|
||||
)
|
||||
dmca_url = response_data.get("block", {}).get("html_url")
|
||||
except Exception:
|
||||
pass
|
||||
raise RepositoryUnavailableError(
|
||||
"Repository unavailable due to legal reasons (HTTP 451)",
|
||||
dmca_url=dmca_url,
|
||||
)
|
||||
|
||||
if status_code != 200:
|
||||
template = "API request returned HTTP {0}: {1}"
|
||||
errors.append(template.format(status_code, r.reason))
|
||||
raise Exception(", ".join(errors))
|
||||
case _:
|
||||
raise Exception(
|
||||
f"API request returned HTTP {http_response.getcode()}: {http_response.reason}"
|
||||
)
|
||||
else:
|
||||
logger.error(
|
||||
f"Failed to read response after {MAX_RETRIES} attempts for {next_url or template}"
|
||||
)
|
||||
raise Exception(
|
||||
f"Failed to read response after {MAX_RETRIES} attempts for {next_url or template}"
|
||||
)
|
||||
|
||||
if read_error:
|
||||
template = "API request problem reading response for {0}"
|
||||
errors.append(template.format(request))
|
||||
raise Exception(", ".join(errors))
|
||||
# SECOND: Process and paginate
|
||||
|
||||
if len(errors) == 0:
|
||||
if type(response) is list:
|
||||
for resp in response:
|
||||
yield resp
|
||||
# Parse Link header for next page URL (cursor-based pagination)
|
||||
link_header = r.headers.get("Link", "")
|
||||
next_url = None
|
||||
if link_header:
|
||||
# Parse Link header: <https://api.github.com/...?per_page=100&after=cursor>; rel="next"
|
||||
for link in link_header.split(","):
|
||||
if 'rel="next"' in link:
|
||||
next_url = link[link.find("<") + 1 : link.find(">")]
|
||||
break
|
||||
if not next_url:
|
||||
break
|
||||
elif type(response) is dict and single_request:
|
||||
# Pause before next request if rate limit is low
|
||||
if (
|
||||
remaining := int(http_response.headers.get("x-ratelimit-remaining", 0))
|
||||
) <= (args.throttle_limit or 0):
|
||||
if args.throttle_limit:
|
||||
logger.info(
|
||||
f"Throttling: {remaining} requests left, pausing {args.throttle_pause}s"
|
||||
)
|
||||
time.sleep(args.throttle_pause)
|
||||
|
||||
# Yield results
|
||||
if isinstance(response, list):
|
||||
yield from response
|
||||
elif isinstance(response, dict):
|
||||
yield response
|
||||
|
||||
if len(errors) > 0:
|
||||
raise Exception(", ".join(errors))
|
||||
# Check for more pages
|
||||
if not paginated or not (
|
||||
next_url := _extract_next_page_url(
|
||||
http_response.headers.get("Link", "")
|
||||
)
|
||||
):
|
||||
break # No more data
|
||||
|
||||
if single_request:
|
||||
break
|
||||
return list(fetch_all())
|
||||
|
||||
|
||||
def retrieve_data(args, template, query_args=None, single_request=False):
|
||||
return list(retrieve_data_gen(args, template, query_args, single_request))
|
||||
def make_request_with_retry(request, auth):
|
||||
"""Make HTTP request with automatic retry for transient errors."""
|
||||
|
||||
def is_retryable_status(status_code, headers):
|
||||
# Server errors are always retryable
|
||||
if status_code in (500, 502, 503, 504):
|
||||
return True
|
||||
# Rate limit (403/429) is retryable if limit exhausted
|
||||
if status_code in (403, 429):
|
||||
return int(headers.get("x-ratelimit-remaining", 1)) < 1
|
||||
return False
|
||||
|
||||
def get_query_args(query_args=None):
|
||||
if not query_args:
|
||||
query_args = {}
|
||||
return query_args
|
||||
|
||||
|
||||
def _get_response(request, auth, template):
|
||||
retry_timeout = 3
|
||||
errors = []
|
||||
# We'll make requests in a loop so we can
|
||||
# delay and retry in the case of rate-limiting
|
||||
while True:
|
||||
should_continue = False
|
||||
for attempt in range(MAX_RETRIES):
|
||||
try:
|
||||
r = urlopen(request, context=https_ctx)
|
||||
return urlopen(request, context=https_ctx)
|
||||
|
||||
except HTTPError as exc:
|
||||
errors, should_continue = _request_http_error(exc, auth, errors) # noqa
|
||||
r = exc
|
||||
except URLError as e:
|
||||
logger.warning(e.reason)
|
||||
should_continue, retry_timeout = _request_url_error(template, retry_timeout)
|
||||
if not should_continue:
|
||||
raise
|
||||
except socket.error as e:
|
||||
logger.warning(e.strerror)
|
||||
should_continue, retry_timeout = _request_url_error(template, retry_timeout)
|
||||
if not should_continue:
|
||||
# HTTPError can be used as a response-like object
|
||||
if not is_retryable_status(exc.code, exc.headers):
|
||||
raise # Non-retryable error
|
||||
|
||||
if attempt >= MAX_RETRIES - 1:
|
||||
logger.error(f"HTTP {exc.code} failed after {MAX_RETRIES} attempts")
|
||||
raise
|
||||
|
||||
if should_continue:
|
||||
continue
|
||||
delay = calculate_retry_delay(attempt, exc.headers)
|
||||
logger.warning(
|
||||
f"HTTP {exc.code}, retrying in {delay:.1f}s "
|
||||
f"(attempt {attempt + 1}/{MAX_RETRIES})"
|
||||
)
|
||||
if auth is None and exc.code in (403, 429):
|
||||
logger.info("Hint: Authenticate to raise your GitHub rate limit")
|
||||
time.sleep(delay)
|
||||
|
||||
break
|
||||
return r, errors
|
||||
except (URLError, socket.error) as e:
|
||||
if attempt >= MAX_RETRIES - 1:
|
||||
logger.error(f"Connection error failed after {MAX_RETRIES} attempts: {e}")
|
||||
raise
|
||||
delay = calculate_retry_delay(attempt, {})
|
||||
logger.warning(
|
||||
f"Connection error: {e}, retrying in {delay:.1f}s "
|
||||
f"(attempt {attempt + 1}/{MAX_RETRIES})"
|
||||
)
|
||||
time.sleep(delay)
|
||||
|
||||
raise Exception(f"Request failed after {MAX_RETRIES} attempts") # pragma: no cover
|
||||
|
||||
|
||||
def _construct_request(per_page, query_args, template, auth, as_app=None, fine=False):
|
||||
@@ -808,52 +821,6 @@ def _construct_request(per_page, query_args, template, auth, as_app=None, fine=F
|
||||
return request
|
||||
|
||||
|
||||
def _request_http_error(exc, auth, errors):
|
||||
# HTTPError behaves like a Response so we can
|
||||
# check the status code and headers to see exactly
|
||||
# what failed.
|
||||
|
||||
should_continue = False
|
||||
headers = exc.headers
|
||||
limit_remaining = int(headers.get("x-ratelimit-remaining", 0))
|
||||
|
||||
if exc.code == 403 and limit_remaining < 1:
|
||||
# The X-RateLimit-Reset header includes a
|
||||
# timestamp telling us when the limit will reset
|
||||
# so we can calculate how long to wait rather
|
||||
# than inefficiently polling:
|
||||
gm_now = calendar.timegm(time.gmtime())
|
||||
reset = int(headers.get("x-ratelimit-reset", 0)) or gm_now
|
||||
# We'll never sleep for less than 10 seconds:
|
||||
delta = max(10, reset - gm_now)
|
||||
|
||||
limit = headers.get("x-ratelimit-limit")
|
||||
logger.warning(
|
||||
"Exceeded rate limit of {} requests; waiting {} seconds to reset".format(
|
||||
limit, delta
|
||||
)
|
||||
) # noqa
|
||||
|
||||
if auth is None:
|
||||
logger.info("Hint: Authenticate to raise your GitHub rate limit")
|
||||
|
||||
time.sleep(delta)
|
||||
should_continue = True
|
||||
return errors, should_continue
|
||||
|
||||
|
||||
def _request_url_error(template, retry_timeout):
|
||||
# In case of a connection timing out, we can retry a few time
|
||||
# But we won't crash and not back-up the rest now
|
||||
logger.info("'{}' timed out".format(template))
|
||||
retry_timeout -= 1
|
||||
|
||||
if retry_timeout >= 0:
|
||||
return True, retry_timeout
|
||||
|
||||
raise Exception("'{}' timed out to much, skipping!".format(template))
|
||||
|
||||
|
||||
class S3HTTPRedirectHandler(HTTPRedirectHandler):
|
||||
"""
|
||||
A subclassed redirect handler for downloading Github assets from S3.
|
||||
@@ -1503,7 +1470,7 @@ def download_attachments(
|
||||
|
||||
def get_authenticated_user(args):
|
||||
template = "https://{0}/user".format(get_github_api_host(args))
|
||||
data = retrieve_data(args, template, single_request=True)
|
||||
data = retrieve_data(args, template, paginated=False)
|
||||
return data[0]
|
||||
|
||||
|
||||
@@ -1517,7 +1484,7 @@ def check_git_lfs_install():
|
||||
|
||||
def retrieve_repositories(args, authenticated_user):
|
||||
logger.info("Retrieving repositories")
|
||||
single_request = False
|
||||
paginated = True
|
||||
if args.user == authenticated_user["login"]:
|
||||
# we must use the /user/repos API to be able to access private repos
|
||||
template = "https://{0}/user/repos".format(get_github_api_host(args))
|
||||
@@ -1540,16 +1507,16 @@ def retrieve_repositories(args, authenticated_user):
|
||||
repo_path = args.repository
|
||||
else:
|
||||
repo_path = "{0}/{1}".format(args.user, args.repository)
|
||||
single_request = True
|
||||
paginated = False
|
||||
template = "https://{0}/repos/{1}".format(get_github_api_host(args), repo_path)
|
||||
|
||||
repos = retrieve_data(args, template, single_request=single_request)
|
||||
repos = retrieve_data(args, template, paginated=paginated)
|
||||
|
||||
if args.all_starred:
|
||||
starred_template = "https://{0}/users/{1}/starred".format(
|
||||
get_github_api_host(args), args.user
|
||||
)
|
||||
starred_repos = retrieve_data(args, starred_template, single_request=False)
|
||||
starred_repos = retrieve_data(args, starred_template)
|
||||
# flag each repo as starred for downstream processing
|
||||
for item in starred_repos:
|
||||
item.update({"is_starred": True})
|
||||
@@ -1559,7 +1526,7 @@ def retrieve_repositories(args, authenticated_user):
|
||||
gists_template = "https://{0}/users/{1}/gists".format(
|
||||
get_github_api_host(args), args.user
|
||||
)
|
||||
gists = retrieve_data(args, gists_template, single_request=False)
|
||||
gists = retrieve_data(args, gists_template)
|
||||
# flag each repo as a gist for downstream processing
|
||||
for item in gists:
|
||||
item.update({"is_gist": True})
|
||||
@@ -1578,9 +1545,7 @@ def retrieve_repositories(args, authenticated_user):
|
||||
starred_gists_template = "https://{0}/gists/starred".format(
|
||||
get_github_api_host(args)
|
||||
)
|
||||
starred_gists = retrieve_data(
|
||||
args, starred_gists_template, single_request=False
|
||||
)
|
||||
starred_gists = retrieve_data(args, starred_gists_template)
|
||||
# flag each repo as a starred gist for downstream processing
|
||||
for item in starred_gists:
|
||||
item.update({"is_gist": True, "is_starred": True})
|
||||
@@ -1849,14 +1814,14 @@ def backup_pulls(args, repo_cwd, repository, repos_template):
|
||||
pull_states = ["open", "closed"]
|
||||
for pull_state in pull_states:
|
||||
query_args["state"] = pull_state
|
||||
_pulls = retrieve_data_gen(args, _pulls_template, query_args=query_args)
|
||||
_pulls = retrieve_data(args, _pulls_template, query_args=query_args)
|
||||
for pull in _pulls:
|
||||
if args.since and pull["updated_at"] < args.since:
|
||||
break
|
||||
if not args.since or pull["updated_at"] >= args.since:
|
||||
pulls[pull["number"]] = pull
|
||||
else:
|
||||
_pulls = retrieve_data_gen(args, _pulls_template, query_args=query_args)
|
||||
_pulls = retrieve_data(args, _pulls_template, query_args=query_args)
|
||||
for pull in _pulls:
|
||||
if args.since and pull["updated_at"] < args.since:
|
||||
break
|
||||
@@ -1864,7 +1829,7 @@ def backup_pulls(args, repo_cwd, repository, repos_template):
|
||||
pulls[pull["number"]] = retrieve_data(
|
||||
args,
|
||||
_pulls_template + "/{}".format(pull["number"]),
|
||||
single_request=True,
|
||||
paginated=False,
|
||||
)[0]
|
||||
|
||||
logger.info("Saving {0} pull requests to disk".format(len(list(pulls.keys()))))
|
||||
|
||||
@@ -1,40 +1,15 @@
|
||||
# Linting & Formatting
|
||||
autopep8==2.3.2
|
||||
black==25.12.0
|
||||
bleach==6.3.0
|
||||
certifi==2025.11.12
|
||||
charset-normalizer==3.4.4
|
||||
click==8.3.1
|
||||
colorama==0.4.6
|
||||
docutils==0.22.3
|
||||
flake8==7.3.0
|
||||
gitchangelog==3.0.4
|
||||
|
||||
# Testing
|
||||
pytest==9.0.2
|
||||
idna==3.11
|
||||
importlib-metadata==8.7.0
|
||||
jaraco.classes==3.4.0
|
||||
keyring==25.7.0
|
||||
markdown-it-py==4.0.0
|
||||
mccabe==0.7.0
|
||||
mdurl==0.1.2
|
||||
more-itertools==10.8.0
|
||||
mypy-extensions==1.1.0
|
||||
packaging==25.0
|
||||
pathspec==0.12.1
|
||||
pkginfo==1.12.1.2
|
||||
platformdirs==4.5.1
|
||||
pycodestyle==2.14.0
|
||||
pyflakes==3.4.0
|
||||
Pygments==2.19.2
|
||||
readme-renderer==44.0
|
||||
requests==2.32.5
|
||||
requests-toolbelt==1.0.0
|
||||
restructuredtext-lint==2.0.2
|
||||
rfc3986==2.0.0
|
||||
rich==14.2.0
|
||||
setuptools==80.9.0
|
||||
six==1.17.0
|
||||
tqdm==4.67.1
|
||||
|
||||
# Release & Publishing
|
||||
twine==6.2.0
|
||||
urllib3==2.6.1
|
||||
webencodings==0.5.1
|
||||
zipp==3.23.0
|
||||
gitchangelog==3.0.4
|
||||
setuptools==80.9.0
|
||||
|
||||
# Documentation
|
||||
restructuredtext-lint==2.0.2
|
||||
|
||||
@@ -13,7 +13,6 @@ class TestHTTP451Exception:
|
||||
|
||||
def test_repository_unavailable_error_raised(self):
|
||||
"""HTTP 451 should raise RepositoryUnavailableError with DMCA URL."""
|
||||
# Create mock args
|
||||
args = Mock()
|
||||
args.as_app = False
|
||||
args.token_fine = None
|
||||
@@ -25,7 +24,6 @@ class TestHTTP451Exception:
|
||||
args.throttle_limit = None
|
||||
args.throttle_pause = 0
|
||||
|
||||
# Mock HTTPError 451 response
|
||||
mock_response = Mock()
|
||||
mock_response.getcode.return_value = 451
|
||||
|
||||
@@ -41,14 +39,10 @@ class TestHTTP451Exception:
|
||||
mock_response.headers = {"x-ratelimit-remaining": "5000"}
|
||||
mock_response.reason = "Unavailable For Legal Reasons"
|
||||
|
||||
def mock_get_response(request, auth, template):
|
||||
return mock_response, []
|
||||
|
||||
with patch("github_backup.github_backup._get_response", side_effect=mock_get_response):
|
||||
with patch("github_backup.github_backup.make_request_with_retry", return_value=mock_response):
|
||||
with pytest.raises(github_backup.RepositoryUnavailableError) as exc_info:
|
||||
list(github_backup.retrieve_data_gen(args, "https://api.github.com/repos/test/dmca/issues"))
|
||||
github_backup.retrieve_data(args, "https://api.github.com/repos/test/dmca/issues")
|
||||
|
||||
# Check exception has DMCA URL
|
||||
assert exc_info.value.dmca_url == "https://github.com/github/dmca/blob/master/2024/11/2024-11-04-source-code.md"
|
||||
assert "451" in str(exc_info.value)
|
||||
|
||||
@@ -71,14 +65,10 @@ class TestHTTP451Exception:
|
||||
mock_response.headers = {"x-ratelimit-remaining": "5000"}
|
||||
mock_response.reason = "Unavailable For Legal Reasons"
|
||||
|
||||
def mock_get_response(request, auth, template):
|
||||
return mock_response, []
|
||||
|
||||
with patch("github_backup.github_backup._get_response", side_effect=mock_get_response):
|
||||
with patch("github_backup.github_backup.make_request_with_retry", return_value=mock_response):
|
||||
with pytest.raises(github_backup.RepositoryUnavailableError) as exc_info:
|
||||
list(github_backup.retrieve_data_gen(args, "https://api.github.com/repos/test/dmca/issues"))
|
||||
github_backup.retrieve_data(args, "https://api.github.com/repos/test/dmca/issues")
|
||||
|
||||
# Exception raised even without DMCA URL
|
||||
assert exc_info.value.dmca_url is None
|
||||
assert "451" in str(exc_info.value)
|
||||
|
||||
@@ -101,42 +91,9 @@ class TestHTTP451Exception:
|
||||
mock_response.headers = {"x-ratelimit-remaining": "5000"}
|
||||
mock_response.reason = "Unavailable For Legal Reasons"
|
||||
|
||||
def mock_get_response(request, auth, template):
|
||||
return mock_response, []
|
||||
|
||||
with patch("github_backup.github_backup._get_response", side_effect=mock_get_response):
|
||||
with patch("github_backup.github_backup.make_request_with_retry", return_value=mock_response):
|
||||
with pytest.raises(github_backup.RepositoryUnavailableError):
|
||||
list(github_backup.retrieve_data_gen(args, "https://api.github.com/repos/test/dmca/issues"))
|
||||
|
||||
def test_other_http_errors_unchanged(self):
|
||||
"""Other HTTP errors should still raise generic Exception."""
|
||||
args = Mock()
|
||||
args.as_app = False
|
||||
args.token_fine = None
|
||||
args.token_classic = None
|
||||
args.username = None
|
||||
args.password = None
|
||||
args.osx_keychain_item_name = None
|
||||
args.osx_keychain_item_account = None
|
||||
args.throttle_limit = None
|
||||
args.throttle_pause = 0
|
||||
|
||||
mock_response = Mock()
|
||||
mock_response.getcode.return_value = 404
|
||||
mock_response.read.return_value = b'{"message": "Not Found"}'
|
||||
mock_response.headers = {"x-ratelimit-remaining": "5000"}
|
||||
mock_response.reason = "Not Found"
|
||||
|
||||
def mock_get_response(request, auth, template):
|
||||
return mock_response, []
|
||||
|
||||
with patch("github_backup.github_backup._get_response", side_effect=mock_get_response):
|
||||
# Should raise generic Exception, not RepositoryUnavailableError
|
||||
with pytest.raises(Exception) as exc_info:
|
||||
list(github_backup.retrieve_data_gen(args, "https://api.github.com/repos/test/notfound/issues"))
|
||||
|
||||
assert not isinstance(exc_info.value, github_backup.RepositoryUnavailableError)
|
||||
assert "404" in str(exc_info.value)
|
||||
github_backup.retrieve_data(args, "https://api.github.com/repos/test/dmca/issues")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -40,7 +40,7 @@ class MockHTTPResponse:
|
||||
|
||||
@pytest.fixture
|
||||
def mock_args():
|
||||
"""Mock args for retrieve_data_gen."""
|
||||
"""Mock args for retrieve_data."""
|
||||
args = Mock()
|
||||
args.as_app = False
|
||||
args.token_fine = None
|
||||
@@ -77,10 +77,8 @@ def test_cursor_based_pagination(mock_args):
|
||||
return responses[len(requests_made) - 1]
|
||||
|
||||
with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen):
|
||||
results = list(
|
||||
github_backup.retrieve_data_gen(
|
||||
mock_args, "https://api.github.com/repos/owner/repo/issues"
|
||||
)
|
||||
results = github_backup.retrieve_data(
|
||||
mock_args, "https://api.github.com/repos/owner/repo/issues"
|
||||
)
|
||||
|
||||
# Verify all items retrieved and cursor was used in second request
|
||||
@@ -112,10 +110,8 @@ def test_page_based_pagination(mock_args):
|
||||
return responses[len(requests_made) - 1]
|
||||
|
||||
with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen):
|
||||
results = list(
|
||||
github_backup.retrieve_data_gen(
|
||||
mock_args, "https://api.github.com/repos/owner/repo/pulls"
|
||||
)
|
||||
results = github_backup.retrieve_data(
|
||||
mock_args, "https://api.github.com/repos/owner/repo/pulls"
|
||||
)
|
||||
|
||||
# Verify all items retrieved and page parameter was used (not cursor)
|
||||
@@ -142,10 +138,8 @@ def test_no_link_header_stops_pagination(mock_args):
|
||||
return responses[len(requests_made) - 1]
|
||||
|
||||
with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen):
|
||||
results = list(
|
||||
github_backup.retrieve_data_gen(
|
||||
mock_args, "https://api.github.com/repos/owner/repo/labels"
|
||||
)
|
||||
results = github_backup.retrieve_data(
|
||||
mock_args, "https://api.github.com/repos/owner/repo/labels"
|
||||
)
|
||||
|
||||
# Verify pagination stopped after first request
|
||||
|
||||
365
tests/test_retrieve_data.py
Normal file
365
tests/test_retrieve_data.py
Normal file
@@ -0,0 +1,365 @@
|
||||
"""Tests for retrieve_data function."""
|
||||
|
||||
import json
|
||||
import socket
|
||||
from unittest.mock import Mock, patch
|
||||
from urllib.error import HTTPError, URLError
|
||||
|
||||
import pytest
|
||||
|
||||
from github_backup import github_backup
|
||||
from github_backup.github_backup import (
|
||||
MAX_RETRIES,
|
||||
calculate_retry_delay,
|
||||
make_request_with_retry,
|
||||
)
|
||||
|
||||
|
||||
class TestCalculateRetryDelay:
|
||||
def test_respects_retry_after_header(self):
|
||||
headers = {'retry-after': '30'}
|
||||
assert calculate_retry_delay(0, headers) == 30
|
||||
|
||||
def test_respects_rate_limit_reset(self):
|
||||
import time
|
||||
import calendar
|
||||
# Set reset time 60 seconds in the future
|
||||
future_reset = calendar.timegm(time.gmtime()) + 60
|
||||
headers = {
|
||||
'x-ratelimit-remaining': '0',
|
||||
'x-ratelimit-reset': str(future_reset)
|
||||
}
|
||||
delay = calculate_retry_delay(0, headers)
|
||||
# Should be approximately 60 seconds (with some tolerance for execution time)
|
||||
assert 55 <= delay <= 65
|
||||
|
||||
def test_exponential_backoff(self):
|
||||
delay_0 = calculate_retry_delay(0, {})
|
||||
delay_1 = calculate_retry_delay(1, {})
|
||||
delay_2 = calculate_retry_delay(2, {})
|
||||
# Base delay is 1s, so delays should be roughly 1, 2, 4 (plus jitter)
|
||||
assert 0.9 <= delay_0 <= 1.2 # ~1s + up to 10% jitter
|
||||
assert 1.8 <= delay_1 <= 2.4 # ~2s + up to 10% jitter
|
||||
assert 3.6 <= delay_2 <= 4.8 # ~4s + up to 10% jitter
|
||||
|
||||
def test_max_delay_cap(self):
|
||||
# Very high attempt number should not exceed 120s + jitter
|
||||
delay = calculate_retry_delay(100, {})
|
||||
assert delay <= 120 * 1.1 # 120s max + 10% jitter
|
||||
|
||||
def test_minimum_rate_limit_delay(self):
|
||||
import time
|
||||
import calendar
|
||||
# Set reset time in the past (already reset)
|
||||
past_reset = calendar.timegm(time.gmtime()) - 100
|
||||
headers = {
|
||||
'x-ratelimit-remaining': '0',
|
||||
'x-ratelimit-reset': str(past_reset)
|
||||
}
|
||||
delay = calculate_retry_delay(0, headers)
|
||||
# Should be minimum 10 seconds even if reset time is in past
|
||||
assert delay >= 10
|
||||
|
||||
|
||||
class TestRetrieveDataRetry:
|
||||
"""Tests for retry behavior in retrieve_data."""
|
||||
|
||||
@pytest.fixture
|
||||
def mock_args(self):
|
||||
args = Mock()
|
||||
args.as_app = False
|
||||
args.token_fine = None
|
||||
args.token_classic = "fake_token"
|
||||
args.username = None
|
||||
args.password = None
|
||||
args.osx_keychain_item_name = None
|
||||
args.osx_keychain_item_account = None
|
||||
args.throttle_limit = None
|
||||
args.throttle_pause = 0
|
||||
return args
|
||||
|
||||
def test_json_parse_error_retries_and_fails(self, mock_args):
|
||||
"""HTTP 200 with invalid JSON should retry and eventually fail."""
|
||||
mock_response = Mock()
|
||||
mock_response.getcode.return_value = 200
|
||||
mock_response.read.return_value = b"not valid json {"
|
||||
mock_response.headers = {"x-ratelimit-remaining": "5000"}
|
||||
|
||||
call_count = 0
|
||||
|
||||
def mock_make_request(*args, **kwargs):
|
||||
nonlocal call_count
|
||||
call_count += 1
|
||||
return mock_response
|
||||
|
||||
with patch("github_backup.github_backup.make_request_with_retry", side_effect=mock_make_request):
|
||||
with patch("github_backup.github_backup.calculate_retry_delay", return_value=0): # No delay in tests
|
||||
with pytest.raises(Exception) as exc_info:
|
||||
github_backup.retrieve_data(mock_args, "https://api.github.com/repos/test/repo/issues")
|
||||
|
||||
assert "Failed to read response after" in str(exc_info.value)
|
||||
assert call_count == MAX_RETRIES
|
||||
|
||||
def test_json_parse_error_recovers_on_retry(self, mock_args):
|
||||
"""HTTP 200 with invalid JSON should succeed if retry returns valid JSON."""
|
||||
bad_response = Mock()
|
||||
bad_response.getcode.return_value = 200
|
||||
bad_response.read.return_value = b"not valid json {"
|
||||
bad_response.headers = {"x-ratelimit-remaining": "5000"}
|
||||
|
||||
good_response = Mock()
|
||||
good_response.getcode.return_value = 200
|
||||
good_response.read.return_value = json.dumps([{"id": 1}]).encode("utf-8")
|
||||
good_response.headers = {"x-ratelimit-remaining": "5000", "Link": ""}
|
||||
|
||||
responses = [bad_response, bad_response, good_response]
|
||||
call_count = 0
|
||||
|
||||
def mock_make_request(*args, **kwargs):
|
||||
nonlocal call_count
|
||||
result = responses[call_count]
|
||||
call_count += 1
|
||||
return result
|
||||
|
||||
with patch("github_backup.github_backup.make_request_with_retry", side_effect=mock_make_request):
|
||||
with patch("github_backup.github_backup.calculate_retry_delay", return_value=0):
|
||||
result = github_backup.retrieve_data(mock_args, "https://api.github.com/repos/test/repo/issues")
|
||||
|
||||
assert result == [{"id": 1}]
|
||||
assert call_count == 3 # Failed twice, succeeded on third
|
||||
|
||||
def test_http_error_raises_exception(self, mock_args):
|
||||
"""Non-success HTTP status codes should raise Exception."""
|
||||
mock_response = Mock()
|
||||
mock_response.getcode.return_value = 404
|
||||
mock_response.read.return_value = b'{"message": "Not Found"}'
|
||||
mock_response.headers = {"x-ratelimit-remaining": "5000"}
|
||||
mock_response.reason = "Not Found"
|
||||
|
||||
with patch("github_backup.github_backup.make_request_with_retry", return_value=mock_response):
|
||||
with pytest.raises(Exception) as exc_info:
|
||||
github_backup.retrieve_data(mock_args, "https://api.github.com/repos/test/notfound/issues")
|
||||
|
||||
assert not isinstance(exc_info.value, github_backup.RepositoryUnavailableError)
|
||||
assert "404" in str(exc_info.value)
|
||||
|
||||
|
||||
class TestMakeRequestWithRetry:
|
||||
"""Tests for HTTP error retry behavior in make_request_with_retry."""
|
||||
|
||||
def test_502_error_retries_and_succeeds(self):
|
||||
"""HTTP 502 should retry and succeed if subsequent request works."""
|
||||
good_response = Mock()
|
||||
good_response.read.return_value = b'{"ok": true}'
|
||||
|
||||
call_count = 0
|
||||
fail_count = MAX_RETRIES - 1 # Fail all but last attempt
|
||||
|
||||
def mock_urlopen(*args, **kwargs):
|
||||
nonlocal call_count
|
||||
call_count += 1
|
||||
if call_count <= fail_count:
|
||||
raise HTTPError(
|
||||
url="https://api.github.com/test",
|
||||
code=502,
|
||||
msg="Bad Gateway",
|
||||
hdrs={"x-ratelimit-remaining": "5000"},
|
||||
fp=None,
|
||||
)
|
||||
return good_response
|
||||
|
||||
with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen):
|
||||
with patch("github_backup.github_backup.calculate_retry_delay", return_value=0):
|
||||
result = make_request_with_retry(Mock(), None)
|
||||
|
||||
assert result == good_response
|
||||
assert call_count == MAX_RETRIES
|
||||
|
||||
def test_503_error_retries_until_exhausted(self):
|
||||
"""HTTP 503 should retry MAX_RETRIES times then raise."""
|
||||
call_count = 0
|
||||
|
||||
def mock_urlopen(*args, **kwargs):
|
||||
nonlocal call_count
|
||||
call_count += 1
|
||||
raise HTTPError(
|
||||
url="https://api.github.com/test",
|
||||
code=503,
|
||||
msg="Service Unavailable",
|
||||
hdrs={"x-ratelimit-remaining": "5000"},
|
||||
fp=None,
|
||||
)
|
||||
|
||||
with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen):
|
||||
with patch("github_backup.github_backup.calculate_retry_delay", return_value=0):
|
||||
with pytest.raises(HTTPError) as exc_info:
|
||||
make_request_with_retry(Mock(), None)
|
||||
|
||||
assert exc_info.value.code == 503
|
||||
assert call_count == MAX_RETRIES
|
||||
|
||||
def test_404_error_not_retried(self):
|
||||
"""HTTP 404 should not be retried - raise immediately."""
|
||||
call_count = 0
|
||||
|
||||
def mock_urlopen(*args, **kwargs):
|
||||
nonlocal call_count
|
||||
call_count += 1
|
||||
raise HTTPError(
|
||||
url="https://api.github.com/test",
|
||||
code=404,
|
||||
msg="Not Found",
|
||||
hdrs={"x-ratelimit-remaining": "5000"},
|
||||
fp=None,
|
||||
)
|
||||
|
||||
with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen):
|
||||
with pytest.raises(HTTPError) as exc_info:
|
||||
make_request_with_retry(Mock(), None)
|
||||
|
||||
assert exc_info.value.code == 404
|
||||
assert call_count == 1 # No retries
|
||||
|
||||
def test_rate_limit_403_retried_when_remaining_zero(self):
|
||||
"""HTTP 403 with x-ratelimit-remaining=0 should retry."""
|
||||
good_response = Mock()
|
||||
call_count = 0
|
||||
|
||||
def mock_urlopen(*args, **kwargs):
|
||||
nonlocal call_count
|
||||
call_count += 1
|
||||
if call_count == 1:
|
||||
raise HTTPError(
|
||||
url="https://api.github.com/test",
|
||||
code=403,
|
||||
msg="Forbidden",
|
||||
hdrs={"x-ratelimit-remaining": "0"},
|
||||
fp=None,
|
||||
)
|
||||
return good_response
|
||||
|
||||
with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen):
|
||||
with patch("github_backup.github_backup.calculate_retry_delay", return_value=0):
|
||||
result = make_request_with_retry(Mock(), None)
|
||||
|
||||
assert result == good_response
|
||||
assert call_count == 2
|
||||
|
||||
def test_403_not_retried_when_remaining_nonzero(self):
|
||||
"""HTTP 403 with x-ratelimit-remaining>0 should not retry (permission error)."""
|
||||
call_count = 0
|
||||
|
||||
def mock_urlopen(*args, **kwargs):
|
||||
nonlocal call_count
|
||||
call_count += 1
|
||||
raise HTTPError(
|
||||
url="https://api.github.com/test",
|
||||
code=403,
|
||||
msg="Forbidden",
|
||||
hdrs={"x-ratelimit-remaining": "5000"},
|
||||
fp=None,
|
||||
)
|
||||
|
||||
with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen):
|
||||
with pytest.raises(HTTPError) as exc_info:
|
||||
make_request_with_retry(Mock(), None)
|
||||
|
||||
assert exc_info.value.code == 403
|
||||
assert call_count == 1 # No retries
|
||||
|
||||
def test_connection_error_retries_and_succeeds(self):
|
||||
"""URLError (connection error) should retry and succeed if subsequent request works."""
|
||||
good_response = Mock()
|
||||
call_count = 0
|
||||
fail_count = MAX_RETRIES - 1 # Fail all but last attempt
|
||||
|
||||
def mock_urlopen(*args, **kwargs):
|
||||
nonlocal call_count
|
||||
call_count += 1
|
||||
if call_count <= fail_count:
|
||||
raise URLError("Connection refused")
|
||||
return good_response
|
||||
|
||||
with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen):
|
||||
with patch("github_backup.github_backup.calculate_retry_delay", return_value=0):
|
||||
result = make_request_with_retry(Mock(), None)
|
||||
|
||||
assert result == good_response
|
||||
assert call_count == MAX_RETRIES
|
||||
|
||||
def test_socket_error_retries_until_exhausted(self):
|
||||
"""socket.error should retry MAX_RETRIES times then raise."""
|
||||
call_count = 0
|
||||
|
||||
def mock_urlopen(*args, **kwargs):
|
||||
nonlocal call_count
|
||||
call_count += 1
|
||||
raise socket.error("Connection reset by peer")
|
||||
|
||||
with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen):
|
||||
with patch("github_backup.github_backup.calculate_retry_delay", return_value=0):
|
||||
with pytest.raises(socket.error):
|
||||
make_request_with_retry(Mock(), None)
|
||||
|
||||
assert call_count == MAX_RETRIES
|
||||
|
||||
|
||||
class TestRetrieveDataThrottling:
|
||||
"""Tests for throttling behavior in retrieve_data."""
|
||||
|
||||
@pytest.fixture
|
||||
def mock_args(self):
|
||||
args = Mock()
|
||||
args.as_app = False
|
||||
args.token_fine = None
|
||||
args.token_classic = "fake_token"
|
||||
args.username = None
|
||||
args.password = None
|
||||
args.osx_keychain_item_name = None
|
||||
args.osx_keychain_item_account = None
|
||||
args.throttle_limit = 10 # Throttle when remaining <= 10
|
||||
args.throttle_pause = 5 # Pause 5 seconds
|
||||
return args
|
||||
|
||||
def test_throttling_pauses_when_rate_limit_low(self, mock_args):
|
||||
"""Should pause when x-ratelimit-remaining is at or below throttle_limit."""
|
||||
mock_response = Mock()
|
||||
mock_response.getcode.return_value = 200
|
||||
mock_response.read.return_value = json.dumps([{"id": 1}]).encode("utf-8")
|
||||
mock_response.headers = {"x-ratelimit-remaining": "5", "Link": ""} # Below throttle_limit
|
||||
|
||||
with patch("github_backup.github_backup.make_request_with_retry", return_value=mock_response):
|
||||
with patch("github_backup.github_backup.time.sleep") as mock_sleep:
|
||||
github_backup.retrieve_data(mock_args, "https://api.github.com/repos/test/repo/issues")
|
||||
|
||||
mock_sleep.assert_called_once_with(5) # throttle_pause value
|
||||
|
||||
|
||||
class TestRetrieveDataSingleItem:
|
||||
"""Tests for single item (dict) responses in retrieve_data."""
|
||||
|
||||
@pytest.fixture
|
||||
def mock_args(self):
|
||||
args = Mock()
|
||||
args.as_app = False
|
||||
args.token_fine = None
|
||||
args.token_classic = "fake_token"
|
||||
args.username = None
|
||||
args.password = None
|
||||
args.osx_keychain_item_name = None
|
||||
args.osx_keychain_item_account = None
|
||||
args.throttle_limit = None
|
||||
args.throttle_pause = 0
|
||||
return args
|
||||
|
||||
def test_dict_response_returned_as_list(self, mock_args):
|
||||
"""Single dict response should be returned as a list with one item."""
|
||||
mock_response = Mock()
|
||||
mock_response.getcode.return_value = 200
|
||||
mock_response.read.return_value = json.dumps({"login": "testuser", "id": 123}).encode("utf-8")
|
||||
mock_response.headers = {"x-ratelimit-remaining": "5000", "Link": ""}
|
||||
|
||||
with patch("github_backup.github_backup.make_request_with_retry", return_value=mock_response):
|
||||
result = github_backup.retrieve_data(mock_args, "https://api.github.com/user")
|
||||
|
||||
assert result == [{"login": "testuser", "id": 123}]
|
||||
Reference in New Issue
Block a user