mirror of
https://github.com/josegonzalez/python-github-backup.git
synced 2026-05-01 04:55:34 +02:00
Compare commits
21 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
bd6eea02d5 | ||
|
|
72fa5d3dcd | ||
|
|
c5d11c000a | ||
|
|
8e76089565 | ||
|
|
d30d9bfe60 | ||
|
|
a2391a550e | ||
|
|
9340aa3aaa | ||
|
|
543d76f24b | ||
|
|
3cda5a01fd | ||
|
|
17b79fcbef | ||
|
|
f1fca0f9b7 | ||
|
|
ccc27b95f7 | ||
|
|
f3eabf0bfe | ||
|
|
b92aee6f11 | ||
|
|
4d1772319f | ||
|
|
2c7fdab54e | ||
|
|
334c6c6546 | ||
|
|
2f130ecd66 | ||
|
|
ddf7f82e65 | ||
|
|
ddf82f1115 | ||
|
|
0638666bc7 |
58
CHANGES.rst
58
CHANGES.rst
@@ -1,9 +1,65 @@
|
||||
Changelog
|
||||
=========
|
||||
|
||||
0.62.0 (2026-04-29)
|
||||
0.62.1 (2026-04-30)
|
||||
-------------------
|
||||
------------------------
|
||||
- Document that nothing is saved by default. [Changaco]
|
||||
- Eliminate trailing spaces. [Changaco]
|
||||
- Remove pointless and unsafe `export`s in examples. [Changaco]
|
||||
- Try to clarify what `--incremental` actually does. [Changaco]
|
||||
- Fix a typo in the README. [Changaco]
|
||||
- Document that `--all` doesn't imply `--attachments` [Changaco]
|
||||
- Rename a function to match what it actually does. [Changaco]
|
||||
- Don't leave files open. [Changaco]
|
||||
- Remove legacy code in `mkdir_p` function. [Changaco]
|
||||
- Don't pass stdin when doing so can't do any good. [Changaco]
|
||||
|
||||
When the child process doesn't inherit stderr, it can't ask the user for input, so it shouldn't inherit stdin either.
|
||||
- Use `subprocess.DEVNULL` instead of emulating it. [Changaco]
|
||||
- Remove bad invocation of the system shell. [Changaco]
|
||||
- Add missing `context` argument to `urlopen` call. [Changaco]
|
||||
- Suppress output of call to `git lfs version` [Changaco]
|
||||
- Handle more network errors. [Changaco]
|
||||
|
||||
```python-traceback
|
||||
Traceback (most recent call last):
|
||||
File ".local/bin/github-backup", line 6, in <module>
|
||||
sys.exit(main())
|
||||
~~~~^^
|
||||
File ".local/share/pipx/venvs/github-backup/lib/python3.14/site-packages/github_backup/cli.py", line 83, in main
|
||||
backup_repositories(args, output_directory, repositories)
|
||||
~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
File ".local/share/pipx/venvs/github-backup/lib/python3.14/site-packages/github_backup/github_backup.py", line 1845, in backup_repositories
|
||||
backup_pulls(args, repo_cwd, repository, repos_template)
|
||||
~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
File ".local/share/pipx/venvs/github-backup/lib/python3.14/site-packages/github_backup/github_backup.py", line 2019, in backup_pulls
|
||||
pulls[number]["commit_data"] = retrieve_data(args, template)
|
||||
~~~~~~~~~~~~~^^^^^^^^^^^^^^^^
|
||||
File ".local/share/pipx/venvs/github-backup/lib/python3.14/site-packages/github_backup/github_backup.py", line 766, in retrieve_data
|
||||
return list(fetch_all())
|
||||
File ".local/share/pipx/venvs/github-backup/lib/python3.14/site-packages/github_backup/github_backup.py", line 717, in fetch_all
|
||||
response = json.loads(http_response.read().decode("utf-8"))
|
||||
~~~~~~~~~~~~~~~~~~^^
|
||||
File "/usr/lib/python3.14/http/client.py", line 500, in read
|
||||
s = self._safe_read(self.length)
|
||||
File "/usr/lib/python3.14/http/client.py", line 648, in _safe_read
|
||||
data = self.fp.read(cursize)
|
||||
File "/usr/lib/python3.14/socket.py", line 725, in readinto
|
||||
return self._sock.recv_into(b)
|
||||
~~~~~~~~~~~~~~~~~~~~^^^
|
||||
File "/usr/lib/python3.14/ssl.py", line 1304, in recv_into
|
||||
return self.read(nbytes, buffer)
|
||||
~~~~~~~~~^^^^^^^^^^^^^^^^
|
||||
File "/usr/lib/python3.14/ssl.py", line 1138, in read
|
||||
return self._sslobj.read(len, buffer)
|
||||
~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^
|
||||
ConnectionResetError: [Errno 104] Connection reset by peer
|
||||
```
|
||||
|
||||
|
||||
0.62.0 (2026-04-29)
|
||||
-------------------
|
||||
- Skip checkpoint-equal incremental items. [Duncan Ogilvie]
|
||||
- Avoid redundant release asset list requests. [Duncan Ogilvie]
|
||||
- Reduce unnecessary pull requests with incremental fetching. [Duncan
|
||||
|
||||
25
README.rst
25
README.rst
@@ -22,7 +22,7 @@ Using PIP via PyPI::
|
||||
Using PIP via Github (more likely the latest version)::
|
||||
|
||||
pip install git+https://github.com/josegonzalez/python-github-backup.git#egg=github-backup
|
||||
|
||||
|
||||
*Install note for python newcomers:*
|
||||
|
||||
Python scripts are unlikely to be included in your ``$PATH`` by default, this means it cannot be run directly in terminal with ``$ github-backup ...``, you can either add python's install path to your environments ``$PATH`` or call the script directly e.g. using ``$ ~/.local/bin/github-backup``.*
|
||||
@@ -249,7 +249,7 @@ Note: When you run github-backup, you will be asked whether you want to allow "
|
||||
Github Rate-limit and Throttling
|
||||
--------------------------------
|
||||
|
||||
"github-backup" will automatically throttle itself based on feedback from the Github API.
|
||||
"github-backup" will automatically throttle itself based on feedback from the Github API.
|
||||
|
||||
Their API is usually rate-limited to 5000 calls per hour. The API will ask github-backup to pause until a specific time when the limit is reset again (at the start of the next hour). This continues until the backup is complete.
|
||||
|
||||
@@ -325,7 +325,12 @@ Gotchas / Known-issues
|
||||
All is not everything
|
||||
---------------------
|
||||
|
||||
The ``--all`` argument does not include: cloning private repos (``-P, --private``), cloning forks (``-F, --fork``), cloning starred repositories (``--all-starred``), ``--pull-details``, cloning LFS repositories (``--lfs``), cloning gists (``--gists``) or cloning starred gist repos (``--starred-gists``). See examples for more.
|
||||
The ``--all`` argument does not include: downloading attachments from issue and pull request comments (``--attachments``), cloning private repos (``-P, --private``), cloning forks (``-F, --fork``), cloning starred repositories (``--all-starred``), ``--pull-details``, cloning LFS repositories (``--lfs``), cloning gists (``--gists``) or cloning starred gist repos (``--starred-gists``). See examples for more.
|
||||
|
||||
Saves nothing if no arguments are passed
|
||||
----------------------------------------
|
||||
|
||||
At least one argument like ``--all`` or ``--repositories`` is needed for github-backup to actually save data. Without relevant arguments, github-backup fetches some data from GitHub but doesn't put any of it into files.
|
||||
|
||||
Starred repository size
|
||||
-----------------------
|
||||
@@ -363,9 +368,9 @@ This means any blocking errors on previous runs can cause missing data in backup
|
||||
|
||||
Using (``--incremental-by-files``) will request new data from the API **based on when the file was modified on filesystem**. e.g. if you modify the file yourself you may miss something.
|
||||
|
||||
Still saver than the previous version.
|
||||
Still safer than the previous version.
|
||||
|
||||
Specifically, issues and pull requests are handled like this.
|
||||
Incremental backup only changes how issue and pull request data is fetched.
|
||||
|
||||
Known blocking errors
|
||||
---------------------
|
||||
@@ -429,12 +434,12 @@ Github Backup Examples
|
||||
|
||||
Backup all repositories, including private ones using a classic token::
|
||||
|
||||
export ACCESS_TOKEN=SOME-GITHUB-TOKEN
|
||||
ACCESS_TOKEN=SOME-GITHUB-TOKEN
|
||||
github-backup WhiteHouse --token $ACCESS_TOKEN --organization --output-directory /tmp/white-house --repositories --private
|
||||
|
||||
Use a fine-grained access token to backup a single organization repository with everything else (wiki, pull requests, comments, issues etc)::
|
||||
|
||||
export FINE_ACCESS_TOKEN=SOME-GITHUB-TOKEN
|
||||
FINE_ACCESS_TOKEN=SOME-GITHUB-TOKEN
|
||||
ORGANIZATION=docker
|
||||
REPO=cli
|
||||
# e.g. git@github.com:docker/cli.git
|
||||
@@ -442,14 +447,14 @@ Use a fine-grained access token to backup a single organization repository with
|
||||
|
||||
Quietly and incrementally backup useful Github user data (public and private repos with SSH) including; all issues, pulls, all public starred repos and gists (omitting "hooks", "releases" and therefore "assets" to prevent blocking). *Great for a cron job.* ::
|
||||
|
||||
export FINE_ACCESS_TOKEN=SOME-GITHUB-TOKEN
|
||||
FINE_ACCESS_TOKEN=SOME-GITHUB-TOKEN
|
||||
GH_USER=YOUR-GITHUB-USER
|
||||
|
||||
github-backup -f $FINE_ACCESS_TOKEN --prefer-ssh -o ~/github-backup/ -l error -P -i --all-starred --starred --watched --followers --following --issues --issue-comments --issue-events --pulls --pull-comments --pull-reviews --pull-commits --labels --milestones --security-advisories --discussions --repositories --wikis --releases --assets --attachments --pull-details --gists --starred-gists $GH_USER
|
||||
|
||||
|
||||
Debug an error/block or incomplete backup into a temporary directory. Omit "incremental" to fill a previous incomplete backup. ::
|
||||
|
||||
export FINE_ACCESS_TOKEN=SOME-GITHUB-TOKEN
|
||||
FINE_ACCESS_TOKEN=SOME-GITHUB-TOKEN
|
||||
GH_USER=YOUR-GITHUB-USER
|
||||
|
||||
github-backup -f $FINE_ACCESS_TOKEN -o /tmp/github-backup/ -l debug -P --all-starred --starred --watched --followers --following --issues --issue-comments --issue-events --pulls --pull-comments --pull-reviews --pull-commits --labels --milestones --discussions --repositories --wikis --releases --assets --pull-details --gists --starred-gists $GH_USER
|
||||
|
||||
@@ -1 +1 @@
|
||||
__version__ = "0.62.0"
|
||||
__version__ = "0.62.1"
|
||||
|
||||
@@ -6,7 +6,6 @@ import argparse
|
||||
import base64
|
||||
import calendar
|
||||
import codecs
|
||||
import errno
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
@@ -40,7 +39,6 @@ from .graphql_queries import (
|
||||
DISCUSSION_REPLIES_QUERY,
|
||||
)
|
||||
|
||||
FNULL = open(os.devnull, "w")
|
||||
FILE_URI_PREFIX = "file://"
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -128,13 +126,7 @@ def logging_subprocess(
|
||||
|
||||
def mkdir_p(*args):
|
||||
for path in args:
|
||||
try:
|
||||
os.makedirs(path)
|
||||
except OSError as exc: # Python >2.5
|
||||
if exc.errno == errno.EEXIST and os.path.isdir(path):
|
||||
pass
|
||||
else:
|
||||
raise
|
||||
os.makedirs(path, exist_ok=True)
|
||||
|
||||
|
||||
def mask_password(url, secret="*****"):
|
||||
@@ -488,7 +480,7 @@ def parse_args(args=None):
|
||||
"--attachments",
|
||||
action="store_true",
|
||||
dest="include_attachments",
|
||||
help="download user-attachments from issues, pull requests, and discussions",
|
||||
help="download user-attachments from issues, pull requests, and discussions [*]",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--throttle-limit",
|
||||
@@ -529,19 +521,18 @@ def get_auth(args, encode=True, for_git_cli=False):
|
||||
if platform.system() != "Darwin":
|
||||
raise Exception("Keychain arguments are only supported on Mac OSX")
|
||||
try:
|
||||
with open(os.devnull, "w") as devnull:
|
||||
token = subprocess.check_output(
|
||||
[
|
||||
"security",
|
||||
"find-generic-password",
|
||||
"-s",
|
||||
args.osx_keychain_item_name,
|
||||
"-a",
|
||||
args.osx_keychain_item_account,
|
||||
"-w",
|
||||
],
|
||||
stderr=devnull,
|
||||
).strip()
|
||||
token = subprocess.check_output(
|
||||
[
|
||||
"security",
|
||||
"find-generic-password",
|
||||
"-s",
|
||||
args.osx_keychain_item_name,
|
||||
"-a",
|
||||
args.osx_keychain_item_account,
|
||||
"-w",
|
||||
],
|
||||
stderr=subprocess.DEVNULL,
|
||||
).strip()
|
||||
token = token.decode("utf-8")
|
||||
auth = token + ":" + "x-oauth-basic"
|
||||
except subprocess.SubprocessError:
|
||||
@@ -554,7 +545,7 @@ def get_auth(args, encode=True, for_git_cli=False):
|
||||
)
|
||||
elif args.token_fine:
|
||||
if args.token_fine.startswith(FILE_URI_PREFIX):
|
||||
args.token_fine = read_file_contents(args.token_fine)
|
||||
args.token_fine = read_first_line(args.token_fine)
|
||||
|
||||
if args.token_fine.startswith("github_pat_"):
|
||||
auth = args.token_fine
|
||||
@@ -570,7 +561,7 @@ def get_auth(args, encode=True, for_git_cli=False):
|
||||
)
|
||||
args.token_classic = read_token_from_gh_cli(args)
|
||||
elif args.token_classic.startswith(FILE_URI_PREFIX):
|
||||
args.token_classic = read_file_contents(args.token_classic)
|
||||
args.token_classic = read_first_line(args.token_classic)
|
||||
|
||||
if not args.as_app:
|
||||
auth = args.token_classic + ":" + "x-oauth-basic"
|
||||
@@ -632,8 +623,9 @@ def get_github_host(args):
|
||||
return host
|
||||
|
||||
|
||||
def read_file_contents(file_uri):
|
||||
return open(file_uri[len(FILE_URI_PREFIX) :], "rt").readline().strip()
|
||||
def read_first_line(file_uri):
|
||||
with open(file_uri[len(FILE_URI_PREFIX) :], "rt") as f:
|
||||
return f.readline().strip()
|
||||
|
||||
|
||||
def read_token_from_gh_cli(args):
|
||||
@@ -806,6 +798,7 @@ def retrieve_data(args, template, query_args=None, paginated=True, lazy=False):
|
||||
response = json.loads(http_response.read().decode("utf-8"))
|
||||
break # Exit retry loop and handle the data returned
|
||||
except (
|
||||
ConnectionError,
|
||||
IncompleteRead,
|
||||
json.decoder.JSONDecodeError,
|
||||
TimeoutError,
|
||||
@@ -1297,7 +1290,7 @@ def get_jwt_signed_url_via_markdown_api(url, token, repo_context):
|
||||
request.add_header("Content-Type", "application/json")
|
||||
request.add_header("Accept", "application/vnd.github+json")
|
||||
|
||||
html = urlopen(request, timeout=30).read().decode("utf-8")
|
||||
html = urlopen(request, context=https_ctx, timeout=30).read().decode("utf-8")
|
||||
|
||||
# Parse JWT-signed URL from HTML response
|
||||
# Format: <img src="https://private-user-images.githubusercontent.com/...?jwt=..." ...>
|
||||
@@ -1781,7 +1774,10 @@ def get_authenticated_user(args):
|
||||
|
||||
|
||||
def check_git_lfs_install():
|
||||
exit_code = subprocess.call(["git", "lfs", "version"])
|
||||
exit_code = subprocess.call(
|
||||
["git", "lfs", "version"], stdin=subprocess.DEVNULL,
|
||||
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
|
||||
)
|
||||
if exit_code != 0:
|
||||
raise Exception(
|
||||
"The argument --lfs requires you to have Git LFS installed.\nYou can get it from https://git-lfs.github.com."
|
||||
@@ -1969,10 +1965,11 @@ def read_legacy_last_update(args, output_directory):
|
||||
return None, None
|
||||
|
||||
last_update_path = os.path.join(output_directory, INCREMENTAL_LAST_UPDATE_FILENAME)
|
||||
if os.path.exists(last_update_path):
|
||||
return last_update_path, open(last_update_path).read().strip()
|
||||
|
||||
return last_update_path, None
|
||||
try:
|
||||
with open(last_update_path) as f:
|
||||
return last_update_path, f.read().strip()
|
||||
except FileNotFoundError:
|
||||
return last_update_path, None
|
||||
|
||||
|
||||
def read_resource_last_update(args, resource_cwd, legacy_last_update=None):
|
||||
@@ -1980,13 +1977,13 @@ def read_resource_last_update(args, resource_cwd, legacy_last_update=None):
|
||||
return None
|
||||
|
||||
last_update_path = os.path.join(resource_cwd, INCREMENTAL_LAST_UPDATE_FILENAME)
|
||||
if os.path.exists(last_update_path):
|
||||
return open(last_update_path).read().strip()
|
||||
|
||||
if legacy_last_update and resource_backup_exists(resource_cwd):
|
||||
return legacy_last_update
|
||||
|
||||
return None
|
||||
try:
|
||||
with open(last_update_path) as f:
|
||||
return f.read().strip()
|
||||
except FileNotFoundError:
|
||||
if legacy_last_update and resource_backup_exists(resource_cwd):
|
||||
return legacy_last_update
|
||||
return None
|
||||
|
||||
|
||||
def write_resource_last_update(args, resource_cwd, repository):
|
||||
@@ -1995,7 +1992,8 @@ def write_resource_last_update(args, resource_cwd, repository):
|
||||
|
||||
mkdir_p(resource_cwd)
|
||||
last_update_path = os.path.join(resource_cwd, INCREMENTAL_LAST_UPDATE_FILENAME)
|
||||
open(last_update_path, "w").write(get_repository_checkpoint_time(repository))
|
||||
with open(last_update_path, "w") as f:
|
||||
f.write(get_repository_checkpoint_time(repository))
|
||||
|
||||
|
||||
def iter_incremental_resource_dirs(output_directory):
|
||||
@@ -2383,7 +2381,8 @@ def backup_discussions(args, repo_cwd, repository):
|
||||
discussions_since = None
|
||||
discussion_last_update_path = os.path.join(discussion_cwd, "last_update")
|
||||
if args.incremental and os.path.exists(discussion_last_update_path):
|
||||
discussions_since = open(discussion_last_update_path).read().strip()
|
||||
with open(discussion_last_update_path) as f:
|
||||
discussions_since = f.read().strip()
|
||||
|
||||
logger.info("Retrieving {0} discussions".format(repository["full_name"]))
|
||||
try:
|
||||
@@ -2469,7 +2468,8 @@ def backup_discussions(args, repo_cwd, repository):
|
||||
and newest_seen
|
||||
and (not discussions_since or newest_seen > discussions_since)
|
||||
):
|
||||
open(discussion_last_update_path, "w").write(newest_seen)
|
||||
with open(discussion_last_update_path, "w") as f:
|
||||
f.write(newest_seen)
|
||||
|
||||
attempted_count = len(summaries) - skipped_count
|
||||
if not summaries:
|
||||
@@ -2606,7 +2606,8 @@ def get_pull_reviews_since(args, pulls_cwd):
|
||||
# repository-level checkpoint would otherwise skip old PRs forever.
|
||||
return None, None, reviews_last_update_path
|
||||
|
||||
reviews_since = open(reviews_last_update_path).read().strip()
|
||||
with open(reviews_last_update_path) as f:
|
||||
reviews_since = f.read().strip()
|
||||
if args_since and reviews_since:
|
||||
return min(args_since, reviews_since), reviews_since, reviews_last_update_path
|
||||
|
||||
@@ -2758,7 +2759,8 @@ def backup_pulls(args, repo_cwd, repository, repos_template):
|
||||
and not pull_review_errors
|
||||
and (not pull_reviews_since or newest_pull_update > pull_reviews_since)
|
||||
):
|
||||
open(pull_reviews_last_update_path, "w").write(newest_pull_update)
|
||||
with open(pull_reviews_last_update_path, "w") as f:
|
||||
f.write(newest_pull_update)
|
||||
|
||||
|
||||
def backup_milestones(args, repo_cwd, repository, repos_template):
|
||||
@@ -2980,7 +2982,8 @@ def fetch_repository(
|
||||
masked_remote_url = mask_password(remote_url)
|
||||
|
||||
initialized = subprocess.call(
|
||||
"git ls-remote " + remote_url, stdout=FNULL, stderr=FNULL, shell=True
|
||||
["git", "ls-remote", remote_url], stdin=subprocess.DEVNULL,
|
||||
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
|
||||
)
|
||||
if initialized == 128:
|
||||
if ".wiki.git" in remote_url:
|
||||
|
||||
Reference in New Issue
Block a user