Compare commits

...

21 Commits

Author SHA1 Message Date
GitHub Action
bd6eea02d5 Release version 0.62.1 2026-04-30 15:52:41 +00:00
Jose Diaz-Gonzalez
72fa5d3dcd Merge pull request #510 from Changaco/documentation-improvements
Documentation improvements
2026-04-30 11:52:02 -04:00
Jose Diaz-Gonzalez
c5d11c000a Merge pull request #509 from Changaco/code-cleanup
Code cleanup
2026-04-30 11:51:31 -04:00
Changaco
8e76089565 document that nothing is saved by default 2026-04-30 14:43:27 +00:00
Changaco
d30d9bfe60 eliminate trailing spaces 2026-04-30 14:43:27 +00:00
Changaco
a2391a550e remove pointless and unsafe exports in examples 2026-04-30 14:43:27 +00:00
Changaco
9340aa3aaa try to clarify what --incremental actually does 2026-04-30 14:43:27 +00:00
Changaco
543d76f24b fix a typo in the README 2026-04-30 14:43:27 +00:00
Changaco
3cda5a01fd document that --all doesn't imply --attachments 2026-04-30 14:43:27 +00:00
Changaco
17b79fcbef rename a function to match what it actually does 2026-04-30 14:43:08 +00:00
Changaco
f1fca0f9b7 don't leave files open 2026-04-30 14:43:08 +00:00
Changaco
ccc27b95f7 remove legacy code in mkdir_p function 2026-04-30 14:43:08 +00:00
Changaco
f3eabf0bfe don't pass stdin when doing so can't do any good
When the child process doesn't inherit stderr, it can't ask the user for input, so it shouldn't inherit stdin either.
2026-04-30 14:38:42 +00:00
Changaco
b92aee6f11 use subprocess.DEVNULL instead of emulating it 2026-04-30 14:38:42 +00:00
Jose Diaz-Gonzalez
4d1772319f Merge pull request #508 from Changaco/security-improvements
Security-related changes
2026-04-30 10:19:19 -04:00
Jose Diaz-Gonzalez
2c7fdab54e Merge pull request #507 from Changaco/output-cleanup
Hide output of `git lfs version`
2026-04-30 10:19:02 -04:00
Jose Diaz-Gonzalez
334c6c6546 Merge pull request #506 from Changaco/fix-crash
Fix a crash
2026-04-30 10:18:42 -04:00
Changaco
2f130ecd66 remove bad invocation of the system shell 2026-04-30 12:24:26 +00:00
Changaco
ddf7f82e65 add missing context argument to urlopen call 2026-04-30 12:24:26 +00:00
Changaco
ddf82f1115 suppress output of call to git lfs version 2026-04-30 12:22:57 +00:00
Changaco
0638666bc7 handle more network errors
```python-traceback
Traceback (most recent call last):
  File ".local/bin/github-backup", line 6, in <module>
    sys.exit(main())
             ~~~~^^
  File ".local/share/pipx/venvs/github-backup/lib/python3.14/site-packages/github_backup/cli.py", line 83, in main
    backup_repositories(args, output_directory, repositories)
    ~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File ".local/share/pipx/venvs/github-backup/lib/python3.14/site-packages/github_backup/github_backup.py", line 1845, in backup_repositories
    backup_pulls(args, repo_cwd, repository, repos_template)
    ~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File ".local/share/pipx/venvs/github-backup/lib/python3.14/site-packages/github_backup/github_backup.py", line 2019, in backup_pulls
    pulls[number]["commit_data"] = retrieve_data(args, template)
                                   ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^
  File ".local/share/pipx/venvs/github-backup/lib/python3.14/site-packages/github_backup/github_backup.py", line 766, in retrieve_data
    return list(fetch_all())
  File ".local/share/pipx/venvs/github-backup/lib/python3.14/site-packages/github_backup/github_backup.py", line 717, in fetch_all
    response = json.loads(http_response.read().decode("utf-8"))
                          ~~~~~~~~~~~~~~~~~~^^
  File "/usr/lib/python3.14/http/client.py", line 500, in read
    s = self._safe_read(self.length)
  File "/usr/lib/python3.14/http/client.py", line 648, in _safe_read
    data = self.fp.read(cursize)
  File "/usr/lib/python3.14/socket.py", line 725, in readinto
    return self._sock.recv_into(b)
           ~~~~~~~~~~~~~~~~~~~~^^^
  File "/usr/lib/python3.14/ssl.py", line 1304, in recv_into
    return self.read(nbytes, buffer)
           ~~~~~~~~~^^^^^^^^^^^^^^^^
  File "/usr/lib/python3.14/ssl.py", line 1138, in read
    return self._sslobj.read(len, buffer)
           ~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^
ConnectionResetError: [Errno 104] Connection reset by peer
```
2026-04-29 12:39:13 +00:00
4 changed files with 122 additions and 58 deletions

View File

@@ -1,9 +1,65 @@
Changelog
=========
0.62.0 (2026-04-29)
0.62.1 (2026-04-30)
-------------------
------------------------
- Document that nothing is saved by default. [Changaco]
- Eliminate trailing spaces. [Changaco]
- Remove pointless and unsafe `export`s in examples. [Changaco]
- Try to clarify what `--incremental` actually does. [Changaco]
- Fix a typo in the README. [Changaco]
- Document that `--all` doesn't imply `--attachments` [Changaco]
- Rename a function to match what it actually does. [Changaco]
- Don't leave files open. [Changaco]
- Remove legacy code in `mkdir_p` function. [Changaco]
- Don't pass stdin when doing so can't do any good. [Changaco]
When the child process doesn't inherit stderr, it can't ask the user for input, so it shouldn't inherit stdin either.
- Use `subprocess.DEVNULL` instead of emulating it. [Changaco]
- Remove bad invocation of the system shell. [Changaco]
- Add missing `context` argument to `urlopen` call. [Changaco]
- Suppress output of call to `git lfs version` [Changaco]
- Handle more network errors. [Changaco]
```python-traceback
Traceback (most recent call last):
File ".local/bin/github-backup", line 6, in <module>
sys.exit(main())
~~~~^^
File ".local/share/pipx/venvs/github-backup/lib/python3.14/site-packages/github_backup/cli.py", line 83, in main
backup_repositories(args, output_directory, repositories)
~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File ".local/share/pipx/venvs/github-backup/lib/python3.14/site-packages/github_backup/github_backup.py", line 1845, in backup_repositories
backup_pulls(args, repo_cwd, repository, repos_template)
~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File ".local/share/pipx/venvs/github-backup/lib/python3.14/site-packages/github_backup/github_backup.py", line 2019, in backup_pulls
pulls[number]["commit_data"] = retrieve_data(args, template)
~~~~~~~~~~~~~^^^^^^^^^^^^^^^^
File ".local/share/pipx/venvs/github-backup/lib/python3.14/site-packages/github_backup/github_backup.py", line 766, in retrieve_data
return list(fetch_all())
File ".local/share/pipx/venvs/github-backup/lib/python3.14/site-packages/github_backup/github_backup.py", line 717, in fetch_all
response = json.loads(http_response.read().decode("utf-8"))
~~~~~~~~~~~~~~~~~~^^
File "/usr/lib/python3.14/http/client.py", line 500, in read
s = self._safe_read(self.length)
File "/usr/lib/python3.14/http/client.py", line 648, in _safe_read
data = self.fp.read(cursize)
File "/usr/lib/python3.14/socket.py", line 725, in readinto
return self._sock.recv_into(b)
~~~~~~~~~~~~~~~~~~~~^^^
File "/usr/lib/python3.14/ssl.py", line 1304, in recv_into
return self.read(nbytes, buffer)
~~~~~~~~~^^^^^^^^^^^^^^^^
File "/usr/lib/python3.14/ssl.py", line 1138, in read
return self._sslobj.read(len, buffer)
~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^
ConnectionResetError: [Errno 104] Connection reset by peer
```
0.62.0 (2026-04-29)
-------------------
- Skip checkpoint-equal incremental items. [Duncan Ogilvie]
- Avoid redundant release asset list requests. [Duncan Ogilvie]
- Reduce unnecessary pull requests with incremental fetching. [Duncan

View File

@@ -22,7 +22,7 @@ Using PIP via PyPI::
Using PIP via Github (more likely the latest version)::
pip install git+https://github.com/josegonzalez/python-github-backup.git#egg=github-backup
*Install note for python newcomers:*
Python scripts are unlikely to be included in your ``$PATH`` by default, this means it cannot be run directly in terminal with ``$ github-backup ...``, you can either add python's install path to your environments ``$PATH`` or call the script directly e.g. using ``$ ~/.local/bin/github-backup``.*
@@ -249,7 +249,7 @@ Note: When you run github-backup, you will be asked whether you want to allow "
Github Rate-limit and Throttling
--------------------------------
"github-backup" will automatically throttle itself based on feedback from the Github API.
"github-backup" will automatically throttle itself based on feedback from the Github API.
Their API is usually rate-limited to 5000 calls per hour. The API will ask github-backup to pause until a specific time when the limit is reset again (at the start of the next hour). This continues until the backup is complete.
@@ -325,7 +325,12 @@ Gotchas / Known-issues
All is not everything
---------------------
The ``--all`` argument does not include: cloning private repos (``-P, --private``), cloning forks (``-F, --fork``), cloning starred repositories (``--all-starred``), ``--pull-details``, cloning LFS repositories (``--lfs``), cloning gists (``--gists``) or cloning starred gist repos (``--starred-gists``). See examples for more.
The ``--all`` argument does not include: downloading attachments from issue and pull request comments (``--attachments``), cloning private repos (``-P, --private``), cloning forks (``-F, --fork``), cloning starred repositories (``--all-starred``), ``--pull-details``, cloning LFS repositories (``--lfs``), cloning gists (``--gists``) or cloning starred gist repos (``--starred-gists``). See examples for more.
Saves nothing if no arguments are passed
----------------------------------------
At least one argument like ``--all`` or ``--repositories`` is needed for github-backup to actually save data. Without relevant arguments, github-backup fetches some data from GitHub but doesn't put any of it into files.
Starred repository size
-----------------------
@@ -363,9 +368,9 @@ This means any blocking errors on previous runs can cause missing data in backup
Using (``--incremental-by-files``) will request new data from the API **based on when the file was modified on filesystem**. e.g. if you modify the file yourself you may miss something.
Still saver than the previous version.
Still safer than the previous version.
Specifically, issues and pull requests are handled like this.
Incremental backup only changes how issue and pull request data is fetched.
Known blocking errors
---------------------
@@ -429,12 +434,12 @@ Github Backup Examples
Backup all repositories, including private ones using a classic token::
export ACCESS_TOKEN=SOME-GITHUB-TOKEN
ACCESS_TOKEN=SOME-GITHUB-TOKEN
github-backup WhiteHouse --token $ACCESS_TOKEN --organization --output-directory /tmp/white-house --repositories --private
Use a fine-grained access token to backup a single organization repository with everything else (wiki, pull requests, comments, issues etc)::
export FINE_ACCESS_TOKEN=SOME-GITHUB-TOKEN
FINE_ACCESS_TOKEN=SOME-GITHUB-TOKEN
ORGANIZATION=docker
REPO=cli
# e.g. git@github.com:docker/cli.git
@@ -442,14 +447,14 @@ Use a fine-grained access token to backup a single organization repository with
Quietly and incrementally backup useful Github user data (public and private repos with SSH) including; all issues, pulls, all public starred repos and gists (omitting "hooks", "releases" and therefore "assets" to prevent blocking). *Great for a cron job.* ::
export FINE_ACCESS_TOKEN=SOME-GITHUB-TOKEN
FINE_ACCESS_TOKEN=SOME-GITHUB-TOKEN
GH_USER=YOUR-GITHUB-USER
github-backup -f $FINE_ACCESS_TOKEN --prefer-ssh -o ~/github-backup/ -l error -P -i --all-starred --starred --watched --followers --following --issues --issue-comments --issue-events --pulls --pull-comments --pull-reviews --pull-commits --labels --milestones --security-advisories --discussions --repositories --wikis --releases --assets --attachments --pull-details --gists --starred-gists $GH_USER
Debug an error/block or incomplete backup into a temporary directory. Omit "incremental" to fill a previous incomplete backup. ::
export FINE_ACCESS_TOKEN=SOME-GITHUB-TOKEN
FINE_ACCESS_TOKEN=SOME-GITHUB-TOKEN
GH_USER=YOUR-GITHUB-USER
github-backup -f $FINE_ACCESS_TOKEN -o /tmp/github-backup/ -l debug -P --all-starred --starred --watched --followers --following --issues --issue-comments --issue-events --pulls --pull-comments --pull-reviews --pull-commits --labels --milestones --discussions --repositories --wikis --releases --assets --pull-details --gists --starred-gists $GH_USER

View File

@@ -1 +1 @@
__version__ = "0.62.0"
__version__ = "0.62.1"

View File

@@ -6,7 +6,6 @@ import argparse
import base64
import calendar
import codecs
import errno
import json
import logging
import os
@@ -40,7 +39,6 @@ from .graphql_queries import (
DISCUSSION_REPLIES_QUERY,
)
FNULL = open(os.devnull, "w")
FILE_URI_PREFIX = "file://"
logger = logging.getLogger(__name__)
@@ -128,13 +126,7 @@ def logging_subprocess(
def mkdir_p(*args):
for path in args:
try:
os.makedirs(path)
except OSError as exc: # Python >2.5
if exc.errno == errno.EEXIST and os.path.isdir(path):
pass
else:
raise
os.makedirs(path, exist_ok=True)
def mask_password(url, secret="*****"):
@@ -488,7 +480,7 @@ def parse_args(args=None):
"--attachments",
action="store_true",
dest="include_attachments",
help="download user-attachments from issues, pull requests, and discussions",
help="download user-attachments from issues, pull requests, and discussions [*]",
)
parser.add_argument(
"--throttle-limit",
@@ -529,19 +521,18 @@ def get_auth(args, encode=True, for_git_cli=False):
if platform.system() != "Darwin":
raise Exception("Keychain arguments are only supported on Mac OSX")
try:
with open(os.devnull, "w") as devnull:
token = subprocess.check_output(
[
"security",
"find-generic-password",
"-s",
args.osx_keychain_item_name,
"-a",
args.osx_keychain_item_account,
"-w",
],
stderr=devnull,
).strip()
token = subprocess.check_output(
[
"security",
"find-generic-password",
"-s",
args.osx_keychain_item_name,
"-a",
args.osx_keychain_item_account,
"-w",
],
stderr=subprocess.DEVNULL,
).strip()
token = token.decode("utf-8")
auth = token + ":" + "x-oauth-basic"
except subprocess.SubprocessError:
@@ -554,7 +545,7 @@ def get_auth(args, encode=True, for_git_cli=False):
)
elif args.token_fine:
if args.token_fine.startswith(FILE_URI_PREFIX):
args.token_fine = read_file_contents(args.token_fine)
args.token_fine = read_first_line(args.token_fine)
if args.token_fine.startswith("github_pat_"):
auth = args.token_fine
@@ -570,7 +561,7 @@ def get_auth(args, encode=True, for_git_cli=False):
)
args.token_classic = read_token_from_gh_cli(args)
elif args.token_classic.startswith(FILE_URI_PREFIX):
args.token_classic = read_file_contents(args.token_classic)
args.token_classic = read_first_line(args.token_classic)
if not args.as_app:
auth = args.token_classic + ":" + "x-oauth-basic"
@@ -632,8 +623,9 @@ def get_github_host(args):
return host
def read_file_contents(file_uri):
return open(file_uri[len(FILE_URI_PREFIX) :], "rt").readline().strip()
def read_first_line(file_uri):
with open(file_uri[len(FILE_URI_PREFIX) :], "rt") as f:
return f.readline().strip()
def read_token_from_gh_cli(args):
@@ -806,6 +798,7 @@ def retrieve_data(args, template, query_args=None, paginated=True, lazy=False):
response = json.loads(http_response.read().decode("utf-8"))
break # Exit retry loop and handle the data returned
except (
ConnectionError,
IncompleteRead,
json.decoder.JSONDecodeError,
TimeoutError,
@@ -1297,7 +1290,7 @@ def get_jwt_signed_url_via_markdown_api(url, token, repo_context):
request.add_header("Content-Type", "application/json")
request.add_header("Accept", "application/vnd.github+json")
html = urlopen(request, timeout=30).read().decode("utf-8")
html = urlopen(request, context=https_ctx, timeout=30).read().decode("utf-8")
# Parse JWT-signed URL from HTML response
# Format: <img src="https://private-user-images.githubusercontent.com/...?jwt=..." ...>
@@ -1781,7 +1774,10 @@ def get_authenticated_user(args):
def check_git_lfs_install():
exit_code = subprocess.call(["git", "lfs", "version"])
exit_code = subprocess.call(
["git", "lfs", "version"], stdin=subprocess.DEVNULL,
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
)
if exit_code != 0:
raise Exception(
"The argument --lfs requires you to have Git LFS installed.\nYou can get it from https://git-lfs.github.com."
@@ -1969,10 +1965,11 @@ def read_legacy_last_update(args, output_directory):
return None, None
last_update_path = os.path.join(output_directory, INCREMENTAL_LAST_UPDATE_FILENAME)
if os.path.exists(last_update_path):
return last_update_path, open(last_update_path).read().strip()
return last_update_path, None
try:
with open(last_update_path) as f:
return last_update_path, f.read().strip()
except FileNotFoundError:
return last_update_path, None
def read_resource_last_update(args, resource_cwd, legacy_last_update=None):
@@ -1980,13 +1977,13 @@ def read_resource_last_update(args, resource_cwd, legacy_last_update=None):
return None
last_update_path = os.path.join(resource_cwd, INCREMENTAL_LAST_UPDATE_FILENAME)
if os.path.exists(last_update_path):
return open(last_update_path).read().strip()
if legacy_last_update and resource_backup_exists(resource_cwd):
return legacy_last_update
return None
try:
with open(last_update_path) as f:
return f.read().strip()
except FileNotFoundError:
if legacy_last_update and resource_backup_exists(resource_cwd):
return legacy_last_update
return None
def write_resource_last_update(args, resource_cwd, repository):
@@ -1995,7 +1992,8 @@ def write_resource_last_update(args, resource_cwd, repository):
mkdir_p(resource_cwd)
last_update_path = os.path.join(resource_cwd, INCREMENTAL_LAST_UPDATE_FILENAME)
open(last_update_path, "w").write(get_repository_checkpoint_time(repository))
with open(last_update_path, "w") as f:
f.write(get_repository_checkpoint_time(repository))
def iter_incremental_resource_dirs(output_directory):
@@ -2383,7 +2381,8 @@ def backup_discussions(args, repo_cwd, repository):
discussions_since = None
discussion_last_update_path = os.path.join(discussion_cwd, "last_update")
if args.incremental and os.path.exists(discussion_last_update_path):
discussions_since = open(discussion_last_update_path).read().strip()
with open(discussion_last_update_path) as f:
discussions_since = f.read().strip()
logger.info("Retrieving {0} discussions".format(repository["full_name"]))
try:
@@ -2469,7 +2468,8 @@ def backup_discussions(args, repo_cwd, repository):
and newest_seen
and (not discussions_since or newest_seen > discussions_since)
):
open(discussion_last_update_path, "w").write(newest_seen)
with open(discussion_last_update_path, "w") as f:
f.write(newest_seen)
attempted_count = len(summaries) - skipped_count
if not summaries:
@@ -2606,7 +2606,8 @@ def get_pull_reviews_since(args, pulls_cwd):
# repository-level checkpoint would otherwise skip old PRs forever.
return None, None, reviews_last_update_path
reviews_since = open(reviews_last_update_path).read().strip()
with open(reviews_last_update_path) as f:
reviews_since = f.read().strip()
if args_since and reviews_since:
return min(args_since, reviews_since), reviews_since, reviews_last_update_path
@@ -2758,7 +2759,8 @@ def backup_pulls(args, repo_cwd, repository, repos_template):
and not pull_review_errors
and (not pull_reviews_since or newest_pull_update > pull_reviews_since)
):
open(pull_reviews_last_update_path, "w").write(newest_pull_update)
with open(pull_reviews_last_update_path, "w") as f:
f.write(newest_pull_update)
def backup_milestones(args, repo_cwd, repository, repos_template):
@@ -2980,7 +2982,8 @@ def fetch_repository(
masked_remote_url = mask_password(remote_url)
initialized = subprocess.call(
"git ls-remote " + remote_url, stdout=FNULL, stderr=FNULL, shell=True
["git", "ls-remote", remote_url], stdin=subprocess.DEVNULL,
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
)
if initialized == 128:
if ".wiki.git" in remote_url: