mirror of
https://github.com/josegonzalez/python-github-backup.git
synced 2025-12-05 16:18:02 +01:00
Compare commits
34 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
07e32b186c | ||
|
|
dcc90b747a | ||
|
|
f414fac108 | ||
|
|
38692bc836 | ||
|
|
81362e5596 | ||
|
|
753a26d0d6 | ||
|
|
b629a865f4 | ||
|
|
75ec773a6f | ||
|
|
f8a16ee0f8 | ||
|
|
63441ebfbc | ||
|
|
7ad324225e | ||
|
|
885e94a102 | ||
|
|
9e1800f56e | ||
|
|
d057ee0d04 | ||
|
|
64562f2460 | ||
|
|
f7f9ffd017 | ||
|
|
048ef04e2a | ||
|
|
b1acfed83a | ||
|
|
18e78a4d66 | ||
|
|
1ed5427043 | ||
|
|
c2e3665ed8 | ||
|
|
0a30a92fe4 | ||
|
|
cc52587f52 | ||
|
|
853b7c46a1 | ||
|
|
e23d12d490 | ||
|
|
f8e1151111 | ||
|
|
664c2a765e | ||
|
|
fa7148d38f | ||
|
|
480ce3ce2a | ||
|
|
943e84e3d9 | ||
|
|
0c924c3158 | ||
|
|
f62c4eaf8b | ||
|
|
a53d7f6849 | ||
|
|
4e571d0735 |
55
CHANGES.rst
55
CHANGES.rst
@@ -1,14 +1,67 @@
|
||||
Changelog
|
||||
=========
|
||||
|
||||
0.37.2 (2021-01-01)
|
||||
0.39.0 (2021-03-18)
|
||||
-------------------
|
||||
------------
|
||||
|
||||
Fix
|
||||
~~~
|
||||
- Fix missing INFO logs. [Gallo Feliz]
|
||||
|
||||
Other
|
||||
~~~~~
|
||||
- Merge pull request #173 from gallofeliz/make-compatible-python-call.
|
||||
[Jose Diaz-Gonzalez]
|
||||
|
||||
Try to make compatible code with direct Python call ; reduce the hard link of the code with the cli
|
||||
- Try to make compatible code with direct Python call ; reduce the hard
|
||||
link of the code with the cli. [Gallo Feliz]
|
||||
- Merge pull request #174 from atorrescogollo/master. [Jose Diaz-
|
||||
Gonzalez]
|
||||
|
||||
Fixed release_name with slash bug
|
||||
- Fixed release_name with slash bug. [Álvaro Torres Cogollo]
|
||||
|
||||
|
||||
0.38.0 (2021-02-13)
|
||||
-------------------
|
||||
|
||||
Fix
|
||||
~~~
|
||||
- Always clone with OAuth token when provided. [Samantha Baldwin]
|
||||
|
||||
Github Enterprise servers with 'Anonymous Git read access' disabled
|
||||
cause `git ls-remote` to fail (128) for a repo's `clone_url`. Using the
|
||||
OAuth token when provided allows cloning private AND public repos when
|
||||
Anonymous Git read access is disabled.
|
||||
|
||||
Other
|
||||
~~~~~
|
||||
- Release version 0.38.0. [Jose Diaz-Gonzalez]
|
||||
- Merge pull request #172 from samanthaq/always-use-oauth-when-provided.
|
||||
[Jose Diaz-Gonzalez]
|
||||
|
||||
fix: Always clone with OAuth token when provided
|
||||
- Merge pull request #170 from Mindavi/bugfix/broken-url. [Jose Diaz-
|
||||
Gonzalez]
|
||||
|
||||
Fix broken and incorrect link to github repository
|
||||
- Change broken link to a fork to a working link to upstream. [Rick van
|
||||
Schijndel]
|
||||
|
||||
|
||||
0.37.2 (2021-01-02)
|
||||
-------------------
|
||||
|
||||
Fix
|
||||
~~~
|
||||
- Use distutils.core on error. [Jose Diaz-Gonzalez]
|
||||
|
||||
Other
|
||||
~~~~~
|
||||
- Release version 0.37.2. [Jose Diaz-Gonzalez]
|
||||
|
||||
|
||||
0.37.1 (2021-01-02)
|
||||
-------------------
|
||||
|
||||
@@ -41,8 +41,8 @@ CLI Usage is as follows::
|
||||
[-P] [-F] [--prefer-ssh] [-v]
|
||||
[--keychain-name OSX_KEYCHAIN_ITEM_NAME]
|
||||
[--keychain-account OSX_KEYCHAIN_ITEM_ACCOUNT]
|
||||
[--releases] [--assets] [--throttle-limit THROTTLE_LIMIT]
|
||||
[--throttle-pause THROTTLE_PAUSE]
|
||||
[--releases] [--assets] [--exclude [REPOSITORY [REPOSITORY ...]]
|
||||
[--throttle-limit THROTTLE_LIMIT] [--throttle-pause THROTTLE_PAUSE]
|
||||
USER
|
||||
|
||||
Backup a github account
|
||||
@@ -112,6 +112,8 @@ CLI Usage is as follows::
|
||||
binaries
|
||||
--assets include assets alongside release information; only
|
||||
applies if including releases
|
||||
--exclude [REPOSITORY [REPOSITORY ...]]
|
||||
names of repositories to exclude from backup.
|
||||
--throttle-limit THROTTLE_LIMIT
|
||||
start throttling of GitHub API requests after this
|
||||
amount of API requests remain
|
||||
@@ -178,4 +180,4 @@ This project currently contains no unit tests. To run linting::
|
||||
.. |PyPI| image:: https://img.shields.io/pypi/v/github-backup.svg
|
||||
:target: https://pypi.python.org/pypi/github-backup/
|
||||
.. |Python Versions| image:: https://img.shields.io/pypi/pyversions/github-backup.svg
|
||||
:target: https://github.com/albertyw/github-backup
|
||||
:target: https://github.com/josegonzalez/python-github-backup
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import os
|
||||
import os, sys, logging
|
||||
|
||||
from github_backup.github_backup import (
|
||||
backup_account,
|
||||
@@ -9,11 +9,17 @@ from github_backup.github_backup import (
|
||||
filter_repositories,
|
||||
get_authenticated_user,
|
||||
log_info,
|
||||
log_warning,
|
||||
mkdir_p,
|
||||
parse_args,
|
||||
retrieve_repositories,
|
||||
)
|
||||
|
||||
logging.basicConfig(
|
||||
format='%(asctime)s.%(msecs)03d: %(message)s',
|
||||
datefmt='%Y-%m-%dT%H:%M:%S',
|
||||
level=logging.INFO
|
||||
)
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
@@ -39,4 +45,8 @@ def main():
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
try:
|
||||
main()
|
||||
except Exception as e:
|
||||
log_warning(str(e))
|
||||
sys.exit(1)
|
||||
|
||||
@@ -1 +1 @@
|
||||
__version__ = '0.37.2'
|
||||
__version__ = '0.42.0'
|
||||
|
||||
@@ -11,12 +11,12 @@ import datetime
|
||||
import errno
|
||||
import getpass
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import select
|
||||
import subprocess
|
||||
import sys
|
||||
import logging
|
||||
import time
|
||||
import platform
|
||||
from urllib.parse import urlparse
|
||||
@@ -27,6 +27,7 @@ from urllib.request import urlopen
|
||||
from urllib.request import Request
|
||||
from urllib.request import HTTPRedirectHandler
|
||||
from urllib.request import build_opener
|
||||
from http.client import IncompleteRead
|
||||
|
||||
try:
|
||||
from . import __version__
|
||||
@@ -41,14 +42,6 @@ def _get_log_date():
|
||||
return datetime.datetime.isoformat(datetime.datetime.now())
|
||||
|
||||
|
||||
def log_error(message):
|
||||
"""
|
||||
Log message (str) or messages (List[str]) to stderr and exit with status 1
|
||||
"""
|
||||
log_warning(message)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def log_info(message):
|
||||
"""
|
||||
Log message (str) or messages (List[str]) to stdout
|
||||
@@ -57,7 +50,7 @@ def log_info(message):
|
||||
message = [message]
|
||||
|
||||
for msg in message:
|
||||
sys.stdout.write("{0}: {1}\n".format(_get_log_date(), msg))
|
||||
logging.info(msg)
|
||||
|
||||
|
||||
def log_warning(message):
|
||||
@@ -68,7 +61,7 @@ def log_warning(message):
|
||||
message = [message]
|
||||
|
||||
for msg in message:
|
||||
sys.stderr.write("{0}: {1}\n".format(_get_log_date(), msg))
|
||||
logging.warning(msg)
|
||||
|
||||
|
||||
def logging_subprocess(popenargs,
|
||||
@@ -140,7 +133,7 @@ def mask_password(url, secret='*****'):
|
||||
return url.replace(parsed.password, secret)
|
||||
|
||||
|
||||
def parse_args():
|
||||
def parse_args(args=None):
|
||||
parser = argparse.ArgumentParser(description='Backup a github account')
|
||||
parser.add_argument('user',
|
||||
metavar='USER',
|
||||
@@ -246,6 +239,10 @@ def parse_args():
|
||||
action='store_true',
|
||||
dest='bare_clone',
|
||||
help='clone bare repositories')
|
||||
parser.add_argument('--no-prune',
|
||||
action='store_true',
|
||||
dest='no_prune',
|
||||
help='disable prune option for git fetch')
|
||||
parser.add_argument('--lfs',
|
||||
action='store_true',
|
||||
dest='lfs_clone',
|
||||
@@ -331,7 +328,11 @@ def parse_args():
|
||||
type=float,
|
||||
default=30.0,
|
||||
help='wait this amount of seconds when API request throttling is active (default: 30.0, requires --throttle-limit to be set)')
|
||||
return parser.parse_args()
|
||||
parser.add_argument('--exclude',
|
||||
dest='exclude',
|
||||
help='names of repositories to exclude',
|
||||
nargs="*")
|
||||
return parser.parse_args(args)
|
||||
|
||||
|
||||
def get_auth(args, encode=True, for_git_cli=False):
|
||||
@@ -339,10 +340,10 @@ def get_auth(args, encode=True, for_git_cli=False):
|
||||
|
||||
if args.osx_keychain_item_name:
|
||||
if not args.osx_keychain_item_account:
|
||||
log_error('You must specify both name and account fields for osx keychain password items')
|
||||
raise Exception('You must specify both name and account fields for osx keychain password items')
|
||||
else:
|
||||
if platform.system() != 'Darwin':
|
||||
log_error("Keychain arguments are only supported on Mac OSX")
|
||||
raise Exception("Keychain arguments are only supported on Mac OSX")
|
||||
try:
|
||||
with open(os.devnull, 'w') as devnull:
|
||||
token = (subprocess.check_output([
|
||||
@@ -353,9 +354,9 @@ def get_auth(args, encode=True, for_git_cli=False):
|
||||
token = token.decode('utf-8')
|
||||
auth = token + ':' + 'x-oauth-basic'
|
||||
except subprocess.SubprocessError:
|
||||
log_error('No password item matching the provided name and account could be found in the osx keychain.')
|
||||
raise Exception('No password item matching the provided name and account could be found in the osx keychain.')
|
||||
elif args.osx_keychain_item_account:
|
||||
log_error('You must specify both name and account fields for osx keychain password items')
|
||||
raise Exception('You must specify both name and account fields for osx keychain password items')
|
||||
elif args.token:
|
||||
_path_specifier = 'file://'
|
||||
if args.token.startswith(_path_specifier):
|
||||
@@ -377,7 +378,7 @@ def get_auth(args, encode=True, for_git_cli=False):
|
||||
password = urlquote(args.password)
|
||||
auth = args.username + ':' + password
|
||||
elif args.password:
|
||||
log_error('You must specify a username for basic auth')
|
||||
raise Exception('You must specify a username for basic auth')
|
||||
|
||||
if not auth:
|
||||
return None
|
||||
@@ -420,7 +421,7 @@ def get_github_repo_url(args, repository):
|
||||
return repository['ssh_url']
|
||||
|
||||
auth = get_auth(args, encode=False, for_git_cli=True)
|
||||
if auth and repository['private'] is True:
|
||||
if auth:
|
||||
repo_url = 'https://{0}@{1}/{2}/{3}.git'.format(
|
||||
auth,
|
||||
get_github_host(args),
|
||||
@@ -444,6 +445,21 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False):
|
||||
r, errors = _get_response(request, auth, template)
|
||||
|
||||
status_code = int(r.getcode())
|
||||
# Check if we got correct data
|
||||
try:
|
||||
response = json.loads(r.read().decode('utf-8'))
|
||||
except IncompleteRead:
|
||||
log_warning("Incomplete read error detected")
|
||||
read_error = True
|
||||
except json.decoder.JSONDecodeError:
|
||||
log_warning("JSON decode error detected")
|
||||
read_error = True
|
||||
except TimeoutError:
|
||||
log_warning("Tiemout error detected")
|
||||
read_error = True
|
||||
else:
|
||||
read_error = False
|
||||
|
||||
# be gentle with API request limit and throttle requests if remaining requests getting low
|
||||
limit_remaining = int(r.headers.get('x-ratelimit-remaining', 0))
|
||||
if args.throttle_limit and limit_remaining <= args.throttle_limit:
|
||||
@@ -454,21 +470,37 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False):
|
||||
time.sleep(args.throttle_pause)
|
||||
|
||||
retries = 0
|
||||
while retries < 3 and status_code == 502:
|
||||
log_warning('API request returned HTTP 502: Bad Gateway. Retrying in 5 seconds')
|
||||
while retries < 3 and (status_code == 502 or read_error):
|
||||
log_warning('API request failed. Retrying in 5 seconds')
|
||||
retries += 1
|
||||
time.sleep(5)
|
||||
request = _construct_request(per_page, page, query_args, template, auth, as_app=args.as_app) # noqa
|
||||
r, errors = _get_response(request, auth, template)
|
||||
|
||||
status_code = int(r.getcode())
|
||||
try:
|
||||
response = json.loads(r.read().decode('utf-8'))
|
||||
read_error = False
|
||||
except IncompleteRead:
|
||||
log_warning("Incomplete read error detected")
|
||||
read_error = True
|
||||
except json.decoder.JSONDecodeError:
|
||||
log_warning("JSON decode error detected")
|
||||
read_error = True
|
||||
except TimeoutError:
|
||||
log_warning("Tiemout error detected")
|
||||
read_error = True
|
||||
|
||||
if status_code != 200:
|
||||
template = 'API request returned HTTP {0}: {1}'
|
||||
errors.append(template.format(status_code, r.reason))
|
||||
log_error(errors)
|
||||
raise Exception(', '.join(errors))
|
||||
|
||||
if read_error:
|
||||
template = 'API request problem reading response for {0}'
|
||||
errors.append(template.format(request))
|
||||
raise Exception(', '.join(errors))
|
||||
|
||||
response = json.loads(r.read().decode('utf-8'))
|
||||
if len(errors) == 0:
|
||||
if type(response) == list:
|
||||
for resp in response:
|
||||
@@ -479,7 +511,7 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False):
|
||||
yield response
|
||||
|
||||
if len(errors) > 0:
|
||||
log_error(errors)
|
||||
raise Exception(', '.join(errors))
|
||||
|
||||
if single_request:
|
||||
break
|
||||
@@ -509,12 +541,12 @@ def _get_response(request, auth, template):
|
||||
r = exc
|
||||
except URLError as e:
|
||||
log_warning(e.reason)
|
||||
should_continue = _request_url_error(template, retry_timeout)
|
||||
should_continue, retry_timeout = _request_url_error(template, retry_timeout)
|
||||
if not should_continue:
|
||||
raise
|
||||
except socket.error as e:
|
||||
log_warning(e.strerror)
|
||||
should_continue = _request_url_error(template, retry_timeout)
|
||||
should_continue, retry_timeout = _request_url_error(template, retry_timeout)
|
||||
if not should_continue:
|
||||
raise
|
||||
|
||||
@@ -574,16 +606,15 @@ def _request_http_error(exc, auth, errors):
|
||||
|
||||
|
||||
def _request_url_error(template, retry_timeout):
|
||||
# Incase of a connection timing out, we can retry a few time
|
||||
# In case of a connection timing out, we can retry a few time
|
||||
# But we won't crash and not back-up the rest now
|
||||
log_info('{} timed out'.format(template))
|
||||
retry_timeout -= 1
|
||||
|
||||
if retry_timeout >= 0:
|
||||
return True
|
||||
return True, retry_timeout
|
||||
|
||||
log_error('{} timed out to much, skipping!')
|
||||
return False
|
||||
raise Exception('{} timed out to much, skipping!')
|
||||
|
||||
|
||||
class S3HTTPRedirectHandler(HTTPRedirectHandler):
|
||||
@@ -640,7 +671,7 @@ def get_authenticated_user(args):
|
||||
def check_git_lfs_install():
|
||||
exit_code = subprocess.call(['git', 'lfs', 'version'])
|
||||
if exit_code != 0:
|
||||
log_error('The argument --lfs requires you to have Git LFS installed.\nYou can get it from https://git-lfs.github.com.')
|
||||
raise Exception('The argument --lfs requires you to have Git LFS installed.\nYou can get it from https://git-lfs.github.com.')
|
||||
|
||||
|
||||
def retrieve_repositories(args, authenticated_user):
|
||||
@@ -726,6 +757,8 @@ def filter_repositories(args, unfiltered_repositories):
|
||||
repositories = [r for r in repositories if name_regex.match(r['name'])]
|
||||
if args.skip_archived:
|
||||
repositories = [r for r in repositories if not r.get('archived')]
|
||||
if args.exclude:
|
||||
repositories = [r for r in repositories if r['name'] not in args.exclude]
|
||||
|
||||
return repositories
|
||||
|
||||
@@ -766,7 +799,8 @@ def backup_repositories(args, output_directory, repositories):
|
||||
repo_dir,
|
||||
skip_existing=args.skip_existing,
|
||||
bare_clone=args.bare_clone,
|
||||
lfs_clone=args.lfs_clone)
|
||||
lfs_clone=args.lfs_clone,
|
||||
no_prune=args.no_prune)
|
||||
|
||||
if repository.get('is_gist'):
|
||||
# dump gist information to a file as well
|
||||
@@ -783,8 +817,9 @@ def backup_repositories(args, output_directory, repositories):
|
||||
os.path.join(repo_cwd, 'wiki'),
|
||||
skip_existing=args.skip_existing,
|
||||
bare_clone=args.bare_clone,
|
||||
lfs_clone=args.lfs_clone)
|
||||
|
||||
lfs_clone=args.lfs_clone,
|
||||
no_prune=args.no_prune
|
||||
)
|
||||
if args.include_issues or args.include_everything:
|
||||
backup_issues(args, repo_cwd, repository, repos_template)
|
||||
|
||||
@@ -878,6 +913,8 @@ def backup_pulls(args, repo_cwd, repository, repos_template):
|
||||
pulls = {}
|
||||
_pulls_template = '{0}/{1}/pulls'.format(repos_template,
|
||||
repository['full_name'])
|
||||
_issue_template = '{0}/{1}/issues'.format(repos_template,
|
||||
repository['full_name'])
|
||||
query_args = {
|
||||
'filter': 'all',
|
||||
'state': 'all',
|
||||
@@ -917,10 +954,17 @@ def backup_pulls(args, repo_cwd, repository, repos_template):
|
||||
|
||||
log_info('Saving {0} pull requests to disk'.format(
|
||||
len(list(pulls.keys()))))
|
||||
# Comments from pulls API are only _review_ comments
|
||||
# regular comments need to be fetched via issue API.
|
||||
# For backwards compatibility with versions <= 0.41.0
|
||||
# keep name "comment_data" for review comments
|
||||
comments_regular_template = _issue_template + '/{0}/comments'
|
||||
comments_template = _pulls_template + '/{0}/comments'
|
||||
commits_template = _pulls_template + '/{0}/commits'
|
||||
for number, pull in list(pulls.items()):
|
||||
if args.include_pull_comments or args.include_everything:
|
||||
template = comments_regular_template.format(number)
|
||||
pulls[number]['comment_regular_data'] = retrieve_data(args, template)
|
||||
template = comments_template.format(number)
|
||||
pulls[number]['comment_data'] = retrieve_data(args, template)
|
||||
if args.include_pull_commits or args.include_everything:
|
||||
@@ -1009,7 +1053,8 @@ def backup_releases(args, repo_cwd, repository, repos_template, include_assets=F
|
||||
log_info('Saving {0} releases to disk'.format(len(releases)))
|
||||
for release in releases:
|
||||
release_name = release['tag_name']
|
||||
output_filepath = os.path.join(release_cwd, '{0}.json'.format(release_name))
|
||||
release_name_safe = release_name.replace('/', '__')
|
||||
output_filepath = os.path.join(release_cwd, '{0}.json'.format(release_name_safe))
|
||||
with codecs.open(output_filepath, 'w+', encoding='utf-8') as f:
|
||||
json_dump(release, f)
|
||||
|
||||
@@ -1017,7 +1062,7 @@ def backup_releases(args, repo_cwd, repository, repos_template, include_assets=F
|
||||
assets = retrieve_data(args, release['assets_url'])
|
||||
if len(assets) > 0:
|
||||
# give release asset files somewhere to live & download them (not including source archives)
|
||||
release_assets_cwd = os.path.join(release_cwd, release_name)
|
||||
release_assets_cwd = os.path.join(release_cwd, release_name_safe)
|
||||
mkdir_p(release_assets_cwd)
|
||||
for asset in assets:
|
||||
download_file(asset['url'], os.path.join(release_assets_cwd, asset['name']), get_auth(args))
|
||||
@@ -1028,7 +1073,8 @@ def fetch_repository(name,
|
||||
local_dir,
|
||||
skip_existing=False,
|
||||
bare_clone=False,
|
||||
lfs_clone=False):
|
||||
lfs_clone=False,
|
||||
no_prune=False):
|
||||
if bare_clone:
|
||||
if os.path.exists(local_dir):
|
||||
clone_exists = subprocess.check_output(['git',
|
||||
@@ -1074,6 +1120,8 @@ def fetch_repository(name,
|
||||
git_command = ['git', 'lfs', 'fetch', '--all', '--prune']
|
||||
else:
|
||||
git_command = ['git', 'fetch', '--all', '--force', '--tags', '--prune']
|
||||
if no_prune:
|
||||
git_command.pop()
|
||||
logging_subprocess(git_command, None, cwd=local_dir)
|
||||
else:
|
||||
log_info('Cloning {0} repository from {1} to {2}'.format(
|
||||
@@ -1081,10 +1129,13 @@ def fetch_repository(name,
|
||||
masked_remote_url,
|
||||
local_dir))
|
||||
if bare_clone:
|
||||
if lfs_clone:
|
||||
git_command = ['git', 'lfs', 'clone', '--mirror', remote_url, local_dir]
|
||||
else:
|
||||
git_command = ['git', 'clone', '--mirror', remote_url, local_dir]
|
||||
logging_subprocess(git_command, None)
|
||||
if lfs_clone:
|
||||
git_command = ['git', 'lfs', 'fetch', '--all', '--prune']
|
||||
if no_prune:
|
||||
git_command.pop()
|
||||
logging_subprocess(git_command, None, cwd=local_dir)
|
||||
else:
|
||||
if lfs_clone:
|
||||
git_command = ['git', 'lfs', 'clone', remote_url, local_dir]
|
||||
|
||||
Reference in New Issue
Block a user