|
|
|
|
@@ -11,12 +11,12 @@ import datetime
|
|
|
|
|
import errno
|
|
|
|
|
import getpass
|
|
|
|
|
import json
|
|
|
|
|
import logging
|
|
|
|
|
import os
|
|
|
|
|
import re
|
|
|
|
|
import select
|
|
|
|
|
import subprocess
|
|
|
|
|
import sys
|
|
|
|
|
import logging
|
|
|
|
|
import time
|
|
|
|
|
import platform
|
|
|
|
|
from urllib.parse import urlparse
|
|
|
|
|
@@ -27,6 +27,7 @@ from urllib.request import urlopen
|
|
|
|
|
from urllib.request import Request
|
|
|
|
|
from urllib.request import HTTPRedirectHandler
|
|
|
|
|
from urllib.request import build_opener
|
|
|
|
|
from http.client import IncompleteRead
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
from . import __version__
|
|
|
|
|
@@ -41,14 +42,6 @@ def _get_log_date():
|
|
|
|
|
return datetime.datetime.isoformat(datetime.datetime.now())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def log_error(message):
|
|
|
|
|
"""
|
|
|
|
|
Log message (str) or messages (List[str]) to stderr and exit with status 1
|
|
|
|
|
"""
|
|
|
|
|
log_warning(message)
|
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def log_info(message):
|
|
|
|
|
"""
|
|
|
|
|
Log message (str) or messages (List[str]) to stdout
|
|
|
|
|
@@ -57,7 +50,7 @@ def log_info(message):
|
|
|
|
|
message = [message]
|
|
|
|
|
|
|
|
|
|
for msg in message:
|
|
|
|
|
sys.stdout.write("{0}: {1}\n".format(_get_log_date(), msg))
|
|
|
|
|
logging.info(msg)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def log_warning(message):
|
|
|
|
|
@@ -68,7 +61,7 @@ def log_warning(message):
|
|
|
|
|
message = [message]
|
|
|
|
|
|
|
|
|
|
for msg in message:
|
|
|
|
|
sys.stderr.write("{0}: {1}\n".format(_get_log_date(), msg))
|
|
|
|
|
logging.warning(msg)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def logging_subprocess(popenargs,
|
|
|
|
|
@@ -140,7 +133,7 @@ def mask_password(url, secret='*****'):
|
|
|
|
|
return url.replace(parsed.password, secret)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def parse_args():
|
|
|
|
|
def parse_args(args=None):
|
|
|
|
|
parser = argparse.ArgumentParser(description='Backup a github account')
|
|
|
|
|
parser.add_argument('user',
|
|
|
|
|
metavar='USER',
|
|
|
|
|
@@ -246,6 +239,10 @@ def parse_args():
|
|
|
|
|
action='store_true',
|
|
|
|
|
dest='bare_clone',
|
|
|
|
|
help='clone bare repositories')
|
|
|
|
|
parser.add_argument('--no-prune',
|
|
|
|
|
action='store_true',
|
|
|
|
|
dest='no_prune',
|
|
|
|
|
help='disable prune option for git fetch')
|
|
|
|
|
parser.add_argument('--lfs',
|
|
|
|
|
action='store_true',
|
|
|
|
|
dest='lfs_clone',
|
|
|
|
|
@@ -331,7 +328,11 @@ def parse_args():
|
|
|
|
|
type=float,
|
|
|
|
|
default=30.0,
|
|
|
|
|
help='wait this amount of seconds when API request throttling is active (default: 30.0, requires --throttle-limit to be set)')
|
|
|
|
|
return parser.parse_args()
|
|
|
|
|
parser.add_argument('--exclude',
|
|
|
|
|
dest='exclude',
|
|
|
|
|
help='names of repositories to exclude',
|
|
|
|
|
nargs="*")
|
|
|
|
|
return parser.parse_args(args)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_auth(args, encode=True, for_git_cli=False):
|
|
|
|
|
@@ -339,10 +340,10 @@ def get_auth(args, encode=True, for_git_cli=False):
|
|
|
|
|
|
|
|
|
|
if args.osx_keychain_item_name:
|
|
|
|
|
if not args.osx_keychain_item_account:
|
|
|
|
|
log_error('You must specify both name and account fields for osx keychain password items')
|
|
|
|
|
raise Exception('You must specify both name and account fields for osx keychain password items')
|
|
|
|
|
else:
|
|
|
|
|
if platform.system() != 'Darwin':
|
|
|
|
|
log_error("Keychain arguments are only supported on Mac OSX")
|
|
|
|
|
raise Exception("Keychain arguments are only supported on Mac OSX")
|
|
|
|
|
try:
|
|
|
|
|
with open(os.devnull, 'w') as devnull:
|
|
|
|
|
token = (subprocess.check_output([
|
|
|
|
|
@@ -353,9 +354,9 @@ def get_auth(args, encode=True, for_git_cli=False):
|
|
|
|
|
token = token.decode('utf-8')
|
|
|
|
|
auth = token + ':' + 'x-oauth-basic'
|
|
|
|
|
except subprocess.SubprocessError:
|
|
|
|
|
log_error('No password item matching the provided name and account could be found in the osx keychain.')
|
|
|
|
|
raise Exception('No password item matching the provided name and account could be found in the osx keychain.')
|
|
|
|
|
elif args.osx_keychain_item_account:
|
|
|
|
|
log_error('You must specify both name and account fields for osx keychain password items')
|
|
|
|
|
raise Exception('You must specify both name and account fields for osx keychain password items')
|
|
|
|
|
elif args.token:
|
|
|
|
|
_path_specifier = 'file://'
|
|
|
|
|
if args.token.startswith(_path_specifier):
|
|
|
|
|
@@ -377,7 +378,7 @@ def get_auth(args, encode=True, for_git_cli=False):
|
|
|
|
|
password = urlquote(args.password)
|
|
|
|
|
auth = args.username + ':' + password
|
|
|
|
|
elif args.password:
|
|
|
|
|
log_error('You must specify a username for basic auth')
|
|
|
|
|
raise Exception('You must specify a username for basic auth')
|
|
|
|
|
|
|
|
|
|
if not auth:
|
|
|
|
|
return None
|
|
|
|
|
@@ -444,6 +445,21 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False):
|
|
|
|
|
r, errors = _get_response(request, auth, template)
|
|
|
|
|
|
|
|
|
|
status_code = int(r.getcode())
|
|
|
|
|
# Check if we got correct data
|
|
|
|
|
try:
|
|
|
|
|
response = json.loads(r.read().decode('utf-8'))
|
|
|
|
|
except IncompleteRead:
|
|
|
|
|
log_warning("Incomplete read error detected")
|
|
|
|
|
read_error = True
|
|
|
|
|
except json.decoder.JSONDecodeError:
|
|
|
|
|
log_warning("JSON decode error detected")
|
|
|
|
|
read_error = True
|
|
|
|
|
except TimeoutError:
|
|
|
|
|
log_warning("Tiemout error detected")
|
|
|
|
|
read_error = True
|
|
|
|
|
else:
|
|
|
|
|
read_error = False
|
|
|
|
|
|
|
|
|
|
# be gentle with API request limit and throttle requests if remaining requests getting low
|
|
|
|
|
limit_remaining = int(r.headers.get('x-ratelimit-remaining', 0))
|
|
|
|
|
if args.throttle_limit and limit_remaining <= args.throttle_limit:
|
|
|
|
|
@@ -454,21 +470,37 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False):
|
|
|
|
|
time.sleep(args.throttle_pause)
|
|
|
|
|
|
|
|
|
|
retries = 0
|
|
|
|
|
while retries < 3 and status_code == 502:
|
|
|
|
|
log_warning('API request returned HTTP 502: Bad Gateway. Retrying in 5 seconds')
|
|
|
|
|
while retries < 3 and (status_code == 502 or read_error):
|
|
|
|
|
log_warning('API request failed. Retrying in 5 seconds')
|
|
|
|
|
retries += 1
|
|
|
|
|
time.sleep(5)
|
|
|
|
|
request = _construct_request(per_page, page, query_args, template, auth, as_app=args.as_app) # noqa
|
|
|
|
|
r, errors = _get_response(request, auth, template)
|
|
|
|
|
|
|
|
|
|
status_code = int(r.getcode())
|
|
|
|
|
try:
|
|
|
|
|
response = json.loads(r.read().decode('utf-8'))
|
|
|
|
|
read_error = False
|
|
|
|
|
except IncompleteRead:
|
|
|
|
|
log_warning("Incomplete read error detected")
|
|
|
|
|
read_error = True
|
|
|
|
|
except json.decoder.JSONDecodeError:
|
|
|
|
|
log_warning("JSON decode error detected")
|
|
|
|
|
read_error = True
|
|
|
|
|
except TimeoutError:
|
|
|
|
|
log_warning("Tiemout error detected")
|
|
|
|
|
read_error = True
|
|
|
|
|
|
|
|
|
|
if status_code != 200:
|
|
|
|
|
template = 'API request returned HTTP {0}: {1}'
|
|
|
|
|
errors.append(template.format(status_code, r.reason))
|
|
|
|
|
log_error(errors)
|
|
|
|
|
raise Exception(', '.join(errors))
|
|
|
|
|
|
|
|
|
|
if read_error:
|
|
|
|
|
template = 'API request problem reading response for {0}'
|
|
|
|
|
errors.append(template.format(request))
|
|
|
|
|
raise Exception(', '.join(errors))
|
|
|
|
|
|
|
|
|
|
response = json.loads(r.read().decode('utf-8'))
|
|
|
|
|
if len(errors) == 0:
|
|
|
|
|
if type(response) == list:
|
|
|
|
|
for resp in response:
|
|
|
|
|
@@ -479,7 +511,7 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False):
|
|
|
|
|
yield response
|
|
|
|
|
|
|
|
|
|
if len(errors) > 0:
|
|
|
|
|
log_error(errors)
|
|
|
|
|
raise Exception(', '.join(errors))
|
|
|
|
|
|
|
|
|
|
if single_request:
|
|
|
|
|
break
|
|
|
|
|
@@ -509,12 +541,12 @@ def _get_response(request, auth, template):
|
|
|
|
|
r = exc
|
|
|
|
|
except URLError as e:
|
|
|
|
|
log_warning(e.reason)
|
|
|
|
|
should_continue = _request_url_error(template, retry_timeout)
|
|
|
|
|
should_continue, retry_timeout = _request_url_error(template, retry_timeout)
|
|
|
|
|
if not should_continue:
|
|
|
|
|
raise
|
|
|
|
|
except socket.error as e:
|
|
|
|
|
log_warning(e.strerror)
|
|
|
|
|
should_continue = _request_url_error(template, retry_timeout)
|
|
|
|
|
should_continue, retry_timeout = _request_url_error(template, retry_timeout)
|
|
|
|
|
if not should_continue:
|
|
|
|
|
raise
|
|
|
|
|
|
|
|
|
|
@@ -574,16 +606,15 @@ def _request_http_error(exc, auth, errors):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _request_url_error(template, retry_timeout):
|
|
|
|
|
# Incase of a connection timing out, we can retry a few time
|
|
|
|
|
# In case of a connection timing out, we can retry a few time
|
|
|
|
|
# But we won't crash and not back-up the rest now
|
|
|
|
|
log_info('{} timed out'.format(template))
|
|
|
|
|
retry_timeout -= 1
|
|
|
|
|
|
|
|
|
|
if retry_timeout >= 0:
|
|
|
|
|
return True
|
|
|
|
|
return True, retry_timeout
|
|
|
|
|
|
|
|
|
|
log_error('{} timed out to much, skipping!')
|
|
|
|
|
return False
|
|
|
|
|
raise Exception('{} timed out to much, skipping!')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class S3HTTPRedirectHandler(HTTPRedirectHandler):
|
|
|
|
|
@@ -640,7 +671,7 @@ def get_authenticated_user(args):
|
|
|
|
|
def check_git_lfs_install():
|
|
|
|
|
exit_code = subprocess.call(['git', 'lfs', 'version'])
|
|
|
|
|
if exit_code != 0:
|
|
|
|
|
log_error('The argument --lfs requires you to have Git LFS installed.\nYou can get it from https://git-lfs.github.com.')
|
|
|
|
|
raise Exception('The argument --lfs requires you to have Git LFS installed.\nYou can get it from https://git-lfs.github.com.')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def retrieve_repositories(args, authenticated_user):
|
|
|
|
|
@@ -726,6 +757,8 @@ def filter_repositories(args, unfiltered_repositories):
|
|
|
|
|
repositories = [r for r in repositories if name_regex.match(r['name'])]
|
|
|
|
|
if args.skip_archived:
|
|
|
|
|
repositories = [r for r in repositories if not r.get('archived')]
|
|
|
|
|
if args.exclude:
|
|
|
|
|
repositories = [r for r in repositories if r['name'] not in args.exclude]
|
|
|
|
|
|
|
|
|
|
return repositories
|
|
|
|
|
|
|
|
|
|
@@ -766,7 +799,8 @@ def backup_repositories(args, output_directory, repositories):
|
|
|
|
|
repo_dir,
|
|
|
|
|
skip_existing=args.skip_existing,
|
|
|
|
|
bare_clone=args.bare_clone,
|
|
|
|
|
lfs_clone=args.lfs_clone)
|
|
|
|
|
lfs_clone=args.lfs_clone,
|
|
|
|
|
no_prune=args.no_prune)
|
|
|
|
|
|
|
|
|
|
if repository.get('is_gist'):
|
|
|
|
|
# dump gist information to a file as well
|
|
|
|
|
@@ -783,8 +817,9 @@ def backup_repositories(args, output_directory, repositories):
|
|
|
|
|
os.path.join(repo_cwd, 'wiki'),
|
|
|
|
|
skip_existing=args.skip_existing,
|
|
|
|
|
bare_clone=args.bare_clone,
|
|
|
|
|
lfs_clone=args.lfs_clone)
|
|
|
|
|
|
|
|
|
|
lfs_clone=args.lfs_clone,
|
|
|
|
|
no_prune=args.no_prune
|
|
|
|
|
)
|
|
|
|
|
if args.include_issues or args.include_everything:
|
|
|
|
|
backup_issues(args, repo_cwd, repository, repos_template)
|
|
|
|
|
|
|
|
|
|
@@ -878,6 +913,8 @@ def backup_pulls(args, repo_cwd, repository, repos_template):
|
|
|
|
|
pulls = {}
|
|
|
|
|
_pulls_template = '{0}/{1}/pulls'.format(repos_template,
|
|
|
|
|
repository['full_name'])
|
|
|
|
|
_issue_template = '{0}/{1}/issues'.format(repos_template,
|
|
|
|
|
repository['full_name'])
|
|
|
|
|
query_args = {
|
|
|
|
|
'filter': 'all',
|
|
|
|
|
'state': 'all',
|
|
|
|
|
@@ -917,10 +954,17 @@ def backup_pulls(args, repo_cwd, repository, repos_template):
|
|
|
|
|
|
|
|
|
|
log_info('Saving {0} pull requests to disk'.format(
|
|
|
|
|
len(list(pulls.keys()))))
|
|
|
|
|
# Comments from pulls API are only _review_ comments
|
|
|
|
|
# regular comments need to be fetched via issue API.
|
|
|
|
|
# For backwards compatibility with versions <= 0.41.0
|
|
|
|
|
# keep name "comment_data" for review comments
|
|
|
|
|
comments_regular_template = _issue_template + '/{0}/comments'
|
|
|
|
|
comments_template = _pulls_template + '/{0}/comments'
|
|
|
|
|
commits_template = _pulls_template + '/{0}/commits'
|
|
|
|
|
for number, pull in list(pulls.items()):
|
|
|
|
|
if args.include_pull_comments or args.include_everything:
|
|
|
|
|
template = comments_regular_template.format(number)
|
|
|
|
|
pulls[number]['comment_regular_data'] = retrieve_data(args, template)
|
|
|
|
|
template = comments_template.format(number)
|
|
|
|
|
pulls[number]['comment_data'] = retrieve_data(args, template)
|
|
|
|
|
if args.include_pull_commits or args.include_everything:
|
|
|
|
|
@@ -1009,7 +1053,8 @@ def backup_releases(args, repo_cwd, repository, repos_template, include_assets=F
|
|
|
|
|
log_info('Saving {0} releases to disk'.format(len(releases)))
|
|
|
|
|
for release in releases:
|
|
|
|
|
release_name = release['tag_name']
|
|
|
|
|
output_filepath = os.path.join(release_cwd, '{0}.json'.format(release_name))
|
|
|
|
|
release_name_safe = release_name.replace('/', '__')
|
|
|
|
|
output_filepath = os.path.join(release_cwd, '{0}.json'.format(release_name_safe))
|
|
|
|
|
with codecs.open(output_filepath, 'w+', encoding='utf-8') as f:
|
|
|
|
|
json_dump(release, f)
|
|
|
|
|
|
|
|
|
|
@@ -1017,7 +1062,7 @@ def backup_releases(args, repo_cwd, repository, repos_template, include_assets=F
|
|
|
|
|
assets = retrieve_data(args, release['assets_url'])
|
|
|
|
|
if len(assets) > 0:
|
|
|
|
|
# give release asset files somewhere to live & download them (not including source archives)
|
|
|
|
|
release_assets_cwd = os.path.join(release_cwd, release_name)
|
|
|
|
|
release_assets_cwd = os.path.join(release_cwd, release_name_safe)
|
|
|
|
|
mkdir_p(release_assets_cwd)
|
|
|
|
|
for asset in assets:
|
|
|
|
|
download_file(asset['url'], os.path.join(release_assets_cwd, asset['name']), get_auth(args))
|
|
|
|
|
@@ -1028,7 +1073,8 @@ def fetch_repository(name,
|
|
|
|
|
local_dir,
|
|
|
|
|
skip_existing=False,
|
|
|
|
|
bare_clone=False,
|
|
|
|
|
lfs_clone=False):
|
|
|
|
|
lfs_clone=False,
|
|
|
|
|
no_prune=False):
|
|
|
|
|
if bare_clone:
|
|
|
|
|
if os.path.exists(local_dir):
|
|
|
|
|
clone_exists = subprocess.check_output(['git',
|
|
|
|
|
@@ -1074,6 +1120,8 @@ def fetch_repository(name,
|
|
|
|
|
git_command = ['git', 'lfs', 'fetch', '--all', '--prune']
|
|
|
|
|
else:
|
|
|
|
|
git_command = ['git', 'fetch', '--all', '--force', '--tags', '--prune']
|
|
|
|
|
if no_prune:
|
|
|
|
|
git_command.pop()
|
|
|
|
|
logging_subprocess(git_command, None, cwd=local_dir)
|
|
|
|
|
else:
|
|
|
|
|
log_info('Cloning {0} repository from {1} to {2}'.format(
|
|
|
|
|
@@ -1081,10 +1129,13 @@ def fetch_repository(name,
|
|
|
|
|
masked_remote_url,
|
|
|
|
|
local_dir))
|
|
|
|
|
if bare_clone:
|
|
|
|
|
if lfs_clone:
|
|
|
|
|
git_command = ['git', 'lfs', 'clone', '--mirror', remote_url, local_dir]
|
|
|
|
|
else:
|
|
|
|
|
git_command = ['git', 'clone', '--mirror', remote_url, local_dir]
|
|
|
|
|
logging_subprocess(git_command, None)
|
|
|
|
|
if lfs_clone:
|
|
|
|
|
git_command = ['git', 'lfs', 'fetch', '--all', '--prune']
|
|
|
|
|
if no_prune:
|
|
|
|
|
git_command.pop()
|
|
|
|
|
logging_subprocess(git_command, None, cwd=local_dir)
|
|
|
|
|
else:
|
|
|
|
|
if lfs_clone:
|
|
|
|
|
git_command = ['git', 'lfs', 'clone', remote_url, local_dir]
|
|
|
|
|
|