mirror of
https://github.com/josegonzalez/python-github-backup.git
synced 2025-12-05 16:18:02 +01:00
784 lines
27 KiB
Python
Executable File
784 lines
27 KiB
Python
Executable File
#!/usr/bin/env python
|
|
|
|
from __future__ import print_function
|
|
|
|
import argparse
|
|
import base64
|
|
import calendar
|
|
import codecs
|
|
import errno
|
|
import getpass
|
|
import json
|
|
import logging
|
|
import os
|
|
import re
|
|
import select
|
|
import subprocess
|
|
import sys
|
|
import time
|
|
try:
|
|
# python 3
|
|
from urllib.parse import urlparse
|
|
from urllib.parse import quote as urlquote
|
|
from urllib.parse import urlencode
|
|
from urllib.error import HTTPError, URLError
|
|
from urllib.request import urlopen
|
|
from urllib.request import Request
|
|
except ImportError:
|
|
# python 2
|
|
from urlparse import urlparse
|
|
from urllib import quote as urlquote
|
|
from urllib import urlencode
|
|
from urllib2 import HTTPError, URLError
|
|
from urllib2 import urlopen
|
|
from urllib2 import Request
|
|
|
|
__version__='asdf'
|
|
# from github_backup import __version__
|
|
|
|
FNULL = open(os.devnull, 'w')
|
|
|
|
|
|
def log_error(message):
|
|
if type(message) == str:
|
|
message = [message]
|
|
|
|
for msg in message:
|
|
sys.stderr.write("{0}\n".format(msg))
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
def log_info(message):
|
|
if type(message) == str:
|
|
message = [message]
|
|
|
|
for msg in message:
|
|
sys.stdout.write("{0}\n".format(msg))
|
|
|
|
|
|
def logging_subprocess(popenargs,
|
|
logger,
|
|
stdout_log_level=logging.DEBUG,
|
|
stderr_log_level=logging.ERROR,
|
|
**kwargs):
|
|
"""
|
|
Variant of subprocess.call that accepts a logger instead of stdout/stderr,
|
|
and logs stdout messages via logger.debug and stderr messages via
|
|
logger.error.
|
|
"""
|
|
child = subprocess.Popen(popenargs, stdout=subprocess.PIPE,
|
|
stderr=subprocess.PIPE, **kwargs)
|
|
|
|
log_level = {child.stdout: stdout_log_level,
|
|
child.stderr: stderr_log_level}
|
|
|
|
def check_io():
|
|
ready_to_read = select.select([child.stdout, child.stderr],
|
|
[],
|
|
[],
|
|
1000)[0]
|
|
for io in ready_to_read:
|
|
line = io.readline()
|
|
if not logger:
|
|
continue
|
|
if not (io == child.stderr and not line):
|
|
logger.log(log_level[io], line[:-1])
|
|
|
|
# keep checking stdout/stderr until the child exits
|
|
while child.poll() is None:
|
|
check_io()
|
|
|
|
check_io() # check again to catch anything after the process exits
|
|
|
|
rc = child.wait()
|
|
|
|
if rc != 0:
|
|
print('{} returned {}:'.format(popenargs[0], rc), file=sys.stderr)
|
|
print('\t', ' '.join(popenargs), file=sys.stderr)
|
|
|
|
return rc
|
|
|
|
|
|
def mkdir_p(*args):
|
|
for path in args:
|
|
try:
|
|
os.makedirs(path)
|
|
except OSError as exc: # Python >2.5
|
|
if exc.errno == errno.EEXIST and os.path.isdir(path):
|
|
pass
|
|
else:
|
|
raise
|
|
|
|
def mask_password(url, secret='*****'):
|
|
parsed = urlparse(url)
|
|
|
|
if not parsed.password:
|
|
return url
|
|
elif parsed.password == 'x-oauth-basic':
|
|
return url.replace(parsed.username, secret)
|
|
|
|
return url.replace(parsed.password, secret)
|
|
|
|
def parse_args():
|
|
parser = argparse.ArgumentParser(description='Backup a github account')
|
|
parser.add_argument('user',
|
|
metavar='USER',
|
|
type=str,
|
|
help='github username')
|
|
parser.add_argument('-u',
|
|
'--username',
|
|
dest='username',
|
|
help='username for basic auth')
|
|
parser.add_argument('-p',
|
|
'--password',
|
|
dest='password',
|
|
help='password for basic auth. '
|
|
'If a username is given but not a password, the '
|
|
'password will be prompted for.')
|
|
parser.add_argument('-t',
|
|
'--token',
|
|
dest='token',
|
|
help='personal access or OAuth token')
|
|
parser.add_argument('-o',
|
|
'--output-directory',
|
|
default='.',
|
|
dest='output_directory',
|
|
help='directory at which to backup the repositories')
|
|
parser.add_argument('-i',
|
|
'--incremental',
|
|
action='store_true',
|
|
dest='incremental',
|
|
help='incremental backup')
|
|
parser.add_argument('--starred',
|
|
action='store_true',
|
|
dest='include_starred',
|
|
help='include starred repositories in backup')
|
|
parser.add_argument('--watched',
|
|
action='store_true',
|
|
dest='include_watched',
|
|
help='include watched repositories in backup')
|
|
parser.add_argument('--all',
|
|
action='store_true',
|
|
dest='include_everything',
|
|
help='include everything in backup')
|
|
parser.add_argument('--issues',
|
|
action='store_true',
|
|
dest='include_issues',
|
|
help='include issues in backup')
|
|
parser.add_argument('--issue-comments',
|
|
action='store_true',
|
|
dest='include_issue_comments',
|
|
help='include issue comments in backup')
|
|
parser.add_argument('--issue-events',
|
|
action='store_true',
|
|
dest='include_issue_events',
|
|
help='include issue events in backup')
|
|
parser.add_argument('--pulls',
|
|
action='store_true',
|
|
dest='include_pulls',
|
|
help='include pull requests in backup')
|
|
parser.add_argument('--pull-comments',
|
|
action='store_true',
|
|
dest='include_pull_comments',
|
|
help='include pull request review comments in backup')
|
|
parser.add_argument('--pull-commits',
|
|
action='store_true',
|
|
dest='include_pull_commits',
|
|
help='include pull request commits in backup')
|
|
parser.add_argument('--labels',
|
|
action='store_true',
|
|
dest='include_labels',
|
|
help='include labels in backup')
|
|
parser.add_argument('--hooks',
|
|
action='store_true',
|
|
dest='include_hooks',
|
|
help='include hooks in backup (works only when authenticated)')
|
|
parser.add_argument('--milestones',
|
|
action='store_true',
|
|
dest='include_milestones',
|
|
help='include milestones in backup')
|
|
parser.add_argument('--repositories',
|
|
action='store_true',
|
|
dest='include_repository',
|
|
help='include repository clone in backup')
|
|
parser.add_argument('--wikis',
|
|
action='store_true',
|
|
dest='include_wiki',
|
|
help='include wiki clone in backup')
|
|
parser.add_argument('--skip-existing',
|
|
action='store_true',
|
|
dest='skip_existing',
|
|
help='skip project if a backup directory exists')
|
|
parser.add_argument('-L',
|
|
'--languages',
|
|
dest='languages',
|
|
help='only allow these languages',
|
|
nargs='*')
|
|
parser.add_argument('-N',
|
|
'--name-regex',
|
|
dest='name_regex',
|
|
help='python regex to match names against')
|
|
parser.add_argument('-H',
|
|
'--github-host',
|
|
dest='github_host',
|
|
help='GitHub Enterprise hostname')
|
|
parser.add_argument('-O',
|
|
'--organization',
|
|
action='store_true',
|
|
dest='organization',
|
|
help='whether or not this is an organization user')
|
|
parser.add_argument('-R',
|
|
'--repository',
|
|
dest='repository',
|
|
help='name of repository to limit backup to')
|
|
parser.add_argument('-P', '--private',
|
|
action='store_true',
|
|
dest='private',
|
|
help='include private repositories')
|
|
parser.add_argument('-F', '--fork',
|
|
action='store_true',
|
|
dest='fork',
|
|
help='include forked repositories')
|
|
parser.add_argument('--prefer-ssh',
|
|
action='store_true',
|
|
help='Clone repositories using SSH instead of HTTPS')
|
|
parser.add_argument('-v', '--version',
|
|
action='version',
|
|
version='%(prog)s ' + __version__)
|
|
return parser.parse_args()
|
|
|
|
|
|
def get_auth(args, encode=True):
|
|
auth = None
|
|
|
|
if args.token:
|
|
auth = args.token + ':' + 'x-oauth-basic'
|
|
elif args.username:
|
|
if not args.password:
|
|
args.password = getpass.getpass()
|
|
if encode:
|
|
password = args.password
|
|
else:
|
|
password = urlquote(args.password)
|
|
auth = args.username + ':' + password
|
|
elif args.password:
|
|
log_error('You must specify a username for basic auth')
|
|
|
|
if not auth:
|
|
return None
|
|
|
|
if encode == False:
|
|
return auth
|
|
|
|
return base64.b64encode(auth)
|
|
|
|
|
|
def get_github_api_host(args):
|
|
if args.github_host:
|
|
host = args.github_host + '/api/v3'
|
|
else:
|
|
host = 'api.github.com'
|
|
|
|
return host
|
|
|
|
|
|
def get_github_host(args):
|
|
if args.github_host:
|
|
host = args.github_host
|
|
else:
|
|
host = 'github.com'
|
|
|
|
return host
|
|
|
|
def get_github_repo_url(args, repository):
|
|
if args.prefer_ssh:
|
|
return repository['ssh_url']
|
|
|
|
auth = get_auth(args, False)
|
|
if auth:
|
|
repo_url = 'https://{0}@{1}/{2}/{3}.git'.format(
|
|
auth,
|
|
get_github_host(args),
|
|
args.user,
|
|
repository['name'])
|
|
else:
|
|
repo_url = repository['clone_url']
|
|
|
|
return repo_url
|
|
|
|
def retrieve_data(args, template, query_args=None, single_request=False):
|
|
auth = get_auth(args)
|
|
query_args = get_query_args(query_args)
|
|
per_page = 100
|
|
page = 0
|
|
data = []
|
|
|
|
while True:
|
|
page = page + 1
|
|
request = _construct_request(per_page, page, query_args, template, auth) # noqa
|
|
r, errors = _get_response(request, auth, template)
|
|
|
|
status_code = int(r.getcode())
|
|
|
|
if status_code != 200:
|
|
template = 'API request returned HTTP {0}: {1}'
|
|
errors.append(template.format(status_code, r.reason))
|
|
log_error(errors)
|
|
|
|
response = json.loads(r.read())
|
|
if len(errors) == 0:
|
|
if type(response) == list:
|
|
data.extend(response)
|
|
if len(response) < per_page:
|
|
break
|
|
elif type(response) == dict and single_request:
|
|
data.append(response)
|
|
|
|
if len(errors) > 0:
|
|
log_error(errors)
|
|
|
|
if single_request:
|
|
break
|
|
|
|
return data
|
|
|
|
|
|
def get_query_args(query_args=None):
|
|
if not query_args:
|
|
query_args = {}
|
|
return query_args
|
|
|
|
|
|
def _get_response(request, auth, template):
|
|
retry_timeout = 3
|
|
errors = []
|
|
# We'll make requests in a loop so we can
|
|
# delay and retry in the case of rate-limiting
|
|
while True:
|
|
should_continue = False
|
|
try:
|
|
r = urlopen(request)
|
|
except HTTPError as exc:
|
|
errors, should_continue = _request_http_error(exc, auth, errors) # noqa
|
|
r = exc
|
|
except URLError:
|
|
should_continue = _request_url_error(template, retry_timeout)
|
|
if not should_continue:
|
|
raise
|
|
|
|
if should_continue:
|
|
continue
|
|
|
|
break
|
|
return r, errors
|
|
|
|
|
|
def _construct_request(per_page, page, query_args, template, auth):
|
|
querystring = urlencode(dict(list({
|
|
'per_page': per_page,
|
|
'page': page
|
|
}.items()) + list(query_args.items())))
|
|
|
|
request = Request(template + '?' + querystring)
|
|
if auth is not None:
|
|
request.add_header('Authorization', 'Basic ' + auth)
|
|
return request
|
|
|
|
|
|
def _request_http_error(exc, auth, errors):
|
|
# HTTPError behaves like a Response so we can
|
|
# check the status code and headers to see exactly
|
|
# what failed.
|
|
|
|
should_continue = False
|
|
headers = exc.headers
|
|
limit_remaining = int(headers.get('x-ratelimit-remaining', 0))
|
|
|
|
if exc.code == 403 and limit_remaining < 1:
|
|
# The X-RateLimit-Reset header includes a
|
|
# timestamp telling us when the limit will reset
|
|
# so we can calculate how long to wait rather
|
|
# than inefficiently polling:
|
|
gm_now = calendar.timegm(time.gmtime())
|
|
reset = int(headers.get('x-ratelimit-reset', 0)) or gm_now
|
|
# We'll never sleep for less than 10 seconds:
|
|
delta = max(10, reset - gm_now)
|
|
|
|
limit = headers.get('x-ratelimit-limit')
|
|
print('Exceeded rate limit of {} requests; waiting {} seconds to reset'.format(limit, delta), # noqa
|
|
file=sys.stderr)
|
|
|
|
if auth is None:
|
|
print('Hint: Authenticate to raise your GitHub rate limit',
|
|
file=sys.stderr)
|
|
|
|
time.sleep(delta)
|
|
should_continue = True
|
|
return errors, should_continue
|
|
|
|
|
|
def _request_url_error(template, retry_timeout):
|
|
# Incase of a connection timing out, we can retry a few time
|
|
# But we won't crash and not back-up the rest now
|
|
log_info('{} timed out'.format(template))
|
|
retry_timeout -= 1
|
|
|
|
if retry_timeout >= 0:
|
|
return True
|
|
|
|
log_error('{} timed out to much, skipping!')
|
|
return False
|
|
|
|
|
|
def retrieve_repositories(args):
|
|
log_info('Retrieving repositories')
|
|
single_request = False
|
|
template = 'https://{0}/user/repos'.format(
|
|
get_github_api_host(args))
|
|
if args.organization:
|
|
template = 'https://{0}/orgs/{1}/repos'.format(
|
|
get_github_api_host(args),
|
|
args.user)
|
|
|
|
if args.repository:
|
|
single_request = True
|
|
template = 'https://{0}/repos/{1}/{2}'.format(
|
|
get_github_api_host(args),
|
|
args.user,
|
|
args.repository)
|
|
|
|
return retrieve_data(args, template, single_request=single_request)
|
|
|
|
|
|
def filter_repositories(args, repositories):
|
|
log_info('Filtering repositories')
|
|
|
|
repositories = [r for r in repositories if r['owner']['login'] == args.user]
|
|
|
|
name_regex = None
|
|
if args.name_regex:
|
|
name_regex = re.compile(args.name_regex)
|
|
|
|
languages = None
|
|
if args.languages:
|
|
languages = [x.lower() for x in args.languages]
|
|
|
|
if not args.fork:
|
|
repositories = [r for r in repositories if not r['fork']]
|
|
if not args.private:
|
|
repositories = [r for r in repositories if not r['private']]
|
|
if languages:
|
|
repositories = [r for r in repositories if r['language'] and r['language'].lower() in languages] # noqa
|
|
if name_regex:
|
|
repositories = [r for r in repositories if name_regex.match(r['name'])]
|
|
|
|
return repositories
|
|
|
|
|
|
def backup_repositories(args, output_directory, repositories):
|
|
log_info('Backing up repositories')
|
|
repos_template = 'https://{0}/repos'.format(get_github_api_host(args))
|
|
|
|
if args.incremental:
|
|
last_update = max(repository['updated_at'] for repository in repositories)
|
|
last_update_path = os.path.join(output_directory, 'last_update')
|
|
if os.path.exists(last_update_path):
|
|
args.since = open(last_update_path).read().strip()
|
|
else:
|
|
args.since = None
|
|
else:
|
|
args.since = None
|
|
|
|
for repository in repositories:
|
|
backup_cwd = os.path.join(output_directory, 'repositories')
|
|
repo_cwd = os.path.join(backup_cwd, repository['name'])
|
|
repo_dir = os.path.join(repo_cwd, 'repository')
|
|
repo_url = get_github_repo_url(args, repository)
|
|
|
|
if args.include_repository or args.include_everything:
|
|
fetch_repository(repository['name'],
|
|
repo_url,
|
|
repo_dir,
|
|
skip_existing=args.skip_existing)
|
|
|
|
download_wiki = (args.include_wiki or args.include_everything)
|
|
if repository['has_wiki'] and download_wiki:
|
|
fetch_repository(repository['name'],
|
|
repo_url.replace('.git', '.wiki.git'),
|
|
os.path.join(repo_cwd, 'wiki'),
|
|
skip_existing=args.skip_existing)
|
|
|
|
if args.include_issues or args.include_everything:
|
|
backup_issues(args, repo_cwd, repository, repos_template)
|
|
|
|
if args.include_pulls or args.include_everything:
|
|
backup_pulls(args, repo_cwd, repository, repos_template)
|
|
|
|
if args.include_milestones or args.include_everything:
|
|
backup_milestones(args, repo_cwd, repository, repos_template)
|
|
|
|
if args.include_labels or args.include_everything:
|
|
backup_labels(args, repo_cwd, repository, repos_template)
|
|
|
|
if args.include_hooks or args.include_everything:
|
|
backup_hooks(args, repo_cwd, repository, repos_template)
|
|
|
|
if args.incremental:
|
|
open(last_update_path, 'w').write(last_update)
|
|
|
|
def backup_issues(args, repo_cwd, repository, repos_template):
|
|
has_issues_dir = os.path.isdir('{0}/issues/.git'.format(repo_cwd))
|
|
if args.skip_existing and has_issues_dir:
|
|
return
|
|
|
|
log_info('Retrieving {0} issues'.format(repository['full_name']))
|
|
issue_cwd = os.path.join(repo_cwd, 'issues')
|
|
mkdir_p(repo_cwd, issue_cwd)
|
|
|
|
issues = {}
|
|
issues_skipped = 0
|
|
issues_skipped_message = ''
|
|
_issue_template = '{0}/{1}/issues'.format(repos_template,
|
|
repository['full_name'])
|
|
|
|
issue_states = ['open', 'closed']
|
|
for issue_state in issue_states:
|
|
query_args = {
|
|
'filter': 'all',
|
|
'state': issue_state
|
|
}
|
|
if args.since:
|
|
query_args['since'] = args.since
|
|
|
|
_issues = retrieve_data(args,
|
|
_issue_template,
|
|
query_args=query_args)
|
|
for issue in _issues:
|
|
# skip pull requests which are also returned as issues
|
|
# if retrieving pull requests is requested as well
|
|
if 'pull_request' in issue and (args.include_pulls or args.include_everything):
|
|
issues_skipped += 1
|
|
continue
|
|
|
|
issues[issue['number']] = issue
|
|
|
|
if issues_skipped:
|
|
issues_skipped_message = ' (skipped {0} pull requests)'.format(issues_skipped)
|
|
log_info('Saving {0} issues to disk{1}'.format(len(list(issues.keys())), issues_skipped_message))
|
|
comments_template = _issue_template + '/{0}/comments'
|
|
events_template = _issue_template + '/{0}/events'
|
|
for number, issue in list(issues.items()):
|
|
if args.include_issue_comments or args.include_everything:
|
|
template = comments_template.format(number)
|
|
issues[number]['comment_data'] = retrieve_data(args, template)
|
|
if args.include_issue_events or args.include_everything:
|
|
template = events_template.format(number)
|
|
issues[number]['event_data'] = retrieve_data(args, template)
|
|
|
|
issue_file = '{0}/{1}.json'.format(issue_cwd, number)
|
|
with codecs.open(issue_file, 'w', encoding='utf-8') as f:
|
|
json_dump(issue, f)
|
|
|
|
|
|
def backup_pulls(args, repo_cwd, repository, repos_template):
|
|
has_pulls_dir = os.path.isdir('{0}/pulls/.git'.format(repo_cwd))
|
|
if args.skip_existing and has_pulls_dir:
|
|
return
|
|
|
|
log_info('Retrieving {0} pull requests'.format(repository['full_name'])) # noqa
|
|
pulls_cwd = os.path.join(repo_cwd, 'pulls')
|
|
mkdir_p(repo_cwd, pulls_cwd)
|
|
|
|
pulls = {}
|
|
_pulls_template = '{0}/{1}/pulls'.format(repos_template,
|
|
repository['full_name'])
|
|
|
|
pull_states = ['open', 'closed']
|
|
for pull_state in pull_states:
|
|
query_args = {
|
|
'filter': 'all',
|
|
'state': pull_state,
|
|
'sort': 'updated',
|
|
'direction': 'desc',
|
|
}
|
|
|
|
# It'd be nice to be able to apply the args.since filter here...
|
|
_pulls = retrieve_data(args,
|
|
_pulls_template,
|
|
query_args=query_args)
|
|
for pull in _pulls:
|
|
if not args.since or pull['updated_at'] >= args.since:
|
|
pulls[pull['number']] = pull
|
|
|
|
log_info('Saving {0} pull requests to disk'.format(len(list(pulls.keys()))))
|
|
comments_template = _pulls_template + '/{0}/comments'
|
|
commits_template = _pulls_template + '/{0}/commits'
|
|
for number, pull in list(pulls.items()):
|
|
if args.include_pull_comments or args.include_everything:
|
|
template = comments_template.format(number)
|
|
pulls[number]['comment_data'] = retrieve_data(args, template)
|
|
if args.include_pull_commits or args.include_everything:
|
|
template = commits_template.format(number)
|
|
pulls[number]['commit_data'] = retrieve_data(args, template)
|
|
|
|
pull_file = '{0}/{1}.json'.format(pulls_cwd, number)
|
|
with codecs.open(pull_file, 'w', encoding='utf-8') as f:
|
|
json_dump(pull, f)
|
|
|
|
|
|
def backup_milestones(args, repo_cwd, repository, repos_template):
|
|
milestone_cwd = os.path.join(repo_cwd, 'milestones')
|
|
if args.skip_existing and os.path.isdir(milestone_cwd):
|
|
return
|
|
|
|
log_info('Retrieving {0} milestones'.format(repository['full_name']))
|
|
mkdir_p(repo_cwd, milestone_cwd)
|
|
|
|
template = '{0}/{1}/milestones'.format(repos_template,
|
|
repository['full_name'])
|
|
|
|
query_args = {
|
|
'state': 'all'
|
|
}
|
|
|
|
_milestones = retrieve_data(args, template, query_args=query_args)
|
|
|
|
milestones = {}
|
|
for milestone in _milestones:
|
|
milestones[milestone['number']] = milestone
|
|
|
|
log_info('Saving {0} milestones to disk'.format(len(list(milestones.keys()))))
|
|
for number, milestone in list(milestones.items()):
|
|
milestone_file = '{0}/{1}.json'.format(milestone_cwd, number)
|
|
with codecs.open(milestone_file, 'w', encoding='utf-8') as f:
|
|
json_dump(milestone, f)
|
|
|
|
|
|
def backup_labels(args, repo_cwd, repository, repos_template):
|
|
label_cwd = os.path.join(repo_cwd, 'labels')
|
|
output_file = '{0}/labels.json'.format(label_cwd)
|
|
template = '{0}/{1}/labels'.format(repos_template,
|
|
repository['full_name'])
|
|
_backup_data(args,
|
|
'labels',
|
|
template,
|
|
output_file,
|
|
label_cwd)
|
|
|
|
|
|
def backup_hooks(args, repo_cwd, repository, repos_template):
|
|
auth = get_auth(args)
|
|
if not auth:
|
|
log_info("Skipping hooks since no authentication provided")
|
|
return
|
|
hook_cwd = os.path.join(repo_cwd, 'hooks')
|
|
output_file = '{0}/hooks.json'.format(hook_cwd)
|
|
template = '{0}/{1}/hooks'.format(repos_template,
|
|
repository['full_name'])
|
|
try:
|
|
_backup_data(args,
|
|
'hooks',
|
|
template,
|
|
output_file,
|
|
hook_cwd)
|
|
except SystemExit:
|
|
log_info("Unable to read hooks, skipping")
|
|
|
|
|
|
def fetch_repository(name, remote_url, local_dir, skip_existing=False):
|
|
clone_exists = os.path.exists(os.path.join(local_dir, '.git'))
|
|
|
|
if clone_exists and skip_existing:
|
|
return
|
|
|
|
masked_remote_url = mask_password(remote_url)
|
|
|
|
initalized = subprocess.call('git ls-remote ' + remote_url,
|
|
stdout=FNULL,
|
|
stderr=FNULL,
|
|
shell=True)
|
|
if initalized == 128:
|
|
log_info("Skipping {0} ({1}) since it's not initalized".format(name, masked_remote_url))
|
|
return
|
|
|
|
if clone_exists:
|
|
log_info('Updating {0} in {1}'.format(name, local_dir))
|
|
git_command = ['git', 'remote', 'rm', 'origin']
|
|
logging_subprocess(git_command, None, cwd=local_dir)
|
|
git_command = ['git', 'remote', 'add', 'origin', remote_url]
|
|
logging_subprocess(git_command, None, cwd=local_dir)
|
|
git_command = ['git', 'fetch', '--all', '--tags', '--prune']
|
|
logging_subprocess(git_command, None, cwd=local_dir)
|
|
else:
|
|
log_info('Cloning {0} repository from {1} to {2}'.format(name,
|
|
masked_remote_url,
|
|
local_dir))
|
|
git_command = ['git', 'clone', remote_url, local_dir]
|
|
logging_subprocess(git_command, None)
|
|
|
|
|
|
def backup_account(args, output_directory):
|
|
account_cwd = os.path.join(output_directory, 'account')
|
|
|
|
if args.include_starred or args.include_everything:
|
|
output_file = '{0}/starred.json'.format(account_cwd)
|
|
template = "https://{0}/users/{1}/starred"
|
|
template = template.format(get_github_api_host(args), args.user)
|
|
_backup_data(args,
|
|
'starred repositories',
|
|
template,
|
|
output_file,
|
|
account_cwd)
|
|
|
|
if args.include_watched or args.include_everything:
|
|
output_file = '{0}/watched.json'.format(account_cwd)
|
|
template = "https://{0}/users/{1}/subscriptions"
|
|
template = template.format(get_github_api_host(args), args.user)
|
|
_backup_data(args,
|
|
'watched repositories',
|
|
template,
|
|
output_file,
|
|
account_cwd)
|
|
|
|
|
|
def _backup_data(args, name, template, output_file, output_directory):
|
|
skip_existing = args.skip_existing
|
|
if not skip_existing or not os.path.exists(output_file):
|
|
log_info('Retrieving {0} {1}'.format(args.user, name))
|
|
mkdir_p(output_directory)
|
|
data = retrieve_data(args, template)
|
|
|
|
log_info('Writing {0} {1} to disk'.format(len(data), name))
|
|
with codecs.open(output_file, 'w', encoding='utf-8') as f:
|
|
json_dump(data, f)
|
|
|
|
|
|
def json_dump(data, output_file):
|
|
json.dump(data,
|
|
output_file,
|
|
ensure_ascii=False,
|
|
sort_keys=True,
|
|
indent=4,
|
|
separators=(',', ': '))
|
|
|
|
|
|
def main():
|
|
args = parse_args()
|
|
|
|
output_directory = os.path.realpath(args.output_directory)
|
|
if not os.path.isdir(output_directory):
|
|
log_info('Create output directory {0}'.format(output_directory))
|
|
mkdir_p(output_directory)
|
|
|
|
log_info('Backing up user {0} to {1}'.format(args.user, output_directory))
|
|
|
|
repositories = retrieve_repositories(args)
|
|
repositories = filter_repositories(args, repositories)
|
|
backup_repositories(args, output_directory, repositories)
|
|
backup_account(args, output_directory)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|