mirror of
https://github.com/josegonzalez/python-github-backup.git
synced 2025-12-07 00:48:01 +01:00
Refactor to both simplify codepath as well as follow PEP8 standards
This commit is contained in:
@@ -22,6 +22,7 @@ from github_backup import __version__
|
|||||||
|
|
||||||
FNULL = open(os.devnull, 'w')
|
FNULL = open(os.devnull, 'w')
|
||||||
|
|
||||||
|
|
||||||
def log_error(message):
|
def log_error(message):
|
||||||
if type(message) == str:
|
if type(message) == str:
|
||||||
message = [message]
|
message = [message]
|
||||||
@@ -40,7 +41,11 @@ def log_info(message):
|
|||||||
sys.stdout.write("{0}\n".format(msg))
|
sys.stdout.write("{0}\n".format(msg))
|
||||||
|
|
||||||
|
|
||||||
def logging_subprocess(popenargs, logger, stdout_log_level=logging.DEBUG, stderr_log_level=logging.ERROR, **kwargs):
|
def logging_subprocess(popenargs,
|
||||||
|
logger,
|
||||||
|
stdout_log_level=logging.DEBUG,
|
||||||
|
stderr_log_level=logging.ERROR,
|
||||||
|
**kwargs):
|
||||||
"""
|
"""
|
||||||
Variant of subprocess.call that accepts a logger instead of stdout/stderr,
|
Variant of subprocess.call that accepts a logger instead of stdout/stderr,
|
||||||
and logs stdout messages via logger.debug and stderr messages via
|
and logs stdout messages via logger.debug and stderr messages via
|
||||||
@@ -53,7 +58,10 @@ def logging_subprocess(popenargs, logger, stdout_log_level=logging.DEBUG, stderr
|
|||||||
child.stderr: stderr_log_level}
|
child.stderr: stderr_log_level}
|
||||||
|
|
||||||
def check_io():
|
def check_io():
|
||||||
ready_to_read = select.select([child.stdout, child.stderr], [], [], 1000)[0]
|
ready_to_read = select.select([child.stdout, child.stderr],
|
||||||
|
[],
|
||||||
|
[],
|
||||||
|
1000)[0]
|
||||||
for io in ready_to_read:
|
for io in ready_to_read:
|
||||||
line = io.readline()
|
line = io.readline()
|
||||||
if not logger:
|
if not logger:
|
||||||
@@ -88,35 +96,121 @@ def mkdir_p(*args):
|
|||||||
|
|
||||||
|
|
||||||
def parse_args():
|
def parse_args():
|
||||||
parser = argparse.ArgumentParser(description='Backup a github users account', prog='Github Backup')
|
parser = argparse.ArgumentParser(description='Backup a github account',
|
||||||
parser.add_argument('user', metavar='USER', type=str, help='github username')
|
prog='Github Backup')
|
||||||
parser.add_argument('-u', '--username', dest='username', help='username for basic auth')
|
parser.add_argument('user',
|
||||||
parser.add_argument('-p', '--password', dest='password', help='password for basic auth')
|
metavar='USER',
|
||||||
parser.add_argument('-t', '--token', dest='token', help='personal access or OAuth token')
|
type=str,
|
||||||
parser.add_argument('-o', '--output-directory', default='.', dest='output_directory', help='directory at which to backup the repositories')
|
help='github username')
|
||||||
parser.add_argument('--starred', action='store_true', dest='include_starred', help='include starred repositories in backup')
|
parser.add_argument('-u',
|
||||||
parser.add_argument('--watched', action='store_true', dest='include_watched', help='include watched repositories in backup')
|
'--username',
|
||||||
parser.add_argument('--all', action='store_true', dest='include_everything', help='include everything in backup')
|
dest='username',
|
||||||
parser.add_argument('--issues', action='store_true', dest='include_issues', help='include issues in backup')
|
help='username for basic auth')
|
||||||
parser.add_argument('--issue-comments', action='store_true', dest='include_issue_comments', help='include issue comments in backup')
|
parser.add_argument('-p',
|
||||||
parser.add_argument('--issue-events', action='store_true', dest='include_issue_events', help='include issue events in backup')
|
'--password',
|
||||||
parser.add_argument('--pulls', action='store_true', dest='include_pulls', help='include pull requests in backup')
|
dest='password',
|
||||||
parser.add_argument('--pull-comments', action='store_true', dest='include_pull_comments', help='include pull request review comments in backup')
|
help='password for basic auth')
|
||||||
parser.add_argument('--pull-commits', action='store_true', dest='include_pull_commits', help='include pull request commits in backup')
|
parser.add_argument('-t',
|
||||||
parser.add_argument('--labels', action='store_true', dest='include_labels', help='include labels in backup')
|
'--token',
|
||||||
parser.add_argument('--milestones', action='store_true', dest='include_milestones', help='include milestones in backup')
|
dest='token',
|
||||||
parser.add_argument('--repositories', action='store_true', dest='include_repository', help='include repository clone in backup')
|
help='personal access or OAuth token')
|
||||||
parser.add_argument('--wikis', action='store_true', dest='include_wiki', help='include wiki clone in backup')
|
parser.add_argument('-o',
|
||||||
parser.add_argument('--skip-existing', action='store_true', dest='skip_existing', help='skip project if a backup directory exists')
|
'--output-directory',
|
||||||
parser.add_argument('-L', '--languages', dest='languages', help='only allow these languages', nargs='*')
|
default='.',
|
||||||
parser.add_argument('-N', '--name-regex', dest='name_regex', help='python regex to match names against')
|
dest='output_directory',
|
||||||
parser.add_argument('-H', '--github-host', dest='github_host', help='GitHub Enterprise hostname')
|
help='directory at which to backup the repositories')
|
||||||
parser.add_argument('-O', '--organization', action='store_true', dest='organization', help='whether or not this is a query for an organization')
|
parser.add_argument('--starred',
|
||||||
parser.add_argument('-R', '--repository', dest='repository', help='name of repository to limit backup to')
|
action='store_true',
|
||||||
parser.add_argument('-P', '--private', action='store_true', dest='private', help='include private repositories')
|
dest='include_starred',
|
||||||
parser.add_argument('-F', '--fork', action='store_true', dest='fork', help='include forked repositories')
|
help='include starred repositories in backup')
|
||||||
parser.add_argument('--prefer-ssh', action='store_true', help='Clone repositories using SSH instead of HTTPS')
|
parser.add_argument('--watched',
|
||||||
parser.add_argument('-v', '--version', action='version', version='%(prog)s ' + __version__)
|
action='store_true',
|
||||||
|
dest='include_watched',
|
||||||
|
help='include watched repositories in backup')
|
||||||
|
parser.add_argument('--all',
|
||||||
|
action='store_true',
|
||||||
|
dest='include_everything',
|
||||||
|
help='include everything in backup')
|
||||||
|
parser.add_argument('--issues',
|
||||||
|
action='store_true',
|
||||||
|
dest='include_issues',
|
||||||
|
help='include issues in backup')
|
||||||
|
parser.add_argument('--issue-comments',
|
||||||
|
action='store_true',
|
||||||
|
dest='include_issue_comments',
|
||||||
|
help='include issue comments in backup')
|
||||||
|
parser.add_argument('--issue-events',
|
||||||
|
action='store_true',
|
||||||
|
dest='include_issue_events',
|
||||||
|
help='include issue events in backup')
|
||||||
|
parser.add_argument('--pulls',
|
||||||
|
action='store_true',
|
||||||
|
dest='include_pulls',
|
||||||
|
help='include pull requests in backup')
|
||||||
|
parser.add_argument('--pull-comments',
|
||||||
|
action='store_true',
|
||||||
|
dest='include_pull_comments',
|
||||||
|
help='include pull request review comments in backup')
|
||||||
|
parser.add_argument('--pull-commits',
|
||||||
|
action='store_true',
|
||||||
|
dest='include_pull_commits',
|
||||||
|
help='include pull request commits in backup')
|
||||||
|
parser.add_argument('--labels',
|
||||||
|
action='store_true',
|
||||||
|
dest='include_labels',
|
||||||
|
help='include labels in backup')
|
||||||
|
parser.add_argument('--milestones',
|
||||||
|
action='store_true',
|
||||||
|
dest='include_milestones',
|
||||||
|
help='include milestones in backup')
|
||||||
|
parser.add_argument('--repositories',
|
||||||
|
action='store_true',
|
||||||
|
dest='include_repository',
|
||||||
|
help='include repository clone in backup')
|
||||||
|
parser.add_argument('--wikis',
|
||||||
|
action='store_true',
|
||||||
|
dest='include_wiki',
|
||||||
|
help='include wiki clone in backup')
|
||||||
|
parser.add_argument('--skip-existing',
|
||||||
|
action='store_true',
|
||||||
|
dest='skip_existing',
|
||||||
|
help='skip project if a backup directory exists')
|
||||||
|
parser.add_argument('-L',
|
||||||
|
'--languages',
|
||||||
|
dest='languages',
|
||||||
|
help='only allow these languages',
|
||||||
|
nargs='*')
|
||||||
|
parser.add_argument('-N',
|
||||||
|
'--name-regex',
|
||||||
|
dest='name_regex',
|
||||||
|
help='python regex to match names against')
|
||||||
|
parser.add_argument('-H',
|
||||||
|
'--github-host',
|
||||||
|
dest='github_host',
|
||||||
|
help='GitHub Enterprise hostname')
|
||||||
|
parser.add_argument('-O',
|
||||||
|
'--organization',
|
||||||
|
action='store_true',
|
||||||
|
dest='organization',
|
||||||
|
help='whether or not this is an organization user')
|
||||||
|
parser.add_argument('-R',
|
||||||
|
'--repository',
|
||||||
|
dest='repository',
|
||||||
|
help='name of repository to limit backup to')
|
||||||
|
parser.add_argument('-P', '--private',
|
||||||
|
action='store_true',
|
||||||
|
dest='private',
|
||||||
|
help='include private repositories')
|
||||||
|
parser.add_argument('-F', '--fork',
|
||||||
|
action='store_true',
|
||||||
|
dest='fork',
|
||||||
|
help='include forked repositories')
|
||||||
|
parser.add_argument('--prefer-ssh',
|
||||||
|
action='store_true',
|
||||||
|
help='Clone repositories using SSH instead of HTTPS')
|
||||||
|
parser.add_argument('-v', '--version',
|
||||||
|
action='version',
|
||||||
|
version='%(prog)s ' + __version__)
|
||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
@@ -127,12 +221,13 @@ def get_auth(args):
|
|||||||
elif args.username and args.password:
|
elif args.username and args.password:
|
||||||
auth = base64.b64encode(args.username + ':' + args.password)
|
auth = base64.b64encode(args.username + ':' + args.password)
|
||||||
elif args.username and not args.password:
|
elif args.username and not args.password:
|
||||||
log_error('You must specify a password for basic auth when specifying a username')
|
log_error('You must specify a password for basic auth')
|
||||||
elif args.password and not args.username:
|
elif args.password and not args.username:
|
||||||
log_error('You must specify a username for basic auth when specifying a password')
|
log_error('You must specify a username for basic auth')
|
||||||
|
|
||||||
return auth
|
return auth
|
||||||
|
|
||||||
|
|
||||||
def get_github_api_host(args):
|
def get_github_api_host(args):
|
||||||
if args.github_host:
|
if args.github_host:
|
||||||
host = args.github_host + '/api/v3'
|
host = args.github_host + '/api/v3'
|
||||||
@@ -141,6 +236,7 @@ def get_github_api_host(args):
|
|||||||
|
|
||||||
return host
|
return host
|
||||||
|
|
||||||
|
|
||||||
def get_github_ssh_host(args):
|
def get_github_ssh_host(args):
|
||||||
if args.github_host:
|
if args.github_host:
|
||||||
host = args.github_host
|
host = args.github_host
|
||||||
@@ -149,74 +245,24 @@ def get_github_ssh_host(args):
|
|||||||
|
|
||||||
return host
|
return host
|
||||||
|
|
||||||
|
|
||||||
def retrieve_data(args, template, query_args=None, single_request=False):
|
def retrieve_data(args, template, query_args=None, single_request=False):
|
||||||
auth = get_auth(args)
|
auth = get_auth(args)
|
||||||
|
query_args = get_query_args(query_args)
|
||||||
per_page = 100
|
per_page = 100
|
||||||
page = 0
|
page = 0
|
||||||
data = []
|
data = []
|
||||||
if not query_args:
|
|
||||||
query_args = {}
|
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
page = page + 1
|
page = page + 1
|
||||||
querystring = urllib.urlencode(dict({
|
request = _construct_request(per_page, page, query_args, template, auth) # noqa
|
||||||
'per_page': per_page,
|
r, errors = _get_response(request, template)
|
||||||
'page': page
|
|
||||||
}.items() + query_args.items()))
|
|
||||||
|
|
||||||
request = urllib2.Request(template + '?' + querystring)
|
|
||||||
if auth is not None:
|
|
||||||
request.add_header('Authorization', 'Basic ' + auth)
|
|
||||||
|
|
||||||
errors = []
|
|
||||||
retry_timeout = 3
|
|
||||||
|
|
||||||
# We'll make requests in a loop so we can delay and retry in the case of rate-limiting
|
|
||||||
while True:
|
|
||||||
try:
|
|
||||||
r = urllib2.urlopen(request)
|
|
||||||
except urllib2.HTTPError as exc:
|
|
||||||
# HTTPError behaves like a Response so we can check the status code and headers to see exactly
|
|
||||||
# what failed.
|
|
||||||
|
|
||||||
limit_remaining = int(exc.headers.get('x-ratelimit-remaining', 0))
|
|
||||||
|
|
||||||
if exc.code == 403 and limit_remaining < 1:
|
|
||||||
# The X-RateLimit-Reset header includes a timestamp telling us when the limit will reset
|
|
||||||
# so we can calculate how long to wait rather than inefficiently polling:
|
|
||||||
gm_now = calendar.timegm(time.gmtime())
|
|
||||||
reset = int(exc.headers.get('x-ratelimit-reset', 0)) or gm_now
|
|
||||||
# We'll never sleep for less than 10 seconds:
|
|
||||||
delta = max(10, reset - gm_now)
|
|
||||||
|
|
||||||
limit = exc.headers.get('x-ratelimit-limit')
|
|
||||||
print('Exceeded rate limit of {} requests; waiting {} seconds to reset'.format(limit, delta),
|
|
||||||
file=sys.stderr)
|
|
||||||
|
|
||||||
ratelimit_error = 'No more requests remaining'
|
|
||||||
if auth is None:
|
|
||||||
ratelimit_error = ratelimit_error + '; authenticate to raise your GitHub rate limit'
|
|
||||||
errors.append(ratelimit_error)
|
|
||||||
|
|
||||||
time.sleep(delta)
|
|
||||||
continue
|
|
||||||
except urllib2.URLError:
|
|
||||||
# Incase of a connection timing out, we can retry a few time
|
|
||||||
# But we won't crash and not back-up the rest now
|
|
||||||
log_info('{} timed out'.format(template))
|
|
||||||
retry_timeout -= 1
|
|
||||||
|
|
||||||
if retry_timeout >= 0:
|
|
||||||
continue
|
|
||||||
|
|
||||||
log_error('{} timed out to much, skipping!')
|
|
||||||
|
|
||||||
break
|
|
||||||
|
|
||||||
status_code = int(r.getcode())
|
status_code = int(r.getcode())
|
||||||
|
|
||||||
if status_code != 200:
|
if status_code != 200:
|
||||||
errors.append('API request returned HTTP {}: {}'.format(status_code, r.reason))
|
template = 'API request returned HTTP {0}: {1}'
|
||||||
|
errors.append(template.format(status_code, r.reason))
|
||||||
log_error(errors)
|
log_error(errors)
|
||||||
|
|
||||||
response = json.loads(r.read())
|
response = json.loads(r.read())
|
||||||
@@ -237,16 +283,108 @@ def retrieve_data(args, template, query_args=None, single_request=False):
|
|||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
def get_query_args(query_args=None):
|
||||||
|
if not query_args:
|
||||||
|
query_args = {}
|
||||||
|
return query_args
|
||||||
|
|
||||||
|
|
||||||
|
def _get_response(request, template):
|
||||||
|
retry_timeout = 3
|
||||||
|
errors = []
|
||||||
|
# We'll make requests in a loop so we can
|
||||||
|
# delay and retry in the case of rate-limiting
|
||||||
|
while True:
|
||||||
|
should_continue = False
|
||||||
|
try:
|
||||||
|
r = urllib2.urlopen(request)
|
||||||
|
except urllib2.HTTPError as exc:
|
||||||
|
errors, should_continue = _request_http_error(exc, auth, errors) # noqa
|
||||||
|
except urllib2.URLError:
|
||||||
|
should_continue = _request_url_error(template, retry_timeout)
|
||||||
|
|
||||||
|
if should_continue:
|
||||||
|
continue
|
||||||
|
|
||||||
|
break
|
||||||
|
return r, errors
|
||||||
|
|
||||||
|
|
||||||
|
def _construct_request(per_page, page, query_args, template, auth):
|
||||||
|
querystring = urllib.urlencode(dict({
|
||||||
|
'per_page': per_page,
|
||||||
|
'page': page
|
||||||
|
}.items() + query_args.items()))
|
||||||
|
|
||||||
|
request = urllib2.Request(template + '?' + querystring)
|
||||||
|
if auth is not None:
|
||||||
|
request.add_header('Authorization', 'Basic ' + auth)
|
||||||
|
return request
|
||||||
|
|
||||||
|
|
||||||
|
def _request_http_error(exc, auth, errors):
|
||||||
|
# HTTPError behaves like a Response so we can
|
||||||
|
# check the status code and headers to see exactly
|
||||||
|
# what failed.
|
||||||
|
|
||||||
|
should_continue = False
|
||||||
|
headers = exc.headers
|
||||||
|
limit_remaining = int(headers.get('x-ratelimit-remaining', 0))
|
||||||
|
|
||||||
|
if exc.code == 403 and limit_remaining < 1:
|
||||||
|
# The X-RateLimit-Reset header includes a
|
||||||
|
# timestamp telling us when the limit will reset
|
||||||
|
# so we can calculate how long to wait rather
|
||||||
|
# than inefficiently polling:
|
||||||
|
gm_now = calendar.timegm(time.gmtime())
|
||||||
|
reset = int(headers.get('x-ratelimit-reset', 0)) or gm_now
|
||||||
|
# We'll never sleep for less than 10 seconds:
|
||||||
|
delta = max(10, reset - gm_now)
|
||||||
|
|
||||||
|
limit = headers.get('x-ratelimit-limit')
|
||||||
|
print('Exceeded rate limit of {} requests; waiting {} seconds to reset'.format(limit, delta), # noqa
|
||||||
|
file=sys.stderr)
|
||||||
|
|
||||||
|
ratelimit_error = 'No more requests remaining'
|
||||||
|
if auth is None:
|
||||||
|
ratelimit_error += '; authenticate to raise your GitHub rate limit' # noqa
|
||||||
|
errors.append(ratelimit_error)
|
||||||
|
|
||||||
|
time.sleep(delta)
|
||||||
|
should_continue = True
|
||||||
|
return errors, should_continue
|
||||||
|
|
||||||
|
|
||||||
|
def _request_url_error(template, retry_timeout):
|
||||||
|
# Incase of a connection timing out, we can retry a few time
|
||||||
|
# But we won't crash and not back-up the rest now
|
||||||
|
log_info('{} timed out'.format(template))
|
||||||
|
retry_timeout -= 1
|
||||||
|
|
||||||
|
if retry_timeout >= 0:
|
||||||
|
return True
|
||||||
|
|
||||||
|
log_error('{} timed out to much, skipping!')
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def retrieve_repositories(args):
|
def retrieve_repositories(args):
|
||||||
log_info('Retrieving repositories')
|
log_info('Retrieving repositories')
|
||||||
single_request = False
|
single_request = False
|
||||||
template = 'https://{0}/users/{1}/repos'.format(get_github_api_host(args), args.user)
|
template = 'https://{0}/users/{1}/repos'.format(
|
||||||
|
get_github_api_host(args),
|
||||||
|
args.user)
|
||||||
if args.organization:
|
if args.organization:
|
||||||
template = 'https://{0}/orgs/{1}/repos'.format(get_github_api_host(args), args.user)
|
template = 'https://{0}/orgs/{1}/repos'.format(
|
||||||
|
get_github_api_host(args),
|
||||||
|
args.user)
|
||||||
|
|
||||||
if args.repository:
|
if args.repository:
|
||||||
single_request = True
|
single_request = True
|
||||||
template = 'https://{0}/repos/{1}/{2}'.format(get_github_api_host(args), args.user, args.repository)
|
template = 'https://{0}/repos/{1}/{2}'.format(
|
||||||
|
get_github_api_host(args),
|
||||||
|
args.user,
|
||||||
|
args.repository)
|
||||||
|
|
||||||
return retrieve_data(args, template, single_request=single_request)
|
return retrieve_data(args, template, single_request=single_request)
|
||||||
|
|
||||||
@@ -266,7 +404,7 @@ def filter_repositories(args, repositories):
|
|||||||
if not args.private:
|
if not args.private:
|
||||||
repositories = [r for r in repositories if not r['private']]
|
repositories = [r for r in repositories if not r['private']]
|
||||||
if languages:
|
if languages:
|
||||||
repositories = [r for r in repositories if r['language'] and r['language'].lower() in languages]
|
repositories = [r for r in repositories if r['language'] and r['language'].lower() in languages] # noqa
|
||||||
if name_regex:
|
if name_regex:
|
||||||
repositories = [r for r in repositories if name_regex.match(r['name'])]
|
repositories = [r for r in repositories if name_regex.match(r['name'])]
|
||||||
|
|
||||||
@@ -277,9 +415,6 @@ def backup_repositories(args, output_directory, repositories):
|
|||||||
log_info('Backing up repositories')
|
log_info('Backing up repositories')
|
||||||
repos_template = 'https://{0}/repos'.format(get_github_api_host(args))
|
repos_template = 'https://{0}/repos'.format(get_github_api_host(args))
|
||||||
|
|
||||||
issue_states = ['open', 'closed']
|
|
||||||
pull_states = ['open', 'closed']
|
|
||||||
|
|
||||||
for repository in repositories:
|
for repository in repositories:
|
||||||
backup_cwd = os.path.join(output_directory, 'repositories')
|
backup_cwd = os.path.join(output_directory, 'repositories')
|
||||||
repo_cwd = os.path.join(backup_cwd, repository['name'])
|
repo_cwd = os.path.join(backup_cwd, repository['name'])
|
||||||
@@ -288,123 +423,156 @@ def backup_repositories(args, output_directory, repositories):
|
|||||||
if args.prefer_ssh:
|
if args.prefer_ssh:
|
||||||
repo_url = repository['ssh_url']
|
repo_url = repository['ssh_url']
|
||||||
else:
|
else:
|
||||||
repo_url = repository['git_url']
|
repo_url = repository['clone_url']
|
||||||
|
|
||||||
if args.include_repository or args.include_everything:
|
if args.include_repository or args.include_everything:
|
||||||
fetch_repository(repository['name'], repo_url, repo_dir, skip_existing=args.skip_existing)
|
fetch_repository(repository['name'],
|
||||||
|
repo_url,
|
||||||
|
repo_dir,
|
||||||
|
skip_existing=args.skip_existing)
|
||||||
|
|
||||||
if repository['has_wiki'] and (args.include_wiki or args.include_everything):
|
download_wiki = (args.include_wiki or args.include_everything)
|
||||||
|
if repository['has_wiki'] and download_wiki:
|
||||||
fetch_repository(repository['name'],
|
fetch_repository(repository['name'],
|
||||||
repo_url.replace('.git', '.wiki.git'),
|
repo_url.replace('.git', '.wiki.git'),
|
||||||
os.path.join(repo_cwd, 'wiki'),
|
os.path.join(repo_cwd, 'wiki'),
|
||||||
skip_existing=args.skip_existing)
|
skip_existing=args.skip_existing)
|
||||||
|
|
||||||
if args.include_issues or args.include_everything:
|
if args.include_issues or args.include_everything:
|
||||||
if args.skip_existing and os.path.isdir('{0}/issues/.git'.format(repo_cwd)):
|
backup_issues(args, repo_cwd, repository, repos_template)
|
||||||
continue
|
|
||||||
|
if args.include_pulls or args.include_everything:
|
||||||
|
backup_pulls(args, repo_cwd, repository, repos_template)
|
||||||
|
|
||||||
|
if args.include_milestones or args.include_everything:
|
||||||
|
backup_milestones(args, repo_cwd, repository, repos_template)
|
||||||
|
|
||||||
|
if args.include_labels or args.include_everything:
|
||||||
|
backup_labels(args, repo_cwd, repository, repos_template)
|
||||||
|
|
||||||
|
|
||||||
|
def backup_issues(args, repo_cwd, repository, repos_template):
|
||||||
|
has_issues_dir = os.path.isdir('{0}/issues/.git'.format(repo_cwd))
|
||||||
|
if args.skip_existing and has_issues_dir:
|
||||||
|
return
|
||||||
|
|
||||||
log_info('Retrieving {0} issues'.format(repository['full_name']))
|
log_info('Retrieving {0} issues'.format(repository['full_name']))
|
||||||
issue_cwd = os.path.join(repo_cwd, 'issues')
|
issue_cwd = os.path.join(repo_cwd, 'issues')
|
||||||
mkdir_p(backup_cwd, repo_cwd, issue_cwd)
|
mkdir_p(repo_cwd, issue_cwd)
|
||||||
|
|
||||||
issues = {}
|
issues = {}
|
||||||
_issue_template = '{0}/{1}/issues'.format(repos_template, repository['full_name'])
|
_issue_template = '{0}/{1}/issues'.format(repos_template,
|
||||||
|
repository['full_name'])
|
||||||
|
|
||||||
|
issue_states = ['open', 'closed']
|
||||||
for issue_state in issue_states:
|
for issue_state in issue_states:
|
||||||
query_args = {
|
query_args = {
|
||||||
'filter': 'all',
|
'filter': 'all',
|
||||||
'state': issue_state
|
'state': issue_state
|
||||||
}
|
}
|
||||||
|
|
||||||
_issues = retrieve_data(args, _issue_template, query_args=query_args)
|
_issues = retrieve_data(args,
|
||||||
|
_issue_template,
|
||||||
|
query_args=query_args)
|
||||||
for issue in _issues:
|
for issue in _issues:
|
||||||
issues[issue['number']] = issue
|
issues[issue['number']] = issue
|
||||||
|
|
||||||
log_info('Saving {0} issues to disk'.format(len(issues.keys())))
|
log_info('Saving {0} issues to disk'.format(len(issues.keys())))
|
||||||
for number, issue in issues.iteritems():
|
|
||||||
comments_template = _issue_template + '/{0}/comments'
|
comments_template = _issue_template + '/{0}/comments'
|
||||||
events_template = _issue_template + '/{0}/events'
|
events_template = _issue_template + '/{0}/events'
|
||||||
|
for number, issue in issues.iteritems():
|
||||||
if args.include_issue_comments or args.include_everything:
|
if args.include_issue_comments or args.include_everything:
|
||||||
issues[number]['comment_data'] = retrieve_data(args, comments_template.format(number))
|
template = comments_template.format(number)
|
||||||
|
issues[number]['comment_data'] = retrieve_data(args, template)
|
||||||
if args.include_issue_events or args.include_everything:
|
if args.include_issue_events or args.include_everything:
|
||||||
issues[number]['event_data'] = retrieve_data(args, events_template.format(number))
|
template = events_template.format(number)
|
||||||
|
issues[number]['event_data'] = retrieve_data(args, template)
|
||||||
|
|
||||||
with codecs.open('{0}/{1}.json'.format(issue_cwd, number), 'w', encoding='utf-8') as issue_file:
|
issue_file = '{0}/{1}.json'.format(issue_cwd, number)
|
||||||
json.dump(issue, issue_file, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ': '))
|
with codecs.open(issue_file, 'w', encoding='utf-8') as f:
|
||||||
|
json_dump(issue, f)
|
||||||
|
|
||||||
if args.include_pulls or args.include_everything:
|
|
||||||
if args.skip_existing and os.path.isdir('{0}/pulls/.git'.format(repo_cwd)):
|
|
||||||
continue
|
|
||||||
|
|
||||||
log_info('Retrieving {0} pull requests'.format(repository['full_name']))
|
def backup_pulls(args, repo_cwd, repository, repos_template):
|
||||||
|
has_pulls_dir = os.path.isdir('{0}/pulls/.git'.format(repo_cwd))
|
||||||
|
if args.skip_existing and has_pulls_dir:
|
||||||
|
return
|
||||||
|
|
||||||
|
log_info('Retrieving {0} pull requests'.format(repository['full_name'])) # noqa
|
||||||
pulls_cwd = os.path.join(repo_cwd, 'pulls')
|
pulls_cwd = os.path.join(repo_cwd, 'pulls')
|
||||||
mkdir_p(backup_cwd, repo_cwd, pulls_cwd)
|
mkdir_p(repo_cwd, pulls_cwd)
|
||||||
|
|
||||||
pulls = {}
|
pulls = {}
|
||||||
_pulls_template = '{0}/{1}/pulls'.format(repos_template, repository['full_name'])
|
_pulls_template = '{0}/{1}/pulls'.format(repos_template,
|
||||||
|
repository['full_name'])
|
||||||
|
|
||||||
|
pull_states = ['open', 'closed']
|
||||||
for pull_state in pull_states:
|
for pull_state in pull_states:
|
||||||
query_args = {
|
query_args = {
|
||||||
'filter': 'all',
|
'filter': 'all',
|
||||||
'state': pull_state
|
'state': pull_state
|
||||||
}
|
}
|
||||||
|
|
||||||
_pulls = retrieve_data(args, _pulls_template, query_args=query_args)
|
_pulls = retrieve_data(args,
|
||||||
|
_pulls_template,
|
||||||
|
query_args=query_args)
|
||||||
for pull in _pulls:
|
for pull in _pulls:
|
||||||
pulls[pull['number']] = pull
|
pulls[pull['number']] = pull
|
||||||
|
|
||||||
log_info('Saving {0} pull requests to disk'.format(len(pulls.keys())))
|
log_info('Saving {0} pull requests to disk'.format(len(pulls.keys())))
|
||||||
for number, pull in pulls.iteritems():
|
|
||||||
comments_template = _pulls_template + '/{0}/comments'
|
comments_template = _pulls_template + '/{0}/comments'
|
||||||
commits_template = _pulls_template + '/{0}/commits'
|
commits_template = _pulls_template + '/{0}/commits'
|
||||||
|
for number, pull in pulls.iteritems():
|
||||||
if args.include_pull_comments or args.include_everything:
|
if args.include_pull_comments or args.include_everything:
|
||||||
pulls[number]['comment_data'] = retrieve_data(args, comments_template.format(number))
|
template = comments_template.format(number)
|
||||||
|
pulls[number]['comment_data'] = retrieve_data(args, template)
|
||||||
if args.include_pull_commits or args.include_everything:
|
if args.include_pull_commits or args.include_everything:
|
||||||
pulls[number]['commit_data'] = retrieve_data(args, commits_template.format(number))
|
template = commits_template.format(number)
|
||||||
|
pulls[number]['commit_data'] = retrieve_data(args, template)
|
||||||
|
|
||||||
with codecs.open('{0}/{1}.json'.format(pulls_cwd, number), 'w', encoding='utf-8') as pull_file:
|
pull_file = '{0}/{1}.json'.format(pulls_cwd, number)
|
||||||
json.dump(pull, pull_file, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ': '))
|
with codecs.open(pull_file, 'w', encoding='utf-8') as f:
|
||||||
|
json_dump(pull, f)
|
||||||
|
|
||||||
if args.include_milestones or args.include_everything:
|
|
||||||
if args.skip_existing and os.path.isdir('{0}/milestones/.git'.format(repo_cwd)):
|
def backup_milestones(args, repo_cwd, repository, repos_template):
|
||||||
continue
|
milestone_cwd = os.path.join(repo_cwd, 'milestones')
|
||||||
|
if args.skip_existing and os.path.isdir(milestone_cwd):
|
||||||
|
return
|
||||||
|
|
||||||
log_info('Retrieving {0} milestones'.format(repository['full_name']))
|
log_info('Retrieving {0} milestones'.format(repository['full_name']))
|
||||||
milestone_cwd = os.path.join(repo_cwd, 'milestones')
|
mkdir_p(repo_cwd, milestone_cwd)
|
||||||
mkdir_p(backup_cwd, repo_cwd, milestone_cwd)
|
|
||||||
|
|
||||||
milestones = {}
|
template = '{0}/{1}/milestones'.format(repos_template,
|
||||||
_milestone_template = '{0}/{1}/milestones'.format(repos_template, repository['full_name'])
|
repository['full_name'])
|
||||||
|
|
||||||
query_args = {
|
query_args = {
|
||||||
'state': 'all'
|
'state': 'all'
|
||||||
}
|
}
|
||||||
|
|
||||||
_milestones = retrieve_data(args, _milestone_template, query_args=query_args)
|
_milestones = retrieve_data(args, template, query_args=query_args)
|
||||||
|
|
||||||
|
milestones = {}
|
||||||
for milestone in _milestones:
|
for milestone in _milestones:
|
||||||
milestones[milestone['number']] = milestone
|
milestones[milestone['number']] = milestone
|
||||||
|
|
||||||
log_info('Saving {0} milestones to disk'.format(len(milestones.keys())))
|
log_info('Saving {0} milestones to disk'.format(len(milestones.keys())))
|
||||||
for number, milestone in milestones.iteritems():
|
for number, milestone in milestones.iteritems():
|
||||||
with codecs.open('{0}/{1}.json'.format(milestone_cwd, number), 'w', encoding='utf-8') as milestone_file:
|
milestone_file = '{0}/{1}.json'.format(milestone_cwd, number)
|
||||||
json.dump(milestone, milestone_file, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ': '))
|
with codecs.open(milestone_file, 'w', encoding='utf-8') as f:
|
||||||
|
json_dump(milestone, f)
|
||||||
|
|
||||||
if args.include_labels or args.include_everything:
|
|
||||||
if args.skip_existing and os.path.isdir('{0}/labels/.git'.format(repo_cwd)):
|
|
||||||
continue
|
|
||||||
|
|
||||||
log_info('Retrieving {0} labels'.format(repository['full_name']))
|
def backup_labels(args, repo_cwd, repository, repos_template):
|
||||||
label_cwd = os.path.join(repo_cwd, 'labels')
|
label_cwd = os.path.join(repo_cwd, 'labels')
|
||||||
mkdir_p(backup_cwd, repo_cwd, label_cwd)
|
output_file = '{0}/labels.json'.format(label_cwd)
|
||||||
|
template = '{0}/{1}/labels'.format(repos_template,
|
||||||
_label_template = '{0}/{1}/labels'.format(repos_template, repository['full_name'])
|
repository['full_name'])
|
||||||
|
_backup_data(args,
|
||||||
labels = retrieve_data(args, _label_template, query_args={})
|
'labels',
|
||||||
|
template,
|
||||||
log_info('Saving {0} labels to disk'.format(len(labels)))
|
output_file,
|
||||||
with codecs.open('{0}/labels.json'.format(label_cwd), 'w', encoding='utf-8') as label_file:
|
label_cwd)
|
||||||
json.dump(labels, label_file, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ': '))
|
|
||||||
|
|
||||||
|
|
||||||
def fetch_repository(name, remote_url, local_dir, skip_existing=False):
|
def fetch_repository(name, remote_url, local_dir, skip_existing=False):
|
||||||
@@ -413,44 +581,69 @@ def fetch_repository(name, remote_url, local_dir, skip_existing=False):
|
|||||||
if clone_exists and skip_existing:
|
if clone_exists and skip_existing:
|
||||||
return
|
return
|
||||||
|
|
||||||
initalized = subprocess.call('git ls-remote ' + remote_url, stdout=FNULL, stderr=FNULL, shell=True)
|
initalized = subprocess.call('git ls-remote ' + remote_url,
|
||||||
|
stdout=FNULL,
|
||||||
|
stderr=FNULL,
|
||||||
|
shell=True)
|
||||||
if initalized == 128:
|
if initalized == 128:
|
||||||
log_info("Skipping {} since it's not initalized".format(name))
|
log_info("Skipping {0} since it's not initalized".format(name))
|
||||||
return
|
return
|
||||||
|
|
||||||
if clone_exists:
|
if clone_exists:
|
||||||
log_info('Updating {} in {}'.format(name, local_dir))
|
log_info('Updating {0} in {1}'.format(name, local_dir))
|
||||||
git_command = ['git', 'fetch', '--all', '--tags', '--prune']
|
git_command = ['git', 'fetch', '--all', '--tags', '--prune']
|
||||||
logging_subprocess(git_command, None, cwd=local_dir)
|
logging_subprocess(git_command, None, cwd=local_dir)
|
||||||
else:
|
else:
|
||||||
log_info('Cloning {} repository from {} to {}'.format(name, remote_url, local_dir))
|
log_info('Cloning {0} repository from {1} to {2}'.format(name,
|
||||||
|
remote_url,
|
||||||
|
local_dir))
|
||||||
git_command = ['git', 'clone', remote_url, local_dir]
|
git_command = ['git', 'clone', remote_url, local_dir]
|
||||||
logging_subprocess(git_command, None)
|
logging_subprocess(git_command, None)
|
||||||
|
|
||||||
|
|
||||||
def backup_account(args, output_directory):
|
def backup_account(args, output_directory):
|
||||||
account_cwd = os.path.join(output_directory, 'account')
|
account_cwd = os.path.join(output_directory, 'account')
|
||||||
if args.include_starred or args.include_everything:
|
|
||||||
if not args.skip_existing or not os.path.exists('{0}/starred.json'.format(account_cwd)):
|
|
||||||
log_info('Retrieving {0} starred repositories'.format(args.user))
|
|
||||||
mkdir_p(account_cwd)
|
|
||||||
|
|
||||||
starred_template = "https://{0}/users/{1}/starred"
|
if args.include_starred or args.include_everything:
|
||||||
starred = retrieve_data(args, starred_template.format(get_github_api_host(args), args.user))
|
output_file = '{0}/starred.json'.format(account_cwd)
|
||||||
log_info('Writing {0} starred repositories'.format(len(starred)))
|
template = "https://{0}/users/{1}/starred"
|
||||||
with codecs.open('{0}/starred.json'.format(account_cwd), 'w', encoding='utf-8') as starred_file:
|
template = template.format(get_github_api_host(args), args.user)
|
||||||
json.dump(starred, starred_file, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ': '))
|
_backup_data(args,
|
||||||
|
'starred repositories',
|
||||||
|
template,
|
||||||
|
output_file,
|
||||||
|
account_cwd)
|
||||||
|
|
||||||
if args.include_watched or args.include_everything:
|
if args.include_watched or args.include_everything:
|
||||||
if not args.skip_existing or not os.path.exists('{0}/watched.json'.format(account_cwd)):
|
output_file = '{0}/watched.json'.format(account_cwd)
|
||||||
log_info('Retrieving {0} watched repositories'.format(args.user))
|
template = "https://{0}/users/{1}/subscriptions"
|
||||||
mkdir_p(account_cwd)
|
template = template.format(get_github_api_host(args), args.user)
|
||||||
|
_backup_data(args,
|
||||||
|
'watched repositories',
|
||||||
|
template,
|
||||||
|
output_file,
|
||||||
|
account_cwd)
|
||||||
|
|
||||||
watched_template = "https://{0}/users/{1}/subscriptions"
|
|
||||||
watched = retrieve_data(args, watched_template.format(get_github_api_host(args), args.user))
|
def _backup_data(args, name, template, output_file, output_directory):
|
||||||
log_info('Writing {0} watched repositories'.format(len(watched)))
|
skip_existing = args.skip_existing
|
||||||
with codecs.open('{0}/watched.json'.format(account_cwd), 'w', encoding='utf-8') as watched_file:
|
if not skip_existing or not os.path.exists(output_file):
|
||||||
json.dump(watched, watched_file, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ': '))
|
log_info('Retrieving {0} {1}'.format(args.user, name))
|
||||||
|
mkdir_p(output_directory)
|
||||||
|
data = retrieve_data(args, template)
|
||||||
|
|
||||||
|
log_info('Writing {0} {1} to disk'.format(len(data), name))
|
||||||
|
with codecs.open(output_file, 'w', encoding='utf-8') as f:
|
||||||
|
json_dump(data, f)
|
||||||
|
|
||||||
|
|
||||||
|
def json_dump(data, output_file):
|
||||||
|
json.dump(data,
|
||||||
|
output_file,
|
||||||
|
ensure_ascii=False,
|
||||||
|
sort_keys=True,
|
||||||
|
indent=4,
|
||||||
|
separators=(',', ': '))
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
@@ -458,7 +651,8 @@ def main():
|
|||||||
|
|
||||||
output_directory = os.path.realpath(args.output_directory)
|
output_directory = os.path.realpath(args.output_directory)
|
||||||
if not os.path.isdir(output_directory):
|
if not os.path.isdir(output_directory):
|
||||||
log_error('Specified output directory is not a directory: {0}'.format(output_directory))
|
log_error('Specified output directory is not a directory: {0}'.format(
|
||||||
|
output_directory))
|
||||||
|
|
||||||
log_info('Backing up user {0} to {1}'.format(args.user, output_directory))
|
log_info('Backing up user {0} to {1}'.format(args.user, output_directory))
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user