mirror of
https://github.com/josegonzalez/python-github-backup.git
synced 2025-12-05 16:18:02 +01:00
Compare commits
19 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9e1cba9817 | ||
|
|
3859a80b7a | ||
|
|
8c12d54898 | ||
|
|
b6b6605acd | ||
|
|
ff5e0aa89c | ||
|
|
79726c360d | ||
|
|
a511bb2b49 | ||
|
|
aedf9b2c66 | ||
|
|
b9e35a50f5 | ||
|
|
d0e239b3ef | ||
|
|
29c9373d9d | ||
|
|
eb8b22c81c | ||
|
|
03739ce1be | ||
|
|
d2bb205b4b | ||
|
|
17141c1bb6 | ||
|
|
d362adbbca | ||
|
|
89df625e04 | ||
|
|
675484a215 | ||
|
|
325f77dcd9 |
28
CHANGES.rst
28
CHANGES.rst
@@ -1,6 +1,34 @@
|
||||
Changelog
|
||||
=========
|
||||
|
||||
0.9.0 (2016-03-29)
|
||||
------------------
|
||||
|
||||
- Fix cloning private repos with basic auth or token. [Kazuki Suda]
|
||||
|
||||
0.8.0 (2016-02-14)
|
||||
------------------
|
||||
|
||||
- Don't store issues which are actually pull requests. [Enrico Tröger]
|
||||
|
||||
This prevents storing pull requests twice since the Github API returns
|
||||
pull requests also as issues. Those issues will be skipped but only if
|
||||
retrieving pull requests is requested as well.
|
||||
Closes #23.
|
||||
|
||||
|
||||
0.7.0 (2016-02-02)
|
||||
------------------
|
||||
|
||||
- Softly fail if not able to read hooks. [Albert Wang]
|
||||
|
||||
- Add note about 2-factor auth. [Albert Wang]
|
||||
|
||||
- Make user repository search go through endpoint capable of reading
|
||||
private repositories. [Albert Wang]
|
||||
|
||||
- Prompt for password if only username given. [Alex Hall]
|
||||
|
||||
0.6.0 (2015-11-10)
|
||||
------------------
|
||||
|
||||
|
||||
29
README.rst
29
README.rst
@@ -23,15 +23,14 @@ CLI Usage is as follows::
|
||||
Github Backup [-h] [-u USERNAME] [-p PASSWORD] [-t TOKEN]
|
||||
[-o OUTPUT_DIRECTORY] [--starred] [--watched] [--all]
|
||||
[--issues] [--issue-comments] [--issue-events] [--pulls]
|
||||
[--pull-comments] [--pull-commits] [--repositories]
|
||||
[--wikis] [--labels] [--hooks] [--skip-existing]
|
||||
[-L [LANGUAGES [LANGUAGES ...]]] [-N NAME_REGEX]
|
||||
[-H GITHUB_HOST] [-O] [-R REPOSITORY] [-P] [-F]
|
||||
[--prefer-ssh] [-v]
|
||||
[--pull-comments] [--pull-commits] [--labels] [--hooks]
|
||||
[--milestones] [--repositories] [--wikis]
|
||||
[--skip-existing] [-L [LANGUAGES [LANGUAGES ...]]]
|
||||
[-N NAME_REGEX] [-H GITHUB_HOST] [-O] [-R REPOSITORY]
|
||||
[-P] [-F] [--prefer-ssh] [-v]
|
||||
USER
|
||||
|
||||
|
||||
Backup a github users account
|
||||
Backup a github account
|
||||
|
||||
positional arguments:
|
||||
USER github username
|
||||
@@ -41,7 +40,8 @@ CLI Usage is as follows::
|
||||
-u USERNAME, --username USERNAME
|
||||
username for basic auth
|
||||
-p PASSWORD, --password PASSWORD
|
||||
password for basic auth
|
||||
password for basic auth. If a username is given but
|
||||
not a password, the password will be prompted for.
|
||||
-t TOKEN, --token TOKEN
|
||||
personal access or OAuth token
|
||||
-o OUTPUT_DIRECTORY, --output-directory OUTPUT_DIRECTORY
|
||||
@@ -55,10 +55,12 @@ CLI Usage is as follows::
|
||||
--pulls include pull requests in backup
|
||||
--pull-comments include pull request review comments in backup
|
||||
--pull-commits include pull request commits in backup
|
||||
--labels include labels in backup
|
||||
--hooks include hooks in backup (works only when
|
||||
authenticated)
|
||||
--milestones include milestones in backup
|
||||
--repositories include repository clone in backup
|
||||
--wikis include wiki clone in backup
|
||||
--labels include labels in backup
|
||||
--hooks include web hooks in backup (works only when authenticated)
|
||||
--skip-existing skip project if a backup directory exists
|
||||
-L [LANGUAGES [LANGUAGES ...]], --languages [LANGUAGES [LANGUAGES ...]]
|
||||
only allow these languages
|
||||
@@ -66,7 +68,7 @@ CLI Usage is as follows::
|
||||
python regex to match names against
|
||||
-H GITHUB_HOST, --github-host GITHUB_HOST
|
||||
GitHub Enterprise hostname
|
||||
-O, --organization whether or not this is a query for an organization
|
||||
-O, --organization whether or not this is an organization user
|
||||
-R REPOSITORY, --repository REPOSITORY
|
||||
name of repository to limit backup to
|
||||
-P, --private include private repositories
|
||||
@@ -76,3 +78,8 @@ CLI Usage is as follows::
|
||||
|
||||
|
||||
The package can be used to backup an *entire* organization or repository, including issues and wikis in the most appropriate format (clones for wikis, json files for issues).
|
||||
|
||||
Authentication
|
||||
==============
|
||||
|
||||
Note: Password-based authentication will fail if you have two-factor authentication enabled.
|
||||
|
||||
@@ -7,6 +7,7 @@ import base64
|
||||
import calendar
|
||||
import codecs
|
||||
import errno
|
||||
import getpass
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
@@ -15,6 +16,7 @@ import select
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
import urlparse
|
||||
import urllib
|
||||
import urllib2
|
||||
|
||||
@@ -94,6 +96,15 @@ def mkdir_p(*args):
|
||||
else:
|
||||
raise
|
||||
|
||||
def mask_password(url, secret='*****'):
|
||||
parsed = urlparse.urlparse(url)
|
||||
|
||||
if not parsed.password:
|
||||
return url
|
||||
elif parsed.password == 'x-oauth-basic':
|
||||
return url.replace(parsed.username, secret)
|
||||
|
||||
return url.replace(parsed.password, secret)
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description='Backup a github account',
|
||||
@@ -109,7 +120,9 @@ def parse_args():
|
||||
parser.add_argument('-p',
|
||||
'--password',
|
||||
dest='password',
|
||||
help='password for basic auth')
|
||||
help='password for basic auth. '
|
||||
'If a username is given but not a password, the '
|
||||
'password will be prompted for.')
|
||||
parser.add_argument('-t',
|
||||
'--token',
|
||||
dest='token',
|
||||
@@ -119,6 +132,11 @@ def parse_args():
|
||||
default='.',
|
||||
dest='output_directory',
|
||||
help='directory at which to backup the repositories')
|
||||
parser.add_argument('-i',
|
||||
'--incremental',
|
||||
action='store_true',
|
||||
dest='incremental',
|
||||
help='incremental backup')
|
||||
parser.add_argument('--starred',
|
||||
action='store_true',
|
||||
dest='include_starred',
|
||||
@@ -218,19 +236,26 @@ def parse_args():
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def get_auth(args):
|
||||
def get_auth(args, encode=True):
|
||||
auth = None
|
||||
|
||||
if args.token:
|
||||
auth = base64.b64encode(args.token + ':' + 'x-oauth-basic')
|
||||
elif args.username and args.password:
|
||||
auth = base64.b64encode(args.username + ':' + args.password)
|
||||
elif args.username and not args.password:
|
||||
log_error('You must specify a password for basic auth')
|
||||
elif args.password and not args.username:
|
||||
auth = args.token + ':' + 'x-oauth-basic'
|
||||
elif args.username:
|
||||
if not args.password:
|
||||
args.password = getpass.getpass()
|
||||
auth = args.username + ':' + args.password
|
||||
elif args.password:
|
||||
log_error('You must specify a username for basic auth')
|
||||
|
||||
if not auth:
|
||||
return None
|
||||
|
||||
if encode == False:
|
||||
return auth
|
||||
|
||||
return base64.b64encode(auth)
|
||||
|
||||
|
||||
def get_github_api_host(args):
|
||||
if args.github_host:
|
||||
@@ -241,7 +266,7 @@ def get_github_api_host(args):
|
||||
return host
|
||||
|
||||
|
||||
def get_github_ssh_host(args):
|
||||
def get_github_host(args):
|
||||
if args.github_host:
|
||||
host = args.github_host
|
||||
else:
|
||||
@@ -249,6 +274,21 @@ def get_github_ssh_host(args):
|
||||
|
||||
return host
|
||||
|
||||
def get_github_repo_url(args, repository):
|
||||
if args.prefer_ssh:
|
||||
return repository['ssh_url']
|
||||
|
||||
auth = get_auth(args, False)
|
||||
if auth:
|
||||
repo_url = 'https://{0}@{1}/{2}/{3}.git'.format(
|
||||
auth,
|
||||
get_github_host(args),
|
||||
args.user,
|
||||
repository['name'])
|
||||
else:
|
||||
repo_url = repository['clone_url']
|
||||
|
||||
return repo_url
|
||||
|
||||
def retrieve_data(args, template, query_args=None, single_request=False):
|
||||
auth = get_auth(args)
|
||||
@@ -378,9 +418,8 @@ def _request_url_error(template, retry_timeout):
|
||||
def retrieve_repositories(args):
|
||||
log_info('Retrieving repositories')
|
||||
single_request = False
|
||||
template = 'https://{0}/users/{1}/repos'.format(
|
||||
get_github_api_host(args),
|
||||
args.user)
|
||||
template = 'https://{0}/user/repos'.format(
|
||||
get_github_api_host(args))
|
||||
if args.organization:
|
||||
template = 'https://{0}/orgs/{1}/repos'.format(
|
||||
get_github_api_host(args),
|
||||
@@ -398,6 +437,9 @@ def retrieve_repositories(args):
|
||||
|
||||
def filter_repositories(args, repositories):
|
||||
log_info('Filtering repositories')
|
||||
|
||||
repositories = [r for r in repositories if r['owner']['login'] == args.user]
|
||||
|
||||
name_regex = None
|
||||
if args.name_regex:
|
||||
name_regex = re.compile(args.name_regex)
|
||||
@@ -422,15 +464,21 @@ def backup_repositories(args, output_directory, repositories):
|
||||
log_info('Backing up repositories')
|
||||
repos_template = 'https://{0}/repos'.format(get_github_api_host(args))
|
||||
|
||||
if args.incremental:
|
||||
last_update = max(repository['updated_at'] for repository in repositories)
|
||||
last_update_path = os.path.join(output_directory, 'last_update')
|
||||
if os.path.exists(last_update_path):
|
||||
args.since = open(last_update_path).read().strip()
|
||||
else:
|
||||
args.since = None
|
||||
else:
|
||||
args.since = None
|
||||
|
||||
for repository in repositories:
|
||||
backup_cwd = os.path.join(output_directory, 'repositories')
|
||||
repo_cwd = os.path.join(backup_cwd, repository['name'])
|
||||
repo_dir = os.path.join(repo_cwd, 'repository')
|
||||
|
||||
if args.prefer_ssh:
|
||||
repo_url = repository['ssh_url']
|
||||
else:
|
||||
repo_url = repository['clone_url']
|
||||
repo_url = get_github_repo_url(args, repository)
|
||||
|
||||
if args.include_repository or args.include_everything:
|
||||
fetch_repository(repository['name'],
|
||||
@@ -460,6 +508,8 @@ def backup_repositories(args, output_directory, repositories):
|
||||
if args.include_hooks or args.include_everything:
|
||||
backup_hooks(args, repo_cwd, repository, repos_template)
|
||||
|
||||
if args.incremental:
|
||||
open(last_update_path, 'w').write(last_update)
|
||||
|
||||
def backup_issues(args, repo_cwd, repository, repos_template):
|
||||
has_issues_dir = os.path.isdir('{0}/issues/.git'.format(repo_cwd))
|
||||
@@ -471,6 +521,8 @@ def backup_issues(args, repo_cwd, repository, repos_template):
|
||||
mkdir_p(repo_cwd, issue_cwd)
|
||||
|
||||
issues = {}
|
||||
issues_skipped = 0
|
||||
issues_skipped_message = ''
|
||||
_issue_template = '{0}/{1}/issues'.format(repos_template,
|
||||
repository['full_name'])
|
||||
|
||||
@@ -480,14 +532,24 @@ def backup_issues(args, repo_cwd, repository, repos_template):
|
||||
'filter': 'all',
|
||||
'state': issue_state
|
||||
}
|
||||
if args.since:
|
||||
query_args['since'] = args.since
|
||||
|
||||
_issues = retrieve_data(args,
|
||||
_issue_template,
|
||||
query_args=query_args)
|
||||
for issue in _issues:
|
||||
# skip pull requests which are also returned as issues
|
||||
# if retrieving pull requests is requested as well
|
||||
if 'pull_request' in issue and (args.include_pulls or args.include_everything):
|
||||
issues_skipped += 1
|
||||
continue
|
||||
|
||||
issues[issue['number']] = issue
|
||||
|
||||
log_info('Saving {0} issues to disk'.format(len(issues.keys())))
|
||||
if issues_skipped:
|
||||
issues_skipped_message = ' (skipped {0} pull requests)'.format(issues_skipped)
|
||||
log_info('Saving {0} issues to disk{1}'.format(len(issues.keys()), issues_skipped_message))
|
||||
comments_template = _issue_template + '/{0}/comments'
|
||||
events_template = _issue_template + '/{0}/events'
|
||||
for number, issue in issues.iteritems():
|
||||
@@ -520,13 +582,17 @@ def backup_pulls(args, repo_cwd, repository, repos_template):
|
||||
for pull_state in pull_states:
|
||||
query_args = {
|
||||
'filter': 'all',
|
||||
'state': pull_state
|
||||
'state': pull_state,
|
||||
'sort': 'updated',
|
||||
'direction': 'desc',
|
||||
}
|
||||
|
||||
# It'd be nice to be able to apply the args.since filter here...
|
||||
_pulls = retrieve_data(args,
|
||||
_pulls_template,
|
||||
query_args=query_args)
|
||||
for pull in _pulls:
|
||||
if not args.since or pull['updated_at'] >= args.since:
|
||||
pulls[pull['number']] = pull
|
||||
|
||||
log_info('Saving {0} pull requests to disk'.format(len(pulls.keys())))
|
||||
@@ -594,11 +660,14 @@ def backup_hooks(args, repo_cwd, repository, repos_template):
|
||||
output_file = '{0}/hooks.json'.format(hook_cwd)
|
||||
template = '{0}/{1}/hooks'.format(repos_template,
|
||||
repository['full_name'])
|
||||
try:
|
||||
_backup_data(args,
|
||||
'hooks',
|
||||
template,
|
||||
output_file,
|
||||
hook_cwd)
|
||||
except SystemExit:
|
||||
log_info("Unable to read hooks, skipping")
|
||||
|
||||
|
||||
def fetch_repository(name, remote_url, local_dir, skip_existing=False):
|
||||
@@ -607,12 +676,14 @@ def fetch_repository(name, remote_url, local_dir, skip_existing=False):
|
||||
if clone_exists and skip_existing:
|
||||
return
|
||||
|
||||
masked_remote_url = mask_password(remote_url)
|
||||
|
||||
initalized = subprocess.call('git ls-remote ' + remote_url,
|
||||
stdout=FNULL,
|
||||
stderr=FNULL,
|
||||
shell=True)
|
||||
if initalized == 128:
|
||||
log_info("Skipping {0} ({1}) since it's not initalized".format(name, remote_url))
|
||||
log_info("Skipping {0} ({1}) since it's not initalized".format(name, masked_remote_url))
|
||||
return
|
||||
|
||||
if clone_exists:
|
||||
@@ -625,7 +696,7 @@ def fetch_repository(name, remote_url, local_dir, skip_existing=False):
|
||||
logging_subprocess(git_command, None, cwd=local_dir)
|
||||
else:
|
||||
log_info('Cloning {0} repository from {1} to {2}'.format(name,
|
||||
remote_url,
|
||||
masked_remote_url,
|
||||
local_dir))
|
||||
git_command = ['git', 'clone', remote_url, local_dir]
|
||||
logging_subprocess(git_command, None)
|
||||
|
||||
@@ -1 +1 @@
|
||||
__version__ = '0.6.0'
|
||||
__version__ = '0.10.0'
|
||||
|
||||
Reference in New Issue
Block a user