mirror of
https://github.com/josegonzalez/python-github-backup.git
synced 2025-12-05 16:18:02 +01:00
Compare commits
16 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9023052e9c | ||
|
|
874c235ba5 | ||
|
|
b7b234d8a5 | ||
|
|
ed160eb0ca | ||
|
|
1d11d62b73 | ||
|
|
9e1cba9817 | ||
|
|
3859a80b7a | ||
|
|
8c12d54898 | ||
|
|
b6b6605acd | ||
|
|
ff5e0aa89c | ||
|
|
79726c360d | ||
|
|
a511bb2b49 | ||
|
|
aedf9b2c66 | ||
|
|
b9e35a50f5 | ||
|
|
1e5a90486c | ||
|
|
9b74aff20b |
56
CHANGES.rst
56
CHANGES.rst
@@ -1,6 +1,62 @@
|
|||||||
Changelog
|
Changelog
|
||||||
=========
|
=========
|
||||||
|
|
||||||
|
0.10.3 (2016-08-20)
|
||||||
|
-------------------
|
||||||
|
|
||||||
|
- Fixes #29. [Jonas Michel]
|
||||||
|
|
||||||
|
Reporting an error when the user's rate limit is exceeded causes
|
||||||
|
the script to terminate after resuming execution from a rate limit
|
||||||
|
sleep. Instead of generating an explicit error we just want to
|
||||||
|
inform the user that the script is going to sleep until their rate
|
||||||
|
limit count resets.
|
||||||
|
|
||||||
|
|
||||||
|
- Fixes #29. [Jonas Michel]
|
||||||
|
|
||||||
|
The errors list was not being cleared out after resuming a backup
|
||||||
|
from a rate limit sleep. When the backup was resumed, the non-empty
|
||||||
|
errors list caused the backup to quit after the next `retrieve_data`
|
||||||
|
request.
|
||||||
|
|
||||||
|
|
||||||
|
0.10.2 (2016-08-21)
|
||||||
|
-------------------
|
||||||
|
|
||||||
|
- Add a note regarding git version requirement. [Jose Diaz-Gonzalez]
|
||||||
|
|
||||||
|
Closes #37
|
||||||
|
|
||||||
|
0.10.0 (2016-08-18)
|
||||||
|
-------------------
|
||||||
|
|
||||||
|
- Implement incremental updates. [Robert Bradshaw]
|
||||||
|
|
||||||
|
Guarded with an --incremental flag.
|
||||||
|
|
||||||
|
Stores the time of the last update and only downloads issue and
|
||||||
|
pull request data since this time. All other data is relatively
|
||||||
|
small (likely fetched with a single request) and so is simply
|
||||||
|
re-populated from scratch as before.
|
||||||
|
|
||||||
|
|
||||||
|
0.9.0 (2016-03-29)
|
||||||
|
------------------
|
||||||
|
|
||||||
|
- Fix cloning private repos with basic auth or token. [Kazuki Suda]
|
||||||
|
|
||||||
|
0.8.0 (2016-02-14)
|
||||||
|
------------------
|
||||||
|
|
||||||
|
- Don't store issues which are actually pull requests. [Enrico Tröger]
|
||||||
|
|
||||||
|
This prevents storing pull requests twice since the Github API returns
|
||||||
|
pull requests also as issues. Those issues will be skipped but only if
|
||||||
|
retrieving pull requests is requested as well.
|
||||||
|
Closes #23.
|
||||||
|
|
||||||
|
|
||||||
0.7.0 (2016-02-02)
|
0.7.0 (2016-02-02)
|
||||||
------------------
|
------------------
|
||||||
|
|
||||||
|
|||||||
@@ -4,6 +4,11 @@ github-backup
|
|||||||
|
|
||||||
backup a github user or organization
|
backup a github user or organization
|
||||||
|
|
||||||
|
Requirements
|
||||||
|
============
|
||||||
|
|
||||||
|
- GIT 1.9+
|
||||||
|
|
||||||
Installation
|
Installation
|
||||||
============
|
============
|
||||||
|
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ import select
|
|||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
|
import urlparse
|
||||||
import urllib
|
import urllib
|
||||||
import urllib2
|
import urllib2
|
||||||
|
|
||||||
@@ -95,6 +96,15 @@ def mkdir_p(*args):
|
|||||||
else:
|
else:
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
def mask_password(url, secret='*****'):
|
||||||
|
parsed = urlparse.urlparse(url)
|
||||||
|
|
||||||
|
if not parsed.password:
|
||||||
|
return url
|
||||||
|
elif parsed.password == 'x-oauth-basic':
|
||||||
|
return url.replace(parsed.username, secret)
|
||||||
|
|
||||||
|
return url.replace(parsed.password, secret)
|
||||||
|
|
||||||
def parse_args():
|
def parse_args():
|
||||||
parser = argparse.ArgumentParser(description='Backup a github account',
|
parser = argparse.ArgumentParser(description='Backup a github account',
|
||||||
@@ -122,6 +132,11 @@ def parse_args():
|
|||||||
default='.',
|
default='.',
|
||||||
dest='output_directory',
|
dest='output_directory',
|
||||||
help='directory at which to backup the repositories')
|
help='directory at which to backup the repositories')
|
||||||
|
parser.add_argument('-i',
|
||||||
|
'--incremental',
|
||||||
|
action='store_true',
|
||||||
|
dest='incremental',
|
||||||
|
help='incremental backup')
|
||||||
parser.add_argument('--starred',
|
parser.add_argument('--starred',
|
||||||
action='store_true',
|
action='store_true',
|
||||||
dest='include_starred',
|
dest='include_starred',
|
||||||
@@ -221,19 +236,25 @@ def parse_args():
|
|||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
def get_auth(args):
|
def get_auth(args, encode=True):
|
||||||
if args.token:
|
auth = None
|
||||||
return base64.b64encode(args.token + ':' + 'x-oauth-basic')
|
|
||||||
|
|
||||||
if args.username:
|
if args.token:
|
||||||
|
auth = args.token + ':' + 'x-oauth-basic'
|
||||||
|
elif args.username:
|
||||||
if not args.password:
|
if not args.password:
|
||||||
args.password = getpass.getpass()
|
args.password = getpass.getpass()
|
||||||
return base64.b64encode(args.username + ':' + args.password)
|
auth = args.username + ':' + args.password
|
||||||
|
elif args.password:
|
||||||
if args.password:
|
|
||||||
log_error('You must specify a username for basic auth')
|
log_error('You must specify a username for basic auth')
|
||||||
|
|
||||||
return None
|
if not auth:
|
||||||
|
return None
|
||||||
|
|
||||||
|
if encode == False:
|
||||||
|
return auth
|
||||||
|
|
||||||
|
return base64.b64encode(auth)
|
||||||
|
|
||||||
|
|
||||||
def get_github_api_host(args):
|
def get_github_api_host(args):
|
||||||
@@ -245,7 +266,7 @@ def get_github_api_host(args):
|
|||||||
return host
|
return host
|
||||||
|
|
||||||
|
|
||||||
def get_github_ssh_host(args):
|
def get_github_host(args):
|
||||||
if args.github_host:
|
if args.github_host:
|
||||||
host = args.github_host
|
host = args.github_host
|
||||||
else:
|
else:
|
||||||
@@ -253,6 +274,21 @@ def get_github_ssh_host(args):
|
|||||||
|
|
||||||
return host
|
return host
|
||||||
|
|
||||||
|
def get_github_repo_url(args, repository):
|
||||||
|
if args.prefer_ssh:
|
||||||
|
return repository['ssh_url']
|
||||||
|
|
||||||
|
auth = get_auth(args, False)
|
||||||
|
if auth:
|
||||||
|
repo_url = 'https://{0}@{1}/{2}/{3}.git'.format(
|
||||||
|
auth,
|
||||||
|
get_github_host(args),
|
||||||
|
args.user,
|
||||||
|
repository['name'])
|
||||||
|
else:
|
||||||
|
repo_url = repository['clone_url']
|
||||||
|
|
||||||
|
return repo_url
|
||||||
|
|
||||||
def retrieve_data(args, template, query_args=None, single_request=False):
|
def retrieve_data(args, template, query_args=None, single_request=False):
|
||||||
auth = get_auth(args)
|
auth = get_auth(args)
|
||||||
@@ -356,10 +392,9 @@ def _request_http_error(exc, auth, errors):
|
|||||||
print('Exceeded rate limit of {} requests; waiting {} seconds to reset'.format(limit, delta), # noqa
|
print('Exceeded rate limit of {} requests; waiting {} seconds to reset'.format(limit, delta), # noqa
|
||||||
file=sys.stderr)
|
file=sys.stderr)
|
||||||
|
|
||||||
ratelimit_error = 'No more requests remaining'
|
|
||||||
if auth is None:
|
if auth is None:
|
||||||
ratelimit_error += '; authenticate to raise your GitHub rate limit' # noqa
|
print('Hint: Authenticate to raise your GitHub rate limit',
|
||||||
errors.append(ratelimit_error)
|
file=sys.stderr)
|
||||||
|
|
||||||
time.sleep(delta)
|
time.sleep(delta)
|
||||||
should_continue = True
|
should_continue = True
|
||||||
@@ -428,15 +463,21 @@ def backup_repositories(args, output_directory, repositories):
|
|||||||
log_info('Backing up repositories')
|
log_info('Backing up repositories')
|
||||||
repos_template = 'https://{0}/repos'.format(get_github_api_host(args))
|
repos_template = 'https://{0}/repos'.format(get_github_api_host(args))
|
||||||
|
|
||||||
|
if args.incremental:
|
||||||
|
last_update = max(repository['updated_at'] for repository in repositories)
|
||||||
|
last_update_path = os.path.join(output_directory, 'last_update')
|
||||||
|
if os.path.exists(last_update_path):
|
||||||
|
args.since = open(last_update_path).read().strip()
|
||||||
|
else:
|
||||||
|
args.since = None
|
||||||
|
else:
|
||||||
|
args.since = None
|
||||||
|
|
||||||
for repository in repositories:
|
for repository in repositories:
|
||||||
backup_cwd = os.path.join(output_directory, 'repositories')
|
backup_cwd = os.path.join(output_directory, 'repositories')
|
||||||
repo_cwd = os.path.join(backup_cwd, repository['name'])
|
repo_cwd = os.path.join(backup_cwd, repository['name'])
|
||||||
repo_dir = os.path.join(repo_cwd, 'repository')
|
repo_dir = os.path.join(repo_cwd, 'repository')
|
||||||
|
repo_url = get_github_repo_url(args, repository)
|
||||||
if args.prefer_ssh:
|
|
||||||
repo_url = repository['ssh_url']
|
|
||||||
else:
|
|
||||||
repo_url = repository['clone_url']
|
|
||||||
|
|
||||||
if args.include_repository or args.include_everything:
|
if args.include_repository or args.include_everything:
|
||||||
fetch_repository(repository['name'],
|
fetch_repository(repository['name'],
|
||||||
@@ -466,6 +507,8 @@ def backup_repositories(args, output_directory, repositories):
|
|||||||
if args.include_hooks or args.include_everything:
|
if args.include_hooks or args.include_everything:
|
||||||
backup_hooks(args, repo_cwd, repository, repos_template)
|
backup_hooks(args, repo_cwd, repository, repos_template)
|
||||||
|
|
||||||
|
if args.incremental:
|
||||||
|
open(last_update_path, 'w').write(last_update)
|
||||||
|
|
||||||
def backup_issues(args, repo_cwd, repository, repos_template):
|
def backup_issues(args, repo_cwd, repository, repos_template):
|
||||||
has_issues_dir = os.path.isdir('{0}/issues/.git'.format(repo_cwd))
|
has_issues_dir = os.path.isdir('{0}/issues/.git'.format(repo_cwd))
|
||||||
@@ -477,6 +520,8 @@ def backup_issues(args, repo_cwd, repository, repos_template):
|
|||||||
mkdir_p(repo_cwd, issue_cwd)
|
mkdir_p(repo_cwd, issue_cwd)
|
||||||
|
|
||||||
issues = {}
|
issues = {}
|
||||||
|
issues_skipped = 0
|
||||||
|
issues_skipped_message = ''
|
||||||
_issue_template = '{0}/{1}/issues'.format(repos_template,
|
_issue_template = '{0}/{1}/issues'.format(repos_template,
|
||||||
repository['full_name'])
|
repository['full_name'])
|
||||||
|
|
||||||
@@ -486,14 +531,24 @@ def backup_issues(args, repo_cwd, repository, repos_template):
|
|||||||
'filter': 'all',
|
'filter': 'all',
|
||||||
'state': issue_state
|
'state': issue_state
|
||||||
}
|
}
|
||||||
|
if args.since:
|
||||||
|
query_args['since'] = args.since
|
||||||
|
|
||||||
_issues = retrieve_data(args,
|
_issues = retrieve_data(args,
|
||||||
_issue_template,
|
_issue_template,
|
||||||
query_args=query_args)
|
query_args=query_args)
|
||||||
for issue in _issues:
|
for issue in _issues:
|
||||||
|
# skip pull requests which are also returned as issues
|
||||||
|
# if retrieving pull requests is requested as well
|
||||||
|
if 'pull_request' in issue and (args.include_pulls or args.include_everything):
|
||||||
|
issues_skipped += 1
|
||||||
|
continue
|
||||||
|
|
||||||
issues[issue['number']] = issue
|
issues[issue['number']] = issue
|
||||||
|
|
||||||
log_info('Saving {0} issues to disk'.format(len(issues.keys())))
|
if issues_skipped:
|
||||||
|
issues_skipped_message = ' (skipped {0} pull requests)'.format(issues_skipped)
|
||||||
|
log_info('Saving {0} issues to disk{1}'.format(len(issues.keys()), issues_skipped_message))
|
||||||
comments_template = _issue_template + '/{0}/comments'
|
comments_template = _issue_template + '/{0}/comments'
|
||||||
events_template = _issue_template + '/{0}/events'
|
events_template = _issue_template + '/{0}/events'
|
||||||
for number, issue in issues.iteritems():
|
for number, issue in issues.iteritems():
|
||||||
@@ -526,14 +581,18 @@ def backup_pulls(args, repo_cwd, repository, repos_template):
|
|||||||
for pull_state in pull_states:
|
for pull_state in pull_states:
|
||||||
query_args = {
|
query_args = {
|
||||||
'filter': 'all',
|
'filter': 'all',
|
||||||
'state': pull_state
|
'state': pull_state,
|
||||||
|
'sort': 'updated',
|
||||||
|
'direction': 'desc',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# It'd be nice to be able to apply the args.since filter here...
|
||||||
_pulls = retrieve_data(args,
|
_pulls = retrieve_data(args,
|
||||||
_pulls_template,
|
_pulls_template,
|
||||||
query_args=query_args)
|
query_args=query_args)
|
||||||
for pull in _pulls:
|
for pull in _pulls:
|
||||||
pulls[pull['number']] = pull
|
if not args.since or pull['updated_at'] >= args.since:
|
||||||
|
pulls[pull['number']] = pull
|
||||||
|
|
||||||
log_info('Saving {0} pull requests to disk'.format(len(pulls.keys())))
|
log_info('Saving {0} pull requests to disk'.format(len(pulls.keys())))
|
||||||
comments_template = _pulls_template + '/{0}/comments'
|
comments_template = _pulls_template + '/{0}/comments'
|
||||||
@@ -616,12 +675,14 @@ def fetch_repository(name, remote_url, local_dir, skip_existing=False):
|
|||||||
if clone_exists and skip_existing:
|
if clone_exists and skip_existing:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
masked_remote_url = mask_password(remote_url)
|
||||||
|
|
||||||
initalized = subprocess.call('git ls-remote ' + remote_url,
|
initalized = subprocess.call('git ls-remote ' + remote_url,
|
||||||
stdout=FNULL,
|
stdout=FNULL,
|
||||||
stderr=FNULL,
|
stderr=FNULL,
|
||||||
shell=True)
|
shell=True)
|
||||||
if initalized == 128:
|
if initalized == 128:
|
||||||
log_info("Skipping {0} ({1}) since it's not initalized".format(name, remote_url))
|
log_info("Skipping {0} ({1}) since it's not initalized".format(name, masked_remote_url))
|
||||||
return
|
return
|
||||||
|
|
||||||
if clone_exists:
|
if clone_exists:
|
||||||
@@ -634,7 +695,7 @@ def fetch_repository(name, remote_url, local_dir, skip_existing=False):
|
|||||||
logging_subprocess(git_command, None, cwd=local_dir)
|
logging_subprocess(git_command, None, cwd=local_dir)
|
||||||
else:
|
else:
|
||||||
log_info('Cloning {0} repository from {1} to {2}'.format(name,
|
log_info('Cloning {0} repository from {1} to {2}'.format(name,
|
||||||
remote_url,
|
masked_remote_url,
|
||||||
local_dir))
|
local_dir))
|
||||||
git_command = ['git', 'clone', remote_url, local_dir]
|
git_command = ['git', 'clone', remote_url, local_dir]
|
||||||
logging_subprocess(git_command, None)
|
logging_subprocess(git_command, None)
|
||||||
|
|||||||
@@ -1 +1 @@
|
|||||||
__version__ = '0.7.0'
|
__version__ = '0.10.3'
|
||||||
|
|||||||
Reference in New Issue
Block a user