Compare commits

...

10 Commits

Author SHA1 Message Date
Jose Diaz-Gonzalez
1d11d62b73 Release version 0.10.1 2016-08-20 20:45:27 -04:00
Jose Diaz-Gonzalez
9e1cba9817 Release version 0.10.0 2016-08-18 14:20:46 -04:00
Jose Diaz-Gonzalez
3859a80b7a Merge pull request #42 from robertwb/master
Implement incremental updates
2016-08-18 14:20:05 -04:00
Robert Bradshaw
8c12d54898 Implement incremental updates
Guarded with an --incremental flag.

Stores the time of the last update and only downloads issue and
pull request data since this time.  All other data is relatively
small (likely fetched with a single request) and so is simply
re-populated from scratch as before.
2016-08-17 21:31:59 -07:00
Jose Diaz-Gonzalez
b6b6605acd Release version 0.9.0 2016-03-29 13:23:45 -04:00
Jose Diaz-Gonzalez
ff5e0aa89c Merge pull request #36 from zlabjp/fix-cloning-private-repos
Fix cloning private repos with basic auth or token
2016-03-29 13:21:57 -04:00
Kazuki Suda
79726c360d Fix cloning private repos with basic auth or token 2016-03-29 15:23:54 +09:00
Jose Diaz-Gonzalez
a511bb2b49 Release version 0.8.0 2016-02-14 16:04:54 -05:00
Jose Diaz-Gonzalez
aedf9b2c66 Merge pull request #35 from eht16/issue23_store_pullrequests_once
Don't store issues which are actually pull requests
2016-02-14 16:02:18 -05:00
Enrico Tröger
b9e35a50f5 Don't store issues which are actually pull requests
This prevents storing pull requests twice since the Github API returns
pull requests also as issues. Those issues will be skipped but only if
retrieving pull requests is requested as well.
Closes #23.
2016-02-14 16:36:40 +01:00
3 changed files with 111 additions and 20 deletions

View File

@@ -1,6 +1,35 @@
Changelog
=========
0.10.1 (2016-08-20)
-------------------
- Implement incremental updates. [Robert Bradshaw]
Guarded with an --incremental flag.
Stores the time of the last update and only downloads issue and
pull request data since this time. All other data is relatively
small (likely fetched with a single request) and so is simply
re-populated from scratch as before.
0.9.0 (2016-03-29)
------------------
- Fix cloning private repos with basic auth or token. [Kazuki Suda]
0.8.0 (2016-02-14)
------------------
- Don't store issues which are actually pull requests. [Enrico Tröger]
This prevents storing pull requests twice since the Github API returns
pull requests also as issues. Those issues will be skipped but only if
retrieving pull requests is requested as well.
Closes #23.
0.7.0 (2016-02-02)
------------------

View File

@@ -16,6 +16,7 @@ import select
import subprocess
import sys
import time
import urlparse
import urllib
import urllib2
@@ -95,6 +96,15 @@ def mkdir_p(*args):
else:
raise
def mask_password(url, secret='*****'):
parsed = urlparse.urlparse(url)
if not parsed.password:
return url
elif parsed.password == 'x-oauth-basic':
return url.replace(parsed.username, secret)
return url.replace(parsed.password, secret)
def parse_args():
parser = argparse.ArgumentParser(description='Backup a github account',
@@ -122,6 +132,11 @@ def parse_args():
default='.',
dest='output_directory',
help='directory at which to backup the repositories')
parser.add_argument('-i',
'--incremental',
action='store_true',
dest='incremental',
help='incremental backup')
parser.add_argument('--starred',
action='store_true',
dest='include_starred',
@@ -221,20 +236,26 @@ def parse_args():
return parser.parse_args()
def get_auth(args):
if args.token:
return base64.b64encode(args.token + ':' + 'x-oauth-basic')
def get_auth(args, encode=True):
auth = None
if args.username:
if args.token:
auth = args.token + ':' + 'x-oauth-basic'
elif args.username:
if not args.password:
args.password = getpass.getpass()
return base64.b64encode(args.username + ':' + args.password)
if args.password:
auth = args.username + ':' + args.password
elif args.password:
log_error('You must specify a username for basic auth')
if not auth:
return None
if encode == False:
return auth
return base64.b64encode(auth)
def get_github_api_host(args):
if args.github_host:
@@ -245,7 +266,7 @@ def get_github_api_host(args):
return host
def get_github_ssh_host(args):
def get_github_host(args):
if args.github_host:
host = args.github_host
else:
@@ -253,6 +274,21 @@ def get_github_ssh_host(args):
return host
def get_github_repo_url(args, repository):
if args.prefer_ssh:
return repository['ssh_url']
auth = get_auth(args, False)
if auth:
repo_url = 'https://{0}@{1}/{2}/{3}.git'.format(
auth,
get_github_host(args),
args.user,
repository['name'])
else:
repo_url = repository['clone_url']
return repo_url
def retrieve_data(args, template, query_args=None, single_request=False):
auth = get_auth(args)
@@ -428,15 +464,21 @@ def backup_repositories(args, output_directory, repositories):
log_info('Backing up repositories')
repos_template = 'https://{0}/repos'.format(get_github_api_host(args))
if args.incremental:
last_update = max(repository['updated_at'] for repository in repositories)
last_update_path = os.path.join(output_directory, 'last_update')
if os.path.exists(last_update_path):
args.since = open(last_update_path).read().strip()
else:
args.since = None
else:
args.since = None
for repository in repositories:
backup_cwd = os.path.join(output_directory, 'repositories')
repo_cwd = os.path.join(backup_cwd, repository['name'])
repo_dir = os.path.join(repo_cwd, 'repository')
if args.prefer_ssh:
repo_url = repository['ssh_url']
else:
repo_url = repository['clone_url']
repo_url = get_github_repo_url(args, repository)
if args.include_repository or args.include_everything:
fetch_repository(repository['name'],
@@ -466,6 +508,8 @@ def backup_repositories(args, output_directory, repositories):
if args.include_hooks or args.include_everything:
backup_hooks(args, repo_cwd, repository, repos_template)
if args.incremental:
open(last_update_path, 'w').write(last_update)
def backup_issues(args, repo_cwd, repository, repos_template):
has_issues_dir = os.path.isdir('{0}/issues/.git'.format(repo_cwd))
@@ -477,6 +521,8 @@ def backup_issues(args, repo_cwd, repository, repos_template):
mkdir_p(repo_cwd, issue_cwd)
issues = {}
issues_skipped = 0
issues_skipped_message = ''
_issue_template = '{0}/{1}/issues'.format(repos_template,
repository['full_name'])
@@ -486,14 +532,24 @@ def backup_issues(args, repo_cwd, repository, repos_template):
'filter': 'all',
'state': issue_state
}
if args.since:
query_args['since'] = args.since
_issues = retrieve_data(args,
_issue_template,
query_args=query_args)
for issue in _issues:
# skip pull requests which are also returned as issues
# if retrieving pull requests is requested as well
if 'pull_request' in issue and (args.include_pulls or args.include_everything):
issues_skipped += 1
continue
issues[issue['number']] = issue
log_info('Saving {0} issues to disk'.format(len(issues.keys())))
if issues_skipped:
issues_skipped_message = ' (skipped {0} pull requests)'.format(issues_skipped)
log_info('Saving {0} issues to disk{1}'.format(len(issues.keys()), issues_skipped_message))
comments_template = _issue_template + '/{0}/comments'
events_template = _issue_template + '/{0}/events'
for number, issue in issues.iteritems():
@@ -526,13 +582,17 @@ def backup_pulls(args, repo_cwd, repository, repos_template):
for pull_state in pull_states:
query_args = {
'filter': 'all',
'state': pull_state
'state': pull_state,
'sort': 'updated',
'direction': 'desc',
}
# It'd be nice to be able to apply the args.since filter here...
_pulls = retrieve_data(args,
_pulls_template,
query_args=query_args)
for pull in _pulls:
if not args.since or pull['updated_at'] >= args.since:
pulls[pull['number']] = pull
log_info('Saving {0} pull requests to disk'.format(len(pulls.keys())))
@@ -616,12 +676,14 @@ def fetch_repository(name, remote_url, local_dir, skip_existing=False):
if clone_exists and skip_existing:
return
masked_remote_url = mask_password(remote_url)
initalized = subprocess.call('git ls-remote ' + remote_url,
stdout=FNULL,
stderr=FNULL,
shell=True)
if initalized == 128:
log_info("Skipping {0} ({1}) since it's not initalized".format(name, remote_url))
log_info("Skipping {0} ({1}) since it's not initalized".format(name, masked_remote_url))
return
if clone_exists:
@@ -634,7 +696,7 @@ def fetch_repository(name, remote_url, local_dir, skip_existing=False):
logging_subprocess(git_command, None, cwd=local_dir)
else:
log_info('Cloning {0} repository from {1} to {2}'.format(name,
remote_url,
masked_remote_url,
local_dir))
git_command = ['git', 'clone', remote_url, local_dir]
logging_subprocess(git_command, None)

View File

@@ -1 +1 @@
__version__ = '0.7.0'
__version__ = '0.10.1'