mirror of
https://github.com/josegonzalez/python-github-backup.git
synced 2025-12-05 16:18:02 +01:00
Compare commits
27 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b6b6605acd | ||
|
|
ff5e0aa89c | ||
|
|
79726c360d | ||
|
|
a511bb2b49 | ||
|
|
aedf9b2c66 | ||
|
|
b9e35a50f5 | ||
|
|
d0e239b3ef | ||
|
|
29c9373d9d | ||
|
|
eb8b22c81c | ||
|
|
03739ce1be | ||
|
|
d2bb205b4b | ||
|
|
17141c1bb6 | ||
|
|
d362adbbca | ||
|
|
89df625e04 | ||
|
|
675484a215 | ||
|
|
325f77dcd9 | ||
|
|
f12e9167aa | ||
|
|
816447af19 | ||
|
|
d9e15e2be2 | ||
|
|
534145d178 | ||
|
|
fe162eedd5 | ||
|
|
53a9a22afb | ||
|
|
2aa7d4cf1e | ||
|
|
804843c128 | ||
|
|
5fc27a4d42 | ||
|
|
c8b3f048f5 | ||
|
|
2d98251992 |
63
CHANGES.rst
63
CHANGES.rst
@@ -1,6 +1,69 @@
|
||||
Changelog
|
||||
=========
|
||||
|
||||
0.9.0 (2016-03-29)
|
||||
------------------
|
||||
|
||||
- Fix cloning private repos with basic auth or token. [Kazuki Suda]
|
||||
|
||||
0.8.0 (2016-02-14)
|
||||
------------------
|
||||
|
||||
- Don't store issues which are actually pull requests. [Enrico Tröger]
|
||||
|
||||
This prevents storing pull requests twice since the Github API returns
|
||||
pull requests also as issues. Those issues will be skipped but only if
|
||||
retrieving pull requests is requested as well.
|
||||
Closes #23.
|
||||
|
||||
|
||||
0.7.0 (2016-02-02)
|
||||
------------------
|
||||
|
||||
- Softly fail if not able to read hooks. [Albert Wang]
|
||||
|
||||
- Add note about 2-factor auth. [Albert Wang]
|
||||
|
||||
- Make user repository search go through endpoint capable of reading
|
||||
private repositories. [Albert Wang]
|
||||
|
||||
- Prompt for password if only username given. [Alex Hall]
|
||||
|
||||
0.6.0 (2015-11-10)
|
||||
------------------
|
||||
|
||||
- Force proper remote url. [Jose Diaz-Gonzalez]
|
||||
|
||||
- Improve error handling in case of HTTP errors. [Enrico Tröger]
|
||||
|
||||
In case of a HTTP status code 404, the returned 'r' was never assigned.
|
||||
In case of URL errors which are not timeouts, we probably should bail
|
||||
out.
|
||||
|
||||
|
||||
- Add --hooks to also include web hooks into the backup. [Enrico Tröger]
|
||||
|
||||
- Create the user specified output directory if it does not exist.
|
||||
[Enrico Tröger]
|
||||
|
||||
Fixes #17.
|
||||
|
||||
|
||||
- Add missing auth argument to _get_response() [Enrico Tröger]
|
||||
|
||||
When running unauthenticated and Github starts rate-limiting the client,
|
||||
github-backup crashes because the used auth variable in _get_response()
|
||||
was not available. This change should fix it.
|
||||
|
||||
|
||||
- Add repository URL to error message for non-existing repositories.
|
||||
[Enrico Tröger]
|
||||
|
||||
This makes it easier for the user to identify which repository does not
|
||||
exist or is not initialised, i.e. whether it is the main repository or
|
||||
the wiki repository and which clone URL was used to check.
|
||||
|
||||
|
||||
0.5.0 (2015-10-10)
|
||||
------------------
|
||||
|
||||
|
||||
27
README.rst
27
README.rst
@@ -23,15 +23,14 @@ CLI Usage is as follows::
|
||||
Github Backup [-h] [-u USERNAME] [-p PASSWORD] [-t TOKEN]
|
||||
[-o OUTPUT_DIRECTORY] [--starred] [--watched] [--all]
|
||||
[--issues] [--issue-comments] [--issue-events] [--pulls]
|
||||
[--pull-comments] [--pull-commits] [--repositories]
|
||||
[--wikis] [--skip-existing]
|
||||
[-L [LANGUAGES [LANGUAGES ...]]] [-N NAME_REGEX]
|
||||
[-H GITHUB_HOST] [-O] [-R REPOSITORY] [-P] [-F]
|
||||
[--prefer-ssh] [-v]
|
||||
[--pull-comments] [--pull-commits] [--labels] [--hooks]
|
||||
[--milestones] [--repositories] [--wikis]
|
||||
[--skip-existing] [-L [LANGUAGES [LANGUAGES ...]]]
|
||||
[-N NAME_REGEX] [-H GITHUB_HOST] [-O] [-R REPOSITORY]
|
||||
[-P] [-F] [--prefer-ssh] [-v]
|
||||
USER
|
||||
|
||||
|
||||
Backup a github users account
|
||||
Backup a github account
|
||||
|
||||
positional arguments:
|
||||
USER github username
|
||||
@@ -41,7 +40,8 @@ CLI Usage is as follows::
|
||||
-u USERNAME, --username USERNAME
|
||||
username for basic auth
|
||||
-p PASSWORD, --password PASSWORD
|
||||
password for basic auth
|
||||
password for basic auth. If a username is given but
|
||||
not a password, the password will be prompted for.
|
||||
-t TOKEN, --token TOKEN
|
||||
personal access or OAuth token
|
||||
-o OUTPUT_DIRECTORY, --output-directory OUTPUT_DIRECTORY
|
||||
@@ -55,6 +55,10 @@ CLI Usage is as follows::
|
||||
--pulls include pull requests in backup
|
||||
--pull-comments include pull request review comments in backup
|
||||
--pull-commits include pull request commits in backup
|
||||
--labels include labels in backup
|
||||
--hooks include hooks in backup (works only when
|
||||
authenticated)
|
||||
--milestones include milestones in backup
|
||||
--repositories include repository clone in backup
|
||||
--wikis include wiki clone in backup
|
||||
--skip-existing skip project if a backup directory exists
|
||||
@@ -64,7 +68,7 @@ CLI Usage is as follows::
|
||||
python regex to match names against
|
||||
-H GITHUB_HOST, --github-host GITHUB_HOST
|
||||
GitHub Enterprise hostname
|
||||
-O, --organization whether or not this is a query for an organization
|
||||
-O, --organization whether or not this is an organization user
|
||||
-R REPOSITORY, --repository REPOSITORY
|
||||
name of repository to limit backup to
|
||||
-P, --private include private repositories
|
||||
@@ -74,3 +78,8 @@ CLI Usage is as follows::
|
||||
|
||||
|
||||
The package can be used to backup an *entire* organization or repository, including issues and wikis in the most appropriate format (clones for wikis, json files for issues).
|
||||
|
||||
Authentication
|
||||
==============
|
||||
|
||||
Note: Password-based authentication will fail if you have two-factor authentication enabled.
|
||||
|
||||
@@ -7,6 +7,7 @@ import base64
|
||||
import calendar
|
||||
import codecs
|
||||
import errno
|
||||
import getpass
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
@@ -15,6 +16,7 @@ import select
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
import urlparse
|
||||
import urllib
|
||||
import urllib2
|
||||
|
||||
@@ -94,6 +96,15 @@ def mkdir_p(*args):
|
||||
else:
|
||||
raise
|
||||
|
||||
def mask_password(url, secret='*****'):
|
||||
parsed = urlparse.urlparse(url)
|
||||
|
||||
if not parsed.password:
|
||||
return url
|
||||
elif parsed.password == 'x-oauth-basic':
|
||||
return url.replace(parsed.username, secret)
|
||||
|
||||
return url.replace(parsed.password, secret)
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description='Backup a github account',
|
||||
@@ -109,7 +120,9 @@ def parse_args():
|
||||
parser.add_argument('-p',
|
||||
'--password',
|
||||
dest='password',
|
||||
help='password for basic auth')
|
||||
help='password for basic auth. '
|
||||
'If a username is given but not a password, the '
|
||||
'password will be prompted for.')
|
||||
parser.add_argument('-t',
|
||||
'--token',
|
||||
dest='token',
|
||||
@@ -159,6 +172,10 @@ def parse_args():
|
||||
action='store_true',
|
||||
dest='include_labels',
|
||||
help='include labels in backup')
|
||||
parser.add_argument('--hooks',
|
||||
action='store_true',
|
||||
dest='include_hooks',
|
||||
help='include hooks in backup (works only when authenticated)')
|
||||
parser.add_argument('--milestones',
|
||||
action='store_true',
|
||||
dest='include_milestones',
|
||||
@@ -214,18 +231,25 @@ def parse_args():
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def get_auth(args):
|
||||
def get_auth(args, encode=True):
|
||||
auth = None
|
||||
|
||||
if args.token:
|
||||
auth = base64.b64encode(args.token + ':' + 'x-oauth-basic')
|
||||
elif args.username and args.password:
|
||||
auth = base64.b64encode(args.username + ':' + args.password)
|
||||
elif args.username and not args.password:
|
||||
log_error('You must specify a password for basic auth')
|
||||
elif args.password and not args.username:
|
||||
auth = args.token + ':' + 'x-oauth-basic'
|
||||
elif args.username:
|
||||
if not args.password:
|
||||
args.password = getpass.getpass()
|
||||
auth = args.username + ':' + args.password
|
||||
elif args.password:
|
||||
log_error('You must specify a username for basic auth')
|
||||
|
||||
return auth
|
||||
if not auth:
|
||||
return None
|
||||
|
||||
if encode == False:
|
||||
return auth
|
||||
|
||||
return base64.b64encode(auth)
|
||||
|
||||
|
||||
def get_github_api_host(args):
|
||||
@@ -237,7 +261,7 @@ def get_github_api_host(args):
|
||||
return host
|
||||
|
||||
|
||||
def get_github_ssh_host(args):
|
||||
def get_github_host(args):
|
||||
if args.github_host:
|
||||
host = args.github_host
|
||||
else:
|
||||
@@ -245,6 +269,21 @@ def get_github_ssh_host(args):
|
||||
|
||||
return host
|
||||
|
||||
def get_github_repo_url(args, repository):
|
||||
if args.prefer_ssh:
|
||||
return repository['ssh_url']
|
||||
|
||||
auth = get_auth(args, False)
|
||||
if auth:
|
||||
repo_url = 'https://{0}@{1}/{2}/{3}.git'.format(
|
||||
auth,
|
||||
get_github_host(args),
|
||||
args.user,
|
||||
repository['name'])
|
||||
else:
|
||||
repo_url = repository['clone_url']
|
||||
|
||||
return repo_url
|
||||
|
||||
def retrieve_data(args, template, query_args=None, single_request=False):
|
||||
auth = get_auth(args)
|
||||
@@ -256,7 +295,7 @@ def retrieve_data(args, template, query_args=None, single_request=False):
|
||||
while True:
|
||||
page = page + 1
|
||||
request = _construct_request(per_page, page, query_args, template, auth) # noqa
|
||||
r, errors = _get_response(request, template)
|
||||
r, errors = _get_response(request, auth, template)
|
||||
|
||||
status_code = int(r.getcode())
|
||||
|
||||
@@ -289,7 +328,7 @@ def get_query_args(query_args=None):
|
||||
return query_args
|
||||
|
||||
|
||||
def _get_response(request, template):
|
||||
def _get_response(request, auth, template):
|
||||
retry_timeout = 3
|
||||
errors = []
|
||||
# We'll make requests in a loop so we can
|
||||
@@ -300,8 +339,11 @@ def _get_response(request, template):
|
||||
r = urllib2.urlopen(request)
|
||||
except urllib2.HTTPError as exc:
|
||||
errors, should_continue = _request_http_error(exc, auth, errors) # noqa
|
||||
r = exc
|
||||
except urllib2.URLError:
|
||||
should_continue = _request_url_error(template, retry_timeout)
|
||||
if not should_continue:
|
||||
raise
|
||||
|
||||
if should_continue:
|
||||
continue
|
||||
@@ -371,9 +413,8 @@ def _request_url_error(template, retry_timeout):
|
||||
def retrieve_repositories(args):
|
||||
log_info('Retrieving repositories')
|
||||
single_request = False
|
||||
template = 'https://{0}/users/{1}/repos'.format(
|
||||
get_github_api_host(args),
|
||||
args.user)
|
||||
template = 'https://{0}/user/repos'.format(
|
||||
get_github_api_host(args))
|
||||
if args.organization:
|
||||
template = 'https://{0}/orgs/{1}/repos'.format(
|
||||
get_github_api_host(args),
|
||||
@@ -391,6 +432,9 @@ def retrieve_repositories(args):
|
||||
|
||||
def filter_repositories(args, repositories):
|
||||
log_info('Filtering repositories')
|
||||
|
||||
repositories = [r for r in repositories if r['owner']['login'] == args.user]
|
||||
|
||||
name_regex = None
|
||||
if args.name_regex:
|
||||
name_regex = re.compile(args.name_regex)
|
||||
@@ -419,11 +463,7 @@ def backup_repositories(args, output_directory, repositories):
|
||||
backup_cwd = os.path.join(output_directory, 'repositories')
|
||||
repo_cwd = os.path.join(backup_cwd, repository['name'])
|
||||
repo_dir = os.path.join(repo_cwd, 'repository')
|
||||
|
||||
if args.prefer_ssh:
|
||||
repo_url = repository['ssh_url']
|
||||
else:
|
||||
repo_url = repository['clone_url']
|
||||
repo_url = get_github_repo_url(args, repository)
|
||||
|
||||
if args.include_repository or args.include_everything:
|
||||
fetch_repository(repository['name'],
|
||||
@@ -450,6 +490,9 @@ def backup_repositories(args, output_directory, repositories):
|
||||
if args.include_labels or args.include_everything:
|
||||
backup_labels(args, repo_cwd, repository, repos_template)
|
||||
|
||||
if args.include_hooks or args.include_everything:
|
||||
backup_hooks(args, repo_cwd, repository, repos_template)
|
||||
|
||||
|
||||
def backup_issues(args, repo_cwd, repository, repos_template):
|
||||
has_issues_dir = os.path.isdir('{0}/issues/.git'.format(repo_cwd))
|
||||
@@ -461,6 +504,8 @@ def backup_issues(args, repo_cwd, repository, repos_template):
|
||||
mkdir_p(repo_cwd, issue_cwd)
|
||||
|
||||
issues = {}
|
||||
issues_skipped = 0
|
||||
issues_skipped_message = ''
|
||||
_issue_template = '{0}/{1}/issues'.format(repos_template,
|
||||
repository['full_name'])
|
||||
|
||||
@@ -475,9 +520,17 @@ def backup_issues(args, repo_cwd, repository, repos_template):
|
||||
_issue_template,
|
||||
query_args=query_args)
|
||||
for issue in _issues:
|
||||
# skip pull requests which are also returned as issues
|
||||
# if retrieving pull requests is requested as well
|
||||
if 'pull_request' in issue and (args.include_pulls or args.include_everything):
|
||||
issues_skipped += 1
|
||||
continue
|
||||
|
||||
issues[issue['number']] = issue
|
||||
|
||||
log_info('Saving {0} issues to disk'.format(len(issues.keys())))
|
||||
if issues_skipped:
|
||||
issues_skipped_message = ' (skipped {0} pull requests)'.format(issues_skipped)
|
||||
log_info('Saving {0} issues to disk{1}'.format(len(issues.keys()), issues_skipped_message))
|
||||
comments_template = _issue_template + '/{0}/comments'
|
||||
events_template = _issue_template + '/{0}/events'
|
||||
for number, issue in issues.iteritems():
|
||||
@@ -575,27 +628,52 @@ def backup_labels(args, repo_cwd, repository, repos_template):
|
||||
label_cwd)
|
||||
|
||||
|
||||
def backup_hooks(args, repo_cwd, repository, repos_template):
|
||||
auth = get_auth(args)
|
||||
if not auth:
|
||||
log_info("Skipping hooks since no authentication provided")
|
||||
return
|
||||
hook_cwd = os.path.join(repo_cwd, 'hooks')
|
||||
output_file = '{0}/hooks.json'.format(hook_cwd)
|
||||
template = '{0}/{1}/hooks'.format(repos_template,
|
||||
repository['full_name'])
|
||||
try:
|
||||
_backup_data(args,
|
||||
'hooks',
|
||||
template,
|
||||
output_file,
|
||||
hook_cwd)
|
||||
except SystemExit:
|
||||
log_info("Unable to read hooks, skipping")
|
||||
|
||||
|
||||
def fetch_repository(name, remote_url, local_dir, skip_existing=False):
|
||||
clone_exists = os.path.exists(os.path.join(local_dir, '.git'))
|
||||
|
||||
if clone_exists and skip_existing:
|
||||
return
|
||||
|
||||
masked_remote_url = mask_password(remote_url)
|
||||
|
||||
initalized = subprocess.call('git ls-remote ' + remote_url,
|
||||
stdout=FNULL,
|
||||
stderr=FNULL,
|
||||
shell=True)
|
||||
if initalized == 128:
|
||||
log_info("Skipping {0} since it's not initalized".format(name))
|
||||
log_info("Skipping {0} ({1}) since it's not initalized".format(name, masked_remote_url))
|
||||
return
|
||||
|
||||
if clone_exists:
|
||||
log_info('Updating {0} in {1}'.format(name, local_dir))
|
||||
git_command = ['git', 'remote', 'rm', 'origin']
|
||||
logging_subprocess(git_command, None, cwd=local_dir)
|
||||
git_command = ['git', 'remote', 'add', 'origin', remote_url]
|
||||
logging_subprocess(git_command, None, cwd=local_dir)
|
||||
git_command = ['git', 'fetch', '--all', '--tags', '--prune']
|
||||
logging_subprocess(git_command, None, cwd=local_dir)
|
||||
else:
|
||||
log_info('Cloning {0} repository from {1} to {2}'.format(name,
|
||||
remote_url,
|
||||
masked_remote_url,
|
||||
local_dir))
|
||||
git_command = ['git', 'clone', remote_url, local_dir]
|
||||
logging_subprocess(git_command, None)
|
||||
@@ -651,8 +729,8 @@ def main():
|
||||
|
||||
output_directory = os.path.realpath(args.output_directory)
|
||||
if not os.path.isdir(output_directory):
|
||||
log_error('Specified output directory is not a directory: {0}'.format(
|
||||
output_directory))
|
||||
log_info('Create output directory {0}'.format(output_directory))
|
||||
mkdir_p(output_directory)
|
||||
|
||||
log_info('Backing up user {0} to {1}'.format(args.user, output_directory))
|
||||
|
||||
|
||||
@@ -1 +1 @@
|
||||
__version__ = '0.5.0'
|
||||
__version__ = '0.9.0'
|
||||
|
||||
Reference in New Issue
Block a user