mirror of
https://github.com/josegonzalez/python-github-backup.git
synced 2025-12-05 16:18:02 +01:00
Compare commits
19 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9e1cba9817 | ||
|
|
3859a80b7a | ||
|
|
8c12d54898 | ||
|
|
b6b6605acd | ||
|
|
ff5e0aa89c | ||
|
|
79726c360d | ||
|
|
a511bb2b49 | ||
|
|
aedf9b2c66 | ||
|
|
b9e35a50f5 | ||
|
|
d0e239b3ef | ||
|
|
29c9373d9d | ||
|
|
eb8b22c81c | ||
|
|
03739ce1be | ||
|
|
d2bb205b4b | ||
|
|
17141c1bb6 | ||
|
|
d362adbbca | ||
|
|
89df625e04 | ||
|
|
675484a215 | ||
|
|
325f77dcd9 |
28
CHANGES.rst
28
CHANGES.rst
@@ -1,6 +1,34 @@
|
|||||||
Changelog
|
Changelog
|
||||||
=========
|
=========
|
||||||
|
|
||||||
|
0.9.0 (2016-03-29)
|
||||||
|
------------------
|
||||||
|
|
||||||
|
- Fix cloning private repos with basic auth or token. [Kazuki Suda]
|
||||||
|
|
||||||
|
0.8.0 (2016-02-14)
|
||||||
|
------------------
|
||||||
|
|
||||||
|
- Don't store issues which are actually pull requests. [Enrico Tröger]
|
||||||
|
|
||||||
|
This prevents storing pull requests twice since the Github API returns
|
||||||
|
pull requests also as issues. Those issues will be skipped but only if
|
||||||
|
retrieving pull requests is requested as well.
|
||||||
|
Closes #23.
|
||||||
|
|
||||||
|
|
||||||
|
0.7.0 (2016-02-02)
|
||||||
|
------------------
|
||||||
|
|
||||||
|
- Softly fail if not able to read hooks. [Albert Wang]
|
||||||
|
|
||||||
|
- Add note about 2-factor auth. [Albert Wang]
|
||||||
|
|
||||||
|
- Make user repository search go through endpoint capable of reading
|
||||||
|
private repositories. [Albert Wang]
|
||||||
|
|
||||||
|
- Prompt for password if only username given. [Alex Hall]
|
||||||
|
|
||||||
0.6.0 (2015-11-10)
|
0.6.0 (2015-11-10)
|
||||||
------------------
|
------------------
|
||||||
|
|
||||||
|
|||||||
29
README.rst
29
README.rst
@@ -23,15 +23,14 @@ CLI Usage is as follows::
|
|||||||
Github Backup [-h] [-u USERNAME] [-p PASSWORD] [-t TOKEN]
|
Github Backup [-h] [-u USERNAME] [-p PASSWORD] [-t TOKEN]
|
||||||
[-o OUTPUT_DIRECTORY] [--starred] [--watched] [--all]
|
[-o OUTPUT_DIRECTORY] [--starred] [--watched] [--all]
|
||||||
[--issues] [--issue-comments] [--issue-events] [--pulls]
|
[--issues] [--issue-comments] [--issue-events] [--pulls]
|
||||||
[--pull-comments] [--pull-commits] [--repositories]
|
[--pull-comments] [--pull-commits] [--labels] [--hooks]
|
||||||
[--wikis] [--labels] [--hooks] [--skip-existing]
|
[--milestones] [--repositories] [--wikis]
|
||||||
[-L [LANGUAGES [LANGUAGES ...]]] [-N NAME_REGEX]
|
[--skip-existing] [-L [LANGUAGES [LANGUAGES ...]]]
|
||||||
[-H GITHUB_HOST] [-O] [-R REPOSITORY] [-P] [-F]
|
[-N NAME_REGEX] [-H GITHUB_HOST] [-O] [-R REPOSITORY]
|
||||||
[--prefer-ssh] [-v]
|
[-P] [-F] [--prefer-ssh] [-v]
|
||||||
USER
|
USER
|
||||||
|
|
||||||
|
Backup a github account
|
||||||
Backup a github users account
|
|
||||||
|
|
||||||
positional arguments:
|
positional arguments:
|
||||||
USER github username
|
USER github username
|
||||||
@@ -41,7 +40,8 @@ CLI Usage is as follows::
|
|||||||
-u USERNAME, --username USERNAME
|
-u USERNAME, --username USERNAME
|
||||||
username for basic auth
|
username for basic auth
|
||||||
-p PASSWORD, --password PASSWORD
|
-p PASSWORD, --password PASSWORD
|
||||||
password for basic auth
|
password for basic auth. If a username is given but
|
||||||
|
not a password, the password will be prompted for.
|
||||||
-t TOKEN, --token TOKEN
|
-t TOKEN, --token TOKEN
|
||||||
personal access or OAuth token
|
personal access or OAuth token
|
||||||
-o OUTPUT_DIRECTORY, --output-directory OUTPUT_DIRECTORY
|
-o OUTPUT_DIRECTORY, --output-directory OUTPUT_DIRECTORY
|
||||||
@@ -55,10 +55,12 @@ CLI Usage is as follows::
|
|||||||
--pulls include pull requests in backup
|
--pulls include pull requests in backup
|
||||||
--pull-comments include pull request review comments in backup
|
--pull-comments include pull request review comments in backup
|
||||||
--pull-commits include pull request commits in backup
|
--pull-commits include pull request commits in backup
|
||||||
|
--labels include labels in backup
|
||||||
|
--hooks include hooks in backup (works only when
|
||||||
|
authenticated)
|
||||||
|
--milestones include milestones in backup
|
||||||
--repositories include repository clone in backup
|
--repositories include repository clone in backup
|
||||||
--wikis include wiki clone in backup
|
--wikis include wiki clone in backup
|
||||||
--labels include labels in backup
|
|
||||||
--hooks include web hooks in backup (works only when authenticated)
|
|
||||||
--skip-existing skip project if a backup directory exists
|
--skip-existing skip project if a backup directory exists
|
||||||
-L [LANGUAGES [LANGUAGES ...]], --languages [LANGUAGES [LANGUAGES ...]]
|
-L [LANGUAGES [LANGUAGES ...]], --languages [LANGUAGES [LANGUAGES ...]]
|
||||||
only allow these languages
|
only allow these languages
|
||||||
@@ -66,7 +68,7 @@ CLI Usage is as follows::
|
|||||||
python regex to match names against
|
python regex to match names against
|
||||||
-H GITHUB_HOST, --github-host GITHUB_HOST
|
-H GITHUB_HOST, --github-host GITHUB_HOST
|
||||||
GitHub Enterprise hostname
|
GitHub Enterprise hostname
|
||||||
-O, --organization whether or not this is a query for an organization
|
-O, --organization whether or not this is an organization user
|
||||||
-R REPOSITORY, --repository REPOSITORY
|
-R REPOSITORY, --repository REPOSITORY
|
||||||
name of repository to limit backup to
|
name of repository to limit backup to
|
||||||
-P, --private include private repositories
|
-P, --private include private repositories
|
||||||
@@ -76,3 +78,8 @@ CLI Usage is as follows::
|
|||||||
|
|
||||||
|
|
||||||
The package can be used to backup an *entire* organization or repository, including issues and wikis in the most appropriate format (clones for wikis, json files for issues).
|
The package can be used to backup an *entire* organization or repository, including issues and wikis in the most appropriate format (clones for wikis, json files for issues).
|
||||||
|
|
||||||
|
Authentication
|
||||||
|
==============
|
||||||
|
|
||||||
|
Note: Password-based authentication will fail if you have two-factor authentication enabled.
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ import base64
|
|||||||
import calendar
|
import calendar
|
||||||
import codecs
|
import codecs
|
||||||
import errno
|
import errno
|
||||||
|
import getpass
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
@@ -15,6 +16,7 @@ import select
|
|||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
|
import urlparse
|
||||||
import urllib
|
import urllib
|
||||||
import urllib2
|
import urllib2
|
||||||
|
|
||||||
@@ -94,6 +96,15 @@ def mkdir_p(*args):
|
|||||||
else:
|
else:
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
def mask_password(url, secret='*****'):
|
||||||
|
parsed = urlparse.urlparse(url)
|
||||||
|
|
||||||
|
if not parsed.password:
|
||||||
|
return url
|
||||||
|
elif parsed.password == 'x-oauth-basic':
|
||||||
|
return url.replace(parsed.username, secret)
|
||||||
|
|
||||||
|
return url.replace(parsed.password, secret)
|
||||||
|
|
||||||
def parse_args():
|
def parse_args():
|
||||||
parser = argparse.ArgumentParser(description='Backup a github account',
|
parser = argparse.ArgumentParser(description='Backup a github account',
|
||||||
@@ -109,7 +120,9 @@ def parse_args():
|
|||||||
parser.add_argument('-p',
|
parser.add_argument('-p',
|
||||||
'--password',
|
'--password',
|
||||||
dest='password',
|
dest='password',
|
||||||
help='password for basic auth')
|
help='password for basic auth. '
|
||||||
|
'If a username is given but not a password, the '
|
||||||
|
'password will be prompted for.')
|
||||||
parser.add_argument('-t',
|
parser.add_argument('-t',
|
||||||
'--token',
|
'--token',
|
||||||
dest='token',
|
dest='token',
|
||||||
@@ -119,6 +132,11 @@ def parse_args():
|
|||||||
default='.',
|
default='.',
|
||||||
dest='output_directory',
|
dest='output_directory',
|
||||||
help='directory at which to backup the repositories')
|
help='directory at which to backup the repositories')
|
||||||
|
parser.add_argument('-i',
|
||||||
|
'--incremental',
|
||||||
|
action='store_true',
|
||||||
|
dest='incremental',
|
||||||
|
help='incremental backup')
|
||||||
parser.add_argument('--starred',
|
parser.add_argument('--starred',
|
||||||
action='store_true',
|
action='store_true',
|
||||||
dest='include_starred',
|
dest='include_starred',
|
||||||
@@ -218,18 +236,25 @@ def parse_args():
|
|||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
def get_auth(args):
|
def get_auth(args, encode=True):
|
||||||
auth = None
|
auth = None
|
||||||
|
|
||||||
if args.token:
|
if args.token:
|
||||||
auth = base64.b64encode(args.token + ':' + 'x-oauth-basic')
|
auth = args.token + ':' + 'x-oauth-basic'
|
||||||
elif args.username and args.password:
|
elif args.username:
|
||||||
auth = base64.b64encode(args.username + ':' + args.password)
|
if not args.password:
|
||||||
elif args.username and not args.password:
|
args.password = getpass.getpass()
|
||||||
log_error('You must specify a password for basic auth')
|
auth = args.username + ':' + args.password
|
||||||
elif args.password and not args.username:
|
elif args.password:
|
||||||
log_error('You must specify a username for basic auth')
|
log_error('You must specify a username for basic auth')
|
||||||
|
|
||||||
return auth
|
if not auth:
|
||||||
|
return None
|
||||||
|
|
||||||
|
if encode == False:
|
||||||
|
return auth
|
||||||
|
|
||||||
|
return base64.b64encode(auth)
|
||||||
|
|
||||||
|
|
||||||
def get_github_api_host(args):
|
def get_github_api_host(args):
|
||||||
@@ -241,7 +266,7 @@ def get_github_api_host(args):
|
|||||||
return host
|
return host
|
||||||
|
|
||||||
|
|
||||||
def get_github_ssh_host(args):
|
def get_github_host(args):
|
||||||
if args.github_host:
|
if args.github_host:
|
||||||
host = args.github_host
|
host = args.github_host
|
||||||
else:
|
else:
|
||||||
@@ -249,6 +274,21 @@ def get_github_ssh_host(args):
|
|||||||
|
|
||||||
return host
|
return host
|
||||||
|
|
||||||
|
def get_github_repo_url(args, repository):
|
||||||
|
if args.prefer_ssh:
|
||||||
|
return repository['ssh_url']
|
||||||
|
|
||||||
|
auth = get_auth(args, False)
|
||||||
|
if auth:
|
||||||
|
repo_url = 'https://{0}@{1}/{2}/{3}.git'.format(
|
||||||
|
auth,
|
||||||
|
get_github_host(args),
|
||||||
|
args.user,
|
||||||
|
repository['name'])
|
||||||
|
else:
|
||||||
|
repo_url = repository['clone_url']
|
||||||
|
|
||||||
|
return repo_url
|
||||||
|
|
||||||
def retrieve_data(args, template, query_args=None, single_request=False):
|
def retrieve_data(args, template, query_args=None, single_request=False):
|
||||||
auth = get_auth(args)
|
auth = get_auth(args)
|
||||||
@@ -378,9 +418,8 @@ def _request_url_error(template, retry_timeout):
|
|||||||
def retrieve_repositories(args):
|
def retrieve_repositories(args):
|
||||||
log_info('Retrieving repositories')
|
log_info('Retrieving repositories')
|
||||||
single_request = False
|
single_request = False
|
||||||
template = 'https://{0}/users/{1}/repos'.format(
|
template = 'https://{0}/user/repos'.format(
|
||||||
get_github_api_host(args),
|
get_github_api_host(args))
|
||||||
args.user)
|
|
||||||
if args.organization:
|
if args.organization:
|
||||||
template = 'https://{0}/orgs/{1}/repos'.format(
|
template = 'https://{0}/orgs/{1}/repos'.format(
|
||||||
get_github_api_host(args),
|
get_github_api_host(args),
|
||||||
@@ -398,6 +437,9 @@ def retrieve_repositories(args):
|
|||||||
|
|
||||||
def filter_repositories(args, repositories):
|
def filter_repositories(args, repositories):
|
||||||
log_info('Filtering repositories')
|
log_info('Filtering repositories')
|
||||||
|
|
||||||
|
repositories = [r for r in repositories if r['owner']['login'] == args.user]
|
||||||
|
|
||||||
name_regex = None
|
name_regex = None
|
||||||
if args.name_regex:
|
if args.name_regex:
|
||||||
name_regex = re.compile(args.name_regex)
|
name_regex = re.compile(args.name_regex)
|
||||||
@@ -422,15 +464,21 @@ def backup_repositories(args, output_directory, repositories):
|
|||||||
log_info('Backing up repositories')
|
log_info('Backing up repositories')
|
||||||
repos_template = 'https://{0}/repos'.format(get_github_api_host(args))
|
repos_template = 'https://{0}/repos'.format(get_github_api_host(args))
|
||||||
|
|
||||||
|
if args.incremental:
|
||||||
|
last_update = max(repository['updated_at'] for repository in repositories)
|
||||||
|
last_update_path = os.path.join(output_directory, 'last_update')
|
||||||
|
if os.path.exists(last_update_path):
|
||||||
|
args.since = open(last_update_path).read().strip()
|
||||||
|
else:
|
||||||
|
args.since = None
|
||||||
|
else:
|
||||||
|
args.since = None
|
||||||
|
|
||||||
for repository in repositories:
|
for repository in repositories:
|
||||||
backup_cwd = os.path.join(output_directory, 'repositories')
|
backup_cwd = os.path.join(output_directory, 'repositories')
|
||||||
repo_cwd = os.path.join(backup_cwd, repository['name'])
|
repo_cwd = os.path.join(backup_cwd, repository['name'])
|
||||||
repo_dir = os.path.join(repo_cwd, 'repository')
|
repo_dir = os.path.join(repo_cwd, 'repository')
|
||||||
|
repo_url = get_github_repo_url(args, repository)
|
||||||
if args.prefer_ssh:
|
|
||||||
repo_url = repository['ssh_url']
|
|
||||||
else:
|
|
||||||
repo_url = repository['clone_url']
|
|
||||||
|
|
||||||
if args.include_repository or args.include_everything:
|
if args.include_repository or args.include_everything:
|
||||||
fetch_repository(repository['name'],
|
fetch_repository(repository['name'],
|
||||||
@@ -460,6 +508,8 @@ def backup_repositories(args, output_directory, repositories):
|
|||||||
if args.include_hooks or args.include_everything:
|
if args.include_hooks or args.include_everything:
|
||||||
backup_hooks(args, repo_cwd, repository, repos_template)
|
backup_hooks(args, repo_cwd, repository, repos_template)
|
||||||
|
|
||||||
|
if args.incremental:
|
||||||
|
open(last_update_path, 'w').write(last_update)
|
||||||
|
|
||||||
def backup_issues(args, repo_cwd, repository, repos_template):
|
def backup_issues(args, repo_cwd, repository, repos_template):
|
||||||
has_issues_dir = os.path.isdir('{0}/issues/.git'.format(repo_cwd))
|
has_issues_dir = os.path.isdir('{0}/issues/.git'.format(repo_cwd))
|
||||||
@@ -471,6 +521,8 @@ def backup_issues(args, repo_cwd, repository, repos_template):
|
|||||||
mkdir_p(repo_cwd, issue_cwd)
|
mkdir_p(repo_cwd, issue_cwd)
|
||||||
|
|
||||||
issues = {}
|
issues = {}
|
||||||
|
issues_skipped = 0
|
||||||
|
issues_skipped_message = ''
|
||||||
_issue_template = '{0}/{1}/issues'.format(repos_template,
|
_issue_template = '{0}/{1}/issues'.format(repos_template,
|
||||||
repository['full_name'])
|
repository['full_name'])
|
||||||
|
|
||||||
@@ -480,14 +532,24 @@ def backup_issues(args, repo_cwd, repository, repos_template):
|
|||||||
'filter': 'all',
|
'filter': 'all',
|
||||||
'state': issue_state
|
'state': issue_state
|
||||||
}
|
}
|
||||||
|
if args.since:
|
||||||
|
query_args['since'] = args.since
|
||||||
|
|
||||||
_issues = retrieve_data(args,
|
_issues = retrieve_data(args,
|
||||||
_issue_template,
|
_issue_template,
|
||||||
query_args=query_args)
|
query_args=query_args)
|
||||||
for issue in _issues:
|
for issue in _issues:
|
||||||
|
# skip pull requests which are also returned as issues
|
||||||
|
# if retrieving pull requests is requested as well
|
||||||
|
if 'pull_request' in issue and (args.include_pulls or args.include_everything):
|
||||||
|
issues_skipped += 1
|
||||||
|
continue
|
||||||
|
|
||||||
issues[issue['number']] = issue
|
issues[issue['number']] = issue
|
||||||
|
|
||||||
log_info('Saving {0} issues to disk'.format(len(issues.keys())))
|
if issues_skipped:
|
||||||
|
issues_skipped_message = ' (skipped {0} pull requests)'.format(issues_skipped)
|
||||||
|
log_info('Saving {0} issues to disk{1}'.format(len(issues.keys()), issues_skipped_message))
|
||||||
comments_template = _issue_template + '/{0}/comments'
|
comments_template = _issue_template + '/{0}/comments'
|
||||||
events_template = _issue_template + '/{0}/events'
|
events_template = _issue_template + '/{0}/events'
|
||||||
for number, issue in issues.iteritems():
|
for number, issue in issues.iteritems():
|
||||||
@@ -520,14 +582,18 @@ def backup_pulls(args, repo_cwd, repository, repos_template):
|
|||||||
for pull_state in pull_states:
|
for pull_state in pull_states:
|
||||||
query_args = {
|
query_args = {
|
||||||
'filter': 'all',
|
'filter': 'all',
|
||||||
'state': pull_state
|
'state': pull_state,
|
||||||
|
'sort': 'updated',
|
||||||
|
'direction': 'desc',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# It'd be nice to be able to apply the args.since filter here...
|
||||||
_pulls = retrieve_data(args,
|
_pulls = retrieve_data(args,
|
||||||
_pulls_template,
|
_pulls_template,
|
||||||
query_args=query_args)
|
query_args=query_args)
|
||||||
for pull in _pulls:
|
for pull in _pulls:
|
||||||
pulls[pull['number']] = pull
|
if not args.since or pull['updated_at'] >= args.since:
|
||||||
|
pulls[pull['number']] = pull
|
||||||
|
|
||||||
log_info('Saving {0} pull requests to disk'.format(len(pulls.keys())))
|
log_info('Saving {0} pull requests to disk'.format(len(pulls.keys())))
|
||||||
comments_template = _pulls_template + '/{0}/comments'
|
comments_template = _pulls_template + '/{0}/comments'
|
||||||
@@ -594,11 +660,14 @@ def backup_hooks(args, repo_cwd, repository, repos_template):
|
|||||||
output_file = '{0}/hooks.json'.format(hook_cwd)
|
output_file = '{0}/hooks.json'.format(hook_cwd)
|
||||||
template = '{0}/{1}/hooks'.format(repos_template,
|
template = '{0}/{1}/hooks'.format(repos_template,
|
||||||
repository['full_name'])
|
repository['full_name'])
|
||||||
_backup_data(args,
|
try:
|
||||||
'hooks',
|
_backup_data(args,
|
||||||
template,
|
'hooks',
|
||||||
output_file,
|
template,
|
||||||
hook_cwd)
|
output_file,
|
||||||
|
hook_cwd)
|
||||||
|
except SystemExit:
|
||||||
|
log_info("Unable to read hooks, skipping")
|
||||||
|
|
||||||
|
|
||||||
def fetch_repository(name, remote_url, local_dir, skip_existing=False):
|
def fetch_repository(name, remote_url, local_dir, skip_existing=False):
|
||||||
@@ -607,12 +676,14 @@ def fetch_repository(name, remote_url, local_dir, skip_existing=False):
|
|||||||
if clone_exists and skip_existing:
|
if clone_exists and skip_existing:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
masked_remote_url = mask_password(remote_url)
|
||||||
|
|
||||||
initalized = subprocess.call('git ls-remote ' + remote_url,
|
initalized = subprocess.call('git ls-remote ' + remote_url,
|
||||||
stdout=FNULL,
|
stdout=FNULL,
|
||||||
stderr=FNULL,
|
stderr=FNULL,
|
||||||
shell=True)
|
shell=True)
|
||||||
if initalized == 128:
|
if initalized == 128:
|
||||||
log_info("Skipping {0} ({1}) since it's not initalized".format(name, remote_url))
|
log_info("Skipping {0} ({1}) since it's not initalized".format(name, masked_remote_url))
|
||||||
return
|
return
|
||||||
|
|
||||||
if clone_exists:
|
if clone_exists:
|
||||||
@@ -625,7 +696,7 @@ def fetch_repository(name, remote_url, local_dir, skip_existing=False):
|
|||||||
logging_subprocess(git_command, None, cwd=local_dir)
|
logging_subprocess(git_command, None, cwd=local_dir)
|
||||||
else:
|
else:
|
||||||
log_info('Cloning {0} repository from {1} to {2}'.format(name,
|
log_info('Cloning {0} repository from {1} to {2}'.format(name,
|
||||||
remote_url,
|
masked_remote_url,
|
||||||
local_dir))
|
local_dir))
|
||||||
git_command = ['git', 'clone', remote_url, local_dir]
|
git_command = ['git', 'clone', remote_url, local_dir]
|
||||||
logging_subprocess(git_command, None)
|
logging_subprocess(git_command, None)
|
||||||
|
|||||||
@@ -1 +1 @@
|
|||||||
__version__ = '0.6.0'
|
__version__ = '0.10.0'
|
||||||
|
|||||||
Reference in New Issue
Block a user