mirror of
https://github.com/josegonzalez/python-github-backup.git
synced 2025-12-05 08:08:02 +01:00
Compare commits
33 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
031a984434 | ||
|
|
9e16f39e3e | ||
|
|
2de96390be | ||
|
|
78cff47a91 | ||
|
|
fa27988c1c | ||
|
|
bb2e2b8c6f | ||
|
|
8fd0f2b64f | ||
|
|
753a551961 | ||
|
|
607b6ca69b | ||
|
|
ef71655b01 | ||
|
|
d8bcbfa644 | ||
|
|
751b0d6e82 | ||
|
|
ea633ca2bb | ||
|
|
a2115ce3e5 | ||
|
|
8a00bb1903 | ||
|
|
e53f8d4724 | ||
|
|
356f5f674b | ||
|
|
13128635cb | ||
|
|
6e6842b025 | ||
|
|
272177c395 | ||
|
|
70f711ea68 | ||
|
|
3fc9957aac | ||
|
|
78098aae23 | ||
|
|
fb7cc5ed53 | ||
|
|
c0679b9cc3 | ||
|
|
03b9d1b2d8 | ||
|
|
5025f69878 | ||
|
|
a351cdc103 | ||
|
|
85e4399408 | ||
|
|
c8171b692a | ||
|
|
523c811cc6 | ||
|
|
857ad0afab | ||
|
|
3f65eadee1 |
23
.circleci/config.yml
Normal file
23
.circleci/config.yml
Normal file
@@ -0,0 +1,23 @@
|
||||
version: 2.1
|
||||
|
||||
orbs:
|
||||
python: circleci/python@0.3.2
|
||||
|
||||
jobs:
|
||||
build-and-test:
|
||||
executor: python/default
|
||||
steps:
|
||||
- checkout
|
||||
- python/load-cache
|
||||
- run:
|
||||
command: pip install flake8
|
||||
name: Install dependencies
|
||||
- python/save-cache
|
||||
- run:
|
||||
command: flake8 --ignore=E501
|
||||
name: Lint
|
||||
|
||||
workflows:
|
||||
main:
|
||||
jobs:
|
||||
- build-and-test
|
||||
54
CHANGES.rst
54
CHANGES.rst
@@ -1,9 +1,61 @@
|
||||
Changelog
|
||||
=========
|
||||
|
||||
0.28.0 (2020-02-03)
|
||||
0.36.0 (2020-08-29)
|
||||
-------------------
|
||||
------------------------
|
||||
- Add flake8 instructions to readme. [Albert Wang]
|
||||
- Fix regex string. [Albert Wang]
|
||||
- Fix whitespace issues. [Albert Wang]
|
||||
- Do not use bare excepts. [Albert Wang]
|
||||
- Add .circleci/config.yml. [Albert Wang]
|
||||
- Include --private flag in example. [wouter bolsterlee]
|
||||
|
||||
By default, private repositories are not included. This is surprising.
|
||||
It took me a while to figure this out, and making that clear in the
|
||||
example can help others to be aware of that.
|
||||
|
||||
|
||||
0.35.0 (2020-08-05)
|
||||
-------------------
|
||||
- Make API request throttling optional. [Samantha Baldwin]
|
||||
|
||||
|
||||
0.34.0 (2020-07-24)
|
||||
-------------------
|
||||
- Add logic for transforming gist repository urls to ssh. [Matt Fields]
|
||||
|
||||
|
||||
0.33.0 (2020-04-13)
|
||||
-------------------
|
||||
- Add basic API request throttling. [Enrico Tröger]
|
||||
|
||||
A simple approach to throttle API requests and so keep within the rate
|
||||
limits of the API. Can be enabled with "--throttle-limit" to specify
|
||||
when throttling should start.
|
||||
"--throttle-pause" defines the time to sleep between further API
|
||||
requests.
|
||||
|
||||
|
||||
0.32.0 (2020-04-13)
|
||||
-------------------
|
||||
- Add timestamp to log messages. [Enrico Tröger]
|
||||
|
||||
|
||||
0.31.0 (2020-02-25)
|
||||
-------------------
|
||||
- #123 update: changed --as-app 'help' description. [ethan]
|
||||
- #123: Support Authenticating As Github Application. [ethan]
|
||||
|
||||
|
||||
0.29.0 (2020-02-14)
|
||||
-------------------
|
||||
- #50 update: keep main() in bin. [ethan]
|
||||
- #50 - refactor for friendlier import. [ethan]
|
||||
|
||||
|
||||
0.28.0 (2020-02-03)
|
||||
-------------------
|
||||
- Remove deprecated (and removed) git lfs flags. [smiley]
|
||||
|
||||
"--tags" and "--force" were removed at some point from "git lfs fetch". This broke our backup script.
|
||||
|
||||
63
README.rst
63
README.rst
@@ -29,19 +29,20 @@ Usage
|
||||
|
||||
CLI Usage is as follows::
|
||||
|
||||
github-backup [-h] [-u USERNAME] [-p PASSWORD] [-t TOKEN]
|
||||
github-backup [-h] [-u USERNAME] [-p PASSWORD] [-t TOKEN] [--as-app]
|
||||
[-o OUTPUT_DIRECTORY] [-i] [--starred] [--all-starred]
|
||||
[--watched] [--followers] [--following] [--all]
|
||||
[--issues] [--issue-comments] [--issue-events] [--pulls]
|
||||
[--pull-comments] [--pull-commits] [--labels] [--hooks]
|
||||
[--milestones] [--repositories] [--releases] [--assets]
|
||||
[--pull-comments] [--pull-commits] [--pull-details]
|
||||
[--labels] [--hooks] [--milestones] [--repositories]
|
||||
[--bare] [--lfs] [--wikis] [--gists] [--starred-gists]
|
||||
[--skip-existing]
|
||||
[-L [LANGUAGES [LANGUAGES ...]]] [-N NAME_REGEX]
|
||||
[-H GITHUB_HOST] [-O] [-R REPOSITORY] [-P] [-F]
|
||||
[--prefer-ssh] [-v]
|
||||
[--skip-existing] [-L [LANGUAGES [LANGUAGES ...]]]
|
||||
[-N NAME_REGEX] [-H GITHUB_HOST] [-O] [-R REPOSITORY]
|
||||
[-P] [-F] [--prefer-ssh] [-v]
|
||||
[--keychain-name OSX_KEYCHAIN_ITEM_NAME]
|
||||
[--keychain-account OSX_KEYCHAIN_ITEM_ACCOUNT]
|
||||
[--releases] [--assets] [--throttle-limit THROTTLE_LIMIT]
|
||||
[--throttle-pause THROTTLE_PAUSE]
|
||||
USER
|
||||
|
||||
Backup a github account
|
||||
@@ -57,36 +58,36 @@ CLI Usage is as follows::
|
||||
password for basic auth. If a username is given but
|
||||
not a password, the password will be prompted for.
|
||||
-t TOKEN, --token TOKEN
|
||||
personal access or OAuth token, or path to token
|
||||
(file://...)
|
||||
personal access, OAuth, or JSON Web token, or path to
|
||||
token (file://...)
|
||||
--as-app authenticate as github app instead of as a user.
|
||||
-o OUTPUT_DIRECTORY, --output-directory OUTPUT_DIRECTORY
|
||||
directory at which to backup the repositories
|
||||
-i, --incremental incremental backup
|
||||
--starred include JSON output of starred repositories in backup
|
||||
--all-starred include starred repositories in backup
|
||||
--watched include watched repositories in backup
|
||||
--all-starred include starred repositories in backup [*]
|
||||
--watched include JSON output of watched repositories in backup
|
||||
--followers include JSON output of followers in backup
|
||||
--following include JSON output of following users in backup
|
||||
--all include everything in backup
|
||||
--all include everything in backup (not including [*])
|
||||
--issues include issues in backup
|
||||
--issue-comments include issue comments in backup
|
||||
--issue-events include issue events in backup
|
||||
--pulls include pull requests in backup
|
||||
--pull-comments include pull request review comments in backup
|
||||
--pull-commits include pull request commits in backup
|
||||
--pull-details include more pull request details in backup [*]
|
||||
--labels include labels in backup
|
||||
--hooks include hooks in backup (works only when
|
||||
authenticated)
|
||||
--milestones include milestones in backup
|
||||
--repositories include repository clone in backup
|
||||
--releases include repository releases' information without assets or binaries
|
||||
--assets include assets alongside release information; only applies if including releases
|
||||
--bare clone bare repositories
|
||||
--lfs clone LFS repositories (requires Git LFS to be
|
||||
installed, https://git-lfs.github.com)
|
||||
installed, https://git-lfs.github.com) [*]
|
||||
--wikis include wiki clone in backup
|
||||
--gists include gists in backup
|
||||
--starred-gists include starred gists in backup
|
||||
--gists include gists in backup [*]
|
||||
--starred-gists include starred gists in backup [*]
|
||||
--skip-existing skip project if a backup directory exists
|
||||
-L [LANGUAGES [LANGUAGES ...]], --languages [LANGUAGES [LANGUAGES ...]]
|
||||
only allow these languages
|
||||
@@ -97,8 +98,8 @@ CLI Usage is as follows::
|
||||
-O, --organization whether or not this is an organization user
|
||||
-R REPOSITORY, --repository REPOSITORY
|
||||
name of repository to limit backup to
|
||||
-P, --private include private repositories
|
||||
-F, --fork include forked repositories
|
||||
-P, --private include private repositories [*]
|
||||
-F, --fork include forked repositories [*]
|
||||
--prefer-ssh Clone repositories using SSH instead of HTTPS
|
||||
-v, --version show program's version number and exit
|
||||
--keychain-name OSX_KEYCHAIN_ITEM_NAME
|
||||
@@ -107,6 +108,17 @@ CLI Usage is as follows::
|
||||
--keychain-account OSX_KEYCHAIN_ITEM_ACCOUNT
|
||||
OSX ONLY: account field of password item in OSX
|
||||
keychain that holds the personal access or OAuth token
|
||||
--releases include release information, not including assets or
|
||||
binaries
|
||||
--assets include assets alongside release information; only
|
||||
applies if including releases
|
||||
--throttle-limit THROTTLE_LIMIT
|
||||
start throttling of GitHub API requests after this
|
||||
amount of API requests remain
|
||||
--throttle-pause THROTTLE_PAUSE
|
||||
wait this amount of seconds when API request
|
||||
throttling is active (default: 30.0, requires
|
||||
--throttle-limit to be set)
|
||||
|
||||
|
||||
The package can be used to backup an *entire* organization or repository, including issues and wikis in the most appropriate format (clones for wikis, json files for issues).
|
||||
@@ -141,10 +153,10 @@ Instructions on how to do this can be found on https://git-lfs.github.com.
|
||||
Examples
|
||||
========
|
||||
|
||||
Backup all repositories::
|
||||
Backup all repositories, including private ones::
|
||||
|
||||
export ACCESS_TOKEN=SOME-GITHUB-TOKEN
|
||||
github-backup WhiteHouse --token $ACCESS_TOKEN --organization --output-directory /tmp/white-house --repositories
|
||||
github-backup WhiteHouse --token $ACCESS_TOKEN --organization --output-directory /tmp/white-house --repositories --private
|
||||
|
||||
Backup a single organization repository with everything else (wiki, pull requests, comments, issues etc)::
|
||||
|
||||
@@ -154,6 +166,15 @@ Backup a single organization repository with everything else (wiki, pull request
|
||||
# e.g. git@github.com:docker/cli.git
|
||||
github-backup $ORGANIZATION -P -t $ACCESS_TOKEN -o . --all -O -R $REPO
|
||||
|
||||
Testing
|
||||
=======
|
||||
|
||||
This project currently contains no unit tests. To run linting::
|
||||
|
||||
pip install flake8
|
||||
flake8 --ignore=E501
|
||||
|
||||
|
||||
.. |PyPI| image:: https://img.shields.io/pypi/v/github-backup.svg
|
||||
:target: https://pypi.python.org/pypi/github-backup/
|
||||
.. |Python Versions| image:: https://img.shields.io/pypi/pyversions/github-backup.svg
|
||||
|
||||
@@ -26,9 +26,12 @@ def main():
|
||||
if args.lfs_clone:
|
||||
check_git_lfs_install()
|
||||
|
||||
log_info('Backing up user {0} to {1}'.format(args.user, output_directory))
|
||||
if not args.as_app:
|
||||
log_info('Backing up user {0} to {1}'.format(args.user, output_directory))
|
||||
authenticated_user = get_authenticated_user(args)
|
||||
else:
|
||||
authenticated_user = {'login': None}
|
||||
|
||||
authenticated_user = get_authenticated_user(args)
|
||||
repositories = retrieve_repositories(args, authenticated_user)
|
||||
repositories = filter_repositories(args, repositories)
|
||||
backup_repositories(args, output_directory, repositories)
|
||||
|
||||
@@ -1 +1 @@
|
||||
__version__ = '0.29.0'
|
||||
__version__ = '0.36.0'
|
||||
|
||||
@@ -7,6 +7,7 @@ import argparse
|
||||
import base64
|
||||
import calendar
|
||||
import codecs
|
||||
import datetime
|
||||
import errno
|
||||
import getpass
|
||||
import json
|
||||
@@ -29,9 +30,11 @@ try:
|
||||
from urllib.request import Request
|
||||
from urllib.request import HTTPRedirectHandler
|
||||
from urllib.request import build_opener
|
||||
from subprocess import SubprocessError
|
||||
except ImportError:
|
||||
# python 2
|
||||
PY2 = True
|
||||
from subprocess import CalledProcessError as SubprocessError
|
||||
from urlparse import urlparse
|
||||
from urllib import quote as urlquote
|
||||
from urllib import urlencode
|
||||
@@ -50,6 +53,10 @@ except ImportError:
|
||||
FNULL = open(os.devnull, 'w')
|
||||
|
||||
|
||||
def _get_log_date():
|
||||
return datetime.datetime.isoformat(datetime.datetime.now())
|
||||
|
||||
|
||||
def log_error(message):
|
||||
"""
|
||||
Log message (str) or messages (List[str]) to stderr and exit with status 1
|
||||
@@ -66,7 +73,7 @@ def log_info(message):
|
||||
message = [message]
|
||||
|
||||
for msg in message:
|
||||
sys.stdout.write("{0}\n".format(msg))
|
||||
sys.stdout.write("{0}: {1}\n".format(_get_log_date(), msg))
|
||||
|
||||
|
||||
def log_warning(message):
|
||||
@@ -77,7 +84,7 @@ def log_warning(message):
|
||||
message = [message]
|
||||
|
||||
for msg in message:
|
||||
sys.stderr.write("{0}\n".format(msg))
|
||||
sys.stderr.write("{0}: {1}\n".format(_get_log_date(), msg))
|
||||
|
||||
|
||||
def logging_subprocess(popenargs,
|
||||
@@ -168,7 +175,11 @@ def parse_args():
|
||||
parser.add_argument('-t',
|
||||
'--token',
|
||||
dest='token',
|
||||
help='personal access or OAuth token, or path to token (file://...)') # noqa
|
||||
help='personal access, OAuth, or JSON Web token, or path to token (file://...)') # noqa
|
||||
parser.add_argument('--as-app',
|
||||
action='store_true',
|
||||
dest='as_app',
|
||||
help='authenticate as github app instead of as a user.')
|
||||
parser.add_argument('-o',
|
||||
'--output-directory',
|
||||
default='.',
|
||||
@@ -322,10 +333,20 @@ def parse_args():
|
||||
action='store_true',
|
||||
dest='include_assets',
|
||||
help='include assets alongside release information; only applies if including releases')
|
||||
parser.add_argument('--throttle-limit',
|
||||
dest='throttle_limit',
|
||||
type=int,
|
||||
default=0,
|
||||
help='start throttling of GitHub API requests after this amount of API requests remain')
|
||||
parser.add_argument('--throttle-pause',
|
||||
dest='throttle_pause',
|
||||
type=float,
|
||||
default=30.0,
|
||||
help='wait this amount of seconds when API request throttling is active (default: 30.0, requires --throttle-limit to be set)')
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def get_auth(args, encode=True):
|
||||
def get_auth(args, encode=True, for_git_cli=False):
|
||||
auth = None
|
||||
|
||||
if args.osx_keychain_item_name:
|
||||
@@ -344,7 +365,7 @@ def get_auth(args, encode=True):
|
||||
if not PY2:
|
||||
token = token.decode('utf-8')
|
||||
auth = token + ':' + 'x-oauth-basic'
|
||||
except:
|
||||
except SubprocessError:
|
||||
log_error('No password item matching the provided name and account could be found in the osx keychain.')
|
||||
elif args.osx_keychain_item_account:
|
||||
log_error('You must specify both name and account fields for osx keychain password items')
|
||||
@@ -353,7 +374,13 @@ def get_auth(args, encode=True):
|
||||
if args.token.startswith(_path_specifier):
|
||||
args.token = open(args.token[len(_path_specifier):],
|
||||
'rt').readline().strip()
|
||||
auth = args.token + ':' + 'x-oauth-basic'
|
||||
if not args.as_app:
|
||||
auth = args.token + ':' + 'x-oauth-basic'
|
||||
else:
|
||||
if not for_git_cli:
|
||||
auth = args.token
|
||||
else:
|
||||
auth = 'x-access-token:' + args.token
|
||||
elif args.username:
|
||||
if not args.password:
|
||||
args.password = getpass.getpass()
|
||||
@@ -394,13 +421,19 @@ def get_github_host(args):
|
||||
|
||||
def get_github_repo_url(args, repository):
|
||||
if repository.get('is_gist'):
|
||||
return repository['git_pull_url']
|
||||
if args.prefer_ssh:
|
||||
# The git_pull_url value is always https for gists, so we need to transform it to ssh form
|
||||
repo_url = re.sub(r'^https?:\/\/(.+)\/(.+)\.git$', r'git@\1:\2.git', repository['git_pull_url'])
|
||||
repo_url = re.sub(r'^git@gist\.', 'git@', repo_url) # strip gist subdomain for better hostkey compatibility
|
||||
else:
|
||||
repo_url = repository['git_pull_url']
|
||||
return repo_url
|
||||
|
||||
if args.prefer_ssh:
|
||||
return repository['ssh_url']
|
||||
|
||||
auth = get_auth(args, False)
|
||||
if auth and repository['private'] == True:
|
||||
auth = get_auth(args, encode=False, for_git_cli=True)
|
||||
if auth and repository['private'] is True:
|
||||
repo_url = 'https://{0}@{1}/{2}/{3}.git'.format(
|
||||
auth,
|
||||
get_github_host(args),
|
||||
@@ -413,24 +446,32 @@ def get_github_repo_url(args, repository):
|
||||
|
||||
|
||||
def retrieve_data_gen(args, template, query_args=None, single_request=False):
|
||||
auth = get_auth(args)
|
||||
auth = get_auth(args, encode=not args.as_app)
|
||||
query_args = get_query_args(query_args)
|
||||
per_page = 100
|
||||
page = 0
|
||||
|
||||
while True:
|
||||
page = page + 1
|
||||
request = _construct_request(per_page, page, query_args, template, auth) # noqa
|
||||
request = _construct_request(per_page, page, query_args, template, auth, as_app=args.as_app) # noqa
|
||||
r, errors = _get_response(request, auth, template)
|
||||
|
||||
status_code = int(r.getcode())
|
||||
# be gentle with API request limit and throttle requests if remaining requests getting low
|
||||
limit_remaining = int(r.headers.get('x-ratelimit-remaining', 0))
|
||||
if args.throttle_limit and limit_remaining <= args.throttle_limit:
|
||||
log_info(
|
||||
'API request limit hit: {} requests left, pausing further requests for {}s'.format(
|
||||
limit_remaining,
|
||||
args.throttle_pause))
|
||||
time.sleep(args.throttle_pause)
|
||||
|
||||
retries = 0
|
||||
while retries < 3 and status_code == 502:
|
||||
print('API request returned HTTP 502: Bad Gateway. Retrying in 5 seconds')
|
||||
log_warning('API request returned HTTP 502: Bad Gateway. Retrying in 5 seconds')
|
||||
retries += 1
|
||||
time.sleep(5)
|
||||
request = _construct_request(per_page, page, query_args, template, auth) # noqa
|
||||
request = _construct_request(per_page, page, query_args, template, auth, as_app=args.as_app) # noqa
|
||||
r, errors = _get_response(request, auth, template)
|
||||
|
||||
status_code = int(r.getcode())
|
||||
@@ -456,9 +497,11 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False):
|
||||
if single_request:
|
||||
break
|
||||
|
||||
|
||||
def retrieve_data(args, template, query_args=None, single_request=False):
|
||||
return list(retrieve_data_gen(args, template, query_args, single_request))
|
||||
|
||||
|
||||
def get_query_args(query_args=None):
|
||||
if not query_args:
|
||||
query_args = {}
|
||||
@@ -495,7 +538,7 @@ def _get_response(request, auth, template):
|
||||
return r, errors
|
||||
|
||||
|
||||
def _construct_request(per_page, page, query_args, template, auth):
|
||||
def _construct_request(per_page, page, query_args, template, auth, as_app=None):
|
||||
querystring = urlencode(dict(list({
|
||||
'per_page': per_page,
|
||||
'page': page
|
||||
@@ -503,7 +546,13 @@ def _construct_request(per_page, page, query_args, template, auth):
|
||||
|
||||
request = Request(template + '?' + querystring)
|
||||
if auth is not None:
|
||||
request.add_header('Authorization', 'Basic '.encode('ascii') + auth)
|
||||
if not as_app:
|
||||
request.add_header('Authorization', 'Basic '.encode('ascii') + auth)
|
||||
else:
|
||||
if not PY2:
|
||||
auth = auth.encode('ascii')
|
||||
request.add_header('Authorization', 'token '.encode('ascii') + auth)
|
||||
request.add_header('Accept', 'application/vnd.github.machine-man-preview+json')
|
||||
log_info('Requesting {}?{}'.format(template, querystring))
|
||||
return request
|
||||
|
||||
@@ -528,12 +577,10 @@ def _request_http_error(exc, auth, errors):
|
||||
delta = max(10, reset - gm_now)
|
||||
|
||||
limit = headers.get('x-ratelimit-limit')
|
||||
print('Exceeded rate limit of {} requests; waiting {} seconds to reset'.format(limit, delta), # noqa
|
||||
file=sys.stderr)
|
||||
log_warning('Exceeded rate limit of {} requests; waiting {} seconds to reset'.format(limit, delta)) # noqa
|
||||
|
||||
if auth is None:
|
||||
print('Hint: Authenticate to raise your GitHub rate limit',
|
||||
file=sys.stderr)
|
||||
log_info('Hint: Authenticate to raise your GitHub rate limit')
|
||||
|
||||
time.sleep(delta)
|
||||
should_continue = True
|
||||
@@ -858,18 +905,22 @@ def backup_pulls(args, repo_cwd, repository, repos_template):
|
||||
pull_states = ['open', 'closed']
|
||||
for pull_state in pull_states:
|
||||
query_args['state'] = pull_state
|
||||
_pulls = retrieve_data_gen(args,
|
||||
_pulls_template,
|
||||
query_args=query_args)
|
||||
_pulls = retrieve_data_gen(
|
||||
args,
|
||||
_pulls_template,
|
||||
query_args=query_args
|
||||
)
|
||||
for pull in _pulls:
|
||||
if args.since and pull['updated_at'] < args.since:
|
||||
break
|
||||
if not args.since or pull['updated_at'] >= args.since:
|
||||
pulls[pull['number']] = pull
|
||||
else:
|
||||
_pulls = retrieve_data_gen(args,
|
||||
_pulls_template,
|
||||
query_args=query_args)
|
||||
_pulls = retrieve_data_gen(
|
||||
args,
|
||||
_pulls_template,
|
||||
query_args=query_args
|
||||
)
|
||||
for pull in _pulls:
|
||||
if args.since and pull['updated_at'] < args.since:
|
||||
break
|
||||
|
||||
Reference in New Issue
Block a user