mirror of
https://github.com/josegonzalez/python-github-backup.git
synced 2025-12-05 16:18:02 +01:00
Compare commits
22 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
751b0d6e82 | ||
|
|
ea633ca2bb | ||
|
|
a2115ce3e5 | ||
|
|
8a00bb1903 | ||
|
|
e53f8d4724 | ||
|
|
356f5f674b | ||
|
|
13128635cb | ||
|
|
6e6842b025 | ||
|
|
272177c395 | ||
|
|
70f711ea68 | ||
|
|
3fc9957aac | ||
|
|
78098aae23 | ||
|
|
fb7cc5ed53 | ||
|
|
c0679b9cc3 | ||
|
|
03b9d1b2d8 | ||
|
|
5025f69878 | ||
|
|
a351cdc103 | ||
|
|
85e4399408 | ||
|
|
c8171b692a | ||
|
|
523c811cc6 | ||
|
|
857ad0afab | ||
|
|
3f65eadee1 |
40
CHANGES.rst
40
CHANGES.rst
@@ -1,9 +1,47 @@
|
|||||||
Changelog
|
Changelog
|
||||||
=========
|
=========
|
||||||
|
|
||||||
0.28.0 (2020-02-03)
|
0.35.0 (2020-08-05)
|
||||||
-------------------
|
-------------------
|
||||||
------------------------
|
------------------------
|
||||||
|
- Make API request throttling optional. [Samantha Baldwin]
|
||||||
|
|
||||||
|
|
||||||
|
0.34.0 (2020-07-24)
|
||||||
|
-------------------
|
||||||
|
- Add logic for transforming gist repository urls to ssh. [Matt Fields]
|
||||||
|
|
||||||
|
|
||||||
|
0.33.0 (2020-04-13)
|
||||||
|
-------------------
|
||||||
|
- Add basic API request throttling. [Enrico Tröger]
|
||||||
|
|
||||||
|
A simple approach to throttle API requests and so keep within the rate
|
||||||
|
limits of the API. Can be enabled with "--throttle-limit" to specify
|
||||||
|
when throttling should start.
|
||||||
|
"--throttle-pause" defines the time to sleep between further API
|
||||||
|
requests.
|
||||||
|
|
||||||
|
|
||||||
|
0.32.0 (2020-04-13)
|
||||||
|
-------------------
|
||||||
|
- Add timestamp to log messages. [Enrico Tröger]
|
||||||
|
|
||||||
|
|
||||||
|
0.31.0 (2020-02-25)
|
||||||
|
-------------------
|
||||||
|
- #123 update: changed --as-app 'help' description. [ethan]
|
||||||
|
- #123: Support Authenticating As Github Application. [ethan]
|
||||||
|
|
||||||
|
|
||||||
|
0.29.0 (2020-02-14)
|
||||||
|
-------------------
|
||||||
|
- #50 update: keep main() in bin. [ethan]
|
||||||
|
- #50 - refactor for friendlier import. [ethan]
|
||||||
|
|
||||||
|
|
||||||
|
0.28.0 (2020-02-03)
|
||||||
|
-------------------
|
||||||
- Remove deprecated (and removed) git lfs flags. [smiley]
|
- Remove deprecated (and removed) git lfs flags. [smiley]
|
||||||
|
|
||||||
"--tags" and "--force" were removed at some point from "git lfs fetch". This broke our backup script.
|
"--tags" and "--force" were removed at some point from "git lfs fetch". This broke our backup script.
|
||||||
|
|||||||
50
README.rst
50
README.rst
@@ -29,19 +29,20 @@ Usage
|
|||||||
|
|
||||||
CLI Usage is as follows::
|
CLI Usage is as follows::
|
||||||
|
|
||||||
github-backup [-h] [-u USERNAME] [-p PASSWORD] [-t TOKEN]
|
github-backup [-h] [-u USERNAME] [-p PASSWORD] [-t TOKEN] [--as-app]
|
||||||
[-o OUTPUT_DIRECTORY] [-i] [--starred] [--all-starred]
|
[-o OUTPUT_DIRECTORY] [-i] [--starred] [--all-starred]
|
||||||
[--watched] [--followers] [--following] [--all]
|
[--watched] [--followers] [--following] [--all]
|
||||||
[--issues] [--issue-comments] [--issue-events] [--pulls]
|
[--issues] [--issue-comments] [--issue-events] [--pulls]
|
||||||
[--pull-comments] [--pull-commits] [--labels] [--hooks]
|
[--pull-comments] [--pull-commits] [--pull-details]
|
||||||
[--milestones] [--repositories] [--releases] [--assets]
|
[--labels] [--hooks] [--milestones] [--repositories]
|
||||||
[--bare] [--lfs] [--wikis] [--gists] [--starred-gists]
|
[--bare] [--lfs] [--wikis] [--gists] [--starred-gists]
|
||||||
[--skip-existing]
|
[--skip-existing] [-L [LANGUAGES [LANGUAGES ...]]]
|
||||||
[-L [LANGUAGES [LANGUAGES ...]]] [-N NAME_REGEX]
|
[-N NAME_REGEX] [-H GITHUB_HOST] [-O] [-R REPOSITORY]
|
||||||
[-H GITHUB_HOST] [-O] [-R REPOSITORY] [-P] [-F]
|
[-P] [-F] [--prefer-ssh] [-v]
|
||||||
[--prefer-ssh] [-v]
|
|
||||||
[--keychain-name OSX_KEYCHAIN_ITEM_NAME]
|
[--keychain-name OSX_KEYCHAIN_ITEM_NAME]
|
||||||
[--keychain-account OSX_KEYCHAIN_ITEM_ACCOUNT]
|
[--keychain-account OSX_KEYCHAIN_ITEM_ACCOUNT]
|
||||||
|
[--releases] [--assets] [--throttle-limit THROTTLE_LIMIT]
|
||||||
|
[--throttle-pause THROTTLE_PAUSE]
|
||||||
USER
|
USER
|
||||||
|
|
||||||
Backup a github account
|
Backup a github account
|
||||||
@@ -57,36 +58,36 @@ CLI Usage is as follows::
|
|||||||
password for basic auth. If a username is given but
|
password for basic auth. If a username is given but
|
||||||
not a password, the password will be prompted for.
|
not a password, the password will be prompted for.
|
||||||
-t TOKEN, --token TOKEN
|
-t TOKEN, --token TOKEN
|
||||||
personal access or OAuth token, or path to token
|
personal access, OAuth, or JSON Web token, or path to
|
||||||
(file://...)
|
token (file://...)
|
||||||
|
--as-app authenticate as github app instead of as a user.
|
||||||
-o OUTPUT_DIRECTORY, --output-directory OUTPUT_DIRECTORY
|
-o OUTPUT_DIRECTORY, --output-directory OUTPUT_DIRECTORY
|
||||||
directory at which to backup the repositories
|
directory at which to backup the repositories
|
||||||
-i, --incremental incremental backup
|
-i, --incremental incremental backup
|
||||||
--starred include JSON output of starred repositories in backup
|
--starred include JSON output of starred repositories in backup
|
||||||
--all-starred include starred repositories in backup
|
--all-starred include starred repositories in backup [*]
|
||||||
--watched include watched repositories in backup
|
--watched include JSON output of watched repositories in backup
|
||||||
--followers include JSON output of followers in backup
|
--followers include JSON output of followers in backup
|
||||||
--following include JSON output of following users in backup
|
--following include JSON output of following users in backup
|
||||||
--all include everything in backup
|
--all include everything in backup (not including [*])
|
||||||
--issues include issues in backup
|
--issues include issues in backup
|
||||||
--issue-comments include issue comments in backup
|
--issue-comments include issue comments in backup
|
||||||
--issue-events include issue events in backup
|
--issue-events include issue events in backup
|
||||||
--pulls include pull requests in backup
|
--pulls include pull requests in backup
|
||||||
--pull-comments include pull request review comments in backup
|
--pull-comments include pull request review comments in backup
|
||||||
--pull-commits include pull request commits in backup
|
--pull-commits include pull request commits in backup
|
||||||
|
--pull-details include more pull request details in backup [*]
|
||||||
--labels include labels in backup
|
--labels include labels in backup
|
||||||
--hooks include hooks in backup (works only when
|
--hooks include hooks in backup (works only when
|
||||||
authenticated)
|
authenticated)
|
||||||
--milestones include milestones in backup
|
--milestones include milestones in backup
|
||||||
--repositories include repository clone in backup
|
--repositories include repository clone in backup
|
||||||
--releases include repository releases' information without assets or binaries
|
|
||||||
--assets include assets alongside release information; only applies if including releases
|
|
||||||
--bare clone bare repositories
|
--bare clone bare repositories
|
||||||
--lfs clone LFS repositories (requires Git LFS to be
|
--lfs clone LFS repositories (requires Git LFS to be
|
||||||
installed, https://git-lfs.github.com)
|
installed, https://git-lfs.github.com) [*]
|
||||||
--wikis include wiki clone in backup
|
--wikis include wiki clone in backup
|
||||||
--gists include gists in backup
|
--gists include gists in backup [*]
|
||||||
--starred-gists include starred gists in backup
|
--starred-gists include starred gists in backup [*]
|
||||||
--skip-existing skip project if a backup directory exists
|
--skip-existing skip project if a backup directory exists
|
||||||
-L [LANGUAGES [LANGUAGES ...]], --languages [LANGUAGES [LANGUAGES ...]]
|
-L [LANGUAGES [LANGUAGES ...]], --languages [LANGUAGES [LANGUAGES ...]]
|
||||||
only allow these languages
|
only allow these languages
|
||||||
@@ -97,8 +98,8 @@ CLI Usage is as follows::
|
|||||||
-O, --organization whether or not this is an organization user
|
-O, --organization whether or not this is an organization user
|
||||||
-R REPOSITORY, --repository REPOSITORY
|
-R REPOSITORY, --repository REPOSITORY
|
||||||
name of repository to limit backup to
|
name of repository to limit backup to
|
||||||
-P, --private include private repositories
|
-P, --private include private repositories [*]
|
||||||
-F, --fork include forked repositories
|
-F, --fork include forked repositories [*]
|
||||||
--prefer-ssh Clone repositories using SSH instead of HTTPS
|
--prefer-ssh Clone repositories using SSH instead of HTTPS
|
||||||
-v, --version show program's version number and exit
|
-v, --version show program's version number and exit
|
||||||
--keychain-name OSX_KEYCHAIN_ITEM_NAME
|
--keychain-name OSX_KEYCHAIN_ITEM_NAME
|
||||||
@@ -107,6 +108,17 @@ CLI Usage is as follows::
|
|||||||
--keychain-account OSX_KEYCHAIN_ITEM_ACCOUNT
|
--keychain-account OSX_KEYCHAIN_ITEM_ACCOUNT
|
||||||
OSX ONLY: account field of password item in OSX
|
OSX ONLY: account field of password item in OSX
|
||||||
keychain that holds the personal access or OAuth token
|
keychain that holds the personal access or OAuth token
|
||||||
|
--releases include release information, not including assets or
|
||||||
|
binaries
|
||||||
|
--assets include assets alongside release information; only
|
||||||
|
applies if including releases
|
||||||
|
--throttle-limit THROTTLE_LIMIT
|
||||||
|
start throttling of GitHub API requests after this
|
||||||
|
amount of API requests remain
|
||||||
|
--throttle-pause THROTTLE_PAUSE
|
||||||
|
wait this amount of seconds when API request
|
||||||
|
throttling is active (default: 30.0, requires
|
||||||
|
--throttle-limit to be set)
|
||||||
|
|
||||||
|
|
||||||
The package can be used to backup an *entire* organization or repository, including issues and wikis in the most appropriate format (clones for wikis, json files for issues).
|
The package can be used to backup an *entire* organization or repository, including issues and wikis in the most appropriate format (clones for wikis, json files for issues).
|
||||||
|
|||||||
@@ -26,9 +26,12 @@ def main():
|
|||||||
if args.lfs_clone:
|
if args.lfs_clone:
|
||||||
check_git_lfs_install()
|
check_git_lfs_install()
|
||||||
|
|
||||||
log_info('Backing up user {0} to {1}'.format(args.user, output_directory))
|
if not args.as_app:
|
||||||
|
log_info('Backing up user {0} to {1}'.format(args.user, output_directory))
|
||||||
|
authenticated_user = get_authenticated_user(args)
|
||||||
|
else:
|
||||||
|
authenticated_user = {'login': None}
|
||||||
|
|
||||||
authenticated_user = get_authenticated_user(args)
|
|
||||||
repositories = retrieve_repositories(args, authenticated_user)
|
repositories = retrieve_repositories(args, authenticated_user)
|
||||||
repositories = filter_repositories(args, repositories)
|
repositories = filter_repositories(args, repositories)
|
||||||
backup_repositories(args, output_directory, repositories)
|
backup_repositories(args, output_directory, repositories)
|
||||||
|
|||||||
@@ -1 +1 @@
|
|||||||
__version__ = '0.29.0'
|
__version__ = '0.35.0'
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ import argparse
|
|||||||
import base64
|
import base64
|
||||||
import calendar
|
import calendar
|
||||||
import codecs
|
import codecs
|
||||||
|
import datetime
|
||||||
import errno
|
import errno
|
||||||
import getpass
|
import getpass
|
||||||
import json
|
import json
|
||||||
@@ -50,6 +51,10 @@ except ImportError:
|
|||||||
FNULL = open(os.devnull, 'w')
|
FNULL = open(os.devnull, 'w')
|
||||||
|
|
||||||
|
|
||||||
|
def _get_log_date():
|
||||||
|
return datetime.datetime.isoformat(datetime.datetime.now())
|
||||||
|
|
||||||
|
|
||||||
def log_error(message):
|
def log_error(message):
|
||||||
"""
|
"""
|
||||||
Log message (str) or messages (List[str]) to stderr and exit with status 1
|
Log message (str) or messages (List[str]) to stderr and exit with status 1
|
||||||
@@ -66,7 +71,7 @@ def log_info(message):
|
|||||||
message = [message]
|
message = [message]
|
||||||
|
|
||||||
for msg in message:
|
for msg in message:
|
||||||
sys.stdout.write("{0}\n".format(msg))
|
sys.stdout.write("{0}: {1}\n".format(_get_log_date(), msg))
|
||||||
|
|
||||||
|
|
||||||
def log_warning(message):
|
def log_warning(message):
|
||||||
@@ -77,7 +82,7 @@ def log_warning(message):
|
|||||||
message = [message]
|
message = [message]
|
||||||
|
|
||||||
for msg in message:
|
for msg in message:
|
||||||
sys.stderr.write("{0}\n".format(msg))
|
sys.stderr.write("{0}: {1}\n".format(_get_log_date(), msg))
|
||||||
|
|
||||||
|
|
||||||
def logging_subprocess(popenargs,
|
def logging_subprocess(popenargs,
|
||||||
@@ -168,7 +173,11 @@ def parse_args():
|
|||||||
parser.add_argument('-t',
|
parser.add_argument('-t',
|
||||||
'--token',
|
'--token',
|
||||||
dest='token',
|
dest='token',
|
||||||
help='personal access or OAuth token, or path to token (file://...)') # noqa
|
help='personal access, OAuth, or JSON Web token, or path to token (file://...)') # noqa
|
||||||
|
parser.add_argument('--as-app',
|
||||||
|
action='store_true',
|
||||||
|
dest='as_app',
|
||||||
|
help='authenticate as github app instead of as a user.')
|
||||||
parser.add_argument('-o',
|
parser.add_argument('-o',
|
||||||
'--output-directory',
|
'--output-directory',
|
||||||
default='.',
|
default='.',
|
||||||
@@ -322,10 +331,20 @@ def parse_args():
|
|||||||
action='store_true',
|
action='store_true',
|
||||||
dest='include_assets',
|
dest='include_assets',
|
||||||
help='include assets alongside release information; only applies if including releases')
|
help='include assets alongside release information; only applies if including releases')
|
||||||
|
parser.add_argument('--throttle-limit',
|
||||||
|
dest='throttle_limit',
|
||||||
|
type=int,
|
||||||
|
default=0,
|
||||||
|
help='start throttling of GitHub API requests after this amount of API requests remain')
|
||||||
|
parser.add_argument('--throttle-pause',
|
||||||
|
dest='throttle_pause',
|
||||||
|
type=float,
|
||||||
|
default=30.0,
|
||||||
|
help='wait this amount of seconds when API request throttling is active (default: 30.0, requires --throttle-limit to be set)')
|
||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
def get_auth(args, encode=True):
|
def get_auth(args, encode=True, for_git_cli=False):
|
||||||
auth = None
|
auth = None
|
||||||
|
|
||||||
if args.osx_keychain_item_name:
|
if args.osx_keychain_item_name:
|
||||||
@@ -353,7 +372,13 @@ def get_auth(args, encode=True):
|
|||||||
if args.token.startswith(_path_specifier):
|
if args.token.startswith(_path_specifier):
|
||||||
args.token = open(args.token[len(_path_specifier):],
|
args.token = open(args.token[len(_path_specifier):],
|
||||||
'rt').readline().strip()
|
'rt').readline().strip()
|
||||||
auth = args.token + ':' + 'x-oauth-basic'
|
if not args.as_app:
|
||||||
|
auth = args.token + ':' + 'x-oauth-basic'
|
||||||
|
else:
|
||||||
|
if not for_git_cli:
|
||||||
|
auth = args.token
|
||||||
|
else:
|
||||||
|
auth = 'x-access-token:' + args.token
|
||||||
elif args.username:
|
elif args.username:
|
||||||
if not args.password:
|
if not args.password:
|
||||||
args.password = getpass.getpass()
|
args.password = getpass.getpass()
|
||||||
@@ -394,12 +419,18 @@ def get_github_host(args):
|
|||||||
|
|
||||||
def get_github_repo_url(args, repository):
|
def get_github_repo_url(args, repository):
|
||||||
if repository.get('is_gist'):
|
if repository.get('is_gist'):
|
||||||
return repository['git_pull_url']
|
if args.prefer_ssh:
|
||||||
|
# The git_pull_url value is always https for gists, so we need to transform it to ssh form
|
||||||
|
repo_url = re.sub('^https?:\/\/(.+)\/(.+)\.git$', r'git@\1:\2.git', repository['git_pull_url'])
|
||||||
|
repo_url = re.sub('^git@gist\.', 'git@', repo_url) # strip gist subdomain for better hostkey compatibility
|
||||||
|
else:
|
||||||
|
repo_url = repository['git_pull_url']
|
||||||
|
return repo_url
|
||||||
|
|
||||||
if args.prefer_ssh:
|
if args.prefer_ssh:
|
||||||
return repository['ssh_url']
|
return repository['ssh_url']
|
||||||
|
|
||||||
auth = get_auth(args, False)
|
auth = get_auth(args, encode=False, for_git_cli=True)
|
||||||
if auth and repository['private'] == True:
|
if auth and repository['private'] == True:
|
||||||
repo_url = 'https://{0}@{1}/{2}/{3}.git'.format(
|
repo_url = 'https://{0}@{1}/{2}/{3}.git'.format(
|
||||||
auth,
|
auth,
|
||||||
@@ -413,24 +444,32 @@ def get_github_repo_url(args, repository):
|
|||||||
|
|
||||||
|
|
||||||
def retrieve_data_gen(args, template, query_args=None, single_request=False):
|
def retrieve_data_gen(args, template, query_args=None, single_request=False):
|
||||||
auth = get_auth(args)
|
auth = get_auth(args, encode=not args.as_app)
|
||||||
query_args = get_query_args(query_args)
|
query_args = get_query_args(query_args)
|
||||||
per_page = 100
|
per_page = 100
|
||||||
page = 0
|
page = 0
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
page = page + 1
|
page = page + 1
|
||||||
request = _construct_request(per_page, page, query_args, template, auth) # noqa
|
request = _construct_request(per_page, page, query_args, template, auth, as_app=args.as_app) # noqa
|
||||||
r, errors = _get_response(request, auth, template)
|
r, errors = _get_response(request, auth, template)
|
||||||
|
|
||||||
status_code = int(r.getcode())
|
status_code = int(r.getcode())
|
||||||
|
# be gentle with API request limit and throttle requests if remaining requests getting low
|
||||||
|
limit_remaining = int(r.headers.get('x-ratelimit-remaining', 0))
|
||||||
|
if args.throttle_limit and limit_remaining <= args.throttle_limit:
|
||||||
|
log_info(
|
||||||
|
'API request limit hit: {} requests left, pausing further requests for {}s'.format(
|
||||||
|
limit_remaining,
|
||||||
|
args.throttle_pause))
|
||||||
|
time.sleep(args.throttle_pause)
|
||||||
|
|
||||||
retries = 0
|
retries = 0
|
||||||
while retries < 3 and status_code == 502:
|
while retries < 3 and status_code == 502:
|
||||||
print('API request returned HTTP 502: Bad Gateway. Retrying in 5 seconds')
|
log_warning('API request returned HTTP 502: Bad Gateway. Retrying in 5 seconds')
|
||||||
retries += 1
|
retries += 1
|
||||||
time.sleep(5)
|
time.sleep(5)
|
||||||
request = _construct_request(per_page, page, query_args, template, auth) # noqa
|
request = _construct_request(per_page, page, query_args, template, auth, as_app=args.as_app) # noqa
|
||||||
r, errors = _get_response(request, auth, template)
|
r, errors = _get_response(request, auth, template)
|
||||||
|
|
||||||
status_code = int(r.getcode())
|
status_code = int(r.getcode())
|
||||||
@@ -495,7 +534,7 @@ def _get_response(request, auth, template):
|
|||||||
return r, errors
|
return r, errors
|
||||||
|
|
||||||
|
|
||||||
def _construct_request(per_page, page, query_args, template, auth):
|
def _construct_request(per_page, page, query_args, template, auth, as_app=None):
|
||||||
querystring = urlencode(dict(list({
|
querystring = urlencode(dict(list({
|
||||||
'per_page': per_page,
|
'per_page': per_page,
|
||||||
'page': page
|
'page': page
|
||||||
@@ -503,7 +542,13 @@ def _construct_request(per_page, page, query_args, template, auth):
|
|||||||
|
|
||||||
request = Request(template + '?' + querystring)
|
request = Request(template + '?' + querystring)
|
||||||
if auth is not None:
|
if auth is not None:
|
||||||
request.add_header('Authorization', 'Basic '.encode('ascii') + auth)
|
if not as_app:
|
||||||
|
request.add_header('Authorization', 'Basic '.encode('ascii') + auth)
|
||||||
|
else:
|
||||||
|
if not PY2:
|
||||||
|
auth = auth.encode('ascii')
|
||||||
|
request.add_header('Authorization', 'token '.encode('ascii') + auth)
|
||||||
|
request.add_header('Accept', 'application/vnd.github.machine-man-preview+json')
|
||||||
log_info('Requesting {}?{}'.format(template, querystring))
|
log_info('Requesting {}?{}'.format(template, querystring))
|
||||||
return request
|
return request
|
||||||
|
|
||||||
@@ -528,12 +573,10 @@ def _request_http_error(exc, auth, errors):
|
|||||||
delta = max(10, reset - gm_now)
|
delta = max(10, reset - gm_now)
|
||||||
|
|
||||||
limit = headers.get('x-ratelimit-limit')
|
limit = headers.get('x-ratelimit-limit')
|
||||||
print('Exceeded rate limit of {} requests; waiting {} seconds to reset'.format(limit, delta), # noqa
|
log_warning('Exceeded rate limit of {} requests; waiting {} seconds to reset'.format(limit, delta)) # noqa
|
||||||
file=sys.stderr)
|
|
||||||
|
|
||||||
if auth is None:
|
if auth is None:
|
||||||
print('Hint: Authenticate to raise your GitHub rate limit',
|
log_info('Hint: Authenticate to raise your GitHub rate limit')
|
||||||
file=sys.stderr)
|
|
||||||
|
|
||||||
time.sleep(delta)
|
time.sleep(delta)
|
||||||
should_continue = True
|
should_continue = True
|
||||||
|
|||||||
Reference in New Issue
Block a user