mirror of
https://github.com/josegonzalez/python-github-backup.git
synced 2025-12-05 16:18:02 +01:00
Compare commits
27 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fac8e4274f | ||
|
|
17fee66f31 | ||
|
|
a56d27dd8b | ||
|
|
e57873b6dd | ||
|
|
2658b039a1 | ||
|
|
fd684a71fb | ||
|
|
bacd77030b | ||
|
|
b73079daf2 | ||
|
|
eca8a70666 | ||
|
|
e74765ba7f | ||
|
|
6db5bd731b | ||
|
|
7305871c20 | ||
|
|
baf7b1a9b4 | ||
|
|
121fa68294 | ||
|
|
44dfc79edc | ||
|
|
89f59cc7a2 | ||
|
|
ad8c5b8768 | ||
|
|
921aab3729 | ||
|
|
ea4c3d0f6f | ||
|
|
9b6400932d | ||
|
|
de0c3f46c6 | ||
|
|
73b069f872 | ||
|
|
3d3f512074 | ||
|
|
1c3078992d | ||
|
|
4b40ae94d7 | ||
|
|
a18fda9faf | ||
|
|
41130fc8b0 |
31
CHANGES.rst
31
CHANGES.rst
@@ -1,9 +1,38 @@
|
||||
Changelog
|
||||
=========
|
||||
|
||||
0.23.0 (2019-06-04)
|
||||
0.26.0 (2019-09-23)
|
||||
-------------------
|
||||
------------------------
|
||||
- Workaround gist clone in `--prefer-ssh` mode. [Vladislav Yarmak]
|
||||
- Create PULL_REQUEST.md. [Jose Diaz-Gonzalez]
|
||||
- Create ISSUE_TEMPLATE.md. [Jose Diaz-Gonzalez]
|
||||
|
||||
|
||||
0.25.0 (2019-07-03)
|
||||
-------------------
|
||||
- Issue 119: Change retrieve_data to be a generator. [2a]
|
||||
|
||||
See issue #119.
|
||||
|
||||
|
||||
0.24.0 (2019-06-27)
|
||||
-------------------
|
||||
- QKT-45: include assets - update readme. [Ethan Timm]
|
||||
|
||||
update readme with flag information for including assets alongside their respective releases
|
||||
- Make assets it's own flag. [Harrison Wright]
|
||||
- Fix super call for python2. [Harrison Wright]
|
||||
- Fix redirect to s3. [Harrison Wright]
|
||||
- WIP: download assets. [Harrison Wright]
|
||||
- QKT-42: releases - add readme info. [ethan]
|
||||
- QKT-42 update: shorter command flag. [ethan]
|
||||
- QKT-42: support saving release information. [ethan]
|
||||
- Fix pull details. [Harrison Wright]
|
||||
|
||||
|
||||
0.23.0 (2019-06-04)
|
||||
-------------------
|
||||
- Avoid to crash in case of HTTP 502 error. [Gael de Chalendar]
|
||||
|
||||
Survive also on socket.error connections like on HTTPError or URLError.
|
||||
|
||||
13
ISSUE_TEMPLATE.md
Normal file
13
ISSUE_TEMPLATE.md
Normal file
@@ -0,0 +1,13 @@
|
||||
# Important notice regarding filed issues
|
||||
|
||||
This project already fills my needs, and as such I have no real reason to continue it's development. This project is otherwise provided as is, and no support is given.
|
||||
|
||||
If pull requests implementing bug fixes or enhancements are pushed, I am happy to review and merge them (time permitting).
|
||||
|
||||
If you wish to have a bug fixed, you have a few options:
|
||||
|
||||
- Fix it yourself.
|
||||
- File a bug and hope someone else fixes it for you.
|
||||
- Pay me to fix it (my rate is $200 an hour, minimum 1 hour, contact me via my [github email address](https://github.com/josegonzalez) if you want to go this route).
|
||||
|
||||
In all cases, feel free to file an issue, they may be of help to others in the future.
|
||||
7
PULL_REQUEST.md
Normal file
7
PULL_REQUEST.md
Normal file
@@ -0,0 +1,7 @@
|
||||
# Important notice regarding filed pull requests
|
||||
|
||||
This project already fills my needs, and as such I have no real reason to continue it's development. This project is otherwise provided as is, and no support is given.
|
||||
|
||||
I will attempt to review pull requests at _my_ earliest convenience. If I am unable to get to your pull request in a timely fashion, it is what it is. This repository does not pay any bills, and I am not required to merge any pull request from any individual.
|
||||
|
||||
If you wish to jump my personal priority queue, you may pay me for my time to review. My rate is $200 an hour - minimum 1 hour - feel free contact me via my github email address if you want to go this route.
|
||||
@@ -4,6 +4,8 @@ github-backup
|
||||
|
||||
|PyPI| |Python Versions|
|
||||
|
||||
This project is considered feature complete for the primary maintainer. If you would like a bugfix or enhancement and cannot sponsor the work, pull requests are welcome. Feel free to contact the maintainer for consulting estimates if desired.
|
||||
|
||||
backup a github user or organization
|
||||
|
||||
Requirements
|
||||
@@ -32,8 +34,9 @@ CLI Usage is as follows::
|
||||
[--watched] [--followers] [--following] [--all]
|
||||
[--issues] [--issue-comments] [--issue-events] [--pulls]
|
||||
[--pull-comments] [--pull-commits] [--labels] [--hooks]
|
||||
[--milestones] [--repositories] [--bare] [--lfs]
|
||||
[--wikis] [--gists] [--starred-gists] [--skip-existing]
|
||||
[--milestones] [--repositories] [--releases] [--assets]
|
||||
[--bare] [--lfs] [--wikis] [--gists] [--starred-gists]
|
||||
[--skip-existing]
|
||||
[-L [LANGUAGES [LANGUAGES ...]]] [-N NAME_REGEX]
|
||||
[-H GITHUB_HOST] [-O] [-R REPOSITORY] [-P] [-F]
|
||||
[--prefer-ssh] [-v]
|
||||
@@ -76,6 +79,8 @@ CLI Usage is as follows::
|
||||
authenticated)
|
||||
--milestones include milestones in backup
|
||||
--repositories include repository clone in backup
|
||||
--releases include repository releases' information without assets or binaries
|
||||
--assets include assets alongside release information; only applies if including releases
|
||||
--bare clone bare repositories
|
||||
--lfs clone LFS repositories (requires Git LFS to be
|
||||
installed, https://git-lfs.github.com)
|
||||
|
||||
@@ -18,6 +18,7 @@ import subprocess
|
||||
import sys
|
||||
import time
|
||||
import platform
|
||||
PY2 = False
|
||||
try:
|
||||
# python 3
|
||||
from urllib.parse import urlparse
|
||||
@@ -26,14 +27,19 @@ try:
|
||||
from urllib.error import HTTPError, URLError
|
||||
from urllib.request import urlopen
|
||||
from urllib.request import Request
|
||||
from urllib.request import HTTPRedirectHandler
|
||||
from urllib.request import build_opener
|
||||
except ImportError:
|
||||
# python 2
|
||||
PY2 = True
|
||||
from urlparse import urlparse
|
||||
from urllib import quote as urlquote
|
||||
from urllib import urlencode
|
||||
from urllib2 import HTTPError, URLError
|
||||
from urllib2 import urlopen
|
||||
from urllib2 import Request
|
||||
from urllib2 import HTTPRedirectHandler
|
||||
from urllib2 import build_opener
|
||||
|
||||
from github_backup import __version__
|
||||
|
||||
@@ -303,6 +309,15 @@ def parse_args():
|
||||
parser.add_argument('--keychain-account',
|
||||
dest='osx_keychain_item_account',
|
||||
help='OSX ONLY: account field of password item in OSX keychain that holds the personal access or OAuth token')
|
||||
parser.add_argument('--releases',
|
||||
action='store_true',
|
||||
dest='include_releases',
|
||||
help='include release information, not including assets or binaries'
|
||||
)
|
||||
parser.add_argument('--assets',
|
||||
action='store_true',
|
||||
dest='include_assets',
|
||||
help='include assets alongside release information; only applies if including releases')
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
@@ -372,12 +387,12 @@ def get_github_host(args):
|
||||
|
||||
|
||||
def get_github_repo_url(args, repository):
|
||||
if args.prefer_ssh:
|
||||
return repository['ssh_url']
|
||||
|
||||
if repository.get('is_gist'):
|
||||
return repository['git_pull_url']
|
||||
|
||||
if args.prefer_ssh:
|
||||
return repository['ssh_url']
|
||||
|
||||
auth = get_auth(args, False)
|
||||
if auth:
|
||||
repo_url = 'https://{0}@{1}/{2}/{3}.git'.format(
|
||||
@@ -391,12 +406,11 @@ def get_github_repo_url(args, repository):
|
||||
return repo_url
|
||||
|
||||
|
||||
def retrieve_data(args, template, query_args=None, single_request=False):
|
||||
def retrieve_data_gen(args, template, query_args=None, single_request=False):
|
||||
auth = get_auth(args)
|
||||
query_args = get_query_args(query_args)
|
||||
per_page = 100
|
||||
page = 0
|
||||
data = []
|
||||
|
||||
while True:
|
||||
page = page + 1
|
||||
@@ -423,11 +437,12 @@ def retrieve_data(args, template, query_args=None, single_request=False):
|
||||
response = json.loads(r.read().decode('utf-8'))
|
||||
if len(errors) == 0:
|
||||
if type(response) == list:
|
||||
data.extend(response)
|
||||
for resp in response:
|
||||
yield resp
|
||||
if len(response) < per_page:
|
||||
break
|
||||
elif type(response) == dict and single_request:
|
||||
data.append(response)
|
||||
yield response
|
||||
|
||||
if len(errors) > 0:
|
||||
log_error(errors)
|
||||
@@ -435,8 +450,8 @@ def retrieve_data(args, template, query_args=None, single_request=False):
|
||||
if single_request:
|
||||
break
|
||||
|
||||
return data
|
||||
|
||||
def retrieve_data(args, template, query_args=None, single_request=False):
|
||||
return list(retrieve_data_gen(args, template, query_args, single_request))
|
||||
|
||||
def get_query_args(query_args=None):
|
||||
if not query_args:
|
||||
@@ -532,6 +547,39 @@ def _request_url_error(template, retry_timeout):
|
||||
return False
|
||||
|
||||
|
||||
class S3HTTPRedirectHandler(HTTPRedirectHandler):
|
||||
"""
|
||||
A subclassed redirect handler for downloading Github assets from S3.
|
||||
|
||||
urllib will add the Authorization header to the redirected request to S3, which will result in a 400,
|
||||
so we should remove said header on redirect.
|
||||
"""
|
||||
def redirect_request(self, req, fp, code, msg, headers, newurl):
|
||||
if PY2:
|
||||
# HTTPRedirectHandler is an old style class
|
||||
request = HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl)
|
||||
else:
|
||||
request = super(S3HTTPRedirectHandler, self).redirect_request(req, fp, code, msg, headers, newurl)
|
||||
del request.headers['Authorization']
|
||||
return request
|
||||
|
||||
|
||||
def download_file(url, path, auth):
|
||||
request = Request(url)
|
||||
request.add_header('Accept', 'application/octet-stream')
|
||||
request.add_header('Authorization', 'Basic '.encode('ascii') + auth)
|
||||
opener = build_opener(S3HTTPRedirectHandler)
|
||||
response = opener.open(request)
|
||||
|
||||
chunk_size = 16 * 1024
|
||||
with open(path, 'wb') as f:
|
||||
while True:
|
||||
chunk = response.read(chunk_size)
|
||||
if not chunk:
|
||||
break
|
||||
f.write(chunk)
|
||||
|
||||
|
||||
def get_authenticated_user(args):
|
||||
template = 'https://{0}/user'.format(get_github_api_host(args))
|
||||
data = retrieve_data(args, template, single_request=True)
|
||||
@@ -699,6 +747,10 @@ def backup_repositories(args, output_directory, repositories):
|
||||
if args.include_hooks or args.include_everything:
|
||||
backup_hooks(args, repo_cwd, repository, repos_template)
|
||||
|
||||
if args.include_releases or args.include_everything:
|
||||
backup_releases(args, repo_cwd, repository, repos_template,
|
||||
include_assets=args.include_assets or args.include_everything)
|
||||
|
||||
if args.incremental:
|
||||
open(last_update_path, 'w').write(last_update)
|
||||
|
||||
@@ -784,24 +836,27 @@ def backup_pulls(args, repo_cwd, repository, repos_template):
|
||||
pull_states = ['open', 'closed']
|
||||
for pull_state in pull_states:
|
||||
query_args['state'] = pull_state
|
||||
# It'd be nice to be able to apply the args.since filter here...
|
||||
_pulls = retrieve_data(args,
|
||||
_pulls = retrieve_data_gen(args,
|
||||
_pulls_template,
|
||||
query_args=query_args)
|
||||
for pull in _pulls:
|
||||
if args.since and pull['updated_at'] < args.since:
|
||||
break
|
||||
if not args.since or pull['updated_at'] >= args.since:
|
||||
pulls[pull['number']] = pull
|
||||
else:
|
||||
_pulls = retrieve_data(args,
|
||||
_pulls = retrieve_data_gen(args,
|
||||
_pulls_template,
|
||||
query_args=query_args)
|
||||
for pull in _pulls:
|
||||
if args.since and pull['updated_at'] < args.since:
|
||||
break
|
||||
if not args.since or pull['updated_at'] >= args.since:
|
||||
pulls[pull['number']] = retrieve_data(
|
||||
args,
|
||||
_pulls_template + '/{}'.format(pull['number']),
|
||||
single_request=True
|
||||
)
|
||||
)[0]
|
||||
|
||||
log_info('Saving {0} pull requests to disk'.format(
|
||||
len(list(pulls.keys()))))
|
||||
@@ -880,6 +935,33 @@ def backup_hooks(args, repo_cwd, repository, repos_template):
|
||||
log_info("Unable to read hooks, skipping")
|
||||
|
||||
|
||||
def backup_releases(args, repo_cwd, repository, repos_template, include_assets=False):
|
||||
repository_fullname = repository['full_name']
|
||||
|
||||
# give release files somewhere to live & log intent
|
||||
release_cwd = os.path.join(repo_cwd, 'releases')
|
||||
log_info('Retrieving {0} releases'.format(repository_fullname))
|
||||
mkdir_p(repo_cwd, release_cwd)
|
||||
|
||||
query_args = {}
|
||||
|
||||
release_template = '{0}/{1}/releases'.format(repos_template, repository_fullname)
|
||||
releases = retrieve_data(args, release_template, query_args=query_args)
|
||||
|
||||
# for each release, store it
|
||||
log_info('Saving {0} releases to disk'.format(len(releases)))
|
||||
for release in releases:
|
||||
release_name = release['tag_name']
|
||||
output_filepath = os.path.join(release_cwd, '{0}.json'.format(release_name))
|
||||
with codecs.open(output_filepath, 'w+', encoding='utf-8') as f:
|
||||
json_dump(release, f)
|
||||
|
||||
if include_assets:
|
||||
assets = retrieve_data(args, release['assets_url'])
|
||||
for asset in assets:
|
||||
download_file(asset['url'], os.path.join(release_cwd, asset['name']), get_auth(args))
|
||||
|
||||
|
||||
def fetch_repository(name,
|
||||
remote_url,
|
||||
local_dir,
|
||||
|
||||
@@ -1 +1 @@
|
||||
__version__ = '0.23.0'
|
||||
__version__ = '0.26.0'
|
||||
|
||||
Reference in New Issue
Block a user