Merge pull request #4 from whwright/wip-releases

Download github assets
This commit is contained in:
Ethan Timm
2019-06-25 15:35:39 -05:00
committed by GitHub

View File

@@ -18,6 +18,7 @@ import subprocess
import sys
import time
import platform
PY2 = False
try:
# python 3
from urllib.parse import urlparse
@@ -26,14 +27,19 @@ try:
from urllib.error import HTTPError, URLError
from urllib.request import urlopen
from urllib.request import Request
from urllib.request import HTTPRedirectHandler
from urllib.request import build_opener
except ImportError:
# python 2
PY2 = True
from urlparse import urlparse
from urllib import quote as urlquote
from urllib import urlencode
from urllib2 import HTTPError, URLError
from urllib2 import urlopen
from urllib2 import Request
from urllib2 import HTTPRedirectHandler
from urllib2 import build_opener
from github_backup import __version__
@@ -308,6 +314,10 @@ def parse_args():
dest='include_releases',
help='include release information, not including assets or binaries'
)
parser.add_argument('--assets',
action='store_true',
dest='include_assets',
help='include assets alongside release information; only applies if including releases')
return parser.parse_args()
@@ -537,6 +547,39 @@ def _request_url_error(template, retry_timeout):
return False
class S3HTTPRedirectHandler(HTTPRedirectHandler):
"""
A subclassed redirect handler for downloading Github assets from S3.
urllib will add the Authorization header to the redirected request to S3, which will result in a 400,
so we should remove said header on redirect.
"""
def redirect_request(self, req, fp, code, msg, headers, newurl):
if PY2:
# HTTPRedirectHandler is an old style class
request = HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl)
else:
request = super(S3HTTPRedirectHandler, self).redirect_request(req, fp, code, msg, headers, newurl)
del request.headers['Authorization']
return request
def download_file(url, path, auth):
request = Request(url)
request.add_header('Accept', 'application/octet-stream')
request.add_header('Authorization', 'Basic '.encode('ascii') + auth)
opener = build_opener(S3HTTPRedirectHandler)
response = opener.open(request)
chunk_size = 16 * 1024
with open(path, 'wb') as f:
while True:
chunk = response.read(chunk_size)
if not chunk:
break
f.write(chunk)
def get_authenticated_user(args):
template = 'https://{0}/user'.format(get_github_api_host(args))
data = retrieve_data(args, template, single_request=True)
@@ -705,7 +748,8 @@ def backup_repositories(args, output_directory, repositories):
backup_hooks(args, repo_cwd, repository, repos_template)
if args.include_releases or args.include_everything:
backup_releases(args, repo_cwd, repository, repos_template)
backup_releases(args, repo_cwd, repository, repos_template,
include_assets=args.include_assets or args.include_everything)
if args.incremental:
open(last_update_path, 'w').write(last_update)
@@ -888,7 +932,7 @@ def backup_hooks(args, repo_cwd, repository, repos_template):
log_info("Unable to read hooks, skipping")
def backup_releases(args, repo_cwd, repository, repos_template):
def backup_releases(args, repo_cwd, repository, repos_template, include_assets=False):
repository_fullname = repository['full_name']
# give release files somewhere to live & log intent
@@ -898,17 +942,22 @@ def backup_releases(args, repo_cwd, repository, repos_template):
query_args = {}
_release_template = '{0}/{1}/releases'.format(repos_template, repository_fullname)
_releases = retrieve_data(args, _release_template, query_args=query_args)
release_template = '{0}/{1}/releases'.format(repos_template, repository_fullname)
releases = retrieve_data(args, release_template, query_args=query_args)
# for each release, store it
log_info('Saving {0} releases to disk'.format(len(_releases)))
for release in _releases:
log_info('Saving {0} releases to disk'.format(len(releases)))
for release in releases:
release_name = release['tag_name']
output_filepath = os.path.join(release_cwd, '{0}.json'.format(release_name))
with codecs.open(output_filepath, 'w+', encoding='utf-8') as f:
json_dump(release, f)
if include_assets:
assets = retrieve_data(args, release['assets_url'])
for asset in assets:
download_file(asset['url'], os.path.join(release_cwd, asset['name']), get_auth(args))
def fetch_repository(name,
remote_url,