From 41130fc8b0b694e68eb6754d005bb2f3295578fb Mon Sep 17 00:00:00 2001 From: ethan Date: Fri, 21 Jun 2019 11:20:26 -0500 Subject: [PATCH 1/8] QKT-42: support saving release information --- bin/github-backup | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/bin/github-backup b/bin/github-backup index d862641..427f14c 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -303,6 +303,11 @@ def parse_args(): parser.add_argument('--keychain-account', dest='osx_keychain_item_account', help='OSX ONLY: account field of password item in OSX keychain that holds the personal access or OAuth token') + parser.add_argument('--include-releases', + action='store_true', + dest='include_releases', + help='include release information, not including assets or binaries' + ) return parser.parse_args() @@ -699,6 +704,9 @@ def backup_repositories(args, output_directory, repositories): if args.include_hooks or args.include_everything: backup_hooks(args, repo_cwd, repository, repos_template) + if args.include_releases or args.include_everything: + backup_releases(args, repo_cwd, repository, repos_template) + if args.incremental: open(last_update_path, 'w').write(last_update) @@ -880,6 +888,28 @@ def backup_hooks(args, repo_cwd, repository, repos_template): log_info("Unable to read hooks, skipping") +def backup_releases(args, repo_cwd, repository, repos_template): + repository_fullname = repository['full_name'] + + # give release files somewhere to live & log intent + release_cwd = os.path.join(repo_cwd, 'releases') + log_info('Retrieving {0} releases'.format(repository_fullname)) + mkdir_p(repo_cwd, release_cwd) + + query_args = {} + + _release_template = '{0}/{1}/releases'.format(repos_template, repository_fullname) + _releases = retrieve_data(args, _release_template, query_args=query_args) + + # for each release, store it + log_info('Saving {0} releases to disk'.format(len(_releases))) + for release in _releases: + release_name = release['tag_name'] + output_filepath = os.path.join(release_cwd, '{0}.json'.format(release_name)) + with codecs.open(output_filepath, 'w+', encoding='utf-8') as f: + json_dump(release, f) + + def fetch_repository(name, remote_url, local_dir, From 4b40ae94d7ffdc75eaac187a2273f3a3d856d0ab Mon Sep 17 00:00:00 2001 From: ethan Date: Fri, 21 Jun 2019 16:48:25 -0500 Subject: [PATCH 2/8] QKT-42 update: shorter command flag --- bin/github-backup | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/github-backup b/bin/github-backup index 427f14c..6c88dd2 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -303,7 +303,7 @@ def parse_args(): parser.add_argument('--keychain-account', dest='osx_keychain_item_account', help='OSX ONLY: account field of password item in OSX keychain that holds the personal access or OAuth token') - parser.add_argument('--include-releases', + parser.add_argument('--releases', action='store_true', dest='include_releases', help='include release information, not including assets or binaries' From 3d3f5120743a2e141c853712fea78c2a3351fc11 Mon Sep 17 00:00:00 2001 From: ethan Date: Fri, 21 Jun 2019 16:53:40 -0500 Subject: [PATCH 3/8] QKT-42: releases - add readme info --- README.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 861e616..b8d8c5e 100644 --- a/README.rst +++ b/README.rst @@ -32,7 +32,7 @@ CLI Usage is as follows:: [--watched] [--followers] [--following] [--all] [--issues] [--issue-comments] [--issue-events] [--pulls] [--pull-comments] [--pull-commits] [--labels] [--hooks] - [--milestones] [--repositories] [--bare] [--lfs] + [--milestones] [--repositories] [--releases] [--bare] [--lfs] [--wikis] [--gists] [--starred-gists] [--skip-existing] [-L [LANGUAGES [LANGUAGES ...]]] [-N NAME_REGEX] [-H GITHUB_HOST] [-O] [-R REPOSITORY] [-P] [-F] @@ -76,6 +76,7 @@ CLI Usage is as follows:: authenticated) --milestones include milestones in backup --repositories include repository clone in backup + --releases include repository releases' information without assets or binaries --bare clone bare repositories --lfs clone LFS repositories (requires Git LFS to be installed, https://git-lfs.github.com) From de0c3f46c616fe8e1f2d3a80b747a69d4bf7da14 Mon Sep 17 00:00:00 2001 From: Harrison Wright Date: Fri, 21 Jun 2019 20:03:14 -0500 Subject: [PATCH 4/8] WIP: download assets --- bin/github-backup | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/bin/github-backup b/bin/github-backup index 6c88dd2..e349eaa 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -537,6 +537,24 @@ def _request_url_error(template, retry_timeout): return False +def download_file(url, path, auth): + request = Request(url) + request.add_header('Accept', 'application/octet-stream') + request.add_header('Authorization', 'Basic '.encode('ascii') + auth) + data = urlopen(request) + with open(path, 'wb') as f: + f.write(data.read()) + + # import requests + # r = requests.get(url, stream=True, headers={ + # 'Accept': 'application/octet-stream', + # 'Authorization': 'Basic '.encode('ascii') + auth + # }) + # with open(path, 'wb') as f: + # for chunk in r.iter_content(1024): + # f.write(chunk) + + def get_authenticated_user(args): template = 'https://{0}/user'.format(get_github_api_host(args)) data = retrieve_data(args, template, single_request=True) @@ -898,17 +916,21 @@ def backup_releases(args, repo_cwd, repository, repos_template): query_args = {} - _release_template = '{0}/{1}/releases'.format(repos_template, repository_fullname) - _releases = retrieve_data(args, _release_template, query_args=query_args) + release_template = '{0}/{1}/releases'.format(repos_template, repository_fullname) + releases = retrieve_data(args, release_template, query_args=query_args) # for each release, store it - log_info('Saving {0} releases to disk'.format(len(_releases))) - for release in _releases: + log_info('Saving {0} releases to disk'.format(len(releases))) + for release in releases: release_name = release['tag_name'] output_filepath = os.path.join(release_cwd, '{0}.json'.format(release_name)) with codecs.open(output_filepath, 'w+', encoding='utf-8') as f: json_dump(release, f) + assets = retrieve_data(args, release['assets_url']) + for asset in assets: + download_file(asset['url'], os.path.join(release_cwd, asset['name']), get_auth(args)) + def fetch_repository(name, remote_url, From 9b6400932d9ba7ebefcaa180ffe2efcd2be36c68 Mon Sep 17 00:00:00 2001 From: Harrison Wright Date: Sat, 22 Jun 2019 13:00:42 -0500 Subject: [PATCH 5/8] Fix redirect to s3 --- bin/github-backup | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/bin/github-backup b/bin/github-backup index e349eaa..583d3ee 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -26,6 +26,8 @@ try: from urllib.error import HTTPError, URLError from urllib.request import urlopen from urllib.request import Request + from urllib.request import HTTPRedirectHandler + from urllib.request import build_opener except ImportError: # python 2 from urlparse import urlparse @@ -34,6 +36,8 @@ except ImportError: from urllib2 import HTTPError, URLError from urllib2 import urlopen from urllib2 import Request + from urllib2 import HTTPRedirectHandler + from urllib2 import build_opener from github_backup import __version__ @@ -537,22 +541,33 @@ def _request_url_error(template, retry_timeout): return False +class S3HTTPRedirectHandler(HTTPRedirectHandler): + """ + A subclassed redirect handler for downloading Github assets from S3. + + urllib will add the Authorization header to the redirected request to S3, which will result in a 400, + so we should remove said header on redirect. + """ + def redirect_request(self, req, fp, code, msg, headers, newurl): + request = super(S3HTTPRedirectHandler, self).redirect_request(req, fp, code, msg, headers, newurl) + del request.headers['Authorization'] + return request + + def download_file(url, path, auth): request = Request(url) request.add_header('Accept', 'application/octet-stream') request.add_header('Authorization', 'Basic '.encode('ascii') + auth) - data = urlopen(request) - with open(path, 'wb') as f: - f.write(data.read()) + opener = build_opener(S3HTTPRedirectHandler) + response = opener.open(request) - # import requests - # r = requests.get(url, stream=True, headers={ - # 'Accept': 'application/octet-stream', - # 'Authorization': 'Basic '.encode('ascii') + auth - # }) - # with open(path, 'wb') as f: - # for chunk in r.iter_content(1024): - # f.write(chunk) + chunk_size = 16 * 1024 + with open(path, 'wb') as f: + while True: + chunk = response.read(chunk_size) + if not chunk: + break + f.write(chunk) def get_authenticated_user(args): From ea4c3d0f6f79aec742f4497b502a757e185d6e4e Mon Sep 17 00:00:00 2001 From: Harrison Wright Date: Sat, 22 Jun 2019 13:05:54 -0500 Subject: [PATCH 6/8] Fix super call for python2 --- bin/github-backup | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/bin/github-backup b/bin/github-backup index 583d3ee..fe3f8e9 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -18,6 +18,7 @@ import subprocess import sys import time import platform +PY2 = False try: # python 3 from urllib.parse import urlparse @@ -30,6 +31,7 @@ try: from urllib.request import build_opener except ImportError: # python 2 + PY2 = True from urlparse import urlparse from urllib import quote as urlquote from urllib import urlencode @@ -549,7 +551,11 @@ class S3HTTPRedirectHandler(HTTPRedirectHandler): so we should remove said header on redirect. """ def redirect_request(self, req, fp, code, msg, headers, newurl): - request = super(S3HTTPRedirectHandler, self).redirect_request(req, fp, code, msg, headers, newurl) + if PY2: + # HTTPRedirectHandler is an old style class + request = HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl) + else: + request = super(S3HTTPRedirectHandler, self).redirect_request(req, fp, code, msg, headers, newurl) del request.headers['Authorization'] return request From 89f59cc7a2e10f6e2878821e7ee8d7f8d1f64d76 Mon Sep 17 00:00:00 2001 From: Harrison Wright Date: Mon, 24 Jun 2019 15:49:19 -0500 Subject: [PATCH 7/8] Make assets it's own flag --- bin/github-backup | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/bin/github-backup b/bin/github-backup index fe3f8e9..5a09483 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -314,6 +314,10 @@ def parse_args(): dest='include_releases', help='include release information, not including assets or binaries' ) + parser.add_argument('--assets', + action='store_true', + dest='include_assets', + help='include assets alongside release information; only applies if including releases') return parser.parse_args() @@ -744,7 +748,8 @@ def backup_repositories(args, output_directory, repositories): backup_hooks(args, repo_cwd, repository, repos_template) if args.include_releases or args.include_everything: - backup_releases(args, repo_cwd, repository, repos_template) + backup_releases(args, repo_cwd, repository, repos_template, + include_assets=args.include_assets or args.include_everything) if args.incremental: open(last_update_path, 'w').write(last_update) @@ -927,7 +932,7 @@ def backup_hooks(args, repo_cwd, repository, repos_template): log_info("Unable to read hooks, skipping") -def backup_releases(args, repo_cwd, repository, repos_template): +def backup_releases(args, repo_cwd, repository, repos_template, include_assets=False): repository_fullname = repository['full_name'] # give release files somewhere to live & log intent @@ -948,9 +953,10 @@ def backup_releases(args, repo_cwd, repository, repos_template): with codecs.open(output_filepath, 'w+', encoding='utf-8') as f: json_dump(release, f) - assets = retrieve_data(args, release['assets_url']) - for asset in assets: - download_file(asset['url'], os.path.join(release_cwd, asset['name']), get_auth(args)) + if include_assets: + assets = retrieve_data(args, release['assets_url']) + for asset in assets: + download_file(asset['url'], os.path.join(release_cwd, asset['name']), get_auth(args)) def fetch_repository(name, From 121fa6829415fd25d7a79f588b4b79a806669db6 Mon Sep 17 00:00:00 2001 From: Ethan Timm Date: Tue, 25 Jun 2019 15:41:02 -0500 Subject: [PATCH 8/8] QKT-45: include assets - update readme update readme with flag information for including assets alongside their respective releases --- README.rst | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index b8d8c5e..8b5f6f1 100644 --- a/README.rst +++ b/README.rst @@ -32,8 +32,9 @@ CLI Usage is as follows:: [--watched] [--followers] [--following] [--all] [--issues] [--issue-comments] [--issue-events] [--pulls] [--pull-comments] [--pull-commits] [--labels] [--hooks] - [--milestones] [--repositories] [--releases] [--bare] [--lfs] - [--wikis] [--gists] [--starred-gists] [--skip-existing] + [--milestones] [--repositories] [--releases] [--assets] + [--bare] [--lfs] [--wikis] [--gists] [--starred-gists] + [--skip-existing] [-L [LANGUAGES [LANGUAGES ...]]] [-N NAME_REGEX] [-H GITHUB_HOST] [-O] [-R REPOSITORY] [-P] [-F] [--prefer-ssh] [-v] @@ -77,6 +78,7 @@ CLI Usage is as follows:: --milestones include milestones in backup --repositories include repository clone in backup --releases include repository releases' information without assets or binaries + --assets include assets alongside release information; only applies if including releases --bare clone bare repositories --lfs clone LFS repositories (requires Git LFS to be installed, https://git-lfs.github.com)