From 17e4f9a12545d5033c1847ddbe00c4cf927a7680 Mon Sep 17 00:00:00 2001 From: "W. Harrison Wright" Date: Wed, 7 Feb 2018 21:29:49 -0600 Subject: [PATCH 1/3] Add ability to backup gists --- README.rst | 3 ++ bin/github-backup | 74 +++++++++++++++++++++++++++++++++++------------ 2 files changed, 59 insertions(+), 18 deletions(-) diff --git a/README.rst b/README.rst index 29ac8aa..69dd9a6 100644 --- a/README.rst +++ b/README.rst @@ -33,6 +33,7 @@ CLI Usage is as follows:: [--pulls] [--pull-comments] [--pull-commits] [--labels] [--hooks] [--milestones] [--repositories] [--bare] [--lfs] [--wikis] [--skip-existing] [--all-starred] + [--gists] [--starred-gists] [-L [LANGUAGES [LANGUAGES ...]]] [-N NAME_REGEX] [-H GITHUB_HOST] [-O] [-R REPOSITORY] [-P] [-F] [--prefer-ssh] [-v] @@ -76,6 +77,8 @@ CLI Usage is as follows:: --wikis include wiki clone in backup --skip-existing skip project if a backup directory exists --all-starred include starred repositories in backup + --gists include gists in backup + --starred-gists include starred gists in backup -L [LANGUAGES [LANGUAGES ...]], --languages [LANGUAGES [LANGUAGES ...]] only allow these languages -N NAME_REGEX, --name-regex NAME_REGEX diff --git a/bin/github-backup b/bin/github-backup index 4d4b485..5711b14 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -224,6 +224,14 @@ def parse_args(): action='store_true', dest='include_wiki', help='include wiki clone in backup') + parser.add_argument('--gists', + action='store_true', + dest='include_gists', + help='include gists in backup') + parser.add_argument('--starred-gists', + action='store_true', + dest='include_starred_gists', + help='include starred gists in backup') parser.add_argument('--skip-existing', action='store_true', dest='skip_existing', @@ -342,6 +350,9 @@ def get_github_repo_url(args, repository): if args.prefer_ssh: return repository['ssh_url'] + if repository.get('is_gist'): + return repository['git_pull_url'] + auth = get_auth(args, False) if auth: repo_url = 'https://{0}@{1}/{2}/{3}.git'.format( @@ -509,12 +520,30 @@ def retrieve_repositories(args): starred_template = 'https://{0}/user/starred'.format( get_github_api_host(args)) starred_repos = retrieve_data(args, starred_template, single_request=False) - # we need to be able to determine this repo was retrieved as a starred repo - # later, so add a flag to each item + # flag each repo as starred for downstream processing for item in starred_repos: item.update({'is_starred': True}) repos.extend(starred_repos) + if args.include_gists: + gists_template = 'https://{0}/gists'.format( + get_github_api_host(args)) + gists = retrieve_data(args, gists_template, single_request=False) + # flag each repo as a gist for downstream processing + for item in gists: + item.update({'is_gist': True}) + repos.extend(gists) + + if args.include_starred_gists: + starred_gists_template = 'https://{0}/gists/starred'.format( + get_github_api_host(args)) + starred_gists = retrieve_data(args, starred_gists_template, single_request=False) + # flag each repo as a starred gist for downstream processing + for item in starred_gists: + item.update({'is_gist': True, + 'is_starred': True}) + repos.extend(starred_gists) + return repos @@ -523,7 +552,8 @@ def filter_repositories(args, unfiltered_repositories): repositories = [] for r in unfiltered_repositories: - if r['owner']['login'] == args.user or r.get('is_starred'): + # gists can be anonymous, so need to safely check owner + if r.get('owner', {}).get('login') == args.user or r.get('is_starred'): repositories.append(r) name_regex = None @@ -535,11 +565,11 @@ def filter_repositories(args, unfiltered_repositories): languages = [x.lower() for x in args.languages] if not args.fork: - repositories = [r for r in repositories if not r['fork']] + repositories = [r for r in repositories if not r.get('fork')] if not args.private: - repositories = [r for r in repositories if not r['private']] + repositories = [r for r in repositories if not r.get('private') or r.get('public')] if languages: - repositories = [r for r in repositories if r['language'] and r['language'].lower() in languages] # noqa + repositories = [r for r in repositories if r.get('language') and r.get('language').lower() in languages] # noqa if name_regex: repositories = [r for r in repositories if name_regex.match(r['name'])] @@ -561,29 +591,37 @@ def backup_repositories(args, output_directory, repositories): args.since = None for repository in repositories: - backup_cwd = os.path.join(output_directory, 'repositories') - repo_cwd = os.path.join(backup_cwd, repository['name']) - - # put starred repos in -o/starred/${owner}/${repo} to prevent collision of - # any repositories with the same name - if repository.get('is_starred'): - backup_cwd = os.path.join(output_directory, 'starred') - repo_cwd = os.path.join(backup_cwd, repository['owner']['login'], - repository['name']) + if repository.get('is_gist'): + repo_cwd = os.path.join(output_directory, 'gists', repository['id']) + elif repository.get('is_starred'): + # put starred repos in -o/starred/${owner}/${repo} to prevent collision of + # any repositories with the same name + repo_cwd = os.path.join(output_directory, 'starred', repository['owner']['login'], repository['name']) + else: + repo_cwd = os.path.join(output_directory, 'repositories', repository['name']) repo_dir = os.path.join(repo_cwd, 'repository') repo_url = get_github_repo_url(args, repository) - if args.include_repository or args.include_everything: - fetch_repository(repository['name'], + include_gists = (args.include_gists or args.include_starred_gists) + if (args.include_repository or args.include_everything) \ + or (include_gists and repository.get('is_gist')): + repo_name = repository.get('name') if not repository.get('is_gist') else repository.get('id') + fetch_repository(repo_name, repo_url, repo_dir, skip_existing=args.skip_existing, bare_clone=args.bare_clone, lfs_clone=args.lfs_clone) + # dump gist information to a file as well + if repository.get('is_gist'): + output_file = '{0}/gist.json'.format(repo_cwd) + with codecs.open(output_file, 'w', encoding='utf-8') as f: + json_dump(repository, f) + download_wiki = (args.include_wiki or args.include_everything) - if repository['has_wiki'] and download_wiki: + if repository.get('has_wiki') and download_wiki: fetch_repository(repository['name'], repo_url.replace('.git', '.wiki.git'), os.path.join(repo_cwd, 'wiki'), From 83128e986a43eb1bcab34b74f9f245197592a88c Mon Sep 17 00:00:00 2001 From: "W. Harrison Wright" Date: Wed, 7 Feb 2018 21:30:55 -0600 Subject: [PATCH 2/3] Formatting --- bin/github-backup | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/bin/github-backup b/bin/github-backup index 5711b14..80195c6 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -517,8 +517,7 @@ def retrieve_repositories(args): repos = retrieve_data(args, template, single_request=single_request) if args.all_starred: - starred_template = 'https://{0}/user/starred'.format( - get_github_api_host(args)) + starred_template = 'https://{0}/user/starred'.format(get_github_api_host(args)) starred_repos = retrieve_data(args, starred_template, single_request=False) # flag each repo as starred for downstream processing for item in starred_repos: @@ -526,8 +525,7 @@ def retrieve_repositories(args): repos.extend(starred_repos) if args.include_gists: - gists_template = 'https://{0}/gists'.format( - get_github_api_host(args)) + gists_template = 'https://{0}/gists'.format(get_github_api_host(args)) gists = retrieve_data(args, gists_template, single_request=False) # flag each repo as a gist for downstream processing for item in gists: @@ -535,8 +533,7 @@ def retrieve_repositories(args): repos.extend(gists) if args.include_starred_gists: - starred_gists_template = 'https://{0}/gists/starred'.format( - get_github_api_host(args)) + starred_gists_template = 'https://{0}/gists/starred'.format(get_github_api_host(args)) starred_gists = retrieve_data(args, starred_gists_template, single_request=False) # flag each repo as a starred gist for downstream processing for item in starred_gists: From f37825418867fb76e0aeb42d0fdf3a9aeb65b88e Mon Sep 17 00:00:00 2001 From: "W. Harrison Wright" Date: Wed, 7 Feb 2018 21:46:59 -0600 Subject: [PATCH 3/3] Short circuit gists backup process --- bin/github-backup | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/bin/github-backup b/bin/github-backup index 80195c6..d3ca674 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -611,14 +611,16 @@ def backup_repositories(args, output_directory, repositories): bare_clone=args.bare_clone, lfs_clone=args.lfs_clone) - # dump gist information to a file as well if repository.get('is_gist'): + # dump gist information to a file as well output_file = '{0}/gist.json'.format(repo_cwd) with codecs.open(output_file, 'w', encoding='utf-8') as f: json_dump(repository, f) + continue # don't try to back anything else for a gist; it doesn't exist + download_wiki = (args.include_wiki or args.include_everything) - if repository.get('has_wiki') and download_wiki: + if repository['has_wiki'] and download_wiki: fetch_repository(repository['name'], repo_url.replace('.git', '.wiki.git'), os.path.join(repo_cwd, 'wiki'),