From 125cfca05eae096053fd1598f67aac916727bf01 Mon Sep 17 00:00:00 2001 From: Harrison Wright Date: Wed, 23 Mar 2022 19:05:36 -0500 Subject: [PATCH 1/2] Refactor logging and add support for quiet flag --- bin/github-backup | 35 ++++++----- github_backup/github_backup.py | 106 +++++++++++++-------------------- 2 files changed, 62 insertions(+), 79 deletions(-) diff --git a/bin/github-backup b/bin/github-backup index 25f6ddd..faea49f 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -2,38 +2,41 @@ import os, sys, logging -from github_backup.github_backup import ( - backup_account, - backup_repositories, - check_git_lfs_install, - filter_repositories, - get_authenticated_user, - log_info, - log_warning, - mkdir_p, - parse_args, - retrieve_repositories, -) - logging.basicConfig( format='%(asctime)s.%(msecs)03d: %(message)s', datefmt='%Y-%m-%dT%H:%M:%S', level=logging.INFO ) +from github_backup.github_backup import ( + backup_account, + backup_repositories, + check_git_lfs_install, + filter_repositories, + get_authenticated_user, + logger, + mkdir_p, + parse_args, + retrieve_repositories, +) + + def main(): args = parse_args() + if args.quiet: + logger.setLevel(logging.WARNING) + output_directory = os.path.realpath(args.output_directory) if not os.path.isdir(output_directory): - log_info('Create output directory {0}'.format(output_directory)) + logger.info('Create output directory {0}'.format(output_directory)) mkdir_p(output_directory) if args.lfs_clone: check_git_lfs_install() if not args.as_app: - log_info('Backing up user {0} to {1}'.format(args.user, output_directory)) + logger.info('Backing up user {0} to {1}'.format(args.user, output_directory)) authenticated_user = get_authenticated_user(args) else: authenticated_user = {'login': None} @@ -48,5 +51,5 @@ if __name__ == '__main__': try: main() except Exception as e: - log_warning(str(e)) + logger.warning(str(e)) sys.exit(1) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index e9217cc..fd4003d 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -7,7 +7,6 @@ import argparse import base64 import calendar import codecs -import datetime import errno import getpass import json @@ -37,31 +36,7 @@ except ImportError: FNULL = open(os.devnull, 'w') - -def _get_log_date(): - return datetime.datetime.isoformat(datetime.datetime.now()) - - -def log_info(message): - """ - Log message (str) or messages (List[str]) to stdout - """ - if type(message) == str: - message = [message] - - for msg in message: - logging.info(msg) - - -def log_warning(message): - """ - Log message (str) or messages (List[str]) to stderr - """ - if type(message) == str: - message = [message] - - for msg in message: - logging.warning(msg) +logger = logging.getLogger(__name__) def logging_subprocess(popenargs, @@ -77,7 +52,7 @@ def logging_subprocess(popenargs, child = subprocess.Popen(popenargs, stdout=subprocess.PIPE, stderr=subprocess.PIPE, **kwargs) if sys.platform == 'win32': - log_info("Windows operating system detected - no subprocess logging will be returned") + logger.info("Windows operating system detected - no subprocess logging will be returned") log_level = {child.stdout: stdout_log_level, child.stderr: stderr_log_level} @@ -139,6 +114,11 @@ def parse_args(args=None): metavar='USER', type=str, help='github username') + parser.add_argument('-q', + '--quiet', + action='store_true', + dest='quiet', + help='supress non-error log messages') parser.add_argument('-u', '--username', dest='username', @@ -441,13 +421,13 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): try: response = json.loads(r.read().decode('utf-8')) except IncompleteRead: - log_warning("Incomplete read error detected") + logger.warning("Incomplete read error detected") read_error = True except json.decoder.JSONDecodeError: - log_warning("JSON decode error detected") + logger.warning("JSON decode error detected") read_error = True except TimeoutError: - log_warning("Tiemout error detected") + logger.warning("Tiemout error detected") read_error = True else: read_error = False @@ -455,7 +435,7 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): # be gentle with API request limit and throttle requests if remaining requests getting low limit_remaining = int(r.headers.get('x-ratelimit-remaining', 0)) if args.throttle_limit and limit_remaining <= args.throttle_limit: - log_info( + logger.info( 'API request limit hit: {} requests left, pausing further requests for {}s'.format( limit_remaining, args.throttle_pause)) @@ -463,7 +443,7 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): retries = 0 while retries < 3 and (status_code == 502 or read_error): - log_warning('API request failed. Retrying in 5 seconds') + logger.warning('API request failed. Retrying in 5 seconds') retries += 1 time.sleep(5) request = _construct_request(per_page, page, query_args, template, auth, as_app=args.as_app) # noqa @@ -474,13 +454,13 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): response = json.loads(r.read().decode('utf-8')) read_error = False except IncompleteRead: - log_warning("Incomplete read error detected") + logger.warning("Incomplete read error detected") read_error = True except json.decoder.JSONDecodeError: - log_warning("JSON decode error detected") + logger.warning("JSON decode error detected") read_error = True except TimeoutError: - log_warning("Tiemout error detected") + logger.warning("Tiemout error detected") read_error = True if status_code != 200: @@ -532,12 +512,12 @@ def _get_response(request, auth, template): errors, should_continue = _request_http_error(exc, auth, errors) # noqa r = exc except URLError as e: - log_warning(e.reason) + logger.warning(e.reason) should_continue = _request_url_error(template, retry_timeout) if not should_continue: raise except socket.error as e: - log_warning(e.strerror) + logger.warning(e.strerror) should_continue = _request_url_error(template, retry_timeout) if not should_continue: raise @@ -563,7 +543,7 @@ def _construct_request(per_page, page, query_args, template, auth, as_app=None): auth = auth.encode('ascii') request.add_header('Authorization', 'token '.encode('ascii') + auth) request.add_header('Accept', 'application/vnd.github.machine-man-preview+json') - log_info('Requesting {}?{}'.format(template, querystring)) + logger.info('Requesting {}?{}'.format(template, querystring)) return request @@ -587,10 +567,10 @@ def _request_http_error(exc, auth, errors): delta = max(10, reset - gm_now) limit = headers.get('x-ratelimit-limit') - log_warning('Exceeded rate limit of {} requests; waiting {} seconds to reset'.format(limit, delta)) # noqa + logger.warning('Exceeded rate limit of {} requests; waiting {} seconds to reset'.format(limit, delta)) # noqa if auth is None: - log_info('Hint: Authenticate to raise your GitHub rate limit') + logger.info('Hint: Authenticate to raise your GitHub rate limit') time.sleep(delta) should_continue = True @@ -600,7 +580,7 @@ def _request_http_error(exc, auth, errors): def _request_url_error(template, retry_timeout): # Incase of a connection timing out, we can retry a few time # But we won't crash and not back-up the rest now - log_info('{} timed out'.format(template)) + logger.info('{} timed out'.format(template)) retry_timeout -= 1 if retry_timeout >= 0: @@ -645,14 +625,14 @@ def download_file(url, path, auth): f.write(chunk) except HTTPError as exc: # Gracefully handle 404 responses (and others) when downloading from S3 - log_warning('Skipping download of asset {0} due to HTTPError: {1}'.format(url, exc.reason)) + logger.warning('Skipping download of asset {0} due to HTTPError: {1}'.format(url, exc.reason)) except URLError as e: # Gracefully handle other URL errors - log_warning('Skipping download of asset {0} due to URLError: {1}'.format(url, e.reason)) + logger.warning('Skipping download of asset {0} due to URLError: {1}'.format(url, e.reason)) except socket.error as e: # Gracefully handle socket errors # TODO: Implement retry logic - log_warning('Skipping download of asset {0} due to socker error: {1}'.format(url, e.strerror)) + logger.warning('Skipping download of asset {0} due to socker error: {1}'.format(url, e.strerror)) def get_authenticated_user(args): @@ -668,7 +648,7 @@ def check_git_lfs_install(): def retrieve_repositories(args, authenticated_user): - log_info('Retrieving repositories') + logger.info('Retrieving repositories') single_request = False if args.user == authenticated_user['login']: # we must use the /user/repos API to be able to access private repos @@ -676,7 +656,7 @@ def retrieve_repositories(args, authenticated_user): get_github_api_host(args)) else: if args.private and not args.organization: - log_warning('Authenticated user is different from user being backed up, thus private repositories cannot be accessed') + logger.warning('Authenticated user is different from user being backed up, thus private repositories cannot be accessed') template = 'https://{0}/users/{1}/repos'.format( get_github_api_host(args), args.user) @@ -724,7 +704,7 @@ def retrieve_repositories(args, authenticated_user): def filter_repositories(args, unfiltered_repositories): - log_info('Filtering repositories') + logger.info('Filtering repositories') repositories = [] for r in unfiltered_repositories: @@ -755,7 +735,7 @@ def filter_repositories(args, unfiltered_repositories): def backup_repositories(args, output_directory, repositories): - log_info('Backing up repositories') + logger.info('Backing up repositories') repos_template = 'https://{0}/repos'.format(get_github_api_host(args)) if args.incremental: @@ -837,7 +817,7 @@ def backup_issues(args, repo_cwd, repository, repos_template): if args.skip_existing and has_issues_dir: return - log_info('Retrieving {0} issues'.format(repository['full_name'])) + logger.info('Retrieving {0} issues'.format(repository['full_name'])) issue_cwd = os.path.join(repo_cwd, 'issues') mkdir_p(repo_cwd, issue_cwd) @@ -873,7 +853,7 @@ def backup_issues(args, repo_cwd, repository, repos_template): issues_skipped_message = ' (skipped {0} pull requests)'.format( issues_skipped) - log_info('Saving {0} issues to disk{1}'.format( + logger.info('Saving {0} issues to disk{1}'.format( len(list(issues.keys())), issues_skipped_message)) comments_template = _issue_template + '/{0}/comments' events_template = _issue_template + '/{0}/events' @@ -895,7 +875,7 @@ def backup_pulls(args, repo_cwd, repository, repos_template): if args.skip_existing and has_pulls_dir: return - log_info('Retrieving {0} pull requests'.format(repository['full_name'])) # noqa + logger.info('Retrieving {0} pull requests'.format(repository['full_name'])) # noqa pulls_cwd = os.path.join(repo_cwd, 'pulls') mkdir_p(repo_cwd, pulls_cwd) @@ -939,7 +919,7 @@ def backup_pulls(args, repo_cwd, repository, repos_template): single_request=True )[0] - log_info('Saving {0} pull requests to disk'.format( + logger.info('Saving {0} pull requests to disk'.format( len(list(pulls.keys())))) comments_template = _pulls_template + '/{0}/comments' commits_template = _pulls_template + '/{0}/commits' @@ -961,7 +941,7 @@ def backup_milestones(args, repo_cwd, repository, repos_template): if args.skip_existing and os.path.isdir(milestone_cwd): return - log_info('Retrieving {0} milestones'.format(repository['full_name'])) + logger.info('Retrieving {0} milestones'.format(repository['full_name'])) mkdir_p(repo_cwd, milestone_cwd) template = '{0}/{1}/milestones'.format(repos_template, @@ -977,7 +957,7 @@ def backup_milestones(args, repo_cwd, repository, repos_template): for milestone in _milestones: milestones[milestone['number']] = milestone - log_info('Saving {0} milestones to disk'.format( + logger.info('Saving {0} milestones to disk'.format( len(list(milestones.keys())))) for number, milestone in list(milestones.items()): milestone_file = '{0}/{1}.json'.format(milestone_cwd, number) @@ -1000,7 +980,7 @@ def backup_labels(args, repo_cwd, repository, repos_template): def backup_hooks(args, repo_cwd, repository, repos_template): auth = get_auth(args) if not auth: - log_info("Skipping hooks since no authentication provided") + logger.info("Skipping hooks since no authentication provided") return hook_cwd = os.path.join(repo_cwd, 'hooks') output_file = '{0}/hooks.json'.format(hook_cwd) @@ -1013,7 +993,7 @@ def backup_hooks(args, repo_cwd, repository, repos_template): output_file, hook_cwd) except SystemExit: - log_info("Unable to read hooks, skipping") + logger.info("Unable to read hooks, skipping") def backup_releases(args, repo_cwd, repository, repos_template, include_assets=False): @@ -1021,7 +1001,7 @@ def backup_releases(args, repo_cwd, repository, repos_template, include_assets=F # give release files somewhere to live & log intent release_cwd = os.path.join(repo_cwd, 'releases') - log_info('Retrieving {0} releases'.format(repository_fullname)) + logger.info('Retrieving {0} releases'.format(repository_fullname)) mkdir_p(repo_cwd, release_cwd) query_args = {} @@ -1030,7 +1010,7 @@ def backup_releases(args, repo_cwd, repository, repos_template, include_assets=F releases = retrieve_data(args, release_template, query_args=query_args) # for each release, store it - log_info('Saving {0} releases to disk'.format(len(releases))) + logger.info('Saving {0} releases to disk'.format(len(releases))) for release in releases: release_name = release['tag_name'] release_name_safe = release_name.replace('/', '__') @@ -1075,12 +1055,12 @@ def fetch_repository(name, stderr=FNULL, shell=True) if initialized == 128: - log_info("Skipping {0} ({1}) since it's not initialized".format( + logger.info("Skipping {0} ({1}) since it's not initialized".format( name, masked_remote_url)) return if clone_exists: - log_info('Updating {0} in {1}'.format(name, local_dir)) + logger.info('Updating {0} in {1}'.format(name, local_dir)) remotes = subprocess.check_output(['git', 'remote', 'show'], cwd=local_dir) @@ -1101,7 +1081,7 @@ def fetch_repository(name, git_command = ['git', 'fetch', '--all', '--force', '--tags', '--prune'] logging_subprocess(git_command, None, cwd=local_dir) else: - log_info('Cloning {0} repository from {1} to {2}'.format( + logger.info('Cloning {0} repository from {1} to {2}'.format( name, masked_remote_url, local_dir)) @@ -1161,11 +1141,11 @@ def backup_account(args, output_directory): def _backup_data(args, name, template, output_file, output_directory): skip_existing = args.skip_existing if not skip_existing or not os.path.exists(output_file): - log_info('Retrieving {0} {1}'.format(args.user, name)) + logger.info('Retrieving {0} {1}'.format(args.user, name)) mkdir_p(output_directory) data = retrieve_data(args, template) - log_info('Writing {0} {1} to disk'.format(len(data), name)) + logger.info('Writing {0} {1} to disk'.format(len(data), name)) with codecs.open(output_file, 'w', encoding='utf-8') as f: json_dump(data, f) From 7437e3abb1aed35b3dfd2f208bbff8c5a00e523c Mon Sep 17 00:00:00 2001 From: Halvor Holsten Strand Date: Mon, 9 Oct 2023 12:01:32 +0200 Subject: [PATCH 2/2] Merge pull request, while keeping -q --quiet flag. Most changes were already included, only adjusted with black formatting. --- .gitchangelog.rc | 117 +++ .github/workflows/tagged-release.yml | 19 + CHANGES.rst | 495 ++-------- README.rst | 31 +- bin/github-backup | 7 +- github_backup/__init__.py | 2 +- github_backup/github_backup.py | 1305 +++++++++++++++----------- release | 12 +- release-requirements.txt | 31 + setup.py | 38 +- 10 files changed, 1058 insertions(+), 999 deletions(-) create mode 100644 .gitchangelog.rc create mode 100644 .github/workflows/tagged-release.yml create mode 100644 release-requirements.txt diff --git a/.gitchangelog.rc b/.gitchangelog.rc new file mode 100644 index 0000000..842973f --- /dev/null +++ b/.gitchangelog.rc @@ -0,0 +1,117 @@ +# +# Format +# +# ACTION: [AUDIENCE:] COMMIT_MSG [@TAG ...] +# +# Description +# +# ACTION is one of 'chg', 'fix', 'new' +# +# Is WHAT the change is about. +# +# 'chg' is for refactor, small improvement, cosmetic changes... +# 'fix' is for bug fixes +# 'new' is for new features, big improvement +# +# SUBJECT is optional and one of 'dev', 'usr', 'pkg', 'test', 'doc' +# +# Is WHO is concerned by the change. +# +# 'dev' is for developpers (API changes, refactors...) +# 'usr' is for final users (UI changes) +# 'pkg' is for packagers (packaging changes) +# 'test' is for testers (test only related changes) +# 'doc' is for doc guys (doc only changes) +# +# COMMIT_MSG is ... well ... the commit message itself. +# +# TAGs are additionnal adjective as 'refactor' 'minor' 'cosmetic' +# +# 'refactor' is obviously for refactoring code only +# 'minor' is for a very meaningless change (a typo, adding a comment) +# 'cosmetic' is for cosmetic driven change (re-indentation, 80-col...) +# +# Example: +# +# new: usr: support of bazaar implemented +# chg: re-indentend some lines @cosmetic +# new: dev: updated code to be compatible with last version of killer lib. +# fix: pkg: updated year of licence coverage. +# new: test: added a bunch of test around user usability of feature X. +# fix: typo in spelling my name in comment. @minor +# +# Please note that multi-line commit message are supported, and only the +# first line will be considered as the "summary" of the commit message. So +# tags, and other rules only applies to the summary. The body of the commit +# message will be displayed in the changelog with minor reformating. + +# +# ``ignore_regexps`` is a line of regexps +# +# Any commit having its full commit message matching any regexp listed here +# will be ignored and won't be reported in the changelog. +# +ignore_regexps = [ + r'(?i)^(Merge pull request|Merge branch|Release|Update)', +] + + +# +# ``replace_regexps`` is a dict associating a regexp pattern and its replacement +# +# It will be applied to get the summary line from the full commit message. +# +# Note that you can provide multiple replacement patterns, they will be all +# tried. If None matches, the summary line will be the full commit message. +# +replace_regexps = { + # current format (ie: 'chg: dev: my commit msg @tag1 @tag2') + + r'^([cC]hg|[fF]ix|[nN]ew)\s*:\s*((dev|use?r|pkg|test|doc)\s*:\s*)?([^\n@]*)(@[a-z]+\s+)*$': + r'\4', +} + + +# ``section_regexps`` is a list of 2-tuples associating a string label and a +# list of regexp +# +# Commit messages will be classified in sections thanks to this. Section +# titles are the label, and a commit is classified under this section if any +# of the regexps associated is matching. +# +section_regexps = [ + ('New', [ + r'^[nN]ew\s*:\s*((dev|use?r|pkg|test|doc)\s*:\s*)?([^\n]*)$', + ]), + ('Changes', [ + r'^[cC]hg\s*:\s*((dev|use?r|pkg|test|doc)\s*:\s*)?([^\n]*)$', + ]), + ('Fix', [ + r'^[fF]ix\s*:\s*((dev|use?r|pkg|test|doc)\s*:\s*)?([^\n]*)$', + ]), + ('Other', None # Match all lines + ), + +] + +# ``body_split_regexp`` is a regexp +# +# Commit message body (not the summary) if existing will be split +# (new line) on this regexp +# +body_split_regexp = r'[\n-]' + + +# ``tag_filter_regexp`` is a regexp +# +# Tags that will be used for the changelog must match this regexp. +# +# tag_filter_regexp = r'^[0-9]+$' +tag_filter_regexp = r'^(?:[vV])?[0-9\.]+$' + + +# ``unreleased_version_label`` is a string +# +# This label will be used as the changelog Title of the last set of changes +# between last valid tag and HEAD if any. +unreleased_version_label = "%%version%% (unreleased)" diff --git a/.github/workflows/tagged-release.yml b/.github/workflows/tagged-release.yml new file mode 100644 index 0000000..846c457 --- /dev/null +++ b/.github/workflows/tagged-release.yml @@ -0,0 +1,19 @@ +--- +name: "tagged-release" + +# yamllint disable-line rule:truthy +on: + push: + tags: + - '*' + +jobs: + tagged-release: + name: tagged-release + runs-on: ubuntu-20.04 + + steps: + - uses: "marvinpinto/action-automatic-releases@v1.2.1" + with: + repo_token: "${{ secrets.GITHUB_TOKEN }}" + prerelease: false diff --git a/CHANGES.rst b/CHANGES.rst index 47b335d..b4f774b 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,9 +1,87 @@ Changelog ========= -0.39.0 (2021-03-18) +0.43.1 (2023-05-29) +------------------- +------------------------ +- Chore: add release requirements. [Jose Diaz-Gonzalez] + + +0.43.0 (2023-05-29) +------------------- + +Fix +~~~ +- Do not update readme. [Jose Diaz-Gonzalez] +- Adjust for black. [Jose Diaz-Gonzalez] +- Adjust for black. [Jose Diaz-Gonzalez] +- Adjust for black. [Jose Diaz-Gonzalez] + +Other +~~~~~ +- Feat: commit gitchangelog.rc to repo so anyone can generate a + changelog. [Jose Diaz-Gonzalez] +- Feat: add release tagging. [Jose Diaz-Gonzalez] +- Chore: formatting. [Jose Diaz-Gonzalez] +- Chore: run black. [Jose Diaz-Gonzalez] +- Add --log-level command line argument. [Enrico Tröger] + + Support changing the log level to the desired value easily. + For example, this is useful to suppress progress messages but + keep logging warnings and errors. +- Check both updated_at and pushed_at properties. [Ken Bailey] + + Check both updated_at and pushed_at dates to get the last_update to reduce data retrieved on incremental api calls using since. + + +0.42.0 (2022-11-28) +------------------- +- Add option to exclude repositories. [npounder] +- Backup regular pull request comments as well. [Oneric] + + Before, only review comments were backed up; + regular comments need to be fetched via issue API. +- Fix bug forever retry when request url error. [kornpisey] +- Added --no-prune option to disable prune option when doing git fetch. + [kornpisey] + + +0.41.0 (2022-03-02) +------------------- +- Git lfs clone doe snot respect --mirror. [Louis Parisot] + + +0.40.2 (2021-12-29) +------------------- +- Fix lint issues raised by Flake8. [atinary-afoulon] + + According to job: + [ https://app.circleci.com/pipelines/github/josegonzalez/python-github-backup/30/workflows/74eb93f2-2505-435d-b728-03b3cc04c14a/jobs/23 ] + + Failed on the following checks: + ./github_backup/github_backup.py:20:1: F811 redefinition of unused 'logging' from line 14 + ./github_backup/github_backup.py:45:1: E302 expected 2 blank lines, found 1 + ./github_backup/github_backup.py:136:20: E251 unexpected spaces around keyword / parameter equals + + +0.40.1 (2021-09-22) +------------------- +- Revert to fetch. [Harrison Wright] + + +0.40.0 (2021-07-12) +------------------- +- Add retry on certain network errors. [Jacek Nykis] + + This change includes certain network level errors in the retry logic. + It partially address #110 but I think more comprehensive fix would be useful. +- Pull changes from remote. [Jonas] + + use `git pull` to pull actual files from the remote instead of using `fetch` for only the metadata + + +0.39.0 (2021-03-19) ------------------- ------------- Fix ~~~ @@ -11,16 +89,8 @@ Fix Other ~~~~~ -- Merge pull request #173 from gallofeliz/make-compatible-python-call. - [Jose Diaz-Gonzalez] - - Try to make compatible code with direct Python call ; reduce the hard link of the code with the cli - Try to make compatible code with direct Python call ; reduce the hard link of the code with the cli. [Gallo Feliz] -- Merge pull request #174 from atorrescogollo/master. [Jose Diaz- - Gonzalez] - - Fixed release_name with slash bug - Fixed release_name with slash bug. [Álvaro Torres Cogollo] @@ -38,15 +108,6 @@ Fix Other ~~~~~ -- Release version 0.38.0. [Jose Diaz-Gonzalez] -- Merge pull request #172 from samanthaq/always-use-oauth-when-provided. - [Jose Diaz-Gonzalez] - - fix: Always clone with OAuth token when provided -- Merge pull request #170 from Mindavi/bugfix/broken-url. [Jose Diaz- - Gonzalez] - - Fix broken and incorrect link to github repository - Change broken link to a fork to a working link to upstream. [Rick van Schijndel] @@ -58,10 +119,6 @@ Fix ~~~ - Use distutils.core on error. [Jose Diaz-Gonzalez] -Other -~~~~~ -- Release version 0.37.2. [Jose Diaz-Gonzalez] - 0.37.1 (2021-01-02) ------------------- @@ -79,46 +136,24 @@ Fix - Set long description type - Gitignore the temporary readme file -Other -~~~~~ -- Release version 0.37.1. [Jose Diaz-Gonzalez] - 0.37.0 (2021-01-02) ------------------- -- Release version 0.37.0. [Jose Diaz-Gonzalez] -- Merge pull request #158 from albertyw/python3. [Jose Diaz-Gonzalez] - - Remove support for python 2 - Add support for python 3.7 and 3.8 in package classifiers. [Albert Wang] - Remove support for python 2.7 in package classifiers. [Albert Wang] - Remove python 2 specific import logic. [Albert Wang] - Remove python 2 specific logic. [Albert Wang] -- Merge pull request #165 from garymoon/add-skip-archived. [Jose Diaz- - Gonzalez] - - Add option to skip archived repositories - Add ability to skip archived repositories. [Gary Moon] 0.36.0 (2020-08-29) ------------------- -- Release version 0.36.0. [Jose Diaz-Gonzalez] -- Merge pull request #157 from albertyw/lint. [Jose Diaz-Gonzalez] - Add flake8 instructions to readme. [Albert Wang] - Fix regex string. [Albert Wang] -- Update boolean check. [Albert Wang] - Fix whitespace issues. [Albert Wang] - Do not use bare excepts. [Albert Wang] -- Merge pull request #161 from albertyw/circleci-project-setup. [Jose - Diaz-Gonzalez] - - Add circleci config - Add .circleci/config.yml. [Albert Wang] -- Merge pull request #160 from wbolster/patch-1. [Jose Diaz-Gonzalez] - - Include --private flag in example - Include --private flag in example. [wouter bolsterlee] By default, private repositories are not included. This is surprising. @@ -128,38 +163,16 @@ Other 0.35.0 (2020-08-05) ------------------- -- Release version 0.35.0. [Jose Diaz-Gonzalez] -- Merge pull request #156 from samanthaq/restore-optional-throttling. - [Jose Diaz-Gonzalez] - - Make API request throttling optional - Make API request throttling optional. [Samantha Baldwin] 0.34.0 (2020-07-24) ------------------- -- Release version 0.34.0. [Jose Diaz-Gonzalez] -- Merge pull request #153 from 0x6d617474/gist_ssh. [Jose Diaz-Gonzalez] - - Add logic for transforming gist repository urls to ssh - Add logic for transforming gist repository urls to ssh. [Matt Fields] -0.33.1 (2020-05-28) -------------------- -- Release version 0.33.1. [Jose Diaz-Gonzalez] -- Merge pull request #151 from garymoon/readme-update-0.33. [Jose Diaz- - Gonzalez] -- Update the readme for new switches added in 0.33. [Gary Moon] - - 0.33.0 (2020-04-13) ------------------- -- Release version 0.33.0. [Jose Diaz-Gonzalez] -- Merge pull request #149 from eht16/simple_api_request_throttling. - [Jose Diaz-Gonzalez] - - Add basic API request throttling - Add basic API request throttling. [Enrico Tröger] A simple approach to throttle API requests and so keep within the rate @@ -171,50 +184,23 @@ Other 0.32.0 (2020-04-13) ------------------- -- Release version 0.32.0. [Jose Diaz-Gonzalez] -- Merge pull request #148 from eht16/logging_with_timestamp. [Jose Diaz- - Gonzalez] - - Add timestamp to log messages - Add timestamp to log messages. [Enrico Tröger] -- Merge pull request #147 from tomhoover/update-readme. [Jose Diaz- - Gonzalez] - - Update README.rst to match 'github-backup -h' -- Update README.rst to match 'github-backup -h' [Tom Hoover] 0.31.0 (2020-02-25) ------------------- -- Release version 0.31.0. [Jose Diaz-Gonzalez] -- Merge pull request #146 from timm3/upstream-123. [Jose Diaz-Gonzalez] - - Authenticate as Github App - #123 update: changed --as-app 'help' description. [ethan] - #123: Support Authenticating As Github Application. [ethan] -0.30.0 (2020-02-14) -------------------- -- Release version 0.30.0. [Jose Diaz-Gonzalez] - - 0.29.0 (2020-02-14) ------------------- -- Release version 0.29.0. [Jose Diaz-Gonzalez] -- Merge pull request #145 from timm3/50-v0.28.0. [Jose Diaz-Gonzalez] - - #50 - refactor for friendlier import - #50 update: keep main() in bin. [ethan] - #50 - refactor for friendlier import. [ethan] 0.28.0 (2020-02-03) ------------------- -- Release version 0.28.0. [Jose Diaz-Gonzalez] -- Merge pull request #143 from smiley/patch-1. [Jose Diaz-Gonzalez] - - Remove deprecated (and removed) "git lfs fetch" flags - Remove deprecated (and removed) git lfs flags. [smiley] "--tags" and "--force" were removed at some point from "git lfs fetch". This broke our backup script. @@ -222,11 +208,6 @@ Other 0.27.0 (2020-01-22) ------------------- -- Release version 0.27.0. [Jose Diaz-Gonzalez] -- Merge pull request #142 from einsteinx2/issue/141-import-error- - version. [Jose Diaz-Gonzalez] - - Fixed script fails if not installed from pip - Fixed script fails if not installed from pip. [Ben Baron] At the top of the script, the line from github_backup import __version__ gets the script's version number to use if the script is called with the -v or --version flags. The problem is that if the script hasn't been installed via pip (for example I cloned the repo directly to my backup server), the script will fail due to an import exception. @@ -234,26 +215,14 @@ Other Also presumably it will always use the version number from pip even if running a modified version from git or a fork or something, though this does not fix that as I have no idea how to check if it's running the pip installed version or not. But at least the script will now work fine if cloned from git or just copied to another machine. closes https://github.com/josegonzalez/python-github-backup/issues/141 -- Merge pull request #136 from einsteinx2/issue/88-macos-keychain- - broken-python3. [Jose Diaz-Gonzalez] - - Fixed macOS keychain access when using Python 3 - Fixed macOS keychain access when using Python 3. [Ben Baron] Python 3 is returning bytes rather than a string, so the string concatenation to create the auth variable was throwing an exception which the script was interpreting to mean it couldn't find the password. Adding a conversion to string first fixed the issue. -- Merge pull request #137 from einsteinx2/issue/134-only-use-auth-token- - when-needed. [Jose Diaz-Gonzalez] - - Public repos no longer include the auth token - Public repos no longer include the auth token. [Ben Baron] When backing up repositories using an auth token and https, the GitHub personal auth token is leaked in each backed up repository. It is included in the URL of each repository's git remote url. This is not needed as they are public and can be accessed without the token and can cause issues in the future if the token is ever changed, so I think it makes more sense not to have the token stored in each repo backup. I think the token should only be "leaked" like this out of necessity, e.g. it's a private repository and the --prefer-ssh option was not chosen so https with auth token was required to perform the clone. -- Merge pull request #130 from einsteinx2/issue/129-fix-crash-on- - release-asset-download-error. [Jose Diaz-Gonzalez] - - Crash when an release asset doesn't exist - Fixed comment typo. [Ben Baron] - Switched log_info to log_warning in download_file. [Ben Baron] - Crash when an release asset doesn't exist. [Ben Baron] @@ -261,10 +230,6 @@ Other Currently, the script crashes whenever a release asset is unable to download (for example a 404 response). This change instead logs the failure and allows the script to continue. No retry logic is enabled, but at least it prevents the crash and allows the backup to complete. Retry logic can be implemented later if wanted. closes https://github.com/josegonzalez/python-github-backup/issues/129 -- Merge pull request #132 from einsteinx2/issue/126-prevent-overwriting- - release-assets. [Jose Diaz-Gonzalez] - - Separate release assets and skip re-downloading - Moved asset downloading loop inside the if block. [Ben Baron] - Separate release assets and skip re-downloading. [Ben Baron] @@ -275,36 +240,21 @@ Other This change also now checks if the asset file already exists on disk and skips downloading it. This drastically speeds up addiotnal syncs as it no longer downloads every single release every single time. It will now only download new releases which I believe is the expected behavior. closes https://github.com/josegonzalez/python-github-backup/issues/126 -- Merge pull request #131 from einsteinx2/improve-gitignore. [Jose Diaz- - Gonzalez] - - Improved gitignore, macOS files and IDE configs - Added newline to end of file. [Ben Baron] - Improved gitignore, macOS files and IDE configs. [Ben Baron] Ignores the annoying hidden macOS files .DS_Store and ._* as well as the IDE configuration folders for contributors using the popular Visual Studio Code and Atom IDEs (more can be added later as needed). -- Update ISSUE_TEMPLATE.md. [Jose Diaz-Gonzalez] 0.26.0 (2019-09-23) ------------------- -- Release version 0.26.0. [Jose Diaz-Gonzalez] -- Merge pull request #128 from Snawoot/master. [Jose Diaz-Gonzalez] - - Workaround gist clone in `--prefer-ssh` mode - Workaround gist clone in `--prefer-ssh` mode. [Vladislav Yarmak] - Create PULL_REQUEST.md. [Jose Diaz-Gonzalez] - Create ISSUE_TEMPLATE.md. [Jose Diaz-Gonzalez] -- Update README.rst. [Jose Diaz-Gonzalez] -- Update README.rst. [Jose Diaz-Gonzalez] 0.25.0 (2019-07-03) ------------------- -- Release version 0.25.0. [Jose Diaz-Gonzalez] -- Merge pull request #120 from 8h2a/patch-1. [Jose Diaz-Gonzalez] - - Issue 119: Change retrieve_data to be a generator - Issue 119: Change retrieve_data to be a generator. [2a] See issue #119. @@ -312,43 +262,21 @@ Other 0.24.0 (2019-06-27) ------------------- -- Release version 0.24.0. [Jose Diaz-Gonzalez] -- Merge pull request #117 from QuicketSolutions/master. [Jose Diaz- - Gonzalez] - - Add option for Releases -- Merge pull request #5 from QuicketSolutions/QKT-45. [Ethan Timm] - QKT-45: include assets - update readme. [Ethan Timm] update readme with flag information for including assets alongside their respective releases -- Merge pull request #4 from whwright/wip-releases. [Ethan Timm] - - Download github assets - Make assets it's own flag. [Harrison Wright] - Fix super call for python2. [Harrison Wright] - Fix redirect to s3. [Harrison Wright] - WIP: download assets. [Harrison Wright] -- Merge pull request #3 from QuicketSolutions/QKT-42. [Ethan Timm] - QKT-42: releases - add readme info. [ethan] -- Merge pull request #2 from QuicketSolutions/QKT-42. [Ethan Timm] - - QKT-42 update: shorter command flag - QKT-42 update: shorter command flag. [ethan] -- Merge pull request #1 from QuicketSolutions/QKT-42. [Ethan Timm] - QKT-42: support saving release information. [ethan] -- Merge pull request #118 from whwright/115-fix-pull-details. [Jose - Diaz-Gonzalez] - - Fix pull details - Fix pull details. [Harrison Wright] 0.23.0 (2019-06-04) ------------------- -- Release version 0.23.0. [Jose Diaz-Gonzalez] -- Merge pull request #113 from kleag/master. [Jose Diaz-Gonzalez] - - Avoid to crash in case of HTTP 502 error - Avoid to crash in case of HTTP 502 error. [Gael de Chalendar] Survive also on socket.error connections like on HTTPError or URLError. @@ -365,32 +293,15 @@ Fix Refs #106 -Other -~~~~~ -- Release version 0.22.2. [Jose Diaz-Gonzalez] -- Merge pull request #107 from josegonzalez/patch-1. [Jose Diaz- - Gonzalez] - - fix: warn instead of error - 0.22.1 (2019-02-21) ------------------- -- Release version 0.22.1. [Jose Diaz-Gonzalez] -- Merge pull request #106 from jstetic/master. [Jose Diaz-Gonzalez] - - Log URL error - Log URL error https://github.com/josegonzalez/python-github- backup/issues/105. [JOHN STETIC] 0.22.0 (2019-02-01) ------------------- -- Release version 0.22.0. [Jose Diaz-Gonzalez] -- Merge pull request #103 from whwright/98-better-logging. [Jose Diaz- - Gonzalez] - - Fix accidental system exit with better logging strategy - Remove unnecessary sys.exit call. [W. Harrison Wright] - Add org check to avoid incorrect log output. [W. Harrison Wright] - Fix accidental system exit with better logging strategy. [W. Harrison @@ -399,10 +310,6 @@ Other 0.21.1 (2018-12-25) ------------------- -- Release version 0.21.1. [Jose Diaz-Gonzalez] -- Merge pull request #101 from ecki/patch-2. [Jose Diaz-Gonzalez] - - Mark options which are not included in --all - Mark options which are not included in --all. [Bernd] As discussed in Issue #100 @@ -410,22 +317,12 @@ Other 0.21.0 (2018-11-28) ------------------- -- Release version 0.21.0. [Jose Diaz-Gonzalez] -- Merge pull request #97 from whwright/94-fix-user-repos. [Jose Diaz- - Gonzalez] - - Correctly download repos when user arg != authenticated user - Correctly download repos when user arg != authenticated user. [W. Harrison Wright] 0.20.1 (2018-09-29) ------------------- -- Release version 0.20.1. [Jose Diaz-Gonzalez] -- Merge pull request #92 from whwright/87-fix-starred-bug. [Jose Diaz- - Gonzalez] - - Clone the specified user's starred repos/gists, not the authenticated user - Clone the specified user's gists, not the authenticated user. [W. Harrison Wright] - Clone the specified user's starred repos, not the authenticated user. @@ -434,7 +331,6 @@ Other 0.20.0 (2018-03-24) ------------------- -- Release version 0.20.0. [Jose Diaz-Gonzalez] - Chore: drop Python 2.6. [Jose Diaz-Gonzalez] - Feat: simplify release script. [Jose Diaz-Gonzalez] @@ -446,33 +342,15 @@ Fix ~~~ - Cleanup pep8 violations. [Jose Diaz-Gonzalez] -Other -~~~~~ -- Release version 0.19.2. [Jose Diaz-Gonzalez] - - -0.19.1 (2018-03-24) -------------------- -- Release version 0.19.1. [Jose Diaz-Gonzalez] - 0.19.0 (2018-03-24) ------------------- -- Release version 0.19.0. [Jose Diaz-Gonzalez] -- Merge pull request #77 from mayflower/pull-details. [Jose Diaz- - Gonzalez] - - Pull Details - Add additional output for the current request. [Robin Gloster] This is useful to have some progress indication for huge repositories. - Add option to backup additional PR details. [Robin Gloster] Some payload is only included when requesting a single pull request -- Merge pull request #84 from johbo/fix-python36-skip-existing. [Jose - Diaz-Gonzalez] - - Mark string as binary in comparison for skip_existing - Mark string as binary in comparison for skip_existing. [Johannes Bornhold] @@ -483,20 +361,11 @@ Other 0.18.0 (2018-02-22) ------------------- -- Release version 0.18.0. [Jose Diaz-Gonzalez] -- Merge pull request #82 from sgreene570/add-followers. [Jose Diaz- - Gonzalez] - - Add option to fetch followers/following JSON data - Add option to fetch followers/following JSON data. [Stephen Greene] 0.17.0 (2018-02-20) ------------------- -- Release version 0.17.0. [Jose Diaz-Gonzalez] -- Merge pull request #81 from whwright/gists. [Jose Diaz-Gonzalez] - - Add ability to back up gists - Short circuit gists backup process. [W. Harrison Wright] - Formatting. [W. Harrison Wright] - Add ability to backup gists. [W. Harrison Wright] @@ -504,94 +373,41 @@ Other 0.16.0 (2018-01-22) ------------------- -- Release version 0.16.0. [Jose Diaz-Gonzalez] -- Merge pull request #78 from whwright/clone-starred-repos. [Jose Diaz- - Gonzalez] - - Clone starred repos -- Update README.rst. [Jose Diaz-Gonzalez] -- Update documentation. [W. Harrison Wright] - Change option to --all-starred. [W. Harrison Wright] - JK don't update documentation. [W. Harrison Wright] -- Update documentation. [W. Harrison Wright] - Put starred clone repoistories under a new option. [W. Harrison Wright] - Add comment. [W. Harrison Wright] - Add ability to clone starred repos. [W. Harrison Wright] -0.15.0 (2017-12-11) -------------------- -- Release version 0.15.0. [Jose Diaz-Gonzalez] -- Merge pull request #75 from slibby/slibby-patch-windows. [Jose Diaz- - Gonzalez] - - update check_io() to allow scripts to run on Windows -- Update logging_subprocess function. [Sam Libby] - - 1. added newline for return - 2. added one-time warning (once per subprocess) -- Update check_io() to allow scripts to run on Windows. [Sam Libby] - - 0.14.1 (2017-10-11) ------------------- -- Release version 0.14.1. [Jose Diaz-Gonzalez] -- Merge pull request #70 from epfremmer/patch-1. [Jose Diaz-Gonzalez] - - Fix arg not defined error - Fix arg not defined error. [Edward Pfremmer] 0.14.0 (2017-10-11) ------------------- -- Release version 0.14.0. [Jose Diaz-Gonzalez] -- Merge pull request #68 from pieterclaerhout/master. [Jose Diaz- - Gonzalez] - - Added support for LFS clones -- Updated the readme. [pieterclaerhout] - Added a check to see if git-lfs is installed when doing an LFS clone. [pieterclaerhout] - Added support for LFS clones. [pieterclaerhout] -- Merge pull request #66 from albertyw/python3. [Jose Diaz-Gonzalez] - - Explicitly support python 3 - Add pypi info to readme. [Albert Wang] - Explicitly support python 3 in package description. [Albert Wang] -- Merge pull request #65 from mumblez/master. [Jose Diaz-Gonzalez] - - add couple examples to help new users - Add couple examples to help new users. [Yusuf Tran] 0.13.2 (2017-05-06) ------------------- -- Release version 0.13.2. [Jose Diaz-Gonzalez] -- Merge pull request #64 from karlicoss/fix-remotes. [Jose Diaz- - Gonzalez] - - Fix remotes while updating repository - Fix remotes while updating repository. [Dima Gerasimov] 0.13.1 (2017-04-11) ------------------- -- Release version 0.13.1. [Jose Diaz-Gonzalez] -- Merge pull request #61 from McNetic/fix_empty_updated_at. [Jose Diaz- - Gonzalez] - - Fix error when repository has no updated_at value - Fix error when repository has no updated_at value. [Nicolai Ehemann] 0.13.0 (2017-04-05) ------------------- -- Release version 0.13.0. [Jose Diaz-Gonzalez] -- Merge pull request #59 from martintoreilly/master. [Jose Diaz- - Gonzalez] - - Add support for storing PAT in OSX keychain - Add OS check for OSX specific keychain args. [Martin O'Reilly] Keychain arguments are only supported on Mac OSX. @@ -609,19 +425,11 @@ Other 0.12.1 (2017-03-27) ------------------- -- Release version 0.12.1. [Jose Diaz-Gonzalez] -- Merge pull request #57 from acdha/reuse-existing-remotes. [Jose Diaz- - Gonzalez] - - Avoid remote branch name churn - Avoid remote branch name churn. [Chris Adams] This avoids the backup output having lots of "[new branch]" messages because removing the old remote name removed all of the existing branch references. -- Merge pull request #55 from amaczuga/master. [Jose Diaz-Gonzalez] - - Fix detection of bare git directories - Fix detection of bare git directories. [Andrzej Maczuga] @@ -636,49 +444,22 @@ Fix Other ~~~~~ -- Release version 0.12.0. [Jose Diaz-Gonzalez] - Pep8: E501 line too long (83 > 79 characters) [Jose Diaz-Gonzalez] - Pep8: E128 continuation line under-indented for visual indent. [Jose Diaz-Gonzalez] -- Merge pull request #54 from amaczuga/master. [Jose Diaz-Gonzalez] - - Support archivization using bare git clones - Support archivization using bare git clones. [Andrzej Maczuga] -- Merge pull request #53 from trel/master. [Jose Diaz-Gonzalez] - - fix typo, 3x - Fix typo, 3x. [Terrell Russell] 0.11.0 (2016-10-26) ------------------- -- Release version 0.11.0. [Jose Diaz-Gonzalez] -- Merge pull request #52 from bjodah/fix-gh-51. [Jose Diaz-Gonzalez] - - Support --token file:///home/user/token.txt (fixes gh-51) - Support --token file:///home/user/token.txt (fixes gh-51) [Björn Dahlgren] -- Merge pull request #48 from albertyw/python3. [Jose Diaz-Gonzalez] - - Support Python 3 - Fix some linting. [Albert Wang] - Fix byte/string conversion for python 3. [Albert Wang] - Support python 3. [Albert Wang] -- Merge pull request #46 from remram44/encode-password. [Jose Diaz- - Gonzalez] - - Encode special characters in password - Encode special characters in password. [Remi Rampin] -- Merge pull request #45 from remram44/cli-programname. [Jose Diaz- - Gonzalez] - - Fix program name -- Update README.rst. [Remi Rampin] - Don't pretend program name is "Github Backup" [Remi Rampin] -- Merge pull request #44 from remram44/readme-git-https. [Jose Diaz- - Gonzalez] - - Don't install over insecure connection - Don't install over insecure connection. [Remi Rampin] The git:// protocol is unauthenticated and unencrypted, and no longer advertised by GitHub. Using HTTPS shouldn't impact performance. @@ -686,10 +467,6 @@ Other 0.10.3 (2016-08-21) ------------------- -- Release version 0.10.3. [Jose Diaz-Gonzalez] -- Merge pull request #30 from jonasrmichel/master. [Jose Diaz-Gonzalez] - - Fixes #29 - Fixes #29. [Jonas Michel] Reporting an error when the user's rate limit is exceeded causes @@ -707,23 +484,13 @@ Other 0.10.2 (2016-08-21) ------------------- -- Release version 0.10.2. [Jose Diaz-Gonzalez] - Add a note regarding git version requirement. [Jose Diaz-Gonzalez] Closes #37 -0.10.1 (2016-08-21) -------------------- -- Release version 0.10.1. [Jose Diaz-Gonzalez] - - 0.10.0 (2016-08-18) ------------------- -- Release version 0.10.0. [Jose Diaz-Gonzalez] -- Merge pull request #42 from robertwb/master. [Jose Diaz-Gonzalez] - - Implement incremental updates - Implement incremental updates. [Robert Bradshaw] Guarded with an --incremental flag. @@ -736,21 +503,11 @@ Other 0.9.0 (2016-03-29) ------------------ -- Release version 0.9.0. [Jose Diaz-Gonzalez] -- Merge pull request #36 from zlabjp/fix-cloning-private-repos. [Jose - Diaz-Gonzalez] - - Fix cloning private repos with basic auth or token - Fix cloning private repos with basic auth or token. [Kazuki Suda] 0.8.0 (2016-02-14) ------------------ -- Release version 0.8.0. [Jose Diaz-Gonzalez] -- Merge pull request #35 from eht16/issue23_store_pullrequests_once. - [Jose Diaz-Gonzalez] - - Don't store issues which are actually pull requests - Don't store issues which are actually pull requests. [Enrico Tröger] This prevents storing pull requests twice since the Github API returns @@ -761,65 +518,31 @@ Other 0.7.0 (2016-02-02) ------------------ -- Release version 0.7.0. [Jose Diaz-Gonzalez] -- Merge pull request #32 from albertyw/soft-fail-hooks. [Jose Diaz- - Gonzalez] - - Softly fail if not able to read hooks - Softly fail if not able to read hooks. [Albert Wang] -- Merge pull request #33 from albertyw/update-readme. [Jose Diaz- - Gonzalez] - - Add note about 2-factor auth in readme - Add note about 2-factor auth. [Albert Wang] -- Merge pull request #31 from albertyw/fix-private-repos. [Jose Diaz- - Gonzalez] - - Fix reading user's private repositories - Make user repository search go through endpoint capable of reading private repositories. [Albert Wang] -- Merge pull request #28 from alexmojaki/getpass. [Jose Diaz-Gonzalez] - - Prompt for password if only username given -- Update README with new CLI usage. [Alex Hall] - Prompt for password if only username given. [Alex Hall] 0.6.0 (2015-11-10) ------------------ -- Release version 0.6.0. [Jose Diaz-Gonzalez] - Force proper remote url. [Jose Diaz-Gonzalez] -- Merge pull request #24 from eht16/add_backup_hooks. [Jose Diaz- - Gonzalez] - - Add backup hooks - Improve error handling in case of HTTP errors. [Enrico Tröger] In case of a HTTP status code 404, the returned 'r' was never assigned. In case of URL errors which are not timeouts, we probably should bail out. - Add --hooks to also include web hooks into the backup. [Enrico Tröger] -- Merge pull request #22 from eht16/issue_17_create_output_directory. - [Jose Diaz-Gonzalez] - - Create the user specified output directory if it does not exist - Create the user specified output directory if it does not exist. [Enrico Tröger] Fixes #17. -- Merge pull request #21 from eht16/fix_get_response_missing_auth. [Jose - Diaz-Gonzalez] - - Add missing auth argument to _get_response() - Add missing auth argument to _get_response() [Enrico Tröger] When running unauthenticated and Github starts rate-limiting the client, github-backup crashes because the used auth variable in _get_response() was not available. This change should fix it. -- Merge pull request #20 from - eht16/improve_error_msg_on_non_existing_repo. [Jose Diaz-Gonzalez] - - Add repository URL to error message for non-existing repositories - Add repository URL to error message for non-existing repositories. [Enrico Tröger] @@ -830,69 +553,28 @@ Other 0.5.0 (2015-10-10) ------------------ -- Release version 0.5.0. [Jose Diaz-Gonzalez] - Add release script. [Jose Diaz-Gonzalez] - Refactor to both simplify codepath as well as follow PEP8 standards. [Jose Diaz-Gonzalez] -- Merge pull request #19 from Embed-Engineering/retry-timeout. [Jose - Diaz-Gonzalez] - - Retry 3 times when the connection times out - Retry 3 times when the connection times out. [Mathijs Jonker] -- Merge pull request #15 from kromkrom/master. [Jose Diaz-Gonzalez] - - Preserve Unicode characters in the output file -- Update github-backup. [Kirill Grushetsky] -- Update github-backup. [Kirill Grushetsky] - Made unicode output defalut. [Kirill Grushetsky] - Import alphabetised. [Kirill Grushetsky] - Preserve Unicode characters in the output file. [Kirill Grushetsky] Added option to preserve Unicode characters in the output file -- Merge pull request #14 from aensley/master. [Jose Diaz-Gonzalez] - - Added backup of labels and milestones. - Josegonzales/python-github-backup#12 Added backup of labels and milestones. [aensley] -- Merge pull request #11 from Embed-Engineering/master. [Jose Diaz- - Gonzalez] - - Added test for uninitialized repo's (or wiki's) - Fixed indent. [Mathijs Jonker] -- Update github-backup. [mjonker-embed] - Skip unitialized repo's. [mjonker-embed] These gave me errors which caused mails from crontab. -- Merge pull request #10 from Embed-Engineering/master. [Jose Diaz- - Gonzalez] - - Added prefer-ssh - Added prefer-ssh. [mjonker-embed] Was needed for my back-up setup, code includes this but readme wasn't updated -- Merge pull request #9 from acdha/ratelimit-retries. [Jose Diaz- - Gonzalez] - - Retry API requests which failed due to rate-limiting - Retry API requests which failed due to rate-limiting. [Chris Adams] This allows operation to continue, albeit at a slower pace, if you have enough data to trigger the API rate limits -- Release 0.4.0. [Jose Diaz-Gonzalez] -- Merge pull request #7 from acdha/repo-backup-overhaul. [Jose Diaz- - Gonzalez] - - Repo backup overhaul -- Update repository back up handling for wikis. [Chris Adams] - - * Now wikis will follow the same logic as the main repo - checkout for --prefer-ssh. - * The regular repository and wiki paths both use the same - function to handle either cloning or updating a local copy - of the remote repo - * All git updates will now use “git fetch --all --tags” - to ensure that tags and branches other than master will - also be backed up - Logging_subprocess: always log when a command fails. [Chris Adams] Previously git clones could fail without any indication @@ -907,19 +589,10 @@ Other The previous commit used the wrong URL for a private repo. This was masked by the lack of error loging in logging_subprocess (which will be in a separate branch) -- Merge pull request #6 from acdha/allow-clone-over-ssh. [Jose Diaz- - Gonzalez] - - Add an option to prefer checkouts over SSH - Add an option to prefer checkouts over SSH. [Chris Adams] This is really useful with private repos to avoid being nagged for credentials for every repository -- Release 0.3.0. [Jose Diaz-Gonzalez] -- Merge pull request #4 from klaude/pull_request_support. [Jose Diaz- - Gonzalez] - - Add pull request support - Add pull request support. [Kevin Laude] Back up reporitory pull requests by passing the --include-pulls @@ -931,10 +604,6 @@ Other Pull requests are automatically backed up when the --all argument is uesd. -- Merge pull request #5 from klaude/github-enterprise-support. [Jose - Diaz-Gonzalez] - - Add GitHub Enterprise Support - Add GitHub Enterprise support. [Kevin Laude] Pass the -H or --github-host argument with a GitHub Enterprise hostname @@ -944,13 +613,9 @@ Other 0.2.0 (2014-09-22) ------------------ -- Release 0.2.0. [Jose Diaz-Gonzalez] - Add support for retrieving repositories. Closes #1. [Jose Diaz- Gonzalez] - Fix PEP8 violations. [Jose Diaz-Gonzalez] -- Merge pull request #2 from johnyf/master. [Jose Diaz-Gonzalez] - - add authorization to header only if specified by user - Add authorization to header only if specified by user. [Ioannis Filippidis] - Fill out readme more. [Jose Diaz-Gonzalez] diff --git a/README.rst b/README.rst index b7cd93b..2e4dfa4 100644 --- a/README.rst +++ b/README.rst @@ -4,7 +4,7 @@ github-backup |PyPI| |Python Versions| - This project is considered feature complete for the primary maintainer. If you would like a bugfix or enhancement and cannot sponsor the work, pull requests are welcome. Feel free to contact the maintainer for consulting estimates if desired. + This project is considered feature complete for the primary maintainer. If you would like a bugfix or enhancement and can not sponsor the work, pull requests are welcome. Feel free to contact the maintainer for consulting estimates if desired. backup a github user or organization @@ -29,20 +29,21 @@ Usage CLI Usage is as follows:: - github-backup [-h] [-u USERNAME] [-p PASSWORD] [-t TOKEN] [--as-app] - [-o OUTPUT_DIRECTORY] [-i] [--starred] [--all-starred] - [--watched] [--followers] [--following] [--all] - [--issues] [--issue-comments] [--issue-events] [--pulls] + github-backup [-h] [-u USERNAME] [-p PASSWORD] [-t TOKEN_CLASSIC] + [-f TOKEN_FINE] [--as-app] [-o OUTPUT_DIRECTORY] + [-l LOG_LEVEL] [-i] [--starred] [--all-starred] + [--watched] [--followers] [--following] [--all] [--issues] + [--issue-comments] [--issue-events] [--pulls] [--pull-comments] [--pull-commits] [--pull-details] [--labels] [--hooks] [--milestones] [--repositories] [--bare] [--lfs] [--wikis] [--gists] [--starred-gists] - [--skip-existing] [-L [LANGUAGES [LANGUAGES ...]]] + [--skip-archived] [--skip-existing] [-L [LANGUAGES ...]] [-N NAME_REGEX] [-H GITHUB_HOST] [-O] [-R REPOSITORY] [-P] [-F] [--prefer-ssh] [-v] [--keychain-name OSX_KEYCHAIN_ITEM_NAME] [--keychain-account OSX_KEYCHAIN_ITEM_ACCOUNT] - [--releases] [--assets] [--throttle-limit THROTTLE_LIMIT] - [--throttle-pause THROTTLE_PAUSE] + [--releases] [--assets] [--exclude [REPOSITORY [REPOSITORY ...]] + [--throttle-limit THROTTLE_LIMIT] [--throttle-pause THROTTLE_PAUSE] USER Backup a github account @@ -57,12 +58,18 @@ CLI Usage is as follows:: -p PASSWORD, --password PASSWORD password for basic auth. If a username is given but not a password, the password will be prompted for. - -t TOKEN, --token TOKEN + -f TOKEN_FINE, --token-fine TOKEN_FINE + fine-grained personal access token or path to token + (file://...) + -t TOKEN_CLASSIC, --token TOKEN_CLASSIC personal access, OAuth, or JSON Web token, or path to token (file://...) --as-app authenticate as github app instead of as a user. -o OUTPUT_DIRECTORY, --output-directory OUTPUT_DIRECTORY directory at which to backup the repositories + -l LOG_LEVEL, --log-level LOG_LEVEL + log level to use (default: info, possible levels: + debug, info, warning, error, critical) -i, --incremental incremental backup --starred include JSON output of starred repositories in backup --all-starred include starred repositories in backup [*] @@ -112,6 +119,8 @@ CLI Usage is as follows:: binaries --assets include assets alongside release information; only applies if including releases + --exclude [REPOSITORY [REPOSITORY ...]] + names of repositories to exclude from backup. --throttle-limit THROTTLE_LIMIT start throttling of GitHub API requests after this amount of API requests remain @@ -158,13 +167,13 @@ Backup all repositories, including private ones:: export ACCESS_TOKEN=SOME-GITHUB-TOKEN github-backup WhiteHouse --token $ACCESS_TOKEN --organization --output-directory /tmp/white-house --repositories --private -Backup a single organization repository with everything else (wiki, pull requests, comments, issues etc):: +Use a fine-grained access token to backup a single organization repository with everything else (wiki, pull requests, comments, issues etc):: export ACCESS_TOKEN=SOME-GITHUB-TOKEN ORGANIZATION=docker REPO=cli # e.g. git@github.com:docker/cli.git - github-backup $ORGANIZATION -P -t $ACCESS_TOKEN -o . --all -O -R $REPO + github-backup $ORGANIZATION -P -f $ACCESS_TOKEN -o . --all -O -R $REPO Testing ======= diff --git a/bin/github-backup b/bin/github-backup index faea49f..b983cdf 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -35,6 +35,11 @@ def main(): if args.lfs_clone: check_git_lfs_install() + if args.log_level: + log_level = logging.getLevelName(args.log_level.upper()) + if isinstance(log_level, int): + logger.root.setLevel(log_level) + if not args.as_app: logger.info('Backing up user {0} to {1}'.format(args.user, output_directory)) authenticated_user = get_authenticated_user(args) @@ -51,5 +56,5 @@ if __name__ == '__main__': try: main() except Exception as e: - logger.warning(str(e)) + logger.error(str(e)) sys.exit(1) diff --git a/github_backup/__init__.py b/github_backup/__init__.py index b228564..fb8a056 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = '0.40.2' +__version__ = "0.43.1" diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index fd4003d..1d79b9b 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -30,40 +30,42 @@ from http.client import IncompleteRead try: from . import __version__ + VERSION = __version__ except ImportError: - VERSION = 'unknown' - -FNULL = open(os.devnull, 'w') + VERSION = "unknown" +FNULL = open(os.devnull, "w") +FILE_URI_PREFIX = "file://" logger = logging.getLogger(__name__) -def logging_subprocess(popenargs, - logger, - stdout_log_level=logging.DEBUG, - stderr_log_level=logging.ERROR, - **kwargs): +def logging_subprocess( + popenargs, + logger, + stdout_log_level=logging.DEBUG, + stderr_log_level=logging.ERROR, + **kwargs +): """ Variant of subprocess.call that accepts a logger instead of stdout/stderr, and logs stdout messages via logger.debug and stderr messages via logger.error. """ - child = subprocess.Popen(popenargs, stdout=subprocess.PIPE, - stderr=subprocess.PIPE, **kwargs) - if sys.platform == 'win32': - logger.info("Windows operating system detected - no subprocess logging will be returned") + child = subprocess.Popen( + popenargs, stdout=subprocess.PIPE, stderr=subprocess.PIPE, **kwargs + ) + if sys.platform == "win32": + logger.info( + "Windows operating system detected - no subprocess logging will be returned" + ) - log_level = {child.stdout: stdout_log_level, - child.stderr: stderr_log_level} + log_level = {child.stdout: stdout_log_level, child.stderr: stderr_log_level} def check_io(): - if sys.platform == 'win32': + if sys.platform == "win32": return - ready_to_read = select.select([child.stdout, child.stderr], - [], - [], - 1000)[0] + ready_to_read = select.select([child.stdout, child.stderr], [], [], 1000)[0] for io in ready_to_read: line = io.readline() if not logger: @@ -80,8 +82,8 @@ def logging_subprocess(popenargs, rc = child.wait() if rc != 0: - print('{} returned {}:'.format(popenargs[0], rc), file=sys.stderr) - print('\t', ' '.join(popenargs), file=sys.stderr) + print("{} returned {}:".format(popenargs[0], rc), file=sys.stderr) + print("\t", " ".join(popenargs), file=sys.stderr) return rc @@ -97,213 +99,314 @@ def mkdir_p(*args): raise -def mask_password(url, secret='*****'): +def mask_password(url, secret="*****"): parsed = urlparse(url) if not parsed.password: return url - elif parsed.password == 'x-oauth-basic': + elif parsed.password == "x-oauth-basic": return url.replace(parsed.username, secret) return url.replace(parsed.password, secret) def parse_args(args=None): - parser = argparse.ArgumentParser(description='Backup a github account') - parser.add_argument('user', - metavar='USER', - type=str, - help='github username') - parser.add_argument('-q', - '--quiet', - action='store_true', - dest='quiet', - help='supress non-error log messages') - parser.add_argument('-u', - '--username', - dest='username', - help='username for basic auth') - parser.add_argument('-p', - '--password', - dest='password', - help='password for basic auth. ' - 'If a username is given but not a password, the ' - 'password will be prompted for.') - parser.add_argument('-t', - '--token', - dest='token', - help='personal access, OAuth, or JSON Web token, or path to token (file://...)') # noqa - parser.add_argument('--as-app', - action='store_true', - dest='as_app', - help='authenticate as github app instead of as a user.') - parser.add_argument('-o', - '--output-directory', - default='.', - dest='output_directory', - help='directory at which to backup the repositories') - parser.add_argument('-i', - '--incremental', - action='store_true', - dest='incremental', - help='incremental backup') - parser.add_argument('--starred', - action='store_true', - dest='include_starred', - help='include JSON output of starred repositories in backup') - parser.add_argument('--all-starred', - action='store_true', - dest='all_starred', - help='include starred repositories in backup [*]') - parser.add_argument('--watched', - action='store_true', - dest='include_watched', - help='include JSON output of watched repositories in backup') - parser.add_argument('--followers', - action='store_true', - dest='include_followers', - help='include JSON output of followers in backup') - parser.add_argument('--following', - action='store_true', - dest='include_following', - help='include JSON output of following users in backup') - parser.add_argument('--all', - action='store_true', - dest='include_everything', - help='include everything in backup (not including [*])') - parser.add_argument('--issues', - action='store_true', - dest='include_issues', - help='include issues in backup') - parser.add_argument('--issue-comments', - action='store_true', - dest='include_issue_comments', - help='include issue comments in backup') - parser.add_argument('--issue-events', - action='store_true', - dest='include_issue_events', - help='include issue events in backup') - parser.add_argument('--pulls', - action='store_true', - dest='include_pulls', - help='include pull requests in backup') - parser.add_argument('--pull-comments', - action='store_true', - dest='include_pull_comments', - help='include pull request review comments in backup') - parser.add_argument('--pull-commits', - action='store_true', - dest='include_pull_commits', - help='include pull request commits in backup') - parser.add_argument('--pull-details', - action='store_true', - dest='include_pull_details', - help='include more pull request details in backup [*]') - parser.add_argument('--labels', - action='store_true', - dest='include_labels', - help='include labels in backup') - parser.add_argument('--hooks', - action='store_true', - dest='include_hooks', - help='include hooks in backup (works only when authenticated)') # noqa - parser.add_argument('--milestones', - action='store_true', - dest='include_milestones', - help='include milestones in backup') - parser.add_argument('--repositories', - action='store_true', - dest='include_repository', - help='include repository clone in backup') - parser.add_argument('--bare', - action='store_true', - dest='bare_clone', - help='clone bare repositories') - parser.add_argument('--lfs', - action='store_true', - dest='lfs_clone', - help='clone LFS repositories (requires Git LFS to be installed, https://git-lfs.github.com) [*]') - parser.add_argument('--wikis', - action='store_true', - dest='include_wiki', - help='include wiki clone in backup') - parser.add_argument('--gists', - action='store_true', - dest='include_gists', - help='include gists in backup [*]') - parser.add_argument('--starred-gists', - action='store_true', - dest='include_starred_gists', - help='include starred gists in backup [*]') - parser.add_argument('--skip-archived', - action='store_true', - dest='skip_archived', - help='skip project if it is archived') - parser.add_argument('--skip-existing', - action='store_true', - dest='skip_existing', - help='skip project if a backup directory exists') - parser.add_argument('-L', - '--languages', - dest='languages', - help='only allow these languages', - nargs='*') - parser.add_argument('-N', - '--name-regex', - dest='name_regex', - help='python regex to match names against') - parser.add_argument('-H', - '--github-host', - dest='github_host', - help='GitHub Enterprise hostname') - parser.add_argument('-O', - '--organization', - action='store_true', - dest='organization', - help='whether or not this is an organization user') - parser.add_argument('-R', - '--repository', - dest='repository', - help='name of repository to limit backup to') - parser.add_argument('-P', '--private', - action='store_true', - dest='private', - help='include private repositories [*]') - parser.add_argument('-F', '--fork', - action='store_true', - dest='fork', - help='include forked repositories [*]') - parser.add_argument('--prefer-ssh', - action='store_true', - help='Clone repositories using SSH instead of HTTPS') - parser.add_argument('-v', '--version', - action='version', - version='%(prog)s ' + VERSION) - parser.add_argument('--keychain-name', - dest='osx_keychain_item_name', - help='OSX ONLY: name field of password item in OSX keychain that holds the personal access or OAuth token') - parser.add_argument('--keychain-account', - dest='osx_keychain_item_account', - help='OSX ONLY: account field of password item in OSX keychain that holds the personal access or OAuth token') - parser.add_argument('--releases', - action='store_true', - dest='include_releases', - help='include release information, not including assets or binaries' - ) - parser.add_argument('--assets', - action='store_true', - dest='include_assets', - help='include assets alongside release information; only applies if including releases') - parser.add_argument('--throttle-limit', - dest='throttle_limit', - type=int, - default=0, - help='start throttling of GitHub API requests after this amount of API requests remain') - parser.add_argument('--throttle-pause', - dest='throttle_pause', - type=float, - default=30.0, - help='wait this amount of seconds when API request throttling is active (default: 30.0, requires --throttle-limit to be set)') + parser = argparse.ArgumentParser(description="Backup a github account") + parser.add_argument("user", metavar="USER", type=str, help="github username") + parser.add_argument( + "-u", "--username", dest="username", help="username for basic auth" + ) + parser.add_argument( + "-p", + "--password", + dest="password", + help="password for basic auth. " + "If a username is given but not a password, the " + "password will be prompted for.", + ) + parser.add_argument( + "-t", + "--token", + dest="token_classic", + help="personal access, OAuth, or JSON Web token, or path to token (file://...)", + ) # noqa + parser.add_argument( + "-f", + "--token-fine", + dest="token_fine", + help="fine-grained personal access token (github_pat_....), or path to token (file://...)", + ) # noqa + parser.add_argument( + "-q", + "--quiet", + action="store_true", + dest="quiet", + help="supress log messages less severe than warning, e.g. info", + ) + parser.add_argument( + "--as-app", + action="store_true", + dest="as_app", + help="authenticate as github app instead of as a user.", + ) + parser.add_argument( + "-o", + "--output-directory", + default=".", + dest="output_directory", + help="directory at which to backup the repositories", + ) + parser.add_argument( + "-l", + "--log-level", + default="info", + dest="log_level", + help="log level to use (default: info, possible levels: debug, info, warning, error, critical)", + ) + parser.add_argument( + "-i", + "--incremental", + action="store_true", + dest="incremental", + help="incremental backup", + ) + parser.add_argument( + "--starred", + action="store_true", + dest="include_starred", + help="include JSON output of starred repositories in backup", + ) + parser.add_argument( + "--all-starred", + action="store_true", + dest="all_starred", + help="include starred repositories in backup [*]", + ) + parser.add_argument( + "--watched", + action="store_true", + dest="include_watched", + help="include JSON output of watched repositories in backup", + ) + parser.add_argument( + "--followers", + action="store_true", + dest="include_followers", + help="include JSON output of followers in backup", + ) + parser.add_argument( + "--following", + action="store_true", + dest="include_following", + help="include JSON output of following users in backup", + ) + parser.add_argument( + "--all", + action="store_true", + dest="include_everything", + help="include everything in backup (not including [*])", + ) + parser.add_argument( + "--issues", + action="store_true", + dest="include_issues", + help="include issues in backup", + ) + parser.add_argument( + "--issue-comments", + action="store_true", + dest="include_issue_comments", + help="include issue comments in backup", + ) + parser.add_argument( + "--issue-events", + action="store_true", + dest="include_issue_events", + help="include issue events in backup", + ) + parser.add_argument( + "--pulls", + action="store_true", + dest="include_pulls", + help="include pull requests in backup", + ) + parser.add_argument( + "--pull-comments", + action="store_true", + dest="include_pull_comments", + help="include pull request review comments in backup", + ) + parser.add_argument( + "--pull-commits", + action="store_true", + dest="include_pull_commits", + help="include pull request commits in backup", + ) + parser.add_argument( + "--pull-details", + action="store_true", + dest="include_pull_details", + help="include more pull request details in backup [*]", + ) + parser.add_argument( + "--labels", + action="store_true", + dest="include_labels", + help="include labels in backup", + ) + parser.add_argument( + "--hooks", + action="store_true", + dest="include_hooks", + help="include hooks in backup (works only when authenticated)", + ) # noqa + parser.add_argument( + "--milestones", + action="store_true", + dest="include_milestones", + help="include milestones in backup", + ) + parser.add_argument( + "--repositories", + action="store_true", + dest="include_repository", + help="include repository clone in backup", + ) + parser.add_argument( + "--bare", action="store_true", dest="bare_clone", help="clone bare repositories" + ) + parser.add_argument( + "--no-prune", + action="store_true", + dest="no_prune", + help="disable prune option for git fetch", + ) + parser.add_argument( + "--lfs", + action="store_true", + dest="lfs_clone", + help="clone LFS repositories (requires Git LFS to be installed, https://git-lfs.github.com) [*]", + ) + parser.add_argument( + "--wikis", + action="store_true", + dest="include_wiki", + help="include wiki clone in backup", + ) + parser.add_argument( + "--gists", + action="store_true", + dest="include_gists", + help="include gists in backup [*]", + ) + parser.add_argument( + "--starred-gists", + action="store_true", + dest="include_starred_gists", + help="include starred gists in backup [*]", + ) + parser.add_argument( + "--skip-archived", + action="store_true", + dest="skip_archived", + help="skip project if it is archived", + ) + parser.add_argument( + "--skip-existing", + action="store_true", + dest="skip_existing", + help="skip project if a backup directory exists", + ) + parser.add_argument( + "-L", + "--languages", + dest="languages", + help="only allow these languages", + nargs="*", + ) + parser.add_argument( + "-N", + "--name-regex", + dest="name_regex", + help="python regex to match names against", + ) + parser.add_argument( + "-H", "--github-host", dest="github_host", help="GitHub Enterprise hostname" + ) + parser.add_argument( + "-O", + "--organization", + action="store_true", + dest="organization", + help="whether or not this is an organization user", + ) + parser.add_argument( + "-R", + "--repository", + dest="repository", + help="name of repository to limit backup to", + ) + parser.add_argument( + "-P", + "--private", + action="store_true", + dest="private", + help="include private repositories [*]", + ) + parser.add_argument( + "-F", + "--fork", + action="store_true", + dest="fork", + help="include forked repositories [*]", + ) + parser.add_argument( + "--prefer-ssh", + action="store_true", + help="Clone repositories using SSH instead of HTTPS", + ) + parser.add_argument( + "-v", "--version", action="version", version="%(prog)s " + VERSION + ) + parser.add_argument( + "--keychain-name", + dest="osx_keychain_item_name", + help="OSX ONLY: name field of password item in OSX keychain that holds the personal access or OAuth token", + ) + parser.add_argument( + "--keychain-account", + dest="osx_keychain_item_account", + help="OSX ONLY: account field of password item in OSX keychain that holds the personal access or OAuth token", + ) + parser.add_argument( + "--releases", + action="store_true", + dest="include_releases", + help="include release information, not including assets or binaries", + ) + parser.add_argument( + "--assets", + action="store_true", + dest="include_assets", + help="include assets alongside release information; only applies if including releases", + ) + parser.add_argument( + "--throttle-limit", + dest="throttle_limit", + type=int, + default=0, + help="start throttling of GitHub API requests after this amount of API requests remain", + ) + parser.add_argument( + "--throttle-pause", + dest="throttle_pause", + type=float, + default=30.0, + help="wait this amount of seconds when API request throttling is active (default: 30.0, requires --throttle-limit to be set)", + ) + parser.add_argument( + "--exclude", dest="exclude", help="names of repositories to exclude", nargs="*" + ) return parser.parse_args(args) @@ -312,35 +415,57 @@ def get_auth(args, encode=True, for_git_cli=False): if args.osx_keychain_item_name: if not args.osx_keychain_item_account: - raise Exception('You must specify both name and account fields for osx keychain password items') + raise Exception( + "You must specify both name and account fields for osx keychain password items" + ) else: - if platform.system() != 'Darwin': + if platform.system() != "Darwin": raise Exception("Keychain arguments are only supported on Mac OSX") try: - with open(os.devnull, 'w') as devnull: - token = (subprocess.check_output([ - 'security', 'find-generic-password', - '-s', args.osx_keychain_item_name, - '-a', args.osx_keychain_item_account, - '-w'], stderr=devnull).strip()) - token = token.decode('utf-8') - auth = token + ':' + 'x-oauth-basic' + with open(os.devnull, "w") as devnull: + token = subprocess.check_output( + [ + "security", + "find-generic-password", + "-s", + args.osx_keychain_item_name, + "-a", + args.osx_keychain_item_account, + "-w", + ], + stderr=devnull, + ).strip() + token = token.decode("utf-8") + auth = token + ":" + "x-oauth-basic" except subprocess.SubprocessError: - raise Exception('No password item matching the provided name and account could be found in the osx keychain.') + raise Exception( + "No password item matching the provided name and account could be found in the osx keychain." + ) elif args.osx_keychain_item_account: - raise Exception('You must specify both name and account fields for osx keychain password items') - elif args.token: - _path_specifier = 'file://' - if args.token.startswith(_path_specifier): - args.token = open(args.token[len(_path_specifier):], - 'rt').readline().strip() + raise Exception( + "You must specify both name and account fields for osx keychain password items" + ) + elif args.token_fine: + if args.token_fine.startswith(FILE_URI_PREFIX): + args.token_fine = read_file_contents(args.token_fine) + + if args.token_fine.startswith("github_pat_"): + auth = args.token_fine + else: + raise Exception( + "Fine-grained token supplied does not look like a GitHub PAT" + ) + elif args.token_classic: + if args.token_classic.startswith(FILE_URI_PREFIX): + args.token_classic = read_file_contents(args.token_classic) + if not args.as_app: - auth = args.token + ':' + 'x-oauth-basic' + auth = args.token_classic + ":" + "x-oauth-basic" else: if not for_git_cli: - auth = args.token + auth = args.token_classic else: - auth = 'x-access-token:' + args.token + auth = "x-access-token:" + args.token_classic elif args.username: if not args.password: args.password = getpass.getpass() @@ -348,24 +473,24 @@ def get_auth(args, encode=True, for_git_cli=False): password = args.password else: password = urlquote(args.password) - auth = args.username + ':' + password + auth = args.username + ":" + password elif args.password: - raise Exception('You must specify a username for basic auth') + raise Exception("You must specify a username for basic auth") if not auth: return None - if not encode: + if not encode or args.token_fine is not None: return auth - return base64.b64encode(auth.encode('ascii')) + return base64.b64encode(auth.encode("ascii")) def get_github_api_host(args): if args.github_host: - host = args.github_host + '/api/v3' + host = args.github_host + "/api/v3" else: - host = 'api.github.com' + host = "api.github.com" return host @@ -374,33 +499,44 @@ def get_github_host(args): if args.github_host: host = args.github_host else: - host = 'github.com' + host = "github.com" return host +def read_file_contents(file_uri): + return open(file_uri[len(FILE_URI_PREFIX) :], "rt").readline().strip() + + def get_github_repo_url(args, repository): - if repository.get('is_gist'): + if repository.get("is_gist"): if args.prefer_ssh: # The git_pull_url value is always https for gists, so we need to transform it to ssh form - repo_url = re.sub(r'^https?:\/\/(.+)\/(.+)\.git$', r'git@\1:\2.git', repository['git_pull_url']) - repo_url = re.sub(r'^git@gist\.', 'git@', repo_url) # strip gist subdomain for better hostkey compatibility + repo_url = re.sub( + r"^https?:\/\/(.+)\/(.+)\.git$", + r"git@\1:\2.git", + repository["git_pull_url"], + ) + repo_url = re.sub( + r"^git@gist\.", "git@", repo_url + ) # strip gist subdomain for better hostkey compatibility else: - repo_url = repository['git_pull_url'] + repo_url = repository["git_pull_url"] return repo_url if args.prefer_ssh: - return repository['ssh_url'] + return repository["ssh_url"] auth = get_auth(args, encode=False, for_git_cli=True) if auth: - repo_url = 'https://{0}@{1}/{2}/{3}.git'.format( - auth, + repo_url = "https://{0}@{1}/{2}/{3}.git".format( + auth if args.token_fine is None else "oauth2:" + auth, get_github_host(args), - repository['owner']['login'], - repository['name']) + repository["owner"]["login"], + repository["name"], + ) else: - repo_url = repository['clone_url'] + repo_url = repository["clone_url"] return repo_url @@ -413,13 +549,21 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): while True: page = page + 1 - request = _construct_request(per_page, page, query_args, template, auth, as_app=args.as_app) # noqa + request = _construct_request( + per_page, + page, + query_args, + template, + auth, + as_app=args.as_app, + fine=True if args.token_fine is not None else False, + ) # noqa r, errors = _get_response(request, auth, template) status_code = int(r.getcode()) # Check if we got correct data try: - response = json.loads(r.read().decode('utf-8')) + response = json.loads(r.read().decode("utf-8")) except IncompleteRead: logger.warning("Incomplete read error detected") read_error = True @@ -433,25 +577,34 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): read_error = False # be gentle with API request limit and throttle requests if remaining requests getting low - limit_remaining = int(r.headers.get('x-ratelimit-remaining', 0)) + limit_remaining = int(r.headers.get("x-ratelimit-remaining", 0)) if args.throttle_limit and limit_remaining <= args.throttle_limit: logger.info( - 'API request limit hit: {} requests left, pausing further requests for {}s'.format( - limit_remaining, - args.throttle_pause)) + "API request limit hit: {} requests left, pausing further requests for {}s".format( + limit_remaining, args.throttle_pause + ) + ) time.sleep(args.throttle_pause) retries = 0 while retries < 3 and (status_code == 502 or read_error): - logger.warning('API request failed. Retrying in 5 seconds') + logger.warning("API request failed. Retrying in 5 seconds") retries += 1 time.sleep(5) - request = _construct_request(per_page, page, query_args, template, auth, as_app=args.as_app) # noqa + request = _construct_request( + per_page, + page, + query_args, + template, + auth, + as_app=args.as_app, + fine=True if args.token_fine is not None else False, + ) # noqa r, errors = _get_response(request, auth, template) status_code = int(r.getcode()) try: - response = json.loads(r.read().decode('utf-8')) + response = json.loads(r.read().decode("utf-8")) read_error = False except IncompleteRead: logger.warning("Incomplete read error detected") @@ -464,14 +617,14 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): read_error = True if status_code != 200: - template = 'API request returned HTTP {0}: {1}' + template = "API request returned HTTP {0}: {1}" errors.append(template.format(status_code, r.reason)) - raise Exception(', '.join(errors)) + raise Exception(", ".join(errors)) if read_error: - template = 'API request problem reading response for {0}' + template = "API request problem reading response for {0}" errors.append(template.format(request)) - raise Exception(', '.join(errors)) + raise Exception(", ".join(errors)) if len(errors) == 0: if type(response) == list: @@ -483,7 +636,7 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): yield response if len(errors) > 0: - raise Exception(', '.join(errors)) + raise Exception(", ".join(errors)) if single_request: break @@ -513,12 +666,12 @@ def _get_response(request, auth, template): r = exc except URLError as e: logger.warning(e.reason) - should_continue = _request_url_error(template, retry_timeout) + should_continue, retry_timeout = _request_url_error(template, retry_timeout) if not should_continue: raise except socket.error as e: logger.warning(e.strerror) - should_continue = _request_url_error(template, retry_timeout) + should_continue, retry_timeout = _request_url_error(template, retry_timeout) if not should_continue: raise @@ -529,21 +682,30 @@ def _get_response(request, auth, template): return r, errors -def _construct_request(per_page, page, query_args, template, auth, as_app=None): - querystring = urlencode(dict(list({ - 'per_page': per_page, - 'page': page - }.items()) + list(query_args.items()))) +def _construct_request( + per_page, page, query_args, template, auth, as_app=None, fine=False +): + querystring = urlencode( + dict( + list({"per_page": per_page, "page": page}.items()) + + list(query_args.items()) + ) + ) - request = Request(template + '?' + querystring) + request = Request(template + "?" + querystring) if auth is not None: if not as_app: - request.add_header('Authorization', 'Basic '.encode('ascii') + auth) + if fine: + request.add_header("Authorization", "token " + auth) + else: + request.add_header("Authorization", "Basic ".encode("ascii") + auth) else: - auth = auth.encode('ascii') - request.add_header('Authorization', 'token '.encode('ascii') + auth) - request.add_header('Accept', 'application/vnd.github.machine-man-preview+json') - logger.info('Requesting {}?{}'.format(template, querystring)) + auth = auth.encode("ascii") + request.add_header("Authorization", "token ".encode("ascii") + auth) + request.add_header( + "Accept", "application/vnd.github.machine-man-preview+json" + ) + logger.info("Requesting {}?{}".format(template, querystring)) return request @@ -554,7 +716,7 @@ def _request_http_error(exc, auth, errors): should_continue = False headers = exc.headers - limit_remaining = int(headers.get('x-ratelimit-remaining', 0)) + limit_remaining = int(headers.get("x-ratelimit-remaining", 0)) if exc.code == 403 and limit_remaining < 1: # The X-RateLimit-Reset header includes a @@ -562,15 +724,19 @@ def _request_http_error(exc, auth, errors): # so we can calculate how long to wait rather # than inefficiently polling: gm_now = calendar.timegm(time.gmtime()) - reset = int(headers.get('x-ratelimit-reset', 0)) or gm_now + reset = int(headers.get("x-ratelimit-reset", 0)) or gm_now # We'll never sleep for less than 10 seconds: delta = max(10, reset - gm_now) - limit = headers.get('x-ratelimit-limit') - logger.warning('Exceeded rate limit of {} requests; waiting {} seconds to reset'.format(limit, delta)) # noqa + limit = headers.get("x-ratelimit-limit") + logger.warning( + "Exceeded rate limit of {} requests; waiting {} seconds to reset".format( + limit, delta + ) + ) # noqa if auth is None: - logger.info('Hint: Authenticate to raise your GitHub rate limit') + logger.info("Hint: Authenticate to raise your GitHub rate limit") time.sleep(delta) should_continue = True @@ -578,16 +744,15 @@ def _request_http_error(exc, auth, errors): def _request_url_error(template, retry_timeout): - # Incase of a connection timing out, we can retry a few time + # In case of a connection timing out, we can retry a few time # But we won't crash and not back-up the rest now - logger.info('{} timed out'.format(template)) + logger.info("'{}' timed out".format(template)) retry_timeout -= 1 if retry_timeout >= 0: - return True + return True, retry_timeout - raise Exception('{} timed out to much, skipping!') - return False + raise Exception("'{}' timed out to much, skipping!".format(template)) class S3HTTPRedirectHandler(HTTPRedirectHandler): @@ -597,9 +762,12 @@ class S3HTTPRedirectHandler(HTTPRedirectHandler): urllib will add the Authorization header to the redirected request to S3, which will result in a 400, so we should remove said header on redirect. """ + def redirect_request(self, req, fp, code, msg, headers, newurl): - request = super(S3HTTPRedirectHandler, self).redirect_request(req, fp, code, msg, headers, newurl) - del request.headers['Authorization'] + request = super(S3HTTPRedirectHandler, self).redirect_request( + req, fp, code, msg, headers, newurl + ) + del request.headers["Authorization"] return request @@ -609,15 +777,15 @@ def download_file(url, path, auth): return request = Request(url) - request.add_header('Accept', 'application/octet-stream') - request.add_header('Authorization', 'Basic '.encode('ascii') + auth) + request.add_header("Accept", "application/octet-stream") + request.add_header("Authorization", "Basic ".encode("ascii") + auth) opener = build_opener(S3HTTPRedirectHandler) try: response = opener.open(request) chunk_size = 16 * 1024 - with open(path, 'wb') as f: + with open(path, "wb") as f: while True: chunk = response.read(chunk_size) if not chunk: @@ -625,91 +793,110 @@ def download_file(url, path, auth): f.write(chunk) except HTTPError as exc: # Gracefully handle 404 responses (and others) when downloading from S3 - logger.warning('Skipping download of asset {0} due to HTTPError: {1}'.format(url, exc.reason)) + logger.warning( + "Skipping download of asset {0} due to HTTPError: {1}".format( + url, exc.reason + ) + ) except URLError as e: # Gracefully handle other URL errors - logger.warning('Skipping download of asset {0} due to URLError: {1}'.format(url, e.reason)) + logger.warning( + "Skipping download of asset {0} due to URLError: {1}".format(url, e.reason) + ) except socket.error as e: # Gracefully handle socket errors # TODO: Implement retry logic - logger.warning('Skipping download of asset {0} due to socker error: {1}'.format(url, e.strerror)) + logger.warning( + "Skipping download of asset {0} due to socker error: {1}".format( + url, e.strerror + ) + ) def get_authenticated_user(args): - template = 'https://{0}/user'.format(get_github_api_host(args)) + template = "https://{0}/user".format(get_github_api_host(args)) data = retrieve_data(args, template, single_request=True) return data[0] def check_git_lfs_install(): - exit_code = subprocess.call(['git', 'lfs', 'version']) + exit_code = subprocess.call(["git", "lfs", "version"]) if exit_code != 0: - raise Exception('The argument --lfs requires you to have Git LFS installed.\nYou can get it from https://git-lfs.github.com.') + raise Exception( + "The argument --lfs requires you to have Git LFS installed.\nYou can get it from https://git-lfs.github.com." + ) def retrieve_repositories(args, authenticated_user): - logger.info('Retrieving repositories') + logger.info("Retrieving repositories") single_request = False - if args.user == authenticated_user['login']: + if args.user == authenticated_user["login"]: # we must use the /user/repos API to be able to access private repos - template = 'https://{0}/user/repos'.format( - get_github_api_host(args)) + template = "https://{0}/user/repos".format(get_github_api_host(args)) else: if args.private and not args.organization: - logger.warning('Authenticated user is different from user being backed up, thus private repositories cannot be accessed') - template = 'https://{0}/users/{1}/repos'.format( - get_github_api_host(args), - args.user) + logger.warning( + "Authenticated user is different from user being backed up, thus private repositories cannot be accessed" + ) + template = "https://{0}/users/{1}/repos".format( + get_github_api_host(args), args.user + ) if args.organization: - template = 'https://{0}/orgs/{1}/repos'.format( - get_github_api_host(args), - args.user) + template = "https://{0}/orgs/{1}/repos".format( + get_github_api_host(args), args.user + ) if args.repository: single_request = True - template = 'https://{0}/repos/{1}/{2}'.format( - get_github_api_host(args), - args.user, - args.repository) + template = "https://{0}/repos/{1}/{2}".format( + get_github_api_host(args), args.user, args.repository + ) repos = retrieve_data(args, template, single_request=single_request) if args.all_starred: - starred_template = 'https://{0}/users/{1}/starred'.format(get_github_api_host(args), args.user) + starred_template = "https://{0}/users/{1}/starred".format( + get_github_api_host(args), args.user + ) starred_repos = retrieve_data(args, starred_template, single_request=False) # flag each repo as starred for downstream processing for item in starred_repos: - item.update({'is_starred': True}) + item.update({"is_starred": True}) repos.extend(starred_repos) if args.include_gists: - gists_template = 'https://{0}/users/{1}/gists'.format(get_github_api_host(args), args.user) + gists_template = "https://{0}/users/{1}/gists".format( + get_github_api_host(args), args.user + ) gists = retrieve_data(args, gists_template, single_request=False) # flag each repo as a gist for downstream processing for item in gists: - item.update({'is_gist': True}) + item.update({"is_gist": True}) repos.extend(gists) if args.include_starred_gists: - starred_gists_template = 'https://{0}/gists/starred'.format(get_github_api_host(args)) - starred_gists = retrieve_data(args, starred_gists_template, single_request=False) + starred_gists_template = "https://{0}/gists/starred".format( + get_github_api_host(args) + ) + starred_gists = retrieve_data( + args, starred_gists_template, single_request=False + ) # flag each repo as a starred gist for downstream processing for item in starred_gists: - item.update({'is_gist': True, - 'is_starred': True}) + item.update({"is_gist": True, "is_starred": True}) repos.extend(starred_gists) return repos def filter_repositories(args, unfiltered_repositories): - logger.info('Filtering repositories') + logger.info("Filtering repositories") repositories = [] for r in unfiltered_repositories: # gists can be anonymous, so need to safely check owner - if r.get('owner', {}).get('login') == args.user or r.get('is_starred'): + if r.get("owner", {}).get("login") == args.user or r.get("is_starred"): repositories.append(r) name_regex = None @@ -721,26 +908,33 @@ def filter_repositories(args, unfiltered_repositories): languages = [x.lower() for x in args.languages] if not args.fork: - repositories = [r for r in repositories if not r.get('fork')] + repositories = [r for r in repositories if not r.get("fork")] if not args.private: - repositories = [r for r in repositories if not r.get('private') or r.get('public')] + repositories = [ + r for r in repositories if not r.get("private") or r.get("public") + ] if languages: - repositories = [r for r in repositories if r.get('language') and r.get('language').lower() in languages] # noqa + repositories = [ + r + for r in repositories + if r.get("language") and r.get("language").lower() in languages + ] # noqa if name_regex: - repositories = [r for r in repositories if name_regex.match(r['name'])] + repositories = [r for r in repositories if name_regex.match(r["name"])] if args.skip_archived: - repositories = [r for r in repositories if not r.get('archived')] + repositories = [r for r in repositories if not r.get("archived")] + if args.exclude: + repositories = [r for r in repositories if r["name"] not in args.exclude] return repositories def backup_repositories(args, output_directory, repositories): - logger.info('Backing up repositories') - repos_template = 'https://{0}/repos'.format(get_github_api_host(args)) + logger.info("Backing up repositories") + repos_template = "https://{0}/repos".format(get_github_api_host(args)) if args.incremental: - last_update = max(list(repository['updated_at'] for repository in repositories) or [time.strftime('%Y-%m-%dT%H:%M:%SZ', time.localtime())]) # noqa - last_update_path = os.path.join(output_directory, 'last_update') + last_update_path = os.path.join(output_directory, "last_update") if os.path.exists(last_update_path): args.since = open(last_update_path).read().strip() else: @@ -748,47 +942,70 @@ def backup_repositories(args, output_directory, repositories): else: args.since = None + last_update = "0000-00-00T00:00:00Z" for repository in repositories: - if repository.get('is_gist'): - repo_cwd = os.path.join(output_directory, 'gists', repository['id']) - elif repository.get('is_starred'): + if "updated_at" in repository and repository["updated_at"] > last_update: + last_update = repository["updated_at"] + elif "pushed_at" in repository and repository["pushed_at"] > last_update: + last_update = repository["pushed_at"] + + if repository.get("is_gist"): + repo_cwd = os.path.join(output_directory, "gists", repository["id"]) + elif repository.get("is_starred"): # put starred repos in -o/starred/${owner}/${repo} to prevent collision of # any repositories with the same name - repo_cwd = os.path.join(output_directory, 'starred', repository['owner']['login'], repository['name']) + repo_cwd = os.path.join( + output_directory, + "starred", + repository["owner"]["login"], + repository["name"], + ) else: - repo_cwd = os.path.join(output_directory, 'repositories', repository['name']) + repo_cwd = os.path.join( + output_directory, "repositories", repository["name"] + ) - repo_dir = os.path.join(repo_cwd, 'repository') + repo_dir = os.path.join(repo_cwd, "repository") repo_url = get_github_repo_url(args, repository) - include_gists = (args.include_gists or args.include_starred_gists) - if (args.include_repository or args.include_everything) \ - or (include_gists and repository.get('is_gist')): - repo_name = repository.get('name') if not repository.get('is_gist') else repository.get('id') - fetch_repository(repo_name, - repo_url, - repo_dir, - skip_existing=args.skip_existing, - bare_clone=args.bare_clone, - lfs_clone=args.lfs_clone) + include_gists = args.include_gists or args.include_starred_gists + if (args.include_repository or args.include_everything) or ( + include_gists and repository.get("is_gist") + ): + repo_name = ( + repository.get("name") + if not repository.get("is_gist") + else repository.get("id") + ) + fetch_repository( + repo_name, + repo_url, + repo_dir, + skip_existing=args.skip_existing, + bare_clone=args.bare_clone, + lfs_clone=args.lfs_clone, + no_prune=args.no_prune, + ) - if repository.get('is_gist'): + if repository.get("is_gist"): # dump gist information to a file as well - output_file = '{0}/gist.json'.format(repo_cwd) - with codecs.open(output_file, 'w', encoding='utf-8') as f: + output_file = "{0}/gist.json".format(repo_cwd) + with codecs.open(output_file, "w", encoding="utf-8") as f: json_dump(repository, f) continue # don't try to back anything else for a gist; it doesn't exist - download_wiki = (args.include_wiki or args.include_everything) - if repository['has_wiki'] and download_wiki: - fetch_repository(repository['name'], - repo_url.replace('.git', '.wiki.git'), - os.path.join(repo_cwd, 'wiki'), - skip_existing=args.skip_existing, - bare_clone=args.bare_clone, - lfs_clone=args.lfs_clone) - + download_wiki = args.include_wiki or args.include_everything + if repository["has_wiki"] and download_wiki: + fetch_repository( + repository["name"], + repo_url.replace(".git", ".wiki.git"), + os.path.join(repo_cwd, "wiki"), + skip_existing=args.skip_existing, + bare_clone=args.bare_clone, + lfs_clone=args.lfs_clone, + no_prune=args.no_prune, + ) if args.include_issues or args.include_everything: backup_issues(args, repo_cwd, repository, repos_template) @@ -805,176 +1022,169 @@ def backup_repositories(args, output_directory, repositories): backup_hooks(args, repo_cwd, repository, repos_template) if args.include_releases or args.include_everything: - backup_releases(args, repo_cwd, repository, repos_template, - include_assets=args.include_assets or args.include_everything) + backup_releases( + args, + repo_cwd, + repository, + repos_template, + include_assets=args.include_assets or args.include_everything, + ) if args.incremental: - open(last_update_path, 'w').write(last_update) + if last_update == "0000-00-00T00:00:00Z": + last_update = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.localtime()) + + open(last_update_path, "w").write(last_update) def backup_issues(args, repo_cwd, repository, repos_template): - has_issues_dir = os.path.isdir('{0}/issues/.git'.format(repo_cwd)) + has_issues_dir = os.path.isdir("{0}/issues/.git".format(repo_cwd)) if args.skip_existing and has_issues_dir: return - logger.info('Retrieving {0} issues'.format(repository['full_name'])) - issue_cwd = os.path.join(repo_cwd, 'issues') + logger.info("Retrieving {0} issues".format(repository["full_name"])) + issue_cwd = os.path.join(repo_cwd, "issues") mkdir_p(repo_cwd, issue_cwd) issues = {} issues_skipped = 0 - issues_skipped_message = '' - _issue_template = '{0}/{1}/issues'.format(repos_template, - repository['full_name']) + issues_skipped_message = "" + _issue_template = "{0}/{1}/issues".format(repos_template, repository["full_name"]) should_include_pulls = args.include_pulls or args.include_everything - issue_states = ['open', 'closed'] + issue_states = ["open", "closed"] for issue_state in issue_states: - query_args = { - 'filter': 'all', - 'state': issue_state - } + query_args = {"filter": "all", "state": issue_state} if args.since: - query_args['since'] = args.since + query_args["since"] = args.since - _issues = retrieve_data(args, - _issue_template, - query_args=query_args) + _issues = retrieve_data(args, _issue_template, query_args=query_args) for issue in _issues: # skip pull requests which are also returned as issues # if retrieving pull requests is requested as well - if 'pull_request' in issue and should_include_pulls: + if "pull_request" in issue and should_include_pulls: issues_skipped += 1 continue - issues[issue['number']] = issue + issues[issue["number"]] = issue if issues_skipped: - issues_skipped_message = ' (skipped {0} pull requests)'.format( - issues_skipped) + issues_skipped_message = " (skipped {0} pull requests)".format(issues_skipped) - logger.info('Saving {0} issues to disk{1}'.format( - len(list(issues.keys())), issues_skipped_message)) - comments_template = _issue_template + '/{0}/comments' - events_template = _issue_template + '/{0}/events' + logger.info( + "Saving {0} issues to disk{1}".format( + len(list(issues.keys())), issues_skipped_message + ) + ) + comments_template = _issue_template + "/{0}/comments" + events_template = _issue_template + "/{0}/events" for number, issue in list(issues.items()): if args.include_issue_comments or args.include_everything: template = comments_template.format(number) - issues[number]['comment_data'] = retrieve_data(args, template) + issues[number]["comment_data"] = retrieve_data(args, template) if args.include_issue_events or args.include_everything: template = events_template.format(number) - issues[number]['event_data'] = retrieve_data(args, template) + issues[number]["event_data"] = retrieve_data(args, template) - issue_file = '{0}/{1}.json'.format(issue_cwd, number) - with codecs.open(issue_file, 'w', encoding='utf-8') as f: + issue_file = "{0}/{1}.json".format(issue_cwd, number) + with codecs.open(issue_file, "w", encoding="utf-8") as f: json_dump(issue, f) def backup_pulls(args, repo_cwd, repository, repos_template): - has_pulls_dir = os.path.isdir('{0}/pulls/.git'.format(repo_cwd)) + has_pulls_dir = os.path.isdir("{0}/pulls/.git".format(repo_cwd)) if args.skip_existing and has_pulls_dir: return - logger.info('Retrieving {0} pull requests'.format(repository['full_name'])) # noqa - pulls_cwd = os.path.join(repo_cwd, 'pulls') + logger.info("Retrieving {0} pull requests".format(repository["full_name"])) # noqa + pulls_cwd = os.path.join(repo_cwd, "pulls") mkdir_p(repo_cwd, pulls_cwd) pulls = {} - _pulls_template = '{0}/{1}/pulls'.format(repos_template, - repository['full_name']) + _pulls_template = "{0}/{1}/pulls".format(repos_template, repository["full_name"]) + _issue_template = "{0}/{1}/issues".format(repos_template, repository["full_name"]) query_args = { - 'filter': 'all', - 'state': 'all', - 'sort': 'updated', - 'direction': 'desc', + "filter": "all", + "state": "all", + "sort": "updated", + "direction": "desc", } if not args.include_pull_details: - pull_states = ['open', 'closed'] + pull_states = ["open", "closed"] for pull_state in pull_states: - query_args['state'] = pull_state - _pulls = retrieve_data_gen( - args, - _pulls_template, - query_args=query_args - ) + query_args["state"] = pull_state + _pulls = retrieve_data_gen(args, _pulls_template, query_args=query_args) for pull in _pulls: - if args.since and pull['updated_at'] < args.since: + if args.since and pull["updated_at"] < args.since: break - if not args.since or pull['updated_at'] >= args.since: - pulls[pull['number']] = pull + if not args.since or pull["updated_at"] >= args.since: + pulls[pull["number"]] = pull else: - _pulls = retrieve_data_gen( - args, - _pulls_template, - query_args=query_args - ) + _pulls = retrieve_data_gen(args, _pulls_template, query_args=query_args) for pull in _pulls: - if args.since and pull['updated_at'] < args.since: + if args.since and pull["updated_at"] < args.since: break - if not args.since or pull['updated_at'] >= args.since: - pulls[pull['number']] = retrieve_data( + if not args.since or pull["updated_at"] >= args.since: + pulls[pull["number"]] = retrieve_data( args, - _pulls_template + '/{}'.format(pull['number']), - single_request=True + _pulls_template + "/{}".format(pull["number"]), + single_request=True, )[0] - logger.info('Saving {0} pull requests to disk'.format( - len(list(pulls.keys())))) - comments_template = _pulls_template + '/{0}/comments' - commits_template = _pulls_template + '/{0}/commits' + logger.info("Saving {0} pull requests to disk".format(len(list(pulls.keys())))) + # Comments from pulls API are only _review_ comments + # regular comments need to be fetched via issue API. + # For backwards compatibility with versions <= 0.41.0 + # keep name "comment_data" for review comments + comments_regular_template = _issue_template + "/{0}/comments" + comments_template = _pulls_template + "/{0}/comments" + commits_template = _pulls_template + "/{0}/commits" for number, pull in list(pulls.items()): if args.include_pull_comments or args.include_everything: + template = comments_regular_template.format(number) + pulls[number]["comment_regular_data"] = retrieve_data(args, template) template = comments_template.format(number) - pulls[number]['comment_data'] = retrieve_data(args, template) + pulls[number]["comment_data"] = retrieve_data(args, template) if args.include_pull_commits or args.include_everything: template = commits_template.format(number) - pulls[number]['commit_data'] = retrieve_data(args, template) + pulls[number]["commit_data"] = retrieve_data(args, template) - pull_file = '{0}/{1}.json'.format(pulls_cwd, number) - with codecs.open(pull_file, 'w', encoding='utf-8') as f: + pull_file = "{0}/{1}.json".format(pulls_cwd, number) + with codecs.open(pull_file, "w", encoding="utf-8") as f: json_dump(pull, f) def backup_milestones(args, repo_cwd, repository, repos_template): - milestone_cwd = os.path.join(repo_cwd, 'milestones') + milestone_cwd = os.path.join(repo_cwd, "milestones") if args.skip_existing and os.path.isdir(milestone_cwd): return - logger.info('Retrieving {0} milestones'.format(repository['full_name'])) + logger.info("Retrieving {0} milestones".format(repository["full_name"])) mkdir_p(repo_cwd, milestone_cwd) - template = '{0}/{1}/milestones'.format(repos_template, - repository['full_name']) + template = "{0}/{1}/milestones".format(repos_template, repository["full_name"]) - query_args = { - 'state': 'all' - } + query_args = {"state": "all"} _milestones = retrieve_data(args, template, query_args=query_args) milestones = {} for milestone in _milestones: - milestones[milestone['number']] = milestone + milestones[milestone["number"]] = milestone - logger.info('Saving {0} milestones to disk'.format( - len(list(milestones.keys())))) + logger.info("Saving {0} milestones to disk".format(len(list(milestones.keys())))) for number, milestone in list(milestones.items()): - milestone_file = '{0}/{1}.json'.format(milestone_cwd, number) - with codecs.open(milestone_file, 'w', encoding='utf-8') as f: + milestone_file = "{0}/{1}.json".format(milestone_cwd, number) + with codecs.open(milestone_file, "w", encoding="utf-8") as f: json_dump(milestone, f) def backup_labels(args, repo_cwd, repository, repos_template): - label_cwd = os.path.join(repo_cwd, 'labels') - output_file = '{0}/labels.json'.format(label_cwd) - template = '{0}/{1}/labels'.format(repos_template, - repository['full_name']) - _backup_data(args, - 'labels', - template, - output_file, - label_cwd) + label_cwd = os.path.join(repo_cwd, "labels") + output_file = "{0}/labels.json".format(label_cwd) + template = "{0}/{1}/labels".format(repos_template, repository["full_name"]) + _backup_data(args, "labels", template, output_file, label_cwd) def backup_hooks(args, repo_cwd, repository, repos_template): @@ -982,178 +1192,185 @@ def backup_hooks(args, repo_cwd, repository, repos_template): if not auth: logger.info("Skipping hooks since no authentication provided") return - hook_cwd = os.path.join(repo_cwd, 'hooks') - output_file = '{0}/hooks.json'.format(hook_cwd) - template = '{0}/{1}/hooks'.format(repos_template, - repository['full_name']) + hook_cwd = os.path.join(repo_cwd, "hooks") + output_file = "{0}/hooks.json".format(hook_cwd) + template = "{0}/{1}/hooks".format(repos_template, repository["full_name"]) try: - _backup_data(args, - 'hooks', - template, - output_file, - hook_cwd) + _backup_data(args, "hooks", template, output_file, hook_cwd) except SystemExit: logger.info("Unable to read hooks, skipping") def backup_releases(args, repo_cwd, repository, repos_template, include_assets=False): - repository_fullname = repository['full_name'] + repository_fullname = repository["full_name"] # give release files somewhere to live & log intent - release_cwd = os.path.join(repo_cwd, 'releases') - logger.info('Retrieving {0} releases'.format(repository_fullname)) + release_cwd = os.path.join(repo_cwd, "releases") + logger.info("Retrieving {0} releases".format(repository_fullname)) mkdir_p(repo_cwd, release_cwd) query_args = {} - release_template = '{0}/{1}/releases'.format(repos_template, repository_fullname) + release_template = "{0}/{1}/releases".format(repos_template, repository_fullname) releases = retrieve_data(args, release_template, query_args=query_args) # for each release, store it - logger.info('Saving {0} releases to disk'.format(len(releases))) + logger.info("Saving {0} releases to disk".format(len(releases))) for release in releases: - release_name = release['tag_name'] - release_name_safe = release_name.replace('/', '__') - output_filepath = os.path.join(release_cwd, '{0}.json'.format(release_name_safe)) - with codecs.open(output_filepath, 'w+', encoding='utf-8') as f: + release_name = release["tag_name"] + release_name_safe = release_name.replace("/", "__") + output_filepath = os.path.join( + release_cwd, "{0}.json".format(release_name_safe) + ) + with codecs.open(output_filepath, "w+", encoding="utf-8") as f: json_dump(release, f) if include_assets: - assets = retrieve_data(args, release['assets_url']) + assets = retrieve_data(args, release["assets_url"]) if len(assets) > 0: # give release asset files somewhere to live & download them (not including source archives) release_assets_cwd = os.path.join(release_cwd, release_name_safe) mkdir_p(release_assets_cwd) for asset in assets: - download_file(asset['url'], os.path.join(release_assets_cwd, asset['name']), get_auth(args)) + download_file( + asset["url"], + os.path.join(release_assets_cwd, asset["name"]), + get_auth(args), + ) -def fetch_repository(name, - remote_url, - local_dir, - skip_existing=False, - bare_clone=False, - lfs_clone=False): +def fetch_repository( + name, + remote_url, + local_dir, + skip_existing=False, + bare_clone=False, + lfs_clone=False, + no_prune=False, +): if bare_clone: if os.path.exists(local_dir): - clone_exists = subprocess.check_output(['git', - 'rev-parse', - '--is-bare-repository'], - cwd=local_dir) == b"true\n" + clone_exists = ( + subprocess.check_output( + ["git", "rev-parse", "--is-bare-repository"], cwd=local_dir + ) + == b"true\n" + ) else: clone_exists = False else: - clone_exists = os.path.exists(os.path.join(local_dir, '.git')) + clone_exists = os.path.exists(os.path.join(local_dir, ".git")) if clone_exists and skip_existing: return masked_remote_url = mask_password(remote_url) - initialized = subprocess.call('git ls-remote ' + remote_url, - stdout=FNULL, - stderr=FNULL, - shell=True) + initialized = subprocess.call( + "git ls-remote " + remote_url, stdout=FNULL, stderr=FNULL, shell=True + ) if initialized == 128: - logger.info("Skipping {0} ({1}) since it's not initialized".format( - name, masked_remote_url)) + logger.info( + "Skipping {0} ({1}) since it's not initialized".format( + name, masked_remote_url + ) + ) return if clone_exists: - logger.info('Updating {0} in {1}'.format(name, local_dir)) + logger.info("Updating {0} in {1}".format(name, local_dir)) - remotes = subprocess.check_output(['git', 'remote', 'show'], - cwd=local_dir) - remotes = [i.strip() for i in remotes.decode('utf-8').splitlines()] + remotes = subprocess.check_output(["git", "remote", "show"], cwd=local_dir) + remotes = [i.strip() for i in remotes.decode("utf-8").splitlines()] - if 'origin' not in remotes: - git_command = ['git', 'remote', 'rm', 'origin'] + if "origin" not in remotes: + git_command = ["git", "remote", "rm", "origin"] logging_subprocess(git_command, None, cwd=local_dir) - git_command = ['git', 'remote', 'add', 'origin', remote_url] + git_command = ["git", "remote", "add", "origin", remote_url] logging_subprocess(git_command, None, cwd=local_dir) else: - git_command = ['git', 'remote', 'set-url', 'origin', remote_url] + git_command = ["git", "remote", "set-url", "origin", remote_url] logging_subprocess(git_command, None, cwd=local_dir) if lfs_clone: - git_command = ['git', 'lfs', 'fetch', '--all', '--prune'] + git_command = ["git", "lfs", "fetch", "--all", "--prune"] else: - git_command = ['git', 'fetch', '--all', '--force', '--tags', '--prune'] + git_command = ["git", "fetch", "--all", "--force", "--tags", "--prune"] + if no_prune: + git_command.pop() logging_subprocess(git_command, None, cwd=local_dir) else: - logger.info('Cloning {0} repository from {1} to {2}'.format( - name, - masked_remote_url, - local_dir)) + logger.info( + "Cloning {0} repository from {1} to {2}".format( + name, masked_remote_url, local_dir + ) + ) if bare_clone: + git_command = ["git", "clone", "--mirror", remote_url, local_dir] + logging_subprocess(git_command, None) if lfs_clone: - git_command = ['git', 'lfs', 'clone', '--mirror', remote_url, local_dir] - else: - git_command = ['git', 'clone', '--mirror', remote_url, local_dir] + git_command = ["git", "lfs", "fetch", "--all", "--prune"] + if no_prune: + git_command.pop() + logging_subprocess(git_command, None, cwd=local_dir) else: if lfs_clone: - git_command = ['git', 'lfs', 'clone', remote_url, local_dir] + git_command = ["git", "lfs", "clone", remote_url, local_dir] else: - git_command = ['git', 'clone', remote_url, local_dir] - logging_subprocess(git_command, None) + git_command = ["git", "clone", remote_url, local_dir] + logging_subprocess(git_command, None) def backup_account(args, output_directory): - account_cwd = os.path.join(output_directory, 'account') + account_cwd = os.path.join(output_directory, "account") if args.include_starred or args.include_everything: output_file = "{0}/starred.json".format(account_cwd) - template = "https://{0}/users/{1}/starred".format(get_github_api_host(args), args.user) - _backup_data(args, - "starred repositories", - template, - output_file, - account_cwd) + template = "https://{0}/users/{1}/starred".format( + get_github_api_host(args), args.user + ) + _backup_data(args, "starred repositories", template, output_file, account_cwd) if args.include_watched or args.include_everything: output_file = "{0}/watched.json".format(account_cwd) - template = "https://{0}/users/{1}/subscriptions".format(get_github_api_host(args), args.user) - _backup_data(args, - "watched repositories", - template, - output_file, - account_cwd) + template = "https://{0}/users/{1}/subscriptions".format( + get_github_api_host(args), args.user + ) + _backup_data(args, "watched repositories", template, output_file, account_cwd) if args.include_followers or args.include_everything: output_file = "{0}/followers.json".format(account_cwd) - template = "https://{0}/users/{1}/followers".format(get_github_api_host(args), args.user) - _backup_data(args, - "followers", - template, - output_file, - account_cwd) + template = "https://{0}/users/{1}/followers".format( + get_github_api_host(args), args.user + ) + _backup_data(args, "followers", template, output_file, account_cwd) if args.include_following or args.include_everything: output_file = "{0}/following.json".format(account_cwd) - template = "https://{0}/users/{1}/following".format(get_github_api_host(args), args.user) - _backup_data(args, - "following", - template, - output_file, - account_cwd) + template = "https://{0}/users/{1}/following".format( + get_github_api_host(args), args.user + ) + _backup_data(args, "following", template, output_file, account_cwd) def _backup_data(args, name, template, output_file, output_directory): skip_existing = args.skip_existing if not skip_existing or not os.path.exists(output_file): - logger.info('Retrieving {0} {1}'.format(args.user, name)) + logger.info("Retrieving {0} {1}".format(args.user, name)) mkdir_p(output_directory) data = retrieve_data(args, template) - logger.info('Writing {0} {1} to disk'.format(len(data), name)) - with codecs.open(output_file, 'w', encoding='utf-8') as f: + logger.info("Writing {0} {1} to disk".format(len(data), name)) + with codecs.open(output_file, "w", encoding="utf-8") as f: json_dump(data, f) def json_dump(data, output_file): - json.dump(data, - output_file, - ensure_ascii=False, - sort_keys=True, - indent=4, - separators=(',', ': ')) + json.dump( + data, + output_file, + ensure_ascii=False, + sort_keys=True, + indent=4, + separators=(",", ": "), + ) diff --git a/release b/release index c48de82..21a14f7 100755 --- a/release +++ b/release @@ -6,7 +6,7 @@ if [[ ! -f setup.py ]]; then exit 1 fi -PACKAGE_NAME="$(cat setup.py | grep "name='" | head | cut -d "'" -f2)" +PACKAGE_NAME="$(cat setup.py | grep 'name="' | head | cut -d '"' -f2)" INIT_PACKAGE_NAME="$(echo "${PACKAGE_NAME//-/_}")" PUBLIC="true" @@ -86,18 +86,12 @@ TMPFILE=$(mktemp /tmp/${tempfoo}.XXXXXX) || { exit 1 } -find_this="__version__ = '$current_version'" -replace_with="__version__ = '$next_version'" +find_this="__version__ = \"$current_version\"" +replace_with="__version__ = \"$next_version\"" echo -e "${YELLOW}--->${COLOR_OFF} Updating ${INIT_PACKAGE_NAME}/__init__.py" sed "s/$find_this/$replace_with/" ${INIT_PACKAGE_NAME}/__init__.py > $TMPFILE && mv $TMPFILE ${INIT_PACKAGE_NAME}/__init__.py -find_this="${PACKAGE_NAME}.git@$current_version" -replace_with="${PACKAGE_NAME}.git@$next_version" - -echo -e "${YELLOW}--->${COLOR_OFF} Updating README.rst" -sed "s/$find_this/$replace_with/" README.rst > $TMPFILE && mv $TMPFILE README.rst - if [ -f docs/conf.py ]; then echo -e "${YELLOW}--->${COLOR_OFF} Updating docs" find_this="version = '${current_version}'" diff --git a/release-requirements.txt b/release-requirements.txt new file mode 100644 index 0000000..1571464 --- /dev/null +++ b/release-requirements.txt @@ -0,0 +1,31 @@ +bleach==6.0.0 +certifi==2023.5.7 +charset-normalizer==3.1.0 +colorama==0.4.6 +docutils==0.20.1 +flake8==6.0.0 +gitchangelog==3.0.4 +idna==3.4 +importlib-metadata==6.6.0 +jaraco.classes==3.2.3 +keyring==23.13.1 +markdown-it-py==2.2.0 +mccabe==0.7.0 +mdurl==0.1.2 +more-itertools==9.1.0 +pkginfo==1.9.6 +pycodestyle==2.10.0 +pyflakes==3.0.1 +Pygments==2.15.1 +readme-renderer==37.3 +requests==2.31.0 +requests-toolbelt==1.0.0 +restructuredtext-lint==1.4.0 +rfc3986==2.0.0 +rich==13.3.5 +six==1.16.0 +tqdm==4.65.0 +twine==4.0.2 +urllib3==2.0.2 +webencodings==0.5.1 +zipp==3.15.0 diff --git a/setup.py b/setup.py index 3b4df41..898e4fb 100644 --- a/setup.py +++ b/setup.py @@ -5,6 +5,7 @@ from github_backup import __version__ try: from setuptools import setup + setup # workaround for pyflakes issue #13 except ImportError: from distutils.core import setup @@ -15,6 +16,7 @@ except ImportError: # http://www.eby-sarna.com/pipermail/peak/2010-May/003357.html) try: import multiprocessing + multiprocessing except ImportError: pass @@ -25,26 +27,26 @@ def open_file(fname): setup( - name='github-backup', + name="github-backup", version=__version__, - author='Jose Diaz-Gonzalez', - author_email='github-backup@josediazgonzalez.com', - packages=['github_backup'], - scripts=['bin/github-backup'], - url='http://github.com/josegonzalez/python-github-backup', - license='MIT', + author="Jose Diaz-Gonzalez", + author_email="github-backup@josediazgonzalez.com", + packages=["github_backup"], + scripts=["bin/github-backup"], + url="http://github.com/josegonzalez/python-github-backup", + license="MIT", classifiers=[ - 'Development Status :: 5 - Production/Stable', - 'Topic :: System :: Archiving :: Backup', - 'License :: OSI Approved :: MIT License', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', + "Development Status :: 5 - Production/Stable", + "Topic :: System :: Archiving :: Backup", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3.5", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", ], - description='backup a github user or organization', - long_description=open_file('README.rst').read(), - long_description_content_type='text/x-rst', - install_requires=open_file('requirements.txt').readlines(), + description="backup a github user or organization", + long_description=open_file("README.rst").read(), + long_description_content_type="text/x-rst", + install_requires=open_file("requirements.txt").readlines(), zip_safe=True, )