From cca8a851ad4370c9d421029baba89dc077d17b50 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Mon, 29 May 2023 18:32:39 -0400 Subject: [PATCH] chore: run black --- github_backup/__init__.py | 2 +- github_backup/github_backup.py | 1249 ++++++++++++++++++-------------- setup.py | 38 +- 3 files changed, 716 insertions(+), 573 deletions(-) diff --git a/github_backup/__init__.py b/github_backup/__init__.py index ccd8b38..92717f7 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = '0.42.0' +__version__ = "0.42.0" diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 0a7765a..7bf021d 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -31,11 +31,12 @@ from http.client import IncompleteRead try: from . import __version__ + VERSION = __version__ except ImportError: - VERSION = 'unknown' + VERSION = "unknown" -FNULL = open(os.devnull, 'w') +FNULL = open(os.devnull, "w") def _get_log_date(): @@ -64,31 +65,32 @@ def log_warning(message): logging.warning(msg) -def logging_subprocess(popenargs, - logger, - stdout_log_level=logging.DEBUG, - stderr_log_level=logging.ERROR, - **kwargs): +def logging_subprocess( + popenargs, + logger, + stdout_log_level=logging.DEBUG, + stderr_log_level=logging.ERROR, + **kwargs +): """ Variant of subprocess.call that accepts a logger instead of stdout/stderr, and logs stdout messages via logger.debug and stderr messages via logger.error. """ - child = subprocess.Popen(popenargs, stdout=subprocess.PIPE, - stderr=subprocess.PIPE, **kwargs) - if sys.platform == 'win32': - log_info("Windows operating system detected - no subprocess logging will be returned") + child = subprocess.Popen( + popenargs, stdout=subprocess.PIPE, stderr=subprocess.PIPE, **kwargs + ) + if sys.platform == "win32": + log_info( + "Windows operating system detected - no subprocess logging will be returned" + ) - log_level = {child.stdout: stdout_log_level, - child.stderr: stderr_log_level} + log_level = {child.stdout: stdout_log_level, child.stderr: stderr_log_level} def check_io(): - if sys.platform == 'win32': + if sys.platform == "win32": return - ready_to_read = select.select([child.stdout, child.stderr], - [], - [], - 1000)[0] + ready_to_read = select.select([child.stdout, child.stderr], [], [], 1000)[0] for io in ready_to_read: line = io.readline() if not logger: @@ -105,8 +107,8 @@ def logging_subprocess(popenargs, rc = child.wait() if rc != 0: - print('{} returned {}:'.format(popenargs[0], rc), file=sys.stderr) - print('\t', ' '.join(popenargs), file=sys.stderr) + print("{} returned {}:".format(popenargs[0], rc), file=sys.stderr) + print("\t", " ".join(popenargs), file=sys.stderr) return rc @@ -122,221 +124,301 @@ def mkdir_p(*args): raise -def mask_password(url, secret='*****'): +def mask_password(url, secret="*****"): parsed = urlparse(url) if not parsed.password: return url - elif parsed.password == 'x-oauth-basic': + elif parsed.password == "x-oauth-basic": return url.replace(parsed.username, secret) return url.replace(parsed.password, secret) def parse_args(args=None): - parser = argparse.ArgumentParser(description='Backup a github account') - parser.add_argument('user', - metavar='USER', - type=str, - help='github username') - parser.add_argument('-u', - '--username', - dest='username', - help='username for basic auth') - parser.add_argument('-p', - '--password', - dest='password', - help='password for basic auth. ' - 'If a username is given but not a password, the ' - 'password will be prompted for.') - parser.add_argument('-t', - '--token', - dest='token', - help='personal access, OAuth, or JSON Web token, or path to token (file://...)') # noqa - parser.add_argument('--as-app', - action='store_true', - dest='as_app', - help='authenticate as github app instead of as a user.') - parser.add_argument('-o', - '--output-directory', - default='.', - dest='output_directory', - help='directory at which to backup the repositories') - parser.add_argument('-l', - '--log-level', - default='info', - dest='log_level', - help='log level to use (default: info, possible levels: debug, info, warning, error, critical)') - parser.add_argument('-i', - '--incremental', - action='store_true', - dest='incremental', - help='incremental backup') - parser.add_argument('--starred', - action='store_true', - dest='include_starred', - help='include JSON output of starred repositories in backup') - parser.add_argument('--all-starred', - action='store_true', - dest='all_starred', - help='include starred repositories in backup [*]') - parser.add_argument('--watched', - action='store_true', - dest='include_watched', - help='include JSON output of watched repositories in backup') - parser.add_argument('--followers', - action='store_true', - dest='include_followers', - help='include JSON output of followers in backup') - parser.add_argument('--following', - action='store_true', - dest='include_following', - help='include JSON output of following users in backup') - parser.add_argument('--all', - action='store_true', - dest='include_everything', - help='include everything in backup (not including [*])') - parser.add_argument('--issues', - action='store_true', - dest='include_issues', - help='include issues in backup') - parser.add_argument('--issue-comments', - action='store_true', - dest='include_issue_comments', - help='include issue comments in backup') - parser.add_argument('--issue-events', - action='store_true', - dest='include_issue_events', - help='include issue events in backup') - parser.add_argument('--pulls', - action='store_true', - dest='include_pulls', - help='include pull requests in backup') - parser.add_argument('--pull-comments', - action='store_true', - dest='include_pull_comments', - help='include pull request review comments in backup') - parser.add_argument('--pull-commits', - action='store_true', - dest='include_pull_commits', - help='include pull request commits in backup') - parser.add_argument('--pull-details', - action='store_true', - dest='include_pull_details', - help='include more pull request details in backup [*]') - parser.add_argument('--labels', - action='store_true', - dest='include_labels', - help='include labels in backup') - parser.add_argument('--hooks', - action='store_true', - dest='include_hooks', - help='include hooks in backup (works only when authenticated)') # noqa - parser.add_argument('--milestones', - action='store_true', - dest='include_milestones', - help='include milestones in backup') - parser.add_argument('--repositories', - action='store_true', - dest='include_repository', - help='include repository clone in backup') - parser.add_argument('--bare', - action='store_true', - dest='bare_clone', - help='clone bare repositories') - parser.add_argument('--no-prune', - action='store_true', - dest='no_prune', - help='disable prune option for git fetch') - parser.add_argument('--lfs', - action='store_true', - dest='lfs_clone', - help='clone LFS repositories (requires Git LFS to be installed, https://git-lfs.github.com) [*]') - parser.add_argument('--wikis', - action='store_true', - dest='include_wiki', - help='include wiki clone in backup') - parser.add_argument('--gists', - action='store_true', - dest='include_gists', - help='include gists in backup [*]') - parser.add_argument('--starred-gists', - action='store_true', - dest='include_starred_gists', - help='include starred gists in backup [*]') - parser.add_argument('--skip-archived', - action='store_true', - dest='skip_archived', - help='skip project if it is archived') - parser.add_argument('--skip-existing', - action='store_true', - dest='skip_existing', - help='skip project if a backup directory exists') - parser.add_argument('-L', - '--languages', - dest='languages', - help='only allow these languages', - nargs='*') - parser.add_argument('-N', - '--name-regex', - dest='name_regex', - help='python regex to match names against') - parser.add_argument('-H', - '--github-host', - dest='github_host', - help='GitHub Enterprise hostname') - parser.add_argument('-O', - '--organization', - action='store_true', - dest='organization', - help='whether or not this is an organization user') - parser.add_argument('-R', - '--repository', - dest='repository', - help='name of repository to limit backup to') - parser.add_argument('-P', '--private', - action='store_true', - dest='private', - help='include private repositories [*]') - parser.add_argument('-F', '--fork', - action='store_true', - dest='fork', - help='include forked repositories [*]') - parser.add_argument('--prefer-ssh', - action='store_true', - help='Clone repositories using SSH instead of HTTPS') - parser.add_argument('-v', '--version', - action='version', - version='%(prog)s ' + VERSION) - parser.add_argument('--keychain-name', - dest='osx_keychain_item_name', - help='OSX ONLY: name field of password item in OSX keychain that holds the personal access or OAuth token') - parser.add_argument('--keychain-account', - dest='osx_keychain_item_account', - help='OSX ONLY: account field of password item in OSX keychain that holds the personal access or OAuth token') - parser.add_argument('--releases', - action='store_true', - dest='include_releases', - help='include release information, not including assets or binaries' - ) - parser.add_argument('--assets', - action='store_true', - dest='include_assets', - help='include assets alongside release information; only applies if including releases') - parser.add_argument('--throttle-limit', - dest='throttle_limit', - type=int, - default=0, - help='start throttling of GitHub API requests after this amount of API requests remain') - parser.add_argument('--throttle-pause', - dest='throttle_pause', - type=float, - default=30.0, - help='wait this amount of seconds when API request throttling is active (default: 30.0, requires --throttle-limit to be set)') - parser.add_argument('--exclude', - dest='exclude', - help='names of repositories to exclude', - nargs="*") + parser = argparse.ArgumentParser(description="Backup a github account") + parser.add_argument("user", metavar="USER", type=str, help="github username") + parser.add_argument( + "-u", "--username", dest="username", help="username for basic auth" + ) + parser.add_argument( + "-p", + "--password", + dest="password", + help="password for basic auth. " + "If a username is given but not a password, the " + "password will be prompted for.", + ) + parser.add_argument( + "-t", + "--token", + dest="token", + help="personal access, OAuth, or JSON Web token, or path to token (file://...)", + ) # noqa + parser.add_argument( + "--as-app", + action="store_true", + dest="as_app", + help="authenticate as github app instead of as a user.", + ) + parser.add_argument( + "-o", + "--output-directory", + default=".", + dest="output_directory", + help="directory at which to backup the repositories", + ) + parser.add_argument( + "-l", + "--log-level", + default="info", + dest="log_level", + help="log level to use (default: info, possible levels: debug, info, warning, error, critical)", + ) + parser.add_argument( + "-i", + "--incremental", + action="store_true", + dest="incremental", + help="incremental backup", + ) + parser.add_argument( + "--starred", + action="store_true", + dest="include_starred", + help="include JSON output of starred repositories in backup", + ) + parser.add_argument( + "--all-starred", + action="store_true", + dest="all_starred", + help="include starred repositories in backup [*]", + ) + parser.add_argument( + "--watched", + action="store_true", + dest="include_watched", + help="include JSON output of watched repositories in backup", + ) + parser.add_argument( + "--followers", + action="store_true", + dest="include_followers", + help="include JSON output of followers in backup", + ) + parser.add_argument( + "--following", + action="store_true", + dest="include_following", + help="include JSON output of following users in backup", + ) + parser.add_argument( + "--all", + action="store_true", + dest="include_everything", + help="include everything in backup (not including [*])", + ) + parser.add_argument( + "--issues", + action="store_true", + dest="include_issues", + help="include issues in backup", + ) + parser.add_argument( + "--issue-comments", + action="store_true", + dest="include_issue_comments", + help="include issue comments in backup", + ) + parser.add_argument( + "--issue-events", + action="store_true", + dest="include_issue_events", + help="include issue events in backup", + ) + parser.add_argument( + "--pulls", + action="store_true", + dest="include_pulls", + help="include pull requests in backup", + ) + parser.add_argument( + "--pull-comments", + action="store_true", + dest="include_pull_comments", + help="include pull request review comments in backup", + ) + parser.add_argument( + "--pull-commits", + action="store_true", + dest="include_pull_commits", + help="include pull request commits in backup", + ) + parser.add_argument( + "--pull-details", + action="store_true", + dest="include_pull_details", + help="include more pull request details in backup [*]", + ) + parser.add_argument( + "--labels", + action="store_true", + dest="include_labels", + help="include labels in backup", + ) + parser.add_argument( + "--hooks", + action="store_true", + dest="include_hooks", + help="include hooks in backup (works only when authenticated)", + ) # noqa + parser.add_argument( + "--milestones", + action="store_true", + dest="include_milestones", + help="include milestones in backup", + ) + parser.add_argument( + "--repositories", + action="store_true", + dest="include_repository", + help="include repository clone in backup", + ) + parser.add_argument( + "--bare", action="store_true", dest="bare_clone", help="clone bare repositories" + ) + parser.add_argument( + "--no-prune", + action="store_true", + dest="no_prune", + help="disable prune option for git fetch", + ) + parser.add_argument( + "--lfs", + action="store_true", + dest="lfs_clone", + help="clone LFS repositories (requires Git LFS to be installed, https://git-lfs.github.com) [*]", + ) + parser.add_argument( + "--wikis", + action="store_true", + dest="include_wiki", + help="include wiki clone in backup", + ) + parser.add_argument( + "--gists", + action="store_true", + dest="include_gists", + help="include gists in backup [*]", + ) + parser.add_argument( + "--starred-gists", + action="store_true", + dest="include_starred_gists", + help="include starred gists in backup [*]", + ) + parser.add_argument( + "--skip-archived", + action="store_true", + dest="skip_archived", + help="skip project if it is archived", + ) + parser.add_argument( + "--skip-existing", + action="store_true", + dest="skip_existing", + help="skip project if a backup directory exists", + ) + parser.add_argument( + "-L", + "--languages", + dest="languages", + help="only allow these languages", + nargs="*", + ) + parser.add_argument( + "-N", + "--name-regex", + dest="name_regex", + help="python regex to match names against", + ) + parser.add_argument( + "-H", "--github-host", dest="github_host", help="GitHub Enterprise hostname" + ) + parser.add_argument( + "-O", + "--organization", + action="store_true", + dest="organization", + help="whether or not this is an organization user", + ) + parser.add_argument( + "-R", + "--repository", + dest="repository", + help="name of repository to limit backup to", + ) + parser.add_argument( + "-P", + "--private", + action="store_true", + dest="private", + help="include private repositories [*]", + ) + parser.add_argument( + "-F", + "--fork", + action="store_true", + dest="fork", + help="include forked repositories [*]", + ) + parser.add_argument( + "--prefer-ssh", + action="store_true", + help="Clone repositories using SSH instead of HTTPS", + ) + parser.add_argument( + "-v", "--version", action="version", version="%(prog)s " + VERSION + ) + parser.add_argument( + "--keychain-name", + dest="osx_keychain_item_name", + help="OSX ONLY: name field of password item in OSX keychain that holds the personal access or OAuth token", + ) + parser.add_argument( + "--keychain-account", + dest="osx_keychain_item_account", + help="OSX ONLY: account field of password item in OSX keychain that holds the personal access or OAuth token", + ) + parser.add_argument( + "--releases", + action="store_true", + dest="include_releases", + help="include release information, not including assets or binaries", + ) + parser.add_argument( + "--assets", + action="store_true", + dest="include_assets", + help="include assets alongside release information; only applies if including releases", + ) + parser.add_argument( + "--throttle-limit", + dest="throttle_limit", + type=int, + default=0, + help="start throttling of GitHub API requests after this amount of API requests remain", + ) + parser.add_argument( + "--throttle-pause", + dest="throttle_pause", + type=float, + default=30.0, + help="wait this amount of seconds when API request throttling is active (default: 30.0, requires --throttle-limit to be set)", + ) + parser.add_argument( + "--exclude", dest="exclude", help="names of repositories to exclude", nargs="*" + ) return parser.parse_args(args) @@ -345,35 +427,49 @@ def get_auth(args, encode=True, for_git_cli=False): if args.osx_keychain_item_name: if not args.osx_keychain_item_account: - raise Exception('You must specify both name and account fields for osx keychain password items') + raise Exception( + "You must specify both name and account fields for osx keychain password items" + ) else: - if platform.system() != 'Darwin': + if platform.system() != "Darwin": raise Exception("Keychain arguments are only supported on Mac OSX") try: - with open(os.devnull, 'w') as devnull: - token = (subprocess.check_output([ - 'security', 'find-generic-password', - '-s', args.osx_keychain_item_name, - '-a', args.osx_keychain_item_account, - '-w'], stderr=devnull).strip()) - token = token.decode('utf-8') - auth = token + ':' + 'x-oauth-basic' + with open(os.devnull, "w") as devnull: + token = subprocess.check_output( + [ + "security", + "find-generic-password", + "-s", + args.osx_keychain_item_name, + "-a", + args.osx_keychain_item_account, + "-w", + ], + stderr=devnull, + ).strip() + token = token.decode("utf-8") + auth = token + ":" + "x-oauth-basic" except subprocess.SubprocessError: - raise Exception('No password item matching the provided name and account could be found in the osx keychain.') + raise Exception( + "No password item matching the provided name and account could be found in the osx keychain." + ) elif args.osx_keychain_item_account: - raise Exception('You must specify both name and account fields for osx keychain password items') + raise Exception( + "You must specify both name and account fields for osx keychain password items" + ) elif args.token: - _path_specifier = 'file://' + _path_specifier = "file://" if args.token.startswith(_path_specifier): - args.token = open(args.token[len(_path_specifier):], - 'rt').readline().strip() + args.token = ( + open(args.token[len(_path_specifier) :], "rt").readline().strip() + ) if not args.as_app: - auth = args.token + ':' + 'x-oauth-basic' + auth = args.token + ":" + "x-oauth-basic" else: if not for_git_cli: auth = args.token else: - auth = 'x-access-token:' + args.token + auth = "x-access-token:" + args.token elif args.username: if not args.password: args.password = getpass.getpass() @@ -381,9 +477,9 @@ def get_auth(args, encode=True, for_git_cli=False): password = args.password else: password = urlquote(args.password) - auth = args.username + ':' + password + auth = args.username + ":" + password elif args.password: - raise Exception('You must specify a username for basic auth') + raise Exception("You must specify a username for basic auth") if not auth: return None @@ -391,14 +487,14 @@ def get_auth(args, encode=True, for_git_cli=False): if not encode: return auth - return base64.b64encode(auth.encode('ascii')) + return base64.b64encode(auth.encode("ascii")) def get_github_api_host(args): if args.github_host: - host = args.github_host + '/api/v3' + host = args.github_host + "/api/v3" else: - host = 'api.github.com' + host = "api.github.com" return host @@ -407,33 +503,40 @@ def get_github_host(args): if args.github_host: host = args.github_host else: - host = 'github.com' + host = "github.com" return host def get_github_repo_url(args, repository): - if repository.get('is_gist'): + if repository.get("is_gist"): if args.prefer_ssh: # The git_pull_url value is always https for gists, so we need to transform it to ssh form - repo_url = re.sub(r'^https?:\/\/(.+)\/(.+)\.git$', r'git@\1:\2.git', repository['git_pull_url']) - repo_url = re.sub(r'^git@gist\.', 'git@', repo_url) # strip gist subdomain for better hostkey compatibility + repo_url = re.sub( + r"^https?:\/\/(.+)\/(.+)\.git$", + r"git@\1:\2.git", + repository["git_pull_url"], + ) + repo_url = re.sub( + r"^git@gist\.", "git@", repo_url + ) # strip gist subdomain for better hostkey compatibility else: - repo_url = repository['git_pull_url'] + repo_url = repository["git_pull_url"] return repo_url if args.prefer_ssh: - return repository['ssh_url'] + return repository["ssh_url"] auth = get_auth(args, encode=False, for_git_cli=True) if auth: - repo_url = 'https://{0}@{1}/{2}/{3}.git'.format( + repo_url = "https://{0}@{1}/{2}/{3}.git".format( auth, get_github_host(args), - repository['owner']['login'], - repository['name']) + repository["owner"]["login"], + repository["name"], + ) else: - repo_url = repository['clone_url'] + repo_url = repository["clone_url"] return repo_url @@ -446,13 +549,15 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): while True: page = page + 1 - request = _construct_request(per_page, page, query_args, template, auth, as_app=args.as_app) # noqa + request = _construct_request( + per_page, page, query_args, template, auth, as_app=args.as_app + ) # noqa r, errors = _get_response(request, auth, template) status_code = int(r.getcode()) # Check if we got correct data try: - response = json.loads(r.read().decode('utf-8')) + response = json.loads(r.read().decode("utf-8")) except IncompleteRead: log_warning("Incomplete read error detected") read_error = True @@ -466,25 +571,28 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): read_error = False # be gentle with API request limit and throttle requests if remaining requests getting low - limit_remaining = int(r.headers.get('x-ratelimit-remaining', 0)) + limit_remaining = int(r.headers.get("x-ratelimit-remaining", 0)) if args.throttle_limit and limit_remaining <= args.throttle_limit: log_info( - 'API request limit hit: {} requests left, pausing further requests for {}s'.format( - limit_remaining, - args.throttle_pause)) + "API request limit hit: {} requests left, pausing further requests for {}s".format( + limit_remaining, args.throttle_pause + ) + ) time.sleep(args.throttle_pause) retries = 0 while retries < 3 and (status_code == 502 or read_error): - log_warning('API request failed. Retrying in 5 seconds') + log_warning("API request failed. Retrying in 5 seconds") retries += 1 time.sleep(5) - request = _construct_request(per_page, page, query_args, template, auth, as_app=args.as_app) # noqa + request = _construct_request( + per_page, page, query_args, template, auth, as_app=args.as_app + ) # noqa r, errors = _get_response(request, auth, template) status_code = int(r.getcode()) try: - response = json.loads(r.read().decode('utf-8')) + response = json.loads(r.read().decode("utf-8")) read_error = False except IncompleteRead: log_warning("Incomplete read error detected") @@ -497,14 +605,14 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): read_error = True if status_code != 200: - template = 'API request returned HTTP {0}: {1}' + template = "API request returned HTTP {0}: {1}" errors.append(template.format(status_code, r.reason)) - raise Exception(', '.join(errors)) + raise Exception(", ".join(errors)) if read_error: - template = 'API request problem reading response for {0}' + template = "API request problem reading response for {0}" errors.append(template.format(request)) - raise Exception(', '.join(errors)) + raise Exception(", ".join(errors)) if len(errors) == 0: if type(response) == list: @@ -516,7 +624,7 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): yield response if len(errors) > 0: - raise Exception(', '.join(errors)) + raise Exception(", ".join(errors)) if single_request: break @@ -563,20 +671,24 @@ def _get_response(request, auth, template): def _construct_request(per_page, page, query_args, template, auth, as_app=None): - querystring = urlencode(dict(list({ - 'per_page': per_page, - 'page': page - }.items()) + list(query_args.items()))) + querystring = urlencode( + dict( + list({"per_page": per_page, "page": page}.items()) + + list(query_args.items()) + ) + ) - request = Request(template + '?' + querystring) + request = Request(template + "?" + querystring) if auth is not None: if not as_app: - request.add_header('Authorization', 'Basic '.encode('ascii') + auth) + request.add_header("Authorization", "Basic ".encode("ascii") + auth) else: - auth = auth.encode('ascii') - request.add_header('Authorization', 'token '.encode('ascii') + auth) - request.add_header('Accept', 'application/vnd.github.machine-man-preview+json') - log_info('Requesting {}?{}'.format(template, querystring)) + auth = auth.encode("ascii") + request.add_header("Authorization", "token ".encode("ascii") + auth) + request.add_header( + "Accept", "application/vnd.github.machine-man-preview+json" + ) + log_info("Requesting {}?{}".format(template, querystring)) return request @@ -587,7 +699,7 @@ def _request_http_error(exc, auth, errors): should_continue = False headers = exc.headers - limit_remaining = int(headers.get('x-ratelimit-remaining', 0)) + limit_remaining = int(headers.get("x-ratelimit-remaining", 0)) if exc.code == 403 and limit_remaining < 1: # The X-RateLimit-Reset header includes a @@ -595,15 +707,19 @@ def _request_http_error(exc, auth, errors): # so we can calculate how long to wait rather # than inefficiently polling: gm_now = calendar.timegm(time.gmtime()) - reset = int(headers.get('x-ratelimit-reset', 0)) or gm_now + reset = int(headers.get("x-ratelimit-reset", 0)) or gm_now # We'll never sleep for less than 10 seconds: delta = max(10, reset - gm_now) - limit = headers.get('x-ratelimit-limit') - log_warning('Exceeded rate limit of {} requests; waiting {} seconds to reset'.format(limit, delta)) # noqa + limit = headers.get("x-ratelimit-limit") + log_warning( + "Exceeded rate limit of {} requests; waiting {} seconds to reset".format( + limit, delta + ) + ) # noqa if auth is None: - log_info('Hint: Authenticate to raise your GitHub rate limit') + log_info("Hint: Authenticate to raise your GitHub rate limit") time.sleep(delta) should_continue = True @@ -613,13 +729,13 @@ def _request_http_error(exc, auth, errors): def _request_url_error(template, retry_timeout): # In case of a connection timing out, we can retry a few time # But we won't crash and not back-up the rest now - log_info('{} timed out'.format(template)) + log_info("{} timed out".format(template)) retry_timeout -= 1 if retry_timeout >= 0: return True, retry_timeout - raise Exception('{} timed out to much, skipping!') + raise Exception("{} timed out to much, skipping!") class S3HTTPRedirectHandler(HTTPRedirectHandler): @@ -629,9 +745,12 @@ class S3HTTPRedirectHandler(HTTPRedirectHandler): urllib will add the Authorization header to the redirected request to S3, which will result in a 400, so we should remove said header on redirect. """ + def redirect_request(self, req, fp, code, msg, headers, newurl): - request = super(S3HTTPRedirectHandler, self).redirect_request(req, fp, code, msg, headers, newurl) - del request.headers['Authorization'] + request = super(S3HTTPRedirectHandler, self).redirect_request( + req, fp, code, msg, headers, newurl + ) + del request.headers["Authorization"] return request @@ -641,15 +760,15 @@ def download_file(url, path, auth): return request = Request(url) - request.add_header('Accept', 'application/octet-stream') - request.add_header('Authorization', 'Basic '.encode('ascii') + auth) + request.add_header("Accept", "application/octet-stream") + request.add_header("Authorization", "Basic ".encode("ascii") + auth) opener = build_opener(S3HTTPRedirectHandler) try: response = opener.open(request) chunk_size = 16 * 1024 - with open(path, 'wb') as f: + with open(path, "wb") as f: while True: chunk = response.read(chunk_size) if not chunk: @@ -657,91 +776,110 @@ def download_file(url, path, auth): f.write(chunk) except HTTPError as exc: # Gracefully handle 404 responses (and others) when downloading from S3 - log_warning('Skipping download of asset {0} due to HTTPError: {1}'.format(url, exc.reason)) + log_warning( + "Skipping download of asset {0} due to HTTPError: {1}".format( + url, exc.reason + ) + ) except URLError as e: # Gracefully handle other URL errors - log_warning('Skipping download of asset {0} due to URLError: {1}'.format(url, e.reason)) + log_warning( + "Skipping download of asset {0} due to URLError: {1}".format(url, e.reason) + ) except socket.error as e: # Gracefully handle socket errors # TODO: Implement retry logic - log_warning('Skipping download of asset {0} due to socker error: {1}'.format(url, e.strerror)) + log_warning( + "Skipping download of asset {0} due to socker error: {1}".format( + url, e.strerror + ) + ) def get_authenticated_user(args): - template = 'https://{0}/user'.format(get_github_api_host(args)) + template = "https://{0}/user".format(get_github_api_host(args)) data = retrieve_data(args, template, single_request=True) return data[0] def check_git_lfs_install(): - exit_code = subprocess.call(['git', 'lfs', 'version']) + exit_code = subprocess.call(["git", "lfs", "version"]) if exit_code != 0: - raise Exception('The argument --lfs requires you to have Git LFS installed.\nYou can get it from https://git-lfs.github.com.') + raise Exception( + "The argument --lfs requires you to have Git LFS installed.\nYou can get it from https://git-lfs.github.com." + ) def retrieve_repositories(args, authenticated_user): - log_info('Retrieving repositories') + log_info("Retrieving repositories") single_request = False - if args.user == authenticated_user['login']: + if args.user == authenticated_user["login"]: # we must use the /user/repos API to be able to access private repos - template = 'https://{0}/user/repos'.format( - get_github_api_host(args)) + template = "https://{0}/user/repos".format(get_github_api_host(args)) else: if args.private and not args.organization: - log_warning('Authenticated user is different from user being backed up, thus private repositories cannot be accessed') - template = 'https://{0}/users/{1}/repos'.format( - get_github_api_host(args), - args.user) + log_warning( + "Authenticated user is different from user being backed up, thus private repositories cannot be accessed" + ) + template = "https://{0}/users/{1}/repos".format( + get_github_api_host(args), args.user + ) if args.organization: - template = 'https://{0}/orgs/{1}/repos'.format( - get_github_api_host(args), - args.user) + template = "https://{0}/orgs/{1}/repos".format( + get_github_api_host(args), args.user + ) if args.repository: single_request = True - template = 'https://{0}/repos/{1}/{2}'.format( - get_github_api_host(args), - args.user, - args.repository) + template = "https://{0}/repos/{1}/{2}".format( + get_github_api_host(args), args.user, args.repository + ) repos = retrieve_data(args, template, single_request=single_request) if args.all_starred: - starred_template = 'https://{0}/users/{1}/starred'.format(get_github_api_host(args), args.user) + starred_template = "https://{0}/users/{1}/starred".format( + get_github_api_host(args), args.user + ) starred_repos = retrieve_data(args, starred_template, single_request=False) # flag each repo as starred for downstream processing for item in starred_repos: - item.update({'is_starred': True}) + item.update({"is_starred": True}) repos.extend(starred_repos) if args.include_gists: - gists_template = 'https://{0}/users/{1}/gists'.format(get_github_api_host(args), args.user) + gists_template = "https://{0}/users/{1}/gists".format( + get_github_api_host(args), args.user + ) gists = retrieve_data(args, gists_template, single_request=False) # flag each repo as a gist for downstream processing for item in gists: - item.update({'is_gist': True}) + item.update({"is_gist": True}) repos.extend(gists) if args.include_starred_gists: - starred_gists_template = 'https://{0}/gists/starred'.format(get_github_api_host(args)) - starred_gists = retrieve_data(args, starred_gists_template, single_request=False) + starred_gists_template = "https://{0}/gists/starred".format( + get_github_api_host(args) + ) + starred_gists = retrieve_data( + args, starred_gists_template, single_request=False + ) # flag each repo as a starred gist for downstream processing for item in starred_gists: - item.update({'is_gist': True, - 'is_starred': True}) + item.update({"is_gist": True, "is_starred": True}) repos.extend(starred_gists) return repos def filter_repositories(args, unfiltered_repositories): - log_info('Filtering repositories') + log_info("Filtering repositories") repositories = [] for r in unfiltered_repositories: # gists can be anonymous, so need to safely check owner - if r.get('owner', {}).get('login') == args.user or r.get('is_starred'): + if r.get("owner", {}).get("login") == args.user or r.get("is_starred"): repositories.append(r) name_regex = None @@ -753,27 +891,33 @@ def filter_repositories(args, unfiltered_repositories): languages = [x.lower() for x in args.languages] if not args.fork: - repositories = [r for r in repositories if not r.get('fork')] + repositories = [r for r in repositories if not r.get("fork")] if not args.private: - repositories = [r for r in repositories if not r.get('private') or r.get('public')] + repositories = [ + r for r in repositories if not r.get("private") or r.get("public") + ] if languages: - repositories = [r for r in repositories if r.get('language') and r.get('language').lower() in languages] # noqa + repositories = [ + r + for r in repositories + if r.get("language") and r.get("language").lower() in languages + ] # noqa if name_regex: - repositories = [r for r in repositories if name_regex.match(r['name'])] + repositories = [r for r in repositories if name_regex.match(r["name"])] if args.skip_archived: - repositories = [r for r in repositories if not r.get('archived')] + repositories = [r for r in repositories if not r.get("archived")] if args.exclude: - repositories = [r for r in repositories if r['name'] not in args.exclude] + repositories = [r for r in repositories if r["name"] not in args.exclude] return repositories def backup_repositories(args, output_directory, repositories): - log_info('Backing up repositories') - repos_template = 'https://{0}/repos'.format(get_github_api_host(args)) + log_info("Backing up repositories") + repos_template = "https://{0}/repos".format(get_github_api_host(args)) if args.incremental: - last_update_path = os.path.join(output_directory, 'last_update') + last_update_path = os.path.join(output_directory, "last_update") if os.path.exists(last_update_path): args.since = open(last_update_path).read().strip() else: @@ -781,55 +925,70 @@ def backup_repositories(args, output_directory, repositories): else: args.since = None - last_update = '0000-00-00T00:00:00Z' + last_update = "0000-00-00T00:00:00Z" for repository in repositories: - if 'updated_at' in repository and repository['updated_at'] > last_update: - last_update = repository['updated_at'] - elif 'pushed_at' in repository and repository['pushed_at'] > last_update: - last_update = repository['pushed_at'] - - if repository.get('is_gist'): - repo_cwd = os.path.join(output_directory, 'gists', repository['id']) - elif repository.get('is_starred'): + if "updated_at" in repository and repository["updated_at"] > last_update: + last_update = repository["updated_at"] + elif "pushed_at" in repository and repository["pushed_at"] > last_update: + last_update = repository["pushed_at"] + + if repository.get("is_gist"): + repo_cwd = os.path.join(output_directory, "gists", repository["id"]) + elif repository.get("is_starred"): # put starred repos in -o/starred/${owner}/${repo} to prevent collision of # any repositories with the same name - repo_cwd = os.path.join(output_directory, 'starred', repository['owner']['login'], repository['name']) + repo_cwd = os.path.join( + output_directory, + "starred", + repository["owner"]["login"], + repository["name"], + ) else: - repo_cwd = os.path.join(output_directory, 'repositories', repository['name']) + repo_cwd = os.path.join( + output_directory, "repositories", repository["name"] + ) - repo_dir = os.path.join(repo_cwd, 'repository') + repo_dir = os.path.join(repo_cwd, "repository") repo_url = get_github_repo_url(args, repository) - include_gists = (args.include_gists or args.include_starred_gists) - if (args.include_repository or args.include_everything) \ - or (include_gists and repository.get('is_gist')): - repo_name = repository.get('name') if not repository.get('is_gist') else repository.get('id') - fetch_repository(repo_name, - repo_url, - repo_dir, - skip_existing=args.skip_existing, - bare_clone=args.bare_clone, - lfs_clone=args.lfs_clone, - no_prune=args.no_prune) + include_gists = args.include_gists or args.include_starred_gists + if (args.include_repository or args.include_everything) or ( + include_gists and repository.get("is_gist") + ): + repo_name = ( + repository.get("name") + if not repository.get("is_gist") + else repository.get("id") + ) + fetch_repository( + repo_name, + repo_url, + repo_dir, + skip_existing=args.skip_existing, + bare_clone=args.bare_clone, + lfs_clone=args.lfs_clone, + no_prune=args.no_prune, + ) - if repository.get('is_gist'): + if repository.get("is_gist"): # dump gist information to a file as well - output_file = '{0}/gist.json'.format(repo_cwd) - with codecs.open(output_file, 'w', encoding='utf-8') as f: + output_file = "{0}/gist.json".format(repo_cwd) + with codecs.open(output_file, "w", encoding="utf-8") as f: json_dump(repository, f) continue # don't try to back anything else for a gist; it doesn't exist - download_wiki = (args.include_wiki or args.include_everything) - if repository['has_wiki'] and download_wiki: - fetch_repository(repository['name'], - repo_url.replace('.git', '.wiki.git'), - os.path.join(repo_cwd, 'wiki'), - skip_existing=args.skip_existing, - bare_clone=args.bare_clone, - lfs_clone=args.lfs_clone, - no_prune=args.no_prune - ) + download_wiki = args.include_wiki or args.include_everything + if repository["has_wiki"] and download_wiki: + fetch_repository( + repository["name"], + repo_url.replace(".git", ".wiki.git"), + os.path.join(repo_cwd, "wiki"), + skip_existing=args.skip_existing, + bare_clone=args.bare_clone, + lfs_clone=args.lfs_clone, + no_prune=args.no_prune, + ) if args.include_issues or args.include_everything: backup_issues(args, repo_cwd, repository, repos_template) @@ -846,188 +1005,169 @@ def backup_repositories(args, output_directory, repositories): backup_hooks(args, repo_cwd, repository, repos_template) if args.include_releases or args.include_everything: - backup_releases(args, repo_cwd, repository, repos_template, - include_assets=args.include_assets or args.include_everything) + backup_releases( + args, + repo_cwd, + repository, + repos_template, + include_assets=args.include_assets or args.include_everything, + ) if args.incremental: - if last_update == '0000-00-00T00:00:00Z': - last_update = time.strftime('%Y-%m-%dT%H:%M:%SZ', time.localtime()) - - open(last_update_path, 'w').write(last_update) + if last_update == "0000-00-00T00:00:00Z": + last_update = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.localtime()) + + open(last_update_path, "w").write(last_update) def backup_issues(args, repo_cwd, repository, repos_template): - has_issues_dir = os.path.isdir('{0}/issues/.git'.format(repo_cwd)) + has_issues_dir = os.path.isdir("{0}/issues/.git".format(repo_cwd)) if args.skip_existing and has_issues_dir: return - log_info('Retrieving {0} issues'.format(repository['full_name'])) - issue_cwd = os.path.join(repo_cwd, 'issues') + log_info("Retrieving {0} issues".format(repository["full_name"])) + issue_cwd = os.path.join(repo_cwd, "issues") mkdir_p(repo_cwd, issue_cwd) issues = {} issues_skipped = 0 - issues_skipped_message = '' - _issue_template = '{0}/{1}/issues'.format(repos_template, - repository['full_name']) + issues_skipped_message = "" + _issue_template = "{0}/{1}/issues".format(repos_template, repository["full_name"]) should_include_pulls = args.include_pulls or args.include_everything - issue_states = ['open', 'closed'] + issue_states = ["open", "closed"] for issue_state in issue_states: - query_args = { - 'filter': 'all', - 'state': issue_state - } + query_args = {"filter": "all", "state": issue_state} if args.since: - query_args['since'] = args.since + query_args["since"] = args.since - _issues = retrieve_data(args, - _issue_template, - query_args=query_args) + _issues = retrieve_data(args, _issue_template, query_args=query_args) for issue in _issues: # skip pull requests which are also returned as issues # if retrieving pull requests is requested as well - if 'pull_request' in issue and should_include_pulls: + if "pull_request" in issue and should_include_pulls: issues_skipped += 1 continue - issues[issue['number']] = issue + issues[issue["number"]] = issue if issues_skipped: - issues_skipped_message = ' (skipped {0} pull requests)'.format( - issues_skipped) + issues_skipped_message = " (skipped {0} pull requests)".format(issues_skipped) - log_info('Saving {0} issues to disk{1}'.format( - len(list(issues.keys())), issues_skipped_message)) - comments_template = _issue_template + '/{0}/comments' - events_template = _issue_template + '/{0}/events' + log_info( + "Saving {0} issues to disk{1}".format( + len(list(issues.keys())), issues_skipped_message + ) + ) + comments_template = _issue_template + "/{0}/comments" + events_template = _issue_template + "/{0}/events" for number, issue in list(issues.items()): if args.include_issue_comments or args.include_everything: template = comments_template.format(number) - issues[number]['comment_data'] = retrieve_data(args, template) + issues[number]["comment_data"] = retrieve_data(args, template) if args.include_issue_events or args.include_everything: template = events_template.format(number) - issues[number]['event_data'] = retrieve_data(args, template) + issues[number]["event_data"] = retrieve_data(args, template) - issue_file = '{0}/{1}.json'.format(issue_cwd, number) - with codecs.open(issue_file, 'w', encoding='utf-8') as f: + issue_file = "{0}/{1}.json".format(issue_cwd, number) + with codecs.open(issue_file, "w", encoding="utf-8") as f: json_dump(issue, f) def backup_pulls(args, repo_cwd, repository, repos_template): - has_pulls_dir = os.path.isdir('{0}/pulls/.git'.format(repo_cwd)) + has_pulls_dir = os.path.isdir("{0}/pulls/.git".format(repo_cwd)) if args.skip_existing and has_pulls_dir: return - log_info('Retrieving {0} pull requests'.format(repository['full_name'])) # noqa - pulls_cwd = os.path.join(repo_cwd, 'pulls') + log_info("Retrieving {0} pull requests".format(repository["full_name"])) # noqa + pulls_cwd = os.path.join(repo_cwd, "pulls") mkdir_p(repo_cwd, pulls_cwd) pulls = {} - _pulls_template = '{0}/{1}/pulls'.format(repos_template, - repository['full_name']) - _issue_template = '{0}/{1}/issues'.format(repos_template, - repository['full_name']) + _pulls_template = "{0}/{1}/pulls".format(repos_template, repository["full_name"]) + _issue_template = "{0}/{1}/issues".format(repos_template, repository["full_name"]) query_args = { - 'filter': 'all', - 'state': 'all', - 'sort': 'updated', - 'direction': 'desc', + "filter": "all", + "state": "all", + "sort": "updated", + "direction": "desc", } if not args.include_pull_details: - pull_states = ['open', 'closed'] + pull_states = ["open", "closed"] for pull_state in pull_states: - query_args['state'] = pull_state - _pulls = retrieve_data_gen( - args, - _pulls_template, - query_args=query_args - ) + query_args["state"] = pull_state + _pulls = retrieve_data_gen(args, _pulls_template, query_args=query_args) for pull in _pulls: - if args.since and pull['updated_at'] < args.since: + if args.since and pull["updated_at"] < args.since: break - if not args.since or pull['updated_at'] >= args.since: - pulls[pull['number']] = pull + if not args.since or pull["updated_at"] >= args.since: + pulls[pull["number"]] = pull else: - _pulls = retrieve_data_gen( - args, - _pulls_template, - query_args=query_args - ) + _pulls = retrieve_data_gen(args, _pulls_template, query_args=query_args) for pull in _pulls: - if args.since and pull['updated_at'] < args.since: + if args.since and pull["updated_at"] < args.since: break - if not args.since or pull['updated_at'] >= args.since: - pulls[pull['number']] = retrieve_data( + if not args.since or pull["updated_at"] >= args.since: + pulls[pull["number"]] = retrieve_data( args, - _pulls_template + '/{}'.format(pull['number']), - single_request=True + _pulls_template + "/{}".format(pull["number"]), + single_request=True, )[0] - log_info('Saving {0} pull requests to disk'.format( - len(list(pulls.keys())))) + log_info("Saving {0} pull requests to disk".format(len(list(pulls.keys())))) # Comments from pulls API are only _review_ comments # regular comments need to be fetched via issue API. # For backwards compatibility with versions <= 0.41.0 # keep name "comment_data" for review comments - comments_regular_template = _issue_template + '/{0}/comments' - comments_template = _pulls_template + '/{0}/comments' - commits_template = _pulls_template + '/{0}/commits' + comments_regular_template = _issue_template + "/{0}/comments" + comments_template = _pulls_template + "/{0}/comments" + commits_template = _pulls_template + "/{0}/commits" for number, pull in list(pulls.items()): if args.include_pull_comments or args.include_everything: template = comments_regular_template.format(number) - pulls[number]['comment_regular_data'] = retrieve_data(args, template) + pulls[number]["comment_regular_data"] = retrieve_data(args, template) template = comments_template.format(number) - pulls[number]['comment_data'] = retrieve_data(args, template) + pulls[number]["comment_data"] = retrieve_data(args, template) if args.include_pull_commits or args.include_everything: template = commits_template.format(number) - pulls[number]['commit_data'] = retrieve_data(args, template) + pulls[number]["commit_data"] = retrieve_data(args, template) - pull_file = '{0}/{1}.json'.format(pulls_cwd, number) - with codecs.open(pull_file, 'w', encoding='utf-8') as f: + pull_file = "{0}/{1}.json".format(pulls_cwd, number) + with codecs.open(pull_file, "w", encoding="utf-8") as f: json_dump(pull, f) def backup_milestones(args, repo_cwd, repository, repos_template): - milestone_cwd = os.path.join(repo_cwd, 'milestones') + milestone_cwd = os.path.join(repo_cwd, "milestones") if args.skip_existing and os.path.isdir(milestone_cwd): return - log_info('Retrieving {0} milestones'.format(repository['full_name'])) + log_info("Retrieving {0} milestones".format(repository["full_name"])) mkdir_p(repo_cwd, milestone_cwd) - template = '{0}/{1}/milestones'.format(repos_template, - repository['full_name']) + template = "{0}/{1}/milestones".format(repos_template, repository["full_name"]) - query_args = { - 'state': 'all' - } + query_args = {"state": "all"} _milestones = retrieve_data(args, template, query_args=query_args) milestones = {} for milestone in _milestones: - milestones[milestone['number']] = milestone + milestones[milestone["number"]] = milestone - log_info('Saving {0} milestones to disk'.format( - len(list(milestones.keys())))) + log_info("Saving {0} milestones to disk".format(len(list(milestones.keys())))) for number, milestone in list(milestones.items()): - milestone_file = '{0}/{1}.json'.format(milestone_cwd, number) - with codecs.open(milestone_file, 'w', encoding='utf-8') as f: + milestone_file = "{0}/{1}.json".format(milestone_cwd, number) + with codecs.open(milestone_file, "w", encoding="utf-8") as f: json_dump(milestone, f) def backup_labels(args, repo_cwd, repository, repos_template): - label_cwd = os.path.join(repo_cwd, 'labels') - output_file = '{0}/labels.json'.format(label_cwd) - template = '{0}/{1}/labels'.format(repos_template, - repository['full_name']) - _backup_data(args, - 'labels', - template, - output_file, - label_cwd) + label_cwd = os.path.join(repo_cwd, "labels") + output_file = "{0}/labels.json".format(label_cwd) + template = "{0}/{1}/labels".format(repos_template, repository["full_name"]) + _backup_data(args, "labels", template, output_file, label_cwd) def backup_hooks(args, repo_cwd, repository, repos_template): @@ -1035,184 +1175,185 @@ def backup_hooks(args, repo_cwd, repository, repos_template): if not auth: log_info("Skipping hooks since no authentication provided") return - hook_cwd = os.path.join(repo_cwd, 'hooks') - output_file = '{0}/hooks.json'.format(hook_cwd) - template = '{0}/{1}/hooks'.format(repos_template, - repository['full_name']) + hook_cwd = os.path.join(repo_cwd, "hooks") + output_file = "{0}/hooks.json".format(hook_cwd) + template = "{0}/{1}/hooks".format(repos_template, repository["full_name"]) try: - _backup_data(args, - 'hooks', - template, - output_file, - hook_cwd) + _backup_data(args, "hooks", template, output_file, hook_cwd) except SystemExit: log_info("Unable to read hooks, skipping") def backup_releases(args, repo_cwd, repository, repos_template, include_assets=False): - repository_fullname = repository['full_name'] + repository_fullname = repository["full_name"] # give release files somewhere to live & log intent - release_cwd = os.path.join(repo_cwd, 'releases') - log_info('Retrieving {0} releases'.format(repository_fullname)) + release_cwd = os.path.join(repo_cwd, "releases") + log_info("Retrieving {0} releases".format(repository_fullname)) mkdir_p(repo_cwd, release_cwd) query_args = {} - release_template = '{0}/{1}/releases'.format(repos_template, repository_fullname) + release_template = "{0}/{1}/releases".format(repos_template, repository_fullname) releases = retrieve_data(args, release_template, query_args=query_args) # for each release, store it - log_info('Saving {0} releases to disk'.format(len(releases))) + log_info("Saving {0} releases to disk".format(len(releases))) for release in releases: - release_name = release['tag_name'] - release_name_safe = release_name.replace('/', '__') - output_filepath = os.path.join(release_cwd, '{0}.json'.format(release_name_safe)) - with codecs.open(output_filepath, 'w+', encoding='utf-8') as f: + release_name = release["tag_name"] + release_name_safe = release_name.replace("/", "__") + output_filepath = os.path.join( + release_cwd, "{0}.json".format(release_name_safe) + ) + with codecs.open(output_filepath, "w+", encoding="utf-8") as f: json_dump(release, f) if include_assets: - assets = retrieve_data(args, release['assets_url']) + assets = retrieve_data(args, release["assets_url"]) if len(assets) > 0: # give release asset files somewhere to live & download them (not including source archives) release_assets_cwd = os.path.join(release_cwd, release_name_safe) mkdir_p(release_assets_cwd) for asset in assets: - download_file(asset['url'], os.path.join(release_assets_cwd, asset['name']), get_auth(args)) + download_file( + asset["url"], + os.path.join(release_assets_cwd, asset["name"]), + get_auth(args), + ) -def fetch_repository(name, - remote_url, - local_dir, - skip_existing=False, - bare_clone=False, - lfs_clone=False, - no_prune=False): +def fetch_repository( + name, + remote_url, + local_dir, + skip_existing=False, + bare_clone=False, + lfs_clone=False, + no_prune=False, +): if bare_clone: if os.path.exists(local_dir): - clone_exists = subprocess.check_output(['git', - 'rev-parse', - '--is-bare-repository'], - cwd=local_dir) == b"true\n" + clone_exists = ( + subprocess.check_output( + ["git", "rev-parse", "--is-bare-repository"], cwd=local_dir + ) + == b"true\n" + ) else: clone_exists = False else: - clone_exists = os.path.exists(os.path.join(local_dir, '.git')) + clone_exists = os.path.exists(os.path.join(local_dir, ".git")) if clone_exists and skip_existing: return masked_remote_url = mask_password(remote_url) - initialized = subprocess.call('git ls-remote ' + remote_url, - stdout=FNULL, - stderr=FNULL, - shell=True) + initialized = subprocess.call( + "git ls-remote " + remote_url, stdout=FNULL, stderr=FNULL, shell=True + ) if initialized == 128: - log_info("Skipping {0} ({1}) since it's not initialized".format( - name, masked_remote_url)) + log_info( + "Skipping {0} ({1}) since it's not initialized".format( + name, masked_remote_url + ) + ) return if clone_exists: - log_info('Updating {0} in {1}'.format(name, local_dir)) + log_info("Updating {0} in {1}".format(name, local_dir)) - remotes = subprocess.check_output(['git', 'remote', 'show'], - cwd=local_dir) - remotes = [i.strip() for i in remotes.decode('utf-8').splitlines()] + remotes = subprocess.check_output(["git", "remote", "show"], cwd=local_dir) + remotes = [i.strip() for i in remotes.decode("utf-8").splitlines()] - if 'origin' not in remotes: - git_command = ['git', 'remote', 'rm', 'origin'] + if "origin" not in remotes: + git_command = ["git", "remote", "rm", "origin"] logging_subprocess(git_command, None, cwd=local_dir) - git_command = ['git', 'remote', 'add', 'origin', remote_url] + git_command = ["git", "remote", "add", "origin", remote_url] logging_subprocess(git_command, None, cwd=local_dir) else: - git_command = ['git', 'remote', 'set-url', 'origin', remote_url] + git_command = ["git", "remote", "set-url", "origin", remote_url] logging_subprocess(git_command, None, cwd=local_dir) if lfs_clone: - git_command = ['git', 'lfs', 'fetch', '--all', '--prune'] + git_command = ["git", "lfs", "fetch", "--all", "--prune"] else: - git_command = ['git', 'fetch', '--all', '--force', '--tags', '--prune'] + git_command = ["git", "fetch", "--all", "--force", "--tags", "--prune"] if no_prune: git_command.pop() logging_subprocess(git_command, None, cwd=local_dir) else: - log_info('Cloning {0} repository from {1} to {2}'.format( - name, - masked_remote_url, - local_dir)) + log_info( + "Cloning {0} repository from {1} to {2}".format( + name, masked_remote_url, local_dir + ) + ) if bare_clone: - git_command = ['git', 'clone', '--mirror', remote_url, local_dir] + git_command = ["git", "clone", "--mirror", remote_url, local_dir] logging_subprocess(git_command, None) if lfs_clone: - git_command = ['git', 'lfs', 'fetch', '--all', '--prune'] + git_command = ["git", "lfs", "fetch", "--all", "--prune"] if no_prune: git_command.pop() logging_subprocess(git_command, None, cwd=local_dir) else: if lfs_clone: - git_command = ['git', 'lfs', 'clone', remote_url, local_dir] + git_command = ["git", "lfs", "clone", remote_url, local_dir] else: - git_command = ['git', 'clone', remote_url, local_dir] + git_command = ["git", "clone", remote_url, local_dir] logging_subprocess(git_command, None) def backup_account(args, output_directory): - account_cwd = os.path.join(output_directory, 'account') + account_cwd = os.path.join(output_directory, "account") if args.include_starred or args.include_everything: output_file = "{0}/starred.json".format(account_cwd) - template = "https://{0}/users/{1}/starred".format(get_github_api_host(args), args.user) - _backup_data(args, - "starred repositories", - template, - output_file, - account_cwd) + template = "https://{0}/users/{1}/starred".format( + get_github_api_host(args), args.user + ) + _backup_data(args, "starred repositories", template, output_file, account_cwd) if args.include_watched or args.include_everything: output_file = "{0}/watched.json".format(account_cwd) - template = "https://{0}/users/{1}/subscriptions".format(get_github_api_host(args), args.user) - _backup_data(args, - "watched repositories", - template, - output_file, - account_cwd) + template = "https://{0}/users/{1}/subscriptions".format( + get_github_api_host(args), args.user + ) + _backup_data(args, "watched repositories", template, output_file, account_cwd) if args.include_followers or args.include_everything: output_file = "{0}/followers.json".format(account_cwd) - template = "https://{0}/users/{1}/followers".format(get_github_api_host(args), args.user) - _backup_data(args, - "followers", - template, - output_file, - account_cwd) + template = "https://{0}/users/{1}/followers".format( + get_github_api_host(args), args.user + ) + _backup_data(args, "followers", template, output_file, account_cwd) if args.include_following or args.include_everything: output_file = "{0}/following.json".format(account_cwd) - template = "https://{0}/users/{1}/following".format(get_github_api_host(args), args.user) - _backup_data(args, - "following", - template, - output_file, - account_cwd) + template = "https://{0}/users/{1}/following".format( + get_github_api_host(args), args.user + ) + _backup_data(args, "following", template, output_file, account_cwd) def _backup_data(args, name, template, output_file, output_directory): skip_existing = args.skip_existing if not skip_existing or not os.path.exists(output_file): - log_info('Retrieving {0} {1}'.format(args.user, name)) + log_info("Retrieving {0} {1}".format(args.user, name)) mkdir_p(output_directory) data = retrieve_data(args, template) - log_info('Writing {0} {1} to disk'.format(len(data), name)) - with codecs.open(output_file, 'w', encoding='utf-8') as f: + log_info("Writing {0} {1} to disk".format(len(data), name)) + with codecs.open(output_file, "w", encoding="utf-8") as f: json_dump(data, f) def json_dump(data, output_file): - json.dump(data, - output_file, - ensure_ascii=False, - sort_keys=True, - indent=4, - separators=(',', ': ')) + json.dump( + data, + output_file, + ensure_ascii=False, + sort_keys=True, + indent=4, + separators=(",", ": "), + ) diff --git a/setup.py b/setup.py index 3b4df41..898e4fb 100644 --- a/setup.py +++ b/setup.py @@ -5,6 +5,7 @@ from github_backup import __version__ try: from setuptools import setup + setup # workaround for pyflakes issue #13 except ImportError: from distutils.core import setup @@ -15,6 +16,7 @@ except ImportError: # http://www.eby-sarna.com/pipermail/peak/2010-May/003357.html) try: import multiprocessing + multiprocessing except ImportError: pass @@ -25,26 +27,26 @@ def open_file(fname): setup( - name='github-backup', + name="github-backup", version=__version__, - author='Jose Diaz-Gonzalez', - author_email='github-backup@josediazgonzalez.com', - packages=['github_backup'], - scripts=['bin/github-backup'], - url='http://github.com/josegonzalez/python-github-backup', - license='MIT', + author="Jose Diaz-Gonzalez", + author_email="github-backup@josediazgonzalez.com", + packages=["github_backup"], + scripts=["bin/github-backup"], + url="http://github.com/josegonzalez/python-github-backup", + license="MIT", classifiers=[ - 'Development Status :: 5 - Production/Stable', - 'Topic :: System :: Archiving :: Backup', - 'License :: OSI Approved :: MIT License', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', + "Development Status :: 5 - Production/Stable", + "Topic :: System :: Archiving :: Backup", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3.5", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", ], - description='backup a github user or organization', - long_description=open_file('README.rst').read(), - long_description_content_type='text/x-rst', - install_requires=open_file('requirements.txt').readlines(), + description="backup a github user or organization", + long_description=open_file("README.rst").read(), + long_description_content_type="text/x-rst", + install_requires=open_file("requirements.txt").readlines(), zip_safe=True, )