From a06c3e9fd35fc3c32f760ade7cac0ad708582f01 Mon Sep 17 00:00:00 2001 From: ZhymabekRoman Date: Sun, 25 Jun 2023 10:38:31 +0600 Subject: [PATCH] fix: refactor logging Based on #195 --- bin/github-backup | 34 +++++------ github_backup/github_backup.py | 102 ++++++++++++--------------------- 2 files changed, 55 insertions(+), 81 deletions(-) diff --git a/bin/github-backup b/bin/github-backup index 8d2698b..2fb3a9a 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -2,31 +2,31 @@ import os, sys, logging -from github_backup.github_backup import ( - backup_account, - backup_repositories, - check_git_lfs_install, - filter_repositories, - get_authenticated_user, - log_info, - log_warning, - mkdir_p, - parse_args, - retrieve_repositories, -) - logging.basicConfig( format='%(asctime)s.%(msecs)03d: %(message)s', datefmt='%Y-%m-%dT%H:%M:%S', level=logging.INFO ) +from github_backup.github_backup import ( + backup_account, + backup_repositories, + check_git_lfs_install, + filter_repositories, + get_authenticated_user, + logger, + mkdir_p, + parse_args, + retrieve_repositories, +) + + def main(): args = parse_args() output_directory = os.path.realpath(args.output_directory) if not os.path.isdir(output_directory): - log_info('Create output directory {0}'.format(output_directory)) + logger.info('Create output directory {0}'.format(output_directory)) mkdir_p(output_directory) if args.lfs_clone: @@ -35,10 +35,10 @@ def main(): if args.log_level: log_level = logging.getLevelName(args.log_level.upper()) if isinstance(log_level, int): - logging.root.setLevel(log_level) + logger.root.setLevel(log_level) if not args.as_app: - log_info('Backing up user {0} to {1}'.format(args.user, output_directory)) + logger.info('Backing up user {0} to {1}'.format(args.user, output_directory)) authenticated_user = get_authenticated_user(args) else: authenticated_user = {'login': None} @@ -53,5 +53,5 @@ if __name__ == '__main__': try: main() except Exception as e: - log_warning(str(e)) + logger.error(str(e)) sys.exit(1) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index b9ff345..25202d3 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -7,7 +7,6 @@ import argparse import base64 import calendar import codecs -import datetime import errno import getpass import json @@ -37,32 +36,7 @@ except ImportError: VERSION = "unknown" FNULL = open(os.devnull, "w") - - -def _get_log_date(): - return datetime.datetime.isoformat(datetime.datetime.now()) - - -def log_info(message): - """ - Log message (str) or messages (List[str]) to stdout - """ - if type(message) == str: - message = [message] - - for msg in message: - logging.info(msg) - - -def log_warning(message): - """ - Log message (str) or messages (List[str]) to stderr - """ - if type(message) == str: - message = [message] - - for msg in message: - logging.warning(msg) +logger = logging.getLogger(__name__) def logging_subprocess( @@ -81,7 +55,7 @@ def logging_subprocess( popenargs, stdout=subprocess.PIPE, stderr=subprocess.PIPE, **kwargs ) if sys.platform == "win32": - log_info( + logger.info( "Windows operating system detected - no subprocess logging will be returned" ) @@ -558,13 +532,13 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): try: response = json.loads(r.read().decode("utf-8")) except IncompleteRead: - log_warning("Incomplete read error detected") + logger.warning("Incomplete read error detected") read_error = True except json.decoder.JSONDecodeError: - log_warning("JSON decode error detected") + logger.warning("JSON decode error detected") read_error = True except TimeoutError: - log_warning("Tiemout error detected") + logger.warning("Tiemout error detected") read_error = True else: read_error = False @@ -572,7 +546,7 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): # be gentle with API request limit and throttle requests if remaining requests getting low limit_remaining = int(r.headers.get("x-ratelimit-remaining", 0)) if args.throttle_limit and limit_remaining <= args.throttle_limit: - log_info( + logger.info( "API request limit hit: {} requests left, pausing further requests for {}s".format( limit_remaining, args.throttle_pause ) @@ -581,7 +555,7 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): retries = 0 while retries < 3 and (status_code == 502 or read_error): - log_warning("API request failed. Retrying in 5 seconds") + logger.warning("API request failed. Retrying in 5 seconds") retries += 1 time.sleep(5) request = _construct_request( @@ -594,13 +568,13 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): response = json.loads(r.read().decode("utf-8")) read_error = False except IncompleteRead: - log_warning("Incomplete read error detected") + logger.warning("Incomplete read error detected") read_error = True except json.decoder.JSONDecodeError: - log_warning("JSON decode error detected") + logger.warning("JSON decode error detected") read_error = True except TimeoutError: - log_warning("Tiemout error detected") + logger.warning("Tiemout error detected") read_error = True if status_code != 200: @@ -652,12 +626,12 @@ def _get_response(request, auth, template): errors, should_continue = _request_http_error(exc, auth, errors) # noqa r = exc except URLError as e: - log_warning(e.reason) + logger.warning(e.reason) should_continue, retry_timeout = _request_url_error(template, retry_timeout) if not should_continue: raise except socket.error as e: - log_warning(e.strerror) + logger.warning(e.strerror) should_continue, retry_timeout = _request_url_error(template, retry_timeout) if not should_continue: raise @@ -686,7 +660,7 @@ def _construct_request(per_page, page, query_args, template, auth, as_app=None): request.add_header( "Accept", "application/vnd.github.machine-man-preview+json" ) - log_info("Requesting {}?{}".format(template, querystring)) + logger.info("Requesting {}?{}".format(template, querystring)) return request @@ -710,14 +684,14 @@ def _request_http_error(exc, auth, errors): delta = max(10, reset - gm_now) limit = headers.get("x-ratelimit-limit") - log_warning( + logger.warning( "Exceeded rate limit of {} requests; waiting {} seconds to reset".format( limit, delta ) ) # noqa if auth is None: - log_info("Hint: Authenticate to raise your GitHub rate limit") + logger.info("Hint: Authenticate to raise your GitHub rate limit") time.sleep(delta) should_continue = True @@ -727,7 +701,7 @@ def _request_http_error(exc, auth, errors): def _request_url_error(template, retry_timeout): # In case of a connection timing out, we can retry a few time # But we won't crash and not back-up the rest now - log_info("{} timed out".format(template)) + logger.info("{} timed out".format(template)) retry_timeout -= 1 if retry_timeout >= 0: @@ -774,20 +748,20 @@ def download_file(url, path, auth): f.write(chunk) except HTTPError as exc: # Gracefully handle 404 responses (and others) when downloading from S3 - log_warning( + logger.warning( "Skipping download of asset {0} due to HTTPError: {1}".format( url, exc.reason ) ) except URLError as e: # Gracefully handle other URL errors - log_warning( + logger.warning( "Skipping download of asset {0} due to URLError: {1}".format(url, e.reason) ) except socket.error as e: # Gracefully handle socket errors # TODO: Implement retry logic - log_warning( + logger.warning( "Skipping download of asset {0} due to socker error: {1}".format( url, e.strerror ) @@ -809,14 +783,14 @@ def check_git_lfs_install(): def retrieve_repositories(args, authenticated_user): - log_info("Retrieving repositories") + logger.info("Retrieving repositories") single_request = False if args.user == authenticated_user["login"]: # we must use the /user/repos API to be able to access private repos template = "https://{0}/user/repos".format(get_github_api_host(args)) else: if args.private and not args.organization: - log_warning( + logger.warning( "Authenticated user is different from user being backed up, thus private repositories cannot be accessed" ) template = "https://{0}/users/{1}/repos".format( @@ -872,7 +846,7 @@ def retrieve_repositories(args, authenticated_user): def filter_repositories(args, unfiltered_repositories): - log_info("Filtering repositories") + logger.info("Filtering repositories") repositories = [] for r in unfiltered_repositories: @@ -911,7 +885,7 @@ def filter_repositories(args, unfiltered_repositories): def backup_repositories(args, output_directory, repositories): - log_info("Backing up repositories") + logger.info("Backing up repositories") repos_template = "https://{0}/repos".format(get_github_api_host(args)) if args.incremental: @@ -1023,7 +997,7 @@ def backup_issues(args, repo_cwd, repository, repos_template): if args.skip_existing and has_issues_dir: return - log_info("Retrieving {0} issues".format(repository["full_name"])) + logger.info("Retrieving {0} issues".format(repository["full_name"])) issue_cwd = os.path.join(repo_cwd, "issues") mkdir_p(repo_cwd, issue_cwd) @@ -1052,7 +1026,7 @@ def backup_issues(args, repo_cwd, repository, repos_template): if issues_skipped: issues_skipped_message = " (skipped {0} pull requests)".format(issues_skipped) - log_info( + logger.info( "Saving {0} issues to disk{1}".format( len(list(issues.keys())), issues_skipped_message ) @@ -1077,7 +1051,7 @@ def backup_pulls(args, repo_cwd, repository, repos_template): if args.skip_existing and has_pulls_dir: return - log_info("Retrieving {0} pull requests".format(repository["full_name"])) # noqa + logger.info("Retrieving {0} pull requests".format(repository["full_name"])) # noqa pulls_cwd = os.path.join(repo_cwd, "pulls") mkdir_p(repo_cwd, pulls_cwd) @@ -1113,7 +1087,7 @@ def backup_pulls(args, repo_cwd, repository, repos_template): single_request=True, )[0] - log_info("Saving {0} pull requests to disk".format(len(list(pulls.keys())))) + logger.info("Saving {0} pull requests to disk".format(len(list(pulls.keys())))) # Comments from pulls API are only _review_ comments # regular comments need to be fetched via issue API. # For backwards compatibility with versions <= 0.41.0 @@ -1141,7 +1115,7 @@ def backup_milestones(args, repo_cwd, repository, repos_template): if args.skip_existing and os.path.isdir(milestone_cwd): return - log_info("Retrieving {0} milestones".format(repository["full_name"])) + logger.info("Retrieving {0} milestones".format(repository["full_name"])) mkdir_p(repo_cwd, milestone_cwd) template = "{0}/{1}/milestones".format(repos_template, repository["full_name"]) @@ -1154,7 +1128,7 @@ def backup_milestones(args, repo_cwd, repository, repos_template): for milestone in _milestones: milestones[milestone["number"]] = milestone - log_info("Saving {0} milestones to disk".format(len(list(milestones.keys())))) + logger.info("Saving {0} milestones to disk".format(len(list(milestones.keys())))) for number, milestone in list(milestones.items()): milestone_file = "{0}/{1}.json".format(milestone_cwd, number) with codecs.open(milestone_file, "w", encoding="utf-8") as f: @@ -1171,7 +1145,7 @@ def backup_labels(args, repo_cwd, repository, repos_template): def backup_hooks(args, repo_cwd, repository, repos_template): auth = get_auth(args) if not auth: - log_info("Skipping hooks since no authentication provided") + logger.info("Skipping hooks since no authentication provided") return hook_cwd = os.path.join(repo_cwd, "hooks") output_file = "{0}/hooks.json".format(hook_cwd) @@ -1179,7 +1153,7 @@ def backup_hooks(args, repo_cwd, repository, repos_template): try: _backup_data(args, "hooks", template, output_file, hook_cwd) except SystemExit: - log_info("Unable to read hooks, skipping") + logger.info("Unable to read hooks, skipping") def backup_releases(args, repo_cwd, repository, repos_template, include_assets=False): @@ -1187,7 +1161,7 @@ def backup_releases(args, repo_cwd, repository, repos_template, include_assets=F # give release files somewhere to live & log intent release_cwd = os.path.join(repo_cwd, "releases") - log_info("Retrieving {0} releases".format(repository_fullname)) + logger.info("Retrieving {0} releases".format(repository_fullname)) mkdir_p(repo_cwd, release_cwd) query_args = {} @@ -1196,7 +1170,7 @@ def backup_releases(args, repo_cwd, repository, repos_template, include_assets=F releases = retrieve_data(args, release_template, query_args=query_args) # for each release, store it - log_info("Saving {0} releases to disk".format(len(releases))) + logger.info("Saving {0} releases to disk".format(len(releases))) for release in releases: release_name = release["tag_name"] release_name_safe = release_name.replace("/", "__") @@ -1251,7 +1225,7 @@ def fetch_repository( "git ls-remote " + remote_url, stdout=FNULL, stderr=FNULL, shell=True ) if initialized == 128: - log_info( + logger.info( "Skipping {0} ({1}) since it's not initialized".format( name, masked_remote_url ) @@ -1259,7 +1233,7 @@ def fetch_repository( return if clone_exists: - log_info("Updating {0} in {1}".format(name, local_dir)) + logger.info("Updating {0} in {1}".format(name, local_dir)) remotes = subprocess.check_output(["git", "remote", "show"], cwd=local_dir) remotes = [i.strip() for i in remotes.decode("utf-8").splitlines()] @@ -1281,7 +1255,7 @@ def fetch_repository( git_command.pop() logging_subprocess(git_command, None, cwd=local_dir) else: - log_info( + logger.info( "Cloning {0} repository from {1} to {2}".format( name, masked_remote_url, local_dir ) @@ -1337,11 +1311,11 @@ def backup_account(args, output_directory): def _backup_data(args, name, template, output_file, output_directory): skip_existing = args.skip_existing if not skip_existing or not os.path.exists(output_file): - log_info("Retrieving {0} {1}".format(args.user, name)) + logger.info("Retrieving {0} {1}".format(args.user, name)) mkdir_p(output_directory) data = retrieve_data(args, template) - log_info("Writing {0} {1} to disk".format(len(data), name)) + logger.info("Writing {0} {1} to disk".format(len(data), name)) with codecs.open(output_file, "w", encoding="utf-8") as f: json_dump(data, f)