fix: refactor logging

Based on #195
This commit is contained in:
ZhymabekRoman
2023-06-25 10:38:31 +06:00
parent fe07d5ad09
commit a06c3e9fd3
2 changed files with 55 additions and 81 deletions

View File

@@ -7,7 +7,6 @@ import argparse
import base64
import calendar
import codecs
import datetime
import errno
import getpass
import json
@@ -37,32 +36,7 @@ except ImportError:
VERSION = "unknown"
FNULL = open(os.devnull, "w")
def _get_log_date():
return datetime.datetime.isoformat(datetime.datetime.now())
def log_info(message):
"""
Log message (str) or messages (List[str]) to stdout
"""
if type(message) == str:
message = [message]
for msg in message:
logging.info(msg)
def log_warning(message):
"""
Log message (str) or messages (List[str]) to stderr
"""
if type(message) == str:
message = [message]
for msg in message:
logging.warning(msg)
logger = logging.getLogger(__name__)
def logging_subprocess(
@@ -81,7 +55,7 @@ def logging_subprocess(
popenargs, stdout=subprocess.PIPE, stderr=subprocess.PIPE, **kwargs
)
if sys.platform == "win32":
log_info(
logger.info(
"Windows operating system detected - no subprocess logging will be returned"
)
@@ -558,13 +532,13 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False):
try:
response = json.loads(r.read().decode("utf-8"))
except IncompleteRead:
log_warning("Incomplete read error detected")
logger.warning("Incomplete read error detected")
read_error = True
except json.decoder.JSONDecodeError:
log_warning("JSON decode error detected")
logger.warning("JSON decode error detected")
read_error = True
except TimeoutError:
log_warning("Tiemout error detected")
logger.warning("Tiemout error detected")
read_error = True
else:
read_error = False
@@ -572,7 +546,7 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False):
# be gentle with API request limit and throttle requests if remaining requests getting low
limit_remaining = int(r.headers.get("x-ratelimit-remaining", 0))
if args.throttle_limit and limit_remaining <= args.throttle_limit:
log_info(
logger.info(
"API request limit hit: {} requests left, pausing further requests for {}s".format(
limit_remaining, args.throttle_pause
)
@@ -581,7 +555,7 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False):
retries = 0
while retries < 3 and (status_code == 502 or read_error):
log_warning("API request failed. Retrying in 5 seconds")
logger.warning("API request failed. Retrying in 5 seconds")
retries += 1
time.sleep(5)
request = _construct_request(
@@ -594,13 +568,13 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False):
response = json.loads(r.read().decode("utf-8"))
read_error = False
except IncompleteRead:
log_warning("Incomplete read error detected")
logger.warning("Incomplete read error detected")
read_error = True
except json.decoder.JSONDecodeError:
log_warning("JSON decode error detected")
logger.warning("JSON decode error detected")
read_error = True
except TimeoutError:
log_warning("Tiemout error detected")
logger.warning("Tiemout error detected")
read_error = True
if status_code != 200:
@@ -652,12 +626,12 @@ def _get_response(request, auth, template):
errors, should_continue = _request_http_error(exc, auth, errors) # noqa
r = exc
except URLError as e:
log_warning(e.reason)
logger.warning(e.reason)
should_continue, retry_timeout = _request_url_error(template, retry_timeout)
if not should_continue:
raise
except socket.error as e:
log_warning(e.strerror)
logger.warning(e.strerror)
should_continue, retry_timeout = _request_url_error(template, retry_timeout)
if not should_continue:
raise
@@ -686,7 +660,7 @@ def _construct_request(per_page, page, query_args, template, auth, as_app=None):
request.add_header(
"Accept", "application/vnd.github.machine-man-preview+json"
)
log_info("Requesting {}?{}".format(template, querystring))
logger.info("Requesting {}?{}".format(template, querystring))
return request
@@ -710,14 +684,14 @@ def _request_http_error(exc, auth, errors):
delta = max(10, reset - gm_now)
limit = headers.get("x-ratelimit-limit")
log_warning(
logger.warning(
"Exceeded rate limit of {} requests; waiting {} seconds to reset".format(
limit, delta
)
) # noqa
if auth is None:
log_info("Hint: Authenticate to raise your GitHub rate limit")
logger.info("Hint: Authenticate to raise your GitHub rate limit")
time.sleep(delta)
should_continue = True
@@ -727,7 +701,7 @@ def _request_http_error(exc, auth, errors):
def _request_url_error(template, retry_timeout):
# In case of a connection timing out, we can retry a few time
# But we won't crash and not back-up the rest now
log_info("{} timed out".format(template))
logger.info("{} timed out".format(template))
retry_timeout -= 1
if retry_timeout >= 0:
@@ -774,20 +748,20 @@ def download_file(url, path, auth):
f.write(chunk)
except HTTPError as exc:
# Gracefully handle 404 responses (and others) when downloading from S3
log_warning(
logger.warning(
"Skipping download of asset {0} due to HTTPError: {1}".format(
url, exc.reason
)
)
except URLError as e:
# Gracefully handle other URL errors
log_warning(
logger.warning(
"Skipping download of asset {0} due to URLError: {1}".format(url, e.reason)
)
except socket.error as e:
# Gracefully handle socket errors
# TODO: Implement retry logic
log_warning(
logger.warning(
"Skipping download of asset {0} due to socker error: {1}".format(
url, e.strerror
)
@@ -809,14 +783,14 @@ def check_git_lfs_install():
def retrieve_repositories(args, authenticated_user):
log_info("Retrieving repositories")
logger.info("Retrieving repositories")
single_request = False
if args.user == authenticated_user["login"]:
# we must use the /user/repos API to be able to access private repos
template = "https://{0}/user/repos".format(get_github_api_host(args))
else:
if args.private and not args.organization:
log_warning(
logger.warning(
"Authenticated user is different from user being backed up, thus private repositories cannot be accessed"
)
template = "https://{0}/users/{1}/repos".format(
@@ -872,7 +846,7 @@ def retrieve_repositories(args, authenticated_user):
def filter_repositories(args, unfiltered_repositories):
log_info("Filtering repositories")
logger.info("Filtering repositories")
repositories = []
for r in unfiltered_repositories:
@@ -911,7 +885,7 @@ def filter_repositories(args, unfiltered_repositories):
def backup_repositories(args, output_directory, repositories):
log_info("Backing up repositories")
logger.info("Backing up repositories")
repos_template = "https://{0}/repos".format(get_github_api_host(args))
if args.incremental:
@@ -1023,7 +997,7 @@ def backup_issues(args, repo_cwd, repository, repos_template):
if args.skip_existing and has_issues_dir:
return
log_info("Retrieving {0} issues".format(repository["full_name"]))
logger.info("Retrieving {0} issues".format(repository["full_name"]))
issue_cwd = os.path.join(repo_cwd, "issues")
mkdir_p(repo_cwd, issue_cwd)
@@ -1052,7 +1026,7 @@ def backup_issues(args, repo_cwd, repository, repos_template):
if issues_skipped:
issues_skipped_message = " (skipped {0} pull requests)".format(issues_skipped)
log_info(
logger.info(
"Saving {0} issues to disk{1}".format(
len(list(issues.keys())), issues_skipped_message
)
@@ -1077,7 +1051,7 @@ def backup_pulls(args, repo_cwd, repository, repos_template):
if args.skip_existing and has_pulls_dir:
return
log_info("Retrieving {0} pull requests".format(repository["full_name"])) # noqa
logger.info("Retrieving {0} pull requests".format(repository["full_name"])) # noqa
pulls_cwd = os.path.join(repo_cwd, "pulls")
mkdir_p(repo_cwd, pulls_cwd)
@@ -1113,7 +1087,7 @@ def backup_pulls(args, repo_cwd, repository, repos_template):
single_request=True,
)[0]
log_info("Saving {0} pull requests to disk".format(len(list(pulls.keys()))))
logger.info("Saving {0} pull requests to disk".format(len(list(pulls.keys()))))
# Comments from pulls API are only _review_ comments
# regular comments need to be fetched via issue API.
# For backwards compatibility with versions <= 0.41.0
@@ -1141,7 +1115,7 @@ def backup_milestones(args, repo_cwd, repository, repos_template):
if args.skip_existing and os.path.isdir(milestone_cwd):
return
log_info("Retrieving {0} milestones".format(repository["full_name"]))
logger.info("Retrieving {0} milestones".format(repository["full_name"]))
mkdir_p(repo_cwd, milestone_cwd)
template = "{0}/{1}/milestones".format(repos_template, repository["full_name"])
@@ -1154,7 +1128,7 @@ def backup_milestones(args, repo_cwd, repository, repos_template):
for milestone in _milestones:
milestones[milestone["number"]] = milestone
log_info("Saving {0} milestones to disk".format(len(list(milestones.keys()))))
logger.info("Saving {0} milestones to disk".format(len(list(milestones.keys()))))
for number, milestone in list(milestones.items()):
milestone_file = "{0}/{1}.json".format(milestone_cwd, number)
with codecs.open(milestone_file, "w", encoding="utf-8") as f:
@@ -1171,7 +1145,7 @@ def backup_labels(args, repo_cwd, repository, repos_template):
def backup_hooks(args, repo_cwd, repository, repos_template):
auth = get_auth(args)
if not auth:
log_info("Skipping hooks since no authentication provided")
logger.info("Skipping hooks since no authentication provided")
return
hook_cwd = os.path.join(repo_cwd, "hooks")
output_file = "{0}/hooks.json".format(hook_cwd)
@@ -1179,7 +1153,7 @@ def backup_hooks(args, repo_cwd, repository, repos_template):
try:
_backup_data(args, "hooks", template, output_file, hook_cwd)
except SystemExit:
log_info("Unable to read hooks, skipping")
logger.info("Unable to read hooks, skipping")
def backup_releases(args, repo_cwd, repository, repos_template, include_assets=False):
@@ -1187,7 +1161,7 @@ def backup_releases(args, repo_cwd, repository, repos_template, include_assets=F
# give release files somewhere to live & log intent
release_cwd = os.path.join(repo_cwd, "releases")
log_info("Retrieving {0} releases".format(repository_fullname))
logger.info("Retrieving {0} releases".format(repository_fullname))
mkdir_p(repo_cwd, release_cwd)
query_args = {}
@@ -1196,7 +1170,7 @@ def backup_releases(args, repo_cwd, repository, repos_template, include_assets=F
releases = retrieve_data(args, release_template, query_args=query_args)
# for each release, store it
log_info("Saving {0} releases to disk".format(len(releases)))
logger.info("Saving {0} releases to disk".format(len(releases)))
for release in releases:
release_name = release["tag_name"]
release_name_safe = release_name.replace("/", "__")
@@ -1251,7 +1225,7 @@ def fetch_repository(
"git ls-remote " + remote_url, stdout=FNULL, stderr=FNULL, shell=True
)
if initialized == 128:
log_info(
logger.info(
"Skipping {0} ({1}) since it's not initialized".format(
name, masked_remote_url
)
@@ -1259,7 +1233,7 @@ def fetch_repository(
return
if clone_exists:
log_info("Updating {0} in {1}".format(name, local_dir))
logger.info("Updating {0} in {1}".format(name, local_dir))
remotes = subprocess.check_output(["git", "remote", "show"], cwd=local_dir)
remotes = [i.strip() for i in remotes.decode("utf-8").splitlines()]
@@ -1281,7 +1255,7 @@ def fetch_repository(
git_command.pop()
logging_subprocess(git_command, None, cwd=local_dir)
else:
log_info(
logger.info(
"Cloning {0} repository from {1} to {2}".format(
name, masked_remote_url, local_dir
)
@@ -1337,11 +1311,11 @@ def backup_account(args, output_directory):
def _backup_data(args, name, template, output_file, output_directory):
skip_existing = args.skip_existing
if not skip_existing or not os.path.exists(output_file):
log_info("Retrieving {0} {1}".format(args.user, name))
logger.info("Retrieving {0} {1}".format(args.user, name))
mkdir_p(output_directory)
data = retrieve_data(args, template)
log_info("Writing {0} {1} to disk".format(len(data), name))
logger.info("Writing {0} {1} to disk".format(len(data), name))
with codecs.open(output_file, "w", encoding="utf-8") as f:
json_dump(data, f)