Release version 0.42.0

Merge pull request #204 from Assimila/exclude_repositories
Add option to exclude repositories
2025-12-05 16:18:02 +01:00 · 2022-11-28 00:25:13 -05:00 · 2022-11-28 00:23:20 -05:00 · 2022-11-28 00:23:08 -05:00 · 2022-11-28 00:22:54 -05:00 · 2022-11-28 00:22:40 -05:00
5 changed files with 132 additions and 46 deletions
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -1,10 +1,32 @@
 Changelog
 =========

-0.38.0 (2021-02-13)
+0.39.0 (2021-03-18)
 -------------------
 ------------

+Fix
+~~~
+- Fix missing INFO logs. [Gallo Feliz]
+
+Other
+~~~~~
+- Merge pull request #173 from gallofeliz/make-compatible-python-call.
+  [Jose Diaz-Gonzalez]
+
+  Try to make compatible code with direct Python call ; reduce the hard link of the code with the cli
+- Try to make compatible code with direct Python call ; reduce the hard
+  link of the code with the cli. [Gallo Feliz]
+- Merge pull request #174 from atorrescogollo/master. [Jose Diaz-
+  Gonzalez]
+
+  Fixed release_name with slash bug
+- Fixed release_name with slash bug. [Álvaro Torres Cogollo]
+
+
+0.38.0 (2021-02-13)
+-------------------
+
 Fix
 ~~~
 - Always clone with OAuth token when provided. [Samantha Baldwin]
@@ -16,6 +38,7 @@ Fix

 Other
 ~~~~~
+- Release version 0.38.0. [Jose Diaz-Gonzalez]
 - Merge pull request #172 from samanthaq/always-use-oauth-when-provided.
  [Jose Diaz-Gonzalez]

--- a/README.rst
+++ b/README.rst
@@ -41,8 +41,8 @@ CLI Usage is as follows::
                  [-P] [-F] [--prefer-ssh] [-v]
                  [--keychain-name OSX_KEYCHAIN_ITEM_NAME]
                  [--keychain-account OSX_KEYCHAIN_ITEM_ACCOUNT]
-                  [--releases] [--assets] [--throttle-limit THROTTLE_LIMIT]
-                  [--throttle-pause THROTTLE_PAUSE]
+                  [--releases] [--assets] [--exclude [REPOSITORY [REPOSITORY ...]]
+                  [--throttle-limit THROTTLE_LIMIT] [--throttle-pause THROTTLE_PAUSE]
                  USER

    Backup a github account
@@ -112,6 +112,8 @@ CLI Usage is as follows::
                            binaries
      --assets              include assets alongside release information; only
                            applies if including releases
+      --exclude [REPOSITORY [REPOSITORY ...]]
+                            names of repositories to exclude from backup.
      --throttle-limit THROTTLE_LIMIT
                            start throttling of GitHub API requests after this
                            amount of API requests remain
--- a/bin/github-backup
+++ b/bin/github-backup
@@ -1,6 +1,6 @@
 #!/usr/bin/env python

-import os
+import os, sys, logging

 from github_backup.github_backup import (
    backup_account,
@@ -9,11 +9,17 @@ from github_backup.github_backup import (
    filter_repositories,
    get_authenticated_user,
    log_info,
+    log_warning,
    mkdir_p,
    parse_args,
    retrieve_repositories,
 )

+logging.basicConfig(
+    format='%(asctime)s.%(msecs)03d: %(message)s',
+    datefmt='%Y-%m-%dT%H:%M:%S',
+    level=logging.INFO
+)

 def main():
    args = parse_args()
@@ -39,4 +45,8 @@ def main():


 if __name__ == '__main__':
+    try:
        main()
+    except Exception as e:
+        log_warning(str(e))
+        sys.exit(1)
--- a/github_backup/init.py
+++ b/github_backup/init.py
@@ -1 +1 @@
-__version__ = '0.38.0'
+__version__ = '0.42.0'
--- a/github_backup/github_backup.py
+++ b/github_backup/github_backup.py
@@ -11,12 +11,12 @@ import datetime
 import errno
 import getpass
 import json
-import logging
 import os
 import re
 import select
 import subprocess
 import sys
+import logging
 import time
 import platform
 from urllib.parse import urlparse
@@ -27,6 +27,7 @@ from urllib.request import urlopen
 from urllib.request import Request
 from urllib.request import HTTPRedirectHandler
 from urllib.request import build_opener
+from http.client import IncompleteRead

 try:
    from . import __version__
@@ -41,14 +42,6 @@ def _get_log_date():
    return datetime.datetime.isoformat(datetime.datetime.now())


-def log_error(message):
-    """
-    Log message (str) or messages (List[str]) to stderr and exit with status 1
-    """
-    log_warning(message)
-    sys.exit(1)
-
-
 def log_info(message):
    """
    Log message (str) or messages (List[str]) to stdout
@@ -57,7 +50,7 @@ def log_info(message):
        message = [message]

    for msg in message:
-        sys.stdout.write("{0}: {1}\n".format(_get_log_date(), msg))
+        logging.info(msg)


 def log_warning(message):
@@ -68,7 +61,7 @@ def log_warning(message):
        message = [message]

    for msg in message:
-        sys.stderr.write("{0}: {1}\n".format(_get_log_date(), msg))
+        logging.warning(msg)


 def logging_subprocess(popenargs,
@@ -140,7 +133,7 @@ def mask_password(url, secret='*****'):
    return url.replace(parsed.password, secret)


-def parse_args():
+def parse_args(args=None):
    parser = argparse.ArgumentParser(description='Backup a github account')
    parser.add_argument('user',
                        metavar='USER',
@@ -246,6 +239,10 @@ def parse_args():
                        action='store_true',
                        dest='bare_clone',
                        help='clone bare repositories')
+    parser.add_argument('--no-prune',
+                        action='store_true',
+                        dest='no_prune',
+                        help='disable prune option for git fetch')
    parser.add_argument('--lfs',
                        action='store_true',
                        dest='lfs_clone',
@@ -331,7 +328,11 @@ def parse_args():
                        type=float,
                        default=30.0,
                        help='wait this amount of seconds when API request throttling is active (default: 30.0, requires --throttle-limit to be set)')
-    return parser.parse_args()
+    parser.add_argument('--exclude',
+                        dest='exclude',
+                        help='names of repositories to exclude',
+                        nargs="*")
+    return parser.parse_args(args)


 def get_auth(args, encode=True, for_git_cli=False):
@@ -339,10 +340,10 @@ def get_auth(args, encode=True, for_git_cli=False):

    if args.osx_keychain_item_name:
        if not args.osx_keychain_item_account:
-            log_error('You must specify both name and account fields for osx keychain password items')
+            raise Exception('You must specify both name and account fields for osx keychain password items')
        else:
            if platform.system() != 'Darwin':
-                log_error("Keychain arguments are only supported on Mac OSX")
+                raise Exception("Keychain arguments are only supported on Mac OSX")
            try:
                with open(os.devnull, 'w') as devnull:
                    token = (subprocess.check_output([
@@ -353,9 +354,9 @@ def get_auth(args, encode=True, for_git_cli=False):
                token = token.decode('utf-8')
                auth = token + ':' + 'x-oauth-basic'
            except subprocess.SubprocessError:
-                log_error('No password item matching the provided name and account could be found in the osx keychain.')
+                raise Exception('No password item matching the provided name and account could be found in the osx keychain.')
    elif args.osx_keychain_item_account:
-        log_error('You must specify both name and account fields for osx keychain password items')
+        raise Exception('You must specify both name and account fields for osx keychain password items')
    elif args.token:
        _path_specifier = 'file://'
        if args.token.startswith(_path_specifier):
@@ -377,7 +378,7 @@ def get_auth(args, encode=True, for_git_cli=False):
            password = urlquote(args.password)
        auth = args.username + ':' + password
    elif args.password:
-        log_error('You must specify a username for basic auth')
+        raise Exception('You must specify a username for basic auth')

    if not auth:
        return None
@@ -444,6 +445,21 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False):
        r, errors = _get_response(request, auth, template)

        status_code = int(r.getcode())
+        # Check if we got correct data
+        try:
+            response = json.loads(r.read().decode('utf-8'))
+        except IncompleteRead:
+            log_warning("Incomplete read error detected")
+            read_error = True
+        except json.decoder.JSONDecodeError:
+            log_warning("JSON decode error detected")
+            read_error = True
+        except TimeoutError:
+            log_warning("Tiemout error detected")
+            read_error = True
+        else:
+            read_error = False
+
        # be gentle with API request limit and throttle requests if remaining requests getting low
        limit_remaining = int(r.headers.get('x-ratelimit-remaining', 0))
        if args.throttle_limit and limit_remaining <= args.throttle_limit:
@@ -454,21 +470,37 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False):
            time.sleep(args.throttle_pause)

        retries = 0
-        while retries < 3 and status_code == 502:
-            log_warning('API request returned HTTP 502: Bad Gateway. Retrying in 5 seconds')
+        while retries < 3 and (status_code == 502 or read_error):
+            log_warning('API request failed. Retrying in 5 seconds')
            retries += 1
            time.sleep(5)
            request = _construct_request(per_page, page, query_args, template, auth, as_app=args.as_app)  # noqa
            r, errors = _get_response(request, auth, template)

            status_code = int(r.getcode())
+            try:
+                response = json.loads(r.read().decode('utf-8'))
+                read_error = False
+            except IncompleteRead:
+                log_warning("Incomplete read error detected")
+                read_error = True
+            except json.decoder.JSONDecodeError:
+                log_warning("JSON decode error detected")
+                read_error = True
+            except TimeoutError:
+                log_warning("Tiemout error detected")
+                read_error = True

        if status_code != 200:
            template = 'API request returned HTTP {0}: {1}'
            errors.append(template.format(status_code, r.reason))
-            log_error(errors)
+            raise Exception(', '.join(errors))
+
+        if read_error:
+            template = 'API request problem reading response for {0}'
+            errors.append(template.format(request))
+            raise Exception(', '.join(errors))

-        response = json.loads(r.read().decode('utf-8'))
        if len(errors) == 0:
            if type(response) == list:
                for resp in response:
@@ -479,7 +511,7 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False):
                yield response

        if len(errors) > 0:
-            log_error(errors)
+            raise Exception(', '.join(errors))

        if single_request:
            break
@@ -509,12 +541,12 @@ def _get_response(request, auth, template):
            r = exc
        except URLError as e:
            log_warning(e.reason)
-            should_continue = _request_url_error(template, retry_timeout)
+            should_continue, retry_timeout = _request_url_error(template, retry_timeout)
            if not should_continue:
                raise
        except socket.error as e:
            log_warning(e.strerror)
-            should_continue = _request_url_error(template, retry_timeout)
+            should_continue, retry_timeout = _request_url_error(template, retry_timeout)
            if not should_continue:
                raise

@@ -574,16 +606,15 @@ def _request_http_error(exc, auth, errors):


 def _request_url_error(template, retry_timeout):
-    # Incase of a connection timing out, we can retry a few time
+    # In case of a connection timing out, we can retry a few time
    # But we won't crash and not back-up the rest now
    log_info('{} timed out'.format(template))
    retry_timeout -= 1

    if retry_timeout >= 0:
-        return True
+        return True, retry_timeout

-    log_error('{} timed out to much, skipping!')
-    return False
+    raise Exception('{} timed out to much, skipping!')


 class S3HTTPRedirectHandler(HTTPRedirectHandler):
@@ -640,7 +671,7 @@ def get_authenticated_user(args):
 def check_git_lfs_install():
    exit_code = subprocess.call(['git', 'lfs', 'version'])
    if exit_code != 0:
-        log_error('The argument --lfs requires you to have Git LFS installed.\nYou can get it from https://git-lfs.github.com.')
+        raise Exception('The argument --lfs requires you to have Git LFS installed.\nYou can get it from https://git-lfs.github.com.')


 def retrieve_repositories(args, authenticated_user):
@@ -726,6 +757,8 @@ def filter_repositories(args, unfiltered_repositories):
        repositories = [r for r in repositories if name_regex.match(r['name'])]
    if args.skip_archived:
        repositories = [r for r in repositories if not r.get('archived')]
+    if args.exclude:
+        repositories = [r for r in repositories if r['name'] not in args.exclude]

    return repositories

@@ -766,7 +799,8 @@ def backup_repositories(args, output_directory, repositories):
                             repo_dir,
                             skip_existing=args.skip_existing,
                             bare_clone=args.bare_clone,
-                             lfs_clone=args.lfs_clone)
+                             lfs_clone=args.lfs_clone,
+                             no_prune=args.no_prune)

            if repository.get('is_gist'):
                # dump gist information to a file as well
@@ -783,8 +817,9 @@ def backup_repositories(args, output_directory, repositories):
                             os.path.join(repo_cwd, 'wiki'),
                             skip_existing=args.skip_existing,
                             bare_clone=args.bare_clone,
-                             lfs_clone=args.lfs_clone)
-
+                             lfs_clone=args.lfs_clone,
+                             no_prune=args.no_prune
+                             )
        if args.include_issues or args.include_everything:
            backup_issues(args, repo_cwd, repository, repos_template)

@@ -878,6 +913,8 @@ def backup_pulls(args, repo_cwd, repository, repos_template):
    pulls = {}
    _pulls_template = '{0}/{1}/pulls'.format(repos_template,
                                             repository['full_name'])
+    _issue_template = '{0}/{1}/issues'.format(repos_template,
+                                              repository['full_name'])
    query_args = {
        'filter': 'all',
        'state': 'all',
@@ -917,10 +954,17 @@ def backup_pulls(args, repo_cwd, repository, repos_template):

    log_info('Saving {0} pull requests to disk'.format(
        len(list(pulls.keys()))))
+    # Comments from pulls API are only _review_ comments
+    # regular comments need to be fetched via issue API.
+    # For backwards compatibility with versions <= 0.41.0
+    # keep name "comment_data" for review comments
+    comments_regular_template = _issue_template + '/{0}/comments'
    comments_template = _pulls_template + '/{0}/comments'
    commits_template = _pulls_template + '/{0}/commits'
    for number, pull in list(pulls.items()):
        if args.include_pull_comments or args.include_everything:
+            template = comments_regular_template.format(number)
+            pulls[number]['comment_regular_data'] = retrieve_data(args, template)
            template = comments_template.format(number)
            pulls[number]['comment_data'] = retrieve_data(args, template)
        if args.include_pull_commits or args.include_everything:
@@ -1009,7 +1053,8 @@ def backup_releases(args, repo_cwd, repository, repos_template, include_assets=F
    log_info('Saving {0} releases to disk'.format(len(releases)))
    for release in releases:
        release_name = release['tag_name']
-        output_filepath = os.path.join(release_cwd, '{0}.json'.format(release_name))
+        release_name_safe = release_name.replace('/', '__')
+        output_filepath = os.path.join(release_cwd, '{0}.json'.format(release_name_safe))
        with codecs.open(output_filepath, 'w+', encoding='utf-8') as f:
            json_dump(release, f)

@@ -1017,7 +1062,7 @@ def backup_releases(args, repo_cwd, repository, repos_template, include_assets=F
            assets = retrieve_data(args, release['assets_url'])
            if len(assets) > 0:
                # give release asset files somewhere to live & download them (not including source archives)
-                release_assets_cwd = os.path.join(release_cwd, release_name)
+                release_assets_cwd = os.path.join(release_cwd, release_name_safe)
                mkdir_p(release_assets_cwd)
                for asset in assets:
                    download_file(asset['url'], os.path.join(release_assets_cwd, asset['name']), get_auth(args))
@@ -1028,7 +1073,8 @@ def fetch_repository(name,
                     local_dir,
                     skip_existing=False,
                     bare_clone=False,
-                     lfs_clone=False):
+                     lfs_clone=False,
+                     no_prune=False):
    if bare_clone:
        if os.path.exists(local_dir):
            clone_exists = subprocess.check_output(['git',
@@ -1074,6 +1120,8 @@ def fetch_repository(name,
            git_command = ['git', 'lfs', 'fetch', '--all', '--prune']
        else:
            git_command = ['git', 'fetch', '--all', '--force', '--tags', '--prune']
+        if no_prune:
+            git_command.pop()
        logging_subprocess(git_command, None, cwd=local_dir)
    else:
        log_info('Cloning {0} repository from {1} to {2}'.format(
@@ -1081,10 +1129,13 @@ def fetch_repository(name,
            masked_remote_url,
            local_dir))
        if bare_clone:
-            if lfs_clone:
-                git_command = ['git', 'lfs', 'clone', '--mirror', remote_url, local_dir]
-            else:
            git_command = ['git', 'clone', '--mirror', remote_url, local_dir]
+            logging_subprocess(git_command, None)
+            if lfs_clone:
+                git_command = ['git', 'lfs', 'fetch', '--all', '--prune']
+                if no_prune:
+                    git_command.pop()
+                logging_subprocess(git_command, None, cwd=local_dir)
        else:
            if lfs_clone:
                git_command = ['git', 'lfs', 'clone', remote_url, local_dir]
Author	SHA1	Message	Date
Jose Diaz-Gonzalez	07e32b186c	Release version 0.42.0	2022-11-28 00:25:13 -05:00
josegonzalez	dcc90b747a	Merge pull request #204 from Assimila/exclude_repositories Add option to exclude repositories	2022-11-28 00:23:20 -05:00
josegonzalez	f414fac108	Merge pull request #202 from TheOneric/pull_backup-regular-comments Backup regular pull request comments as well	2022-11-28 00:23:08 -05:00
josegonzalez	38692bc836	Merge pull request #201 from TRAdEWORKS/fix-bug-request-url-error-forever-retry Fix a bug forever retry when request url error	2022-11-28 00:22:54 -05:00
josegonzalez	81362e5596	Merge pull request #200 from TRAdEWORKS/no-prune-flag Add --no-prune command line argument to disable prune option when doing git fetch	2022-11-28 00:22:40 -05:00
npounder	753a26d0d6	add option to exclude repositories	2022-11-25 12:35:24 +00:00
Oneric	b629a865f4	Backup regular pull request comments as well Before, only review comments were backed up; regular comments need to be fetched via issue API. Fixes: https://github.com/josegonzalez/python-github-backup/issues/150	2022-07-12 18:38:11 +02:00
kornpisey	75ec773a6f	fix bug forever retry when request url error	2022-05-30 13:50:23 +09:00
kornpisey	f8a16ee0f8	added --no-prune option to disable prune option when doing git fetch	2022-05-30 13:46:41 +09:00
Jose Diaz-Gonzalez	63441ebfbc	Release version 0.41.0	2022-03-02 02:36:41 -05:00
Jose Diaz-Gonzalez	7ad324225e	Merge pull request #191 from SkySoft-ATM/bug/lfs_mirror git lfs clone does not respect --mirror	2022-03-02 02:34:17 -05:00
Louis Parisot	885e94a102	git lfs clone doe snot respect --mirror	2022-02-03 11:45:59 +01:00
Jose Diaz-Gonzalez	9e1800f56e	Release version 0.40.2	2021-12-29 12:49:10 -05:00
Jose Diaz-Gonzalez	d057ee0d04	Merge pull request #186 from atinary-afoulon/patch-1 Fix lint issues raised by Flake8	2021-12-29 12:48:30 -05:00
atinary-afoulon	64562f2460	Fix lint issues raised by Flake8 According to job: [ https://app.circleci.com/pipelines/github/josegonzalez/python-github-backup/30/workflows/74eb93f2-2505-435d-b728-03b3cc04c14a/jobs/23 ] Failed on the following checks: ./github_backup/github_backup.py:20:1: F811 redefinition of unused 'logging' from line 14 ./github_backup/github_backup.py:45:1: E302 expected 2 blank lines, found 1 ./github_backup/github_backup.py:136:20: E251 unexpected spaces around keyword / parameter equals	2021-12-13 14:33:21 +01:00
Jose Diaz-Gonzalez	f7f9ffd017	Release version 0.40.1	2021-09-22 12:29:08 -04:00
Jose Diaz-Gonzalez	048ef04e2a	Merge pull request #180 from whwright/revert-to-fetch Revert to fetch	2021-09-22 11:06:18 -04:00
Harrison Wright	b1acfed83a	Revert to fetch	2021-07-14 10:53:14 -05:00
Jose Diaz-Gonzalez	18e78a4d66	Release version 0.40.0	2021-07-12 00:44:33 -04:00
Jose Diaz-Gonzalez	1ed5427043	Merge pull request #177 from jacekn/retry Add retry on certain network errors	2021-07-12 00:43:19 -04:00
Jose Diaz-Gonzalez	c2e3665ed8	Merge pull request #178 from pew/patch-1 pull changes from remote	2021-07-12 00:43:10 -04:00
Jonas	0a30a92fe4	pull changes from remote use `git pull` to pull actual files from the remote instead of using `fetch` for only the metadata	2021-07-06 06:21:06 +02:00
Jacek Nykis	cc52587f52	Add retry on certain network errors This change includes certain network level errors in the retry logic. It partially address #110 but I think more comprehensive fix would be useful.	2021-07-01 14:39:10 +01:00
Jose Diaz-Gonzalez	853b7c46a1	Release version 0.39.0	2021-03-18 23:16:04 -04:00
Jose Diaz-Gonzalez	e23d12d490	Merge pull request #173 from gallofeliz/make-compatible-python-call Try to make compatible code with direct Python call ; reduce the hard link of the code with the cli	2021-03-18 22:51:01 -04:00
Jose Diaz-Gonzalez	f8e1151111	Merge pull request #174 from atorrescogollo/master Fixed release_name with slash bug	2021-03-18 22:50:21 -04:00
Álvaro Torres Cogollo	664c2a765e	Fixed release_name with slash bug	2021-03-03 11:36:44 +01:00
Gallo Feliz	fa7148d38f	fix: fix missing INFO logs	2021-02-16 13:25:16 +01:00
Gallo Feliz	480ce3ce2a	Try to make compatible code with direct Python call ; reduce the hard link of the code with the cli	2021-02-16 13:13:51 +01:00