Reduce unnecessary pull requests with incremental fetching

This commit is contained in:
Duncan Ogilvie
2026-04-26 15:15:22 +02:00
parent 02e833e40a
commit 6cd0ab3633
4 changed files with 104 additions and 11 deletions

View File

@@ -717,11 +717,12 @@ def calculate_retry_delay(attempt, headers):
return delay + random.uniform(0, delay * 0.1)
def retrieve_data(args, template, query_args=None, paginated=True):
def retrieve_data(args, template, query_args=None, paginated=True, lazy=False):
"""
Fetch the data from GitHub API.
Handle both single requests and pagination with yield of individual dicts.
Handle both single requests and pagination. Returns a list by default, or
a generator when lazy=True so callers can stop before fetching every page.
Handles throttling, retries, read errors, and DMCA takedowns.
"""
query_args = query_args or {}
@@ -851,6 +852,9 @@ def retrieve_data(args, template, query_args=None, paginated=True):
):
break # No more data
if lazy:
return fetch_all()
return list(fetch_all())
@@ -2656,16 +2660,18 @@ def backup_pulls(args, repo_cwd, repository, repos_template):
pull_states = ["open", "closed"]
for pull_state in pull_states:
query_args["state"] = pull_state
_pulls = retrieve_data(args, _pulls_template, query_args=query_args)
for pull in _pulls:
for pull in retrieve_data(
args, _pulls_template, query_args=query_args, lazy=True
):
track_newest_pull_update(pull)
if pulls_since and pull["updated_at"] < pulls_since:
break
if not pulls_since or pull["updated_at"] >= pulls_since:
pulls[pull["number"]] = pull
else:
_pulls = retrieve_data(args, _pulls_template, query_args=query_args)
for pull in _pulls:
for pull in retrieve_data(
args, _pulls_template, query_args=query_args, lazy=True
):
track_newest_pull_update(pull)
if pulls_since and pull["updated_at"] < pulls_since:
break