diff --git a/README.md b/README.md index 562ba60..4b71e51 100644 --- a/README.md +++ b/README.md @@ -190,6 +190,7 @@ session. When you no longer need to use a session you should make sure to close | returnScreenshot | Optional, default false. Captures a screenshot of the final rendered page after all challenges and waits are completed. The screenshot is returned as a Base64-encoded PNG string in the `screenshot` field of the response. | | proxy | Optional, default disabled. Eg: `"proxy": {"url": "http://127.0.0.1:8888"}`. You must include the proxy schema in the URL: `http://`, `socks4://` or `socks5://`. Authorization (username/password) is not supported. (When the `session` parameter is set, the proxy is ignored; a session specific proxy can be set in `sessions.create`.) | | waitInSeconds | Optional, default none. Length to wait in seconds after solving the challenge, and before returning the results. Useful to allow it to load dynamic content. | +| disableMedia | Optional, default false. When true FlareSolverr will prevent media resources (images, CSS, and fonts) from being loaded to speed up navigation. | > **Warning** > If you want to use Cloudflare clearance cookie in your scripts, make sure you use the FlareSolverr User-Agent too. If they don't match you will see the challenge. @@ -275,6 +276,7 @@ This is the same as `request.get` but it takes one more param: | TZ | UTC | Timezone used in the logs and the web browser. Example: `TZ=Europe/London`. | | LANG | none | Language used in the web browser. Example: `LANG=en_GB`. | | HEADLESS | true | Only for debugging. To run the web browser in headless mode or visible. | +| DISABLE_MEDIA | false | To disable loading images, CSS, and other media in the web browser to save network bandwidth. | | TEST_URL | https://www.google.com | FlareSolverr makes a request on start to make sure the web browser is working. You can change that URL if it is blocked in your country. | | PORT | 8191 | Listening port. You don't need to change this if you are running on Docker. | | HOST | 0.0.0.0 | Listening interface. You don't need to change this if you are running on Docker. | diff --git a/src/dtos.py b/src/dtos.py index a051232..2a5a82c 100644 --- a/src/dtos.py +++ b/src/dtos.py @@ -46,6 +46,8 @@ class V1RequestBase(object): download: bool = None # deprecated v2.0.0, not used returnRawHtml: bool = None # deprecated v2.0.0, not used waitInSeconds: int = None + # Optional resource blocking flag (blocks images, CSS, and fonts) + disableMedia: bool = None def __init__(self, _dict): self.__dict__.update(_dict) diff --git a/src/flaresolverr_service.py b/src/flaresolverr_service.py index 0f4a510..82132c6 100644 --- a/src/flaresolverr_service.py +++ b/src/flaresolverr_service.py @@ -287,10 +287,36 @@ def _evil_logic(req: V1RequestBase, driver: WebDriver, method: str) -> Challenge res.status = STATUS_OK res.message = "" + # optionally block resources like images/css/fonts using CDP + disable_media = utils.get_config_disable_media() + if req.disableMedia is not None: + disable_media = req.disableMedia + if disable_media: + block_urls = [ + # Images + "*.png", "*.jpg", "*.jpeg", "*.gif", "*.webp", "*.bmp", "*.svg", "*.ico", + "*.PNG", "*.JPG", "*.JPEG", "*.GIF", "*.WEBP", "*.BMP", "*.SVG", "*.ICO", + "*.tiff", "*.tif", "*.jpe", "*.apng", "*.avif", "*.heic", "*.heif", + "*.TIFF", "*.TIF", "*.JPE", "*.APNG", "*.AVIF", "*.HEIC", "*.HEIF", + # Stylesheets + "*.css", + "*.CSS", + # Fonts + "*.woff", "*.woff2", "*.ttf", "*.otf", "*.eot", + "*.WOFF", "*.WOFF2", "*.TTF", "*.OTF", "*.EOT" + ] + try: + logging.debug("Network.setBlockedURLs: %s", block_urls) + driver.execute_cdp_cmd("Network.enable", {}) + driver.execute_cdp_cmd("Network.setBlockedURLs", {"urls": block_urls}) + except Exception: + # if CDP commands are not available or fail, ignore and continue + logging.debug("Network.setBlockedURLs failed or unsupported on this webdriver") # navigate to the page - logging.debug(f'Navigating to... {req.url}') - if method == 'POST': + logging.debug(f"Navigating to... {req.url}") + + if method == "POST": _post_request(req, driver) else: driver.get(req.url) diff --git a/src/tests.py b/src/tests.py index 497acbe..af49a68 100644 --- a/src/tests.py +++ b/src/tests.py @@ -92,6 +92,29 @@ class TestFlareSolverr(unittest.TestCase): self.assertGreater(len(solution.cookies), 0) self.assertIn("Chrome/", solution.userAgent) + def test_v1_endpoint_request_get_disable_resources(self): + res = self.app.post_json("/v1", { + "cmd": "request.get", + "url": self.google_url, + "disableMedia": True + }) + self.assertEqual(res.status_code, 200) + + body = V1ResponseBase(res.json) + self.assertEqual(STATUS_OK, body.status) + self.assertEqual("Challenge not detected!", body.message) + self.assertGreater(body.startTimestamp, 10000) + self.assertGreaterEqual(body.endTimestamp, body.startTimestamp) + self.assertEqual(utils.get_flaresolverr_version(), body.version) + + solution = body.solution + self.assertIn(self.google_url, solution.url) + self.assertEqual(solution.status, 200) + self.assertIs(len(solution.headers), 0) + self.assertIn("