Compare commits

...

61 Commits

Author SHA1 Message Date
ilike2burnthing
d21a332519 Hotfix 2 - bad Chromium build, instances failed to terminate (#1072) 2024-02-17 05:53:45 +00:00
ilike2burnthing
3ca6d08f41 Hotfix for Linux build - some Chrome files no longer exist (#1071) 2024-02-17 01:15:32 +00:00
ilike2burnthing
227bd7ac72 Update Chrome downloads (#1070) 2024-02-17 00:50:14 +00:00
ilike2burnthing
e6a08584c0 Update README.md
thanks @kimboslice99
2024-02-16 04:35:37 +00:00
ilike2burnthing
df06d13cf8 Update README.md 2024-01-12 23:38:18 +00:00
ilike2burnthing
993b8c41ac Fix too many open files error. resolves #983 (#1033) 2024-01-07 21:22:14 +00:00
ilike2burnthing
a4d42d7834 Remove unnecessary comment 2023-12-17 00:33:06 +00:00
21hsmw
1c855b8af0 Fix looping challenges and invalid cookies. resolves #1006 (#1010)
Co-authored-by: ilike2burnthing <59480337+ilike2burnthing@users.noreply.github.com>
2023-12-15 22:11:58 +00:00
ilike2burnthing
745c69491f Bump version 3.3.11 (#999) 2023-12-11 20:56:14 +00:00
txtsd
f7e316fd5a updates: UC 3.5.4 & Selenium 4.15.2 (#970)
Co-authored-by: GaspardRuan <1039553124@qq.com>
Co-authored-by: ilike2burnthing <59480337+ilike2burnthing@users.noreply.github.com>
2023-12-11 20:51:16 +00:00
ilike2burnthing
16c8ab5f3d Update README.md 2023-11-14 07:54:09 +00:00
ilike2burnthing
7af311b73c Bump version 3.3.10 (#969) 2023-11-14 04:04:42 +00:00
ilike2burnthing
daec97532d Update README.md 2023-11-14 04:00:01 +00:00
ilike2burnthing
8d7ed48f21 Add LANG ENV. resolves #951 2023-11-14 03:56:57 +00:00
ilike2burnthing
220f2599ae Bump version 3.3.9 (#963) 2023-11-13 07:17:28 +00:00
ilike2burnthing
d772cf3f50 Fix for Docker build, capture TypeError. Fixes #962 2023-11-13 07:14:13 +00:00
ilike2burnthing
ab4365894b Bump version 3.3.8 (#961) 2023-11-13 04:55:49 +00:00
ilike2burnthing
3fa9631559 Fix "OSError: [WinError 6] The handle is invalid" on exit 2023-11-13 04:28:19 +00:00
ilike2burnthing
04858c22fd Support running Chrome 119 from source (#960) 2023-11-13 04:23:06 +00:00
Nabi KaramAliZadeh
5085ca6990 Fix headless=true for Chrome 117+. Fixes #910 (#921) 2023-11-13 04:03:56 +00:00
ilike2burnthing
cd4df1e061 Bump version 3.3.7 (#944) 2023-11-05 14:41:12 +00:00
ilike2burnthing
6c79783f7c Bump version 3.3.6 (#905) 2023-09-15 20:40:56 +01:00
ilike2burnthing
4139e8d47c Update checkbox selector, again 2023-09-15 20:37:26 +01:00
zax2002
1942eb5fdc Typo in README (#901) 2023-09-14 07:41:59 +01:00
ilike2burnthing
401bf5be76 Bump version 3.3.5 (#902) 2023-09-13 10:28:02 +01:00
ilike2burnthing
d8ffdd3061 Change checkbox selector, support language other than English. resolves #891 2023-09-13 10:19:19 +01:00
ilike2burnthing
2d66590b08 Bump version 3.3.4 (#884) 2023-09-02 12:30:21 +01:00
Zachary Hampton
a217510dc7 Update checkbox selector (#882) 2023-09-02 12:24:25 +01:00
ilike2burnthing
553bd8ab4f Bump version 3.3.3 (#879) 2023-08-31 20:02:17 +01:00
ilike2burnthing
1b197c3e53 Update undetected_chromedriver to v3.5.3 (#860) 2023-08-31 19:56:06 +01:00
ngosang
fd308f01be Bump version 3.3.2 2023-08-03 10:00:16 +02:00
ngosang
b5eef32615 Fix URL domain in Prometheus exporter 2023-08-03 09:02:46 +02:00
ngosang
644a843d89 Bump version 3.3.1 2023-08-03 08:13:01 +02:00
ngosang
82e1c94c6f Fix HEADLESS=false in Windows binary 2023-08-03 08:10:14 +02:00
ngosang
fbc71516f5 Fix for Cloudflare verify checkbox 2023-08-03 07:28:58 +02:00
ngosang
40bd1cba4c Fix Prometheus exporter for management and health endpoints 2023-08-03 06:36:31 +02:00
ngosang
d1588c1156 Remove misleading stack trace when a button is not found 2023-08-03 05:45:38 +02:00
ngosang
b4ad583baa Revert "Update base Docker image to Debian Bookworm"
This reverts commit 0edc50e271.
2023-08-03 05:19:56 +02:00
ngosang
5d31e551cc Revert "Install Chromium 115 from Debian testing"
This reverts commit 2aa095ed5d.
2023-08-03 05:19:27 +02:00
ngosang
d92845f34f Bump version 3.3.0 2023-08-02 20:10:35 +02:00
ngosang
5d3b73ea9d Add more traces in build_package.py 2023-08-02 20:05:42 +02:00
ngosang
2aa095ed5d Install Chromium 115 from Debian testing 2023-08-02 19:30:39 +02:00
ngosang
687c8f75ae Update pyinstaller 5.13.0 2023-08-02 19:30:03 +02:00
ngosang
22ed3d324b Fix for new Cloudflare detection. Thanks @cedric-bour for #845 2023-08-02 19:29:44 +02:00
ngosang
5ba9ef03f3 Update Selenium 4.11.2 2023-08-02 19:23:08 +02:00
ngosang
d2e144ea12 Implement Prometheus metrics 2023-07-23 21:52:06 +02:00
ngosang
313fb2c14b Add support for proxy authentication username/password. Thanks @jacobprice808 2023-07-23 19:46:46 +02:00
ngosang
6d69f40b58 Update Chromium 115 in binary packages 2023-07-23 19:46:18 +02:00
ngosang
a1c36f60d2 Fix for Chrome / Chromium version > 114 2023-07-23 19:46:18 +02:00
ngosang
0edc50e271 Update base Docker image to Debian Bookworm 2023-07-23 19:46:18 +02:00
ngosang
f4a4baa57c Update Selenium 4.10.0 2023-07-23 19:46:18 +02:00
ngosang
f7e434c6e3 Simplify 'Verify you are human' resolver. Related #811 2023-07-23 19:46:15 +02:00
Maksim Kurnosenko
7728f2ab31 Update undetected_chromedriver to v3.5.0 (#803)
Co-authored-by: ilike2burnthing <59480337+ilike2burnthing@users.noreply.github.com>
2023-07-17 18:46:43 +01:00
Garfield69
c920bea4ca Update .gitignore 2023-07-17 13:50:11 +12:00
ilike2burnthing
a785f83034 Update CHANGELOG.md 2023-07-16 23:46:31 +01:00
ilike2burnthing
b42c22f5b1 Bump version 3.2.2 2023-07-16 23:46:25 +01:00
Tyler Hagstrom
9c62410a8b Workaround for updated 'verify your are human' check (#816)
Co-authored-by: ilike2burnthing <59480337+ilike2burnthing@users.noreply.github.com>
2023-07-16 23:42:41 +01:00
ngosang
b8768ae17d Bump version 3.2.1 2023-06-10 19:27:55 +02:00
ngosang
9b2c602a1f Kill dead Chrome processes in Windows 2023-06-10 19:26:28 +02:00
ngosang
8316350b98 Fix Chrome GL erros in ASUSTOR NAS 2023-06-10 18:49:09 +02:00
ilike2burnthing
33307ce461 bug_report: add troubleshooting check 2023-06-04 04:08:25 +01:00
18 changed files with 751 additions and 134 deletions

View File

@@ -8,6 +8,13 @@ body:
options: options:
- label: I have checked the README - label: I have checked the README
required: true required: true
- type: checkboxes
attributes:
label: Have you followed our Troubleshooting?
description: Please follow our <a href="https://github.com/FlareSolverr/FlareSolverr/wiki/Troubleshooting">Troubleshooting</a>.
options:
- label: I have followed your Troubleshooting
required: true
- type: checkboxes - type: checkboxes
attributes: attributes:
label: Is there already an issue for your problem? label: Is there already an issue for your problem?

View File

@@ -54,7 +54,7 @@ jobs:
- name: Build artifacts - name: Build artifacts
run: | run: |
python -m pip install -r requirements.txt python -m pip install -r requirements.txt
python -m pip install pyinstaller==5.9.0 python -m pip install pyinstaller==5.13.0
cd src cd src
python build_package.py python build_package.py
@@ -83,7 +83,7 @@ jobs:
- name: Build artifacts - name: Build artifacts
run: | run: |
python -m pip install -r requirements.txt python -m pip install -r requirements.txt
python -m pip install pyinstaller==5.9.0 python -m pip install pyinstaller==5.13.0
cd src cd src
python build_package.py python build_package.py

3
.gitignore vendored
View File

@@ -124,3 +124,6 @@ venv.bak/
.mypy_cache/ .mypy_cache/
.dmypy.json .dmypy.json
dmypy.json dmypy.json
# node
node_modules/

View File

@@ -1,5 +1,98 @@
# Changelog # Changelog
## v3.3.14-hotfix2 (2024/02/17)
* Hotfix 2 - bad Chromium build, instances failed to terminate
## v3.3.14-hotfix (2024/02/17)
* Hotfix for Linux build - some Chrome files no longer exist
## v3.3.14 (2024/02/17)
* Update Chrome downloads. Thanks @opemvbs
## v3.3.13 (2024/01/07)
* Fix too many open files error
## v3.3.12 (2023/12/15)
* Fix looping challenges and invalid cookies
## v3.3.11 (2023/12/11)
* Update UC 3.5.4 & Selenium 4.15.2. Thanks @txtsd
## v3.3.10 (2023/11/14)
* Add LANG ENV - resolves issues with YGGtorrent
## v3.3.9 (2023/11/13)
* Fix for Docker build, capture TypeError
## v3.3.8 (2023/11/13)
* Fix headless=true for Chrome 117+. Thanks @NabiKAZ
* Support running Chrome 119 from source. Thanks @koleg and @Chris7X
* Fix "OSError: [WinError 6] The handle is invalid" on exit. Thanks @enesgorkemgenc
## v3.3.7 (2023/11/05)
* Bump to rebuild. Thanks @JoachimDorchies
## v3.3.6 (2023/09/15)
* Update checkbox selector, again
## v3.3.5 (2023/09/13)
* Change checkbox selector, support languages other than English
## v3.3.4 (2023/09/02)
* Update checkbox selector
## v3.3.3 (2023/08/31)
* Update undetected_chromedriver to v3.5.3
## v3.3.2 (2023/08/03)
* Fix URL domain in Prometheus exporter
## v3.3.1 (2023/08/03)
* Fix for Cloudflare verify checkbox
* Fix HEADLESS=false in Windows binary
* Fix Prometheus exporter for management and health endpoints
* Remove misleading stack trace when the verify checkbox is not found
* Revert "Update base Docker image to Debian Bookworm" #849
* Revert "Install Chromium 115 from Debian testing" #849
## v3.3.0 (2023/08/02)
* Fix for new Cloudflare detection. Thanks @cedric-bour for #845
* Add support for proxy authentication username/password. Thanks @jacobprice808 for #807
* Implement Prometheus metrics
* Fix Chromium Driver for Chrome / Chromium version > 114
* Use Chromium 115 in binary packages (Windows and Linux)
* Install Chromium 115 from Debian testing (Docker)
* Update base Docker image to Debian Bookworm
* Update Selenium 4.11.2
* Update pyinstaller 5.13.0
* Add more traces in build_package.py
## v3.2.2 (2023/07/16)
* Workaround for updated 'verify you are human' check
## v3.2.1 (2023/06/10)
* Kill dead Chrome processes in Windows
* Fix Chrome GL erros in ASUSTOR NAS
## v3.2.0 (2023/05/23) ## v3.2.0 (2023/05/23)
* Support "proxy" param in requests and sessions * Support "proxy" param in requests and sessions

View File

@@ -54,6 +54,7 @@ COPY src .
COPY package.json ../ COPY package.json ../
EXPOSE 8191 EXPOSE 8191
EXPOSE 8192
# dumb-init avoids zombie chromium processes # dumb-init avoids zombie chromium processes
ENTRYPOINT ["/usr/bin/dumb-init", "--"] ENTRYPOINT ["/usr/bin/dumb-init", "--"]
@@ -61,17 +62,17 @@ ENTRYPOINT ["/usr/bin/dumb-init", "--"]
CMD ["/usr/local/bin/python", "-u", "/app/flaresolverr.py"] CMD ["/usr/local/bin/python", "-u", "/app/flaresolverr.py"]
# Local build # Local build
# docker build -t ngosang/flaresolverr:3.2.0 . # docker build -t ngosang/flaresolverr:3.3.14-hotfix2 .
# docker run -p 8191:8191 ngosang/flaresolverr:3.2.0 # docker run -p 8191:8191 ngosang/flaresolverr:3.3.14-hotfix2
# Multi-arch build # Multi-arch build
# docker run --rm --privileged multiarch/qemu-user-static --reset -p yes # docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
# docker buildx create --use # docker buildx create --use
# docker buildx build -t ngosang/flaresolverr:3.2.0 --platform linux/386,linux/amd64,linux/arm/v7,linux/arm64/v8 . # docker buildx build -t ngosang/flaresolverr:3.3.14-hotfix2 --platform linux/386,linux/amd64,linux/arm/v7,linux/arm64/v8 .
# add --push to publish in DockerHub # add --push to publish in DockerHub
# Test multi-arch build # Test multi-arch build
# docker run --rm --privileged multiarch/qemu-user-static --reset -p yes # docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
# docker buildx create --use # docker buildx create --use
# docker buildx build -t ngosang/flaresolverr:3.2.0 --platform linux/arm/v7 --load . # docker buildx build -t ngosang/flaresolverr:3.3.14-hotfix2 --platform linux/arm/v7 --load .
# docker run -p 8191:8191 --platform linux/arm/v7 ngosang/flaresolverr:3.2.0 # docker run -p 8191:8191 --platform linux/arm/v7 ngosang/flaresolverr:3.3.14-hotfix2

View File

@@ -89,7 +89,7 @@ We provide an example Systemd unit file `flaresolverr.service` as reference. You
## Usage ## Usage
Example request: Example Bash request:
```bash ```bash
curl -L -X POST 'http://localhost:8191/v1' \ curl -L -X POST 'http://localhost:8191/v1' \
-H 'Content-Type: application/json' \ -H 'Content-Type: application/json' \
@@ -100,6 +100,32 @@ curl -L -X POST 'http://localhost:8191/v1' \
}' }'
``` ```
Example Python request:
```py
import requests
url = "http://localhost:8191/v1"
headers = {"Content-Type": "application/json"}
data = {
"cmd": "request.get",
"url": "http://www.google.com/",
"maxTimeout": 60000
}
response = requests.post(url, headers=headers, json=data)
print(response.text)
```
Example PowerShell request:
```ps1
$body = @{
cmd = "request.get"
url = "http://www.google.com/"
maxTimeout = 60000
} | ConvertTo-Json
irm -UseBasicParsing 'http://localhost:8191/v1' -Headers @{"Content-Type"="application/json"} -Method Post -Body $body
```
### Commands ### Commands
#### + `sessions.create` #### + `sessions.create`
@@ -110,10 +136,10 @@ cookies for the browser to use.
This also speeds up the requests since it won't have to launch a new browser instance for every request. This also speeds up the requests since it won't have to launch a new browser instance for every request.
| Parameter | Notes | | Parameter | Notes |
|-----------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| |-----------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| session | Optional. The session ID that you want to be assigned to the instance. If isn't set a random UUID will be assigned. | | session | Optional. The session ID that you want to be assigned to the instance. If isn't set a random UUID will be assigned. |
| proxy | Optional, default disabled. Eg: `"proxy": {"url": "http://127.0.0.1:8888"}`. You must include the proxy schema in the URL: `http://`, `socks4://` or `socks5://`. Authorization (username/password) is not supported. | | proxy | Optional, default disabled. Eg: `"proxy": {"url": "http://127.0.0.1:8888"}`. You must include the proxy schema in the URL: `http://`, `socks4://` or `socks5://`. Authorization (username/password) is supported. Eg: `"proxy": {"url": "http://127.0.0.1:8888", "username": "testuser", "password": "testpass"}` |
#### + `sessions.list` #### + `sessions.list`
@@ -226,23 +252,52 @@ This is the same as `request.get` but it takes one more param:
## Environment variables ## Environment variables
| Name | Default | Notes | | Name | Default | Notes |
|-----------------|------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------| |--------------------|------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------|
| LOG_LEVEL | info | Verbosity of the logging. Use `LOG_LEVEL=debug` for more information. | | LOG_LEVEL | info | Verbosity of the logging. Use `LOG_LEVEL=debug` for more information. |
| LOG_HTML | false | Only for debugging. If `true` all HTML that passes through the proxy will be logged to the console in `debug` level. | | LOG_HTML | false | Only for debugging. If `true` all HTML that passes through the proxy will be logged to the console in `debug` level. |
| CAPTCHA_SOLVER | none | Captcha solving method. It is used when a captcha is encountered. See the Captcha Solvers section. | | CAPTCHA_SOLVER | none | Captcha solving method. It is used when a captcha is encountered. See the Captcha Solvers section. |
| TZ | UTC | Timezone used in the logs and the web browser. Example: `TZ=Europe/London`. | | TZ | UTC | Timezone used in the logs and the web browser. Example: `TZ=Europe/London`. |
| HEADLESS | true | Only for debugging. To run the web browser in headless mode or visible. | | LANG | none | Language used in the web browser. Example: `LANG=en_GB`. |
| BROWSER_TIMEOUT | 40000 | If you are experiencing errors/timeouts because your system is slow, you can try to increase this value. Remember to increase the `maxTimeout` parameter too. | | HEADLESS | true | Only for debugging. To run the web browser in headless mode or visible. |
| TEST_URL | https://www.google.com | FlareSolverr makes a request on start to make sure the web browser is working. You can change that URL if it is blocked in your country. | | BROWSER_TIMEOUT | 40000 | If you are experiencing errors/timeouts because your system is slow, you can try to increase this value. Remember to increase the `maxTimeout` parameter too. |
| PORT | 8191 | Listening port. You don't need to change this if you are running on Docker. | | TEST_URL | https://www.google.com | FlareSolverr makes a request on start to make sure the web browser is working. You can change that URL if it is blocked in your country. |
| HOST | 0.0.0.0 | Listening interface. You don't need to change this if you are running on Docker. | | PORT | 8191 | Listening port. You don't need to change this if you are running on Docker. |
| HOST | 0.0.0.0 | Listening interface. You don't need to change this if you are running on Docker. |
| PROMETHEUS_ENABLED | false | Enable Prometheus exporter. See the Prometheus section below. |
| PROMETHEUS_PORT | 8192 | Listening port for Prometheus exporter. See the Prometheus section below. |
Environment variables are set differently depending on the operating system. Some examples: Environment variables are set differently depending on the operating system. Some examples:
* Docker: Take a look at the Docker section in this document. Environment variables can be set in the `docker-compose.yml` file or in the Docker CLI command. * Docker: Take a look at the Docker section in this document. Environment variables can be set in the `docker-compose.yml` file or in the Docker CLI command.
* Linux: Run `export LOG_LEVEL=debug` and then start FlareSolverr in the same shell. * Linux: Run `export LOG_LEVEL=debug` and then start FlareSolverr in the same shell.
* Windows: Open `cmd.exe`, run `set LOG_LEVEL=debug` and then start FlareSolverr in the same shell. * Windows: Open `cmd.exe`, run `set LOG_LEVEL=debug` and then start FlareSolverr in the same shell.
## Prometheus exporter
The Prometheus exporter for FlareSolverr is disabled by default. It can be enabled with the environment variable `PROMETHEUS_ENABLED`. If you are using Docker make sure you expose the `PROMETHEUS_PORT`.
Example metrics:
```shell
# HELP flaresolverr_request_total Total requests with result
# TYPE flaresolverr_request_total counter
flaresolverr_request_total{domain="nowsecure.nl",result="solved"} 1.0
# HELP flaresolverr_request_created Total requests with result
# TYPE flaresolverr_request_created gauge
flaresolverr_request_created{domain="nowsecure.nl",result="solved"} 1.690141657157109e+09
# HELP flaresolverr_request_duration Request duration in seconds
# TYPE flaresolverr_request_duration histogram
flaresolverr_request_duration_bucket{domain="nowsecure.nl",le="0.0"} 0.0
flaresolverr_request_duration_bucket{domain="nowsecure.nl",le="10.0"} 1.0
flaresolverr_request_duration_bucket{domain="nowsecure.nl",le="25.0"} 1.0
flaresolverr_request_duration_bucket{domain="nowsecure.nl",le="50.0"} 1.0
flaresolverr_request_duration_bucket{domain="nowsecure.nl",le="+Inf"} 1.0
flaresolverr_request_duration_count{domain="nowsecure.nl"} 1.0
flaresolverr_request_duration_sum{domain="nowsecure.nl"} 5.858
# HELP flaresolverr_request_duration_created Request duration in seconds
# TYPE flaresolverr_request_duration_created gauge
flaresolverr_request_duration_created{domain="nowsecure.nl"} 1.6901416571570296e+09
```
## Captcha Solvers ## Captcha Solvers
> **Warning** > **Warning**

View File

@@ -1,6 +1,6 @@
{ {
"name": "flaresolverr", "name": "flaresolverr",
"version": "3.2.0", "version": "3.3.14-hotfix2",
"description": "Proxy server to bypass Cloudflare protection", "description": "Proxy server to bypass Cloudflare protection",
"author": "Diego Heras (ngosang / ngosang@hotmail.es)", "author": "Diego Heras (ngosang / ngosang@hotmail.es)",
"license": "MIT" "license": "MIT"

View File

@@ -1,10 +1,11 @@
bottle==0.12.25 bottle==0.12.25
waitress==2.1.2 waitress==2.1.2
selenium==4.9.1 selenium==4.15.2
func-timeout==4.3.5 func-timeout==4.3.5
prometheus-client==0.17.1
# required by undetected_chromedriver # required by undetected_chromedriver
requests==2.31.0 requests==2.31.0
certifi==2023.5.7 certifi==2023.7.22
websockets==11.0.3 websockets==11.0.3
# only required for linux # only required for linux
xvfbwrapper==0.2.9 xvfbwrapper==0.2.9

View File

@@ -0,0 +1,66 @@
import logging
import os
import urllib.parse
from bottle import request
from dtos import V1RequestBase, V1ResponseBase
from metrics import start_metrics_http_server, REQUEST_COUNTER, REQUEST_DURATION
PROMETHEUS_ENABLED = os.environ.get('PROMETHEUS_ENABLED', 'false').lower() == 'true'
PROMETHEUS_PORT = int(os.environ.get('PROMETHEUS_PORT', 8192))
def setup():
if PROMETHEUS_ENABLED:
start_metrics_http_server(PROMETHEUS_PORT)
def prometheus_plugin(callback):
"""
Bottle plugin to expose Prometheus metrics
http://bottlepy.org/docs/dev/plugindev.html
"""
def wrapper(*args, **kwargs):
actual_response = callback(*args, **kwargs)
if PROMETHEUS_ENABLED:
try:
export_metrics(actual_response)
except Exception as e:
logging.warning("Error exporting metrics: " + str(e))
return actual_response
def export_metrics(actual_response):
res = V1ResponseBase(actual_response)
if res.startTimestamp is None or res.endTimestamp is None:
# skip management and healthcheck endpoints
return
domain = "unknown"
if res.solution and res.solution.url:
domain = parse_domain_url(res.solution.url)
else:
# timeout error
req = V1RequestBase(request.json)
if req.url:
domain = parse_domain_url(req.url)
run_time = (res.endTimestamp - res.startTimestamp) / 1000
REQUEST_DURATION.labels(domain=domain).observe(run_time)
result = "unknown"
if res.message == "Challenge solved!":
result = "solved"
elif res.message == "Challenge not detected!":
result = "not_detected"
elif res.message.startswith("Error"):
result = "error"
REQUEST_COUNTER.labels(domain=domain, result=result).inc()
def parse_domain_url(url):
parsed_url = urllib.parse.urlparse(url)
return parsed_url.hostname
return wrapper

View File

@@ -25,8 +25,8 @@ def clean_files():
def download_chromium(): def download_chromium():
# https://commondatastorage.googleapis.com/chromium-browser-snapshots/index.html?prefix=Linux_x64/ # https://commondatastorage.googleapis.com/chromium-browser-snapshots/index.html?prefix=Linux_x64/
revision = "1090006" if os.name == 'nt' else '1090007' revision = "1260008" if os.name == 'nt' else '1260015'
arch = 'Win' if os.name == 'nt' else 'Linux_x64' arch = 'Win_x64' if os.name == 'nt' else 'Linux_x64'
dl_file = 'chrome-win' if os.name == 'nt' else 'chrome-linux' dl_file = 'chrome-win' if os.name == 'nt' else 'chrome-linux'
dl_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, 'dist_chrome') dl_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, 'dist_chrome')
dl_path_folder = os.path.join(dl_path, dl_file) dl_path_folder = os.path.join(dl_path, dl_file)
@@ -59,8 +59,7 @@ def download_chromium():
# Give executable permissions for *nix # Give executable permissions for *nix
# file * | grep executable | cut -d: -f1 # file * | grep executable | cut -d: -f1
print("Giving executable permissions...") print("Giving executable permissions...")
execs = ['chrome', 'chrome_crashpad_handler', 'chrome_sandbox', 'chrome-wrapper', 'nacl_helper', execs = ['chrome', 'chrome_crashpad_handler', 'chrome_sandbox', 'chrome-wrapper', 'xdg-mime', 'xdg-settings']
'nacl_helper_bootstrap', 'nacl_irt_x86_64.nexe', 'xdg-mime', 'xdg-settings']
for exec_file in execs: for exec_file in execs:
exec_path = os.path.join(chrome_path, exec_file) exec_path = os.path.join(chrome_path, exec_file)
os.chmod(exec_path, 0o755) os.chmod(exec_path, 0o755)
@@ -68,12 +67,15 @@ def download_chromium():
def run_pyinstaller(): def run_pyinstaller():
sep = ';' if os.name == 'nt' else ':' sep = ';' if os.name == 'nt' else ':'
subprocess.check_call([sys.executable, "-m", "PyInstaller", result = subprocess.run([sys.executable, "-m", "PyInstaller",
"--icon", "resources/flaresolverr_logo.ico", "--icon", "resources/flaresolverr_logo.ico",
"--add-data", f"package.json{sep}.", "--add-data", f"package.json{sep}.",
"--add-data", f"{os.path.join('dist_chrome', 'chrome')}{sep}chrome", "--add-data", f"{os.path.join('dist_chrome', 'chrome')}{sep}chrome",
os.path.join("src", "flaresolverr.py")], os.path.join("src", "flaresolverr.py")],
cwd=os.pardir) cwd=os.pardir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if result.returncode != 0:
print(result.stderr.decode('utf-8'))
raise Exception("Error running pyInstaller")
def compress_package(): def compress_package():

View File

@@ -8,6 +8,7 @@ from bottle import run, response, Bottle, request, ServerAdapter
from bottle_plugins.error_plugin import error_plugin from bottle_plugins.error_plugin import error_plugin
from bottle_plugins.logger_plugin import logger_plugin from bottle_plugins.logger_plugin import logger_plugin
from bottle_plugins import prometheus_plugin
from dtos import V1RequestBase from dtos import V1RequestBase
import flaresolverr_service import flaresolverr_service
import utils import utils
@@ -24,10 +25,6 @@ class JSONErrorBottle(Bottle):
app = JSONErrorBottle() app = JSONErrorBottle()
# plugin order is important
app.install(logger_plugin)
app.install(error_plugin)
@app.route('/') @app.route('/')
def index(): def index():
@@ -65,6 +62,12 @@ if __name__ == "__main__":
if sys.version_info < (3, 9): if sys.version_info < (3, 9):
raise Exception("The Python version is less than 3.9, a version equal to or higher is required.") raise Exception("The Python version is less than 3.9, a version equal to or higher is required.")
# fix for HEADLESS=false in Windows binary
# https://stackoverflow.com/a/27694505
if os.name == 'nt':
import multiprocessing
multiprocessing.freeze_support()
# fix ssl certificates for compiled binaries # fix ssl certificates for compiled binaries
# https://github.com/pyinstaller/pyinstaller/issues/7229 # https://github.com/pyinstaller/pyinstaller/issues/7229
# https://stackoverflow.com/questions/55736855/how-to-change-the-cafile-argument-in-the-ssl-module-in-python3 # https://stackoverflow.com/questions/55736855/how-to-change-the-cafile-argument-in-the-ssl-module-in-python3
@@ -101,6 +104,13 @@ if __name__ == "__main__":
# test browser installation # test browser installation
flaresolverr_service.test_browser_installation() flaresolverr_service.test_browser_installation()
# start bootle plugins
# plugin order is important
app.install(logger_plugin)
app.install(error_plugin)
prometheus_plugin.setup()
app.install(prometheus_plugin.prometheus_plugin)
# start webserver # start webserver
# default server 'wsgiref' does not support concurrent requests # default server 'wsgiref' does not support concurrent requests
# https://github.com/FlareSolverr/FlareSolverr/issues/680 # https://github.com/FlareSolverr/FlareSolverr/issues/680

View File

@@ -46,7 +46,7 @@ CHALLENGE_SELECTORS = [
# Fairlane / pararius.com # Fairlane / pararius.com
'div.vc div.text-box h2' 'div.vc div.text-box h2'
] ]
SHORT_TIMEOUT = 10 SHORT_TIMEOUT = 1
SESSIONS_STORAGE = SessionsStorage() SESSIONS_STORAGE = SessionsStorage()
@@ -251,26 +251,26 @@ def _resolve_challenge(req: V1RequestBase, method: str) -> ChallengeResolutionT:
def click_verify(driver: WebDriver): def click_verify(driver: WebDriver):
try: try:
logging.debug("Try to find the Cloudflare verify checkbox") logging.debug("Try to find the Cloudflare verify checkbox...")
iframe = driver.find_element(By.XPATH, "//iframe[@title='Widget containing a Cloudflare security challenge']") iframe = driver.find_element(By.XPATH, "//iframe[starts-with(@id, 'cf-chl-widget-')]")
driver.switch_to.frame(iframe) driver.switch_to.frame(iframe)
checkbox = driver.find_element( checkbox = driver.find_element(
by=By.XPATH, by=By.XPATH,
value='//*[@id="cf-stage"]//label[@class="ctp-checkbox-label"]/input', value='//*[@id="challenge-stage"]/div/label/input',
) )
if checkbox: if checkbox:
actions = ActionChains(driver) actions = ActionChains(driver)
actions.move_to_element_with_offset(checkbox, 5, 7) actions.move_to_element_with_offset(checkbox, 5, 7)
actions.click(checkbox) actions.click(checkbox)
actions.perform() actions.perform()
logging.debug("Cloudflare verify checkbox found and clicked") logging.debug("Cloudflare verify checkbox found and clicked!")
except Exception: except Exception:
logging.debug("Cloudflare verify checkbox not found on the page") logging.debug("Cloudflare verify checkbox not found on the page.")
finally: finally:
driver.switch_to.default_content() driver.switch_to.default_content()
try: try:
logging.debug("Try to find the Cloudflare 'Verify you are human' button") logging.debug("Try to find the Cloudflare 'Verify you are human' button...")
button = driver.find_element( button = driver.find_element(
by=By.XPATH, by=By.XPATH,
value="//input[@type='button' and @value='Verify you are human']", value="//input[@type='button' and @value='Verify you are human']",
@@ -280,10 +280,9 @@ def click_verify(driver: WebDriver):
actions.move_to_element_with_offset(button, 5, 7) actions.move_to_element_with_offset(button, 5, 7)
actions.click(button) actions.click(button)
actions.perform() actions.perform()
logging.debug("The Cloudflare 'Verify you are human' button found and clicked") logging.debug("The Cloudflare 'Verify you are human' button found and clicked!")
except Exception as e: except Exception:
logging.debug("The Cloudflare 'Verify you are human' button not found on the page") logging.debug("The Cloudflare 'Verify you are human' button not found on the page.")
# print(e)
time.sleep(2) time.sleep(2)
@@ -295,10 +294,13 @@ def _evil_logic(req: V1RequestBase, driver: WebDriver, method: str) -> Challenge
# navigate to the page # navigate to the page
logging.debug(f'Navigating to... {req.url}') logging.debug(f'Navigating to... {req.url}')
driver.get(req.url)
driver.start_session() # required to bypass Cloudflare
if method == 'POST': if method == 'POST':
_post_request(req, driver) _post_request(req, driver)
else: else:
driver.get(req.url) driver.get(req.url)
driver.start_session() # required to bypass Cloudflare
# set cookies if required # set cookies if required
if req.cookies is not None and len(req.cookies) > 0: if req.cookies is not None and len(req.cookies) > 0:
@@ -311,6 +313,7 @@ def _evil_logic(req: V1RequestBase, driver: WebDriver, method: str) -> Challenge
_post_request(req, driver) _post_request(req, driver)
else: else:
driver.get(req.url) driver.get(req.url)
driver.start_session() # required to bypass Cloudflare
# wait for the page # wait for the page
if utils.get_config_log_html(): if utils.get_config_log_html():
@@ -430,3 +433,4 @@ def _post_request(req: V1RequestBase, driver: WebDriver):
</body> </body>
</html>""" </html>"""
driver.get("data:text/html;charset=utf-8," + html_content) driver.get("data:text/html;charset=utf-8," + html_content)
driver.start_session() # required to bypass Cloudflare

32
src/metrics.py Normal file
View File

@@ -0,0 +1,32 @@
import logging
from prometheus_client import Counter, Histogram, start_http_server
import time
REQUEST_COUNTER = Counter(
name='flaresolverr_request',
documentation='Total requests with result',
labelnames=['domain', 'result']
)
REQUEST_DURATION = Histogram(
name='flaresolverr_request_duration',
documentation='Request duration in seconds',
labelnames=['domain'],
buckets=[0, 10, 25, 50]
)
def serve(port):
start_http_server(port=port)
while True:
time.sleep(600)
def start_metrics_http_server(prometheus_port: int):
logging.info(f"Serving Prometheus exporter on http://0.0.0.0:{prometheus_port}/metrics")
from threading import Thread
Thread(
target=serve,
kwargs=dict(port=prometheus_port),
daemon=True,
).start()

View File

@@ -335,6 +335,42 @@ class TestFlareSolverr(unittest.TestCase):
self.assertGreater(len(solution.cookies), 0) self.assertGreater(len(solution.cookies), 0)
self.assertIn("Chrome/", solution.userAgent) self.assertIn("Chrome/", solution.userAgent)
def test_v1_endpoint_request_get_proxy_http_param_with_credentials(self):
"""
To configure TinyProxy in local:
* sudo vim /etc/tinyproxy/tinyproxy.conf
* edit => LogFile "/tmp/tinyproxy.log"
* edit => Syslog Off
* add => BasicAuth testuser testpass
* sudo tinyproxy -d
* sudo tail -f /tmp/tinyproxy.log
"""
res = self.app.post_json('/v1', {
"cmd": "request.get",
"url": self.google_url,
"proxy": {
"url": self.proxy_url,
"username": "testuser",
"password": "testpass"
}
})
self.assertEqual(res.status_code, 200)
body = V1ResponseBase(res.json)
self.assertEqual(STATUS_OK, body.status)
self.assertEqual("Challenge not detected!", body.message)
self.assertGreater(body.startTimestamp, 10000)
self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
self.assertEqual(utils.get_flaresolverr_version(), body.version)
solution = body.solution
self.assertIn(self.google_url, solution.url)
self.assertEqual(solution.status, 200)
self.assertIs(len(solution.headers), 0)
self.assertIn("<title>Google</title>", solution.response)
self.assertGreater(len(solution.cookies), 0)
self.assertIn("Chrome/", solution.userAgent)
def test_v1_endpoint_request_get_proxy_socks_param(self): def test_v1_endpoint_request_get_proxy_socks_param(self):
""" """
To configure Dante in local: To configure Dante in local:

View File

@@ -17,11 +17,12 @@ by UltrafunkAmsterdam (https://github.com/ultrafunkamsterdam)
from __future__ import annotations from __future__ import annotations
__version__ = "3.4.6" __version__ = "3.5.4"
import json import json
import logging import logging
import os import os
import pathlib
import re import re
import shutil import shutil
import subprocess import subprocess
@@ -33,7 +34,7 @@ from weakref import finalize
import selenium.webdriver.chrome.service import selenium.webdriver.chrome.service
import selenium.webdriver.chrome.webdriver import selenium.webdriver.chrome.webdriver
from selenium.webdriver.common.by import By from selenium.webdriver.common.by import By
import selenium.webdriver.common.service import selenium.webdriver.chromium.service
import selenium.webdriver.remote.command import selenium.webdriver.remote.command
import selenium.webdriver.remote.webdriver import selenium.webdriver.remote.webdriver
@@ -109,11 +110,11 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
browser_executable_path=None, browser_executable_path=None,
port=0, port=0,
enable_cdp_events=False, enable_cdp_events=False,
service_args=None, # service_args=None,
service_creationflags=None, # service_creationflags=None,
desired_capabilities=None, desired_capabilities=None,
advanced_elements=False, advanced_elements=False,
service_log_path=None, # service_log_path=None,
keep_alive=True, keep_alive=True,
log_level=0, log_level=0,
headless=False, headless=False,
@@ -122,8 +123,9 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
suppress_welcome=True, suppress_welcome=True,
use_subprocess=False, use_subprocess=False,
debug=False, debug=False,
no_sandbox=True, no_sandbox=True,
windows_headless=False, windows_headless=False,
user_multi_procs: bool = False,
**kw, **kw,
): ):
""" """
@@ -235,6 +237,14 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
uses the --no-sandbox option, and additionally does suppress the "unsecure option" status bar uses the --no-sandbox option, and additionally does suppress the "unsecure option" status bar
this option has a default of True since many people seem to run this as root (....) , and chrome does not start this option has a default of True since many people seem to run this as root (....) , and chrome does not start
when running as root without using --no-sandbox flag. when running as root without using --no-sandbox flag.
user_multi_procs:
set to true when you are using multithreads/multiprocessing
ensures not all processes are trying to modify a binary which is in use by another.
for this to work. YOU MUST HAVE AT LEAST 1 UNDETECTED_CHROMEDRIVER BINARY IN YOUR ROAMING DATA FOLDER.
this requirement can be easily satisfied, by just running this program "normal" and close/kill it.
""" """
finalize(self, self._ensure_close, self) finalize(self, self._ensure_close, self)
@@ -243,8 +253,11 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
executable_path=driver_executable_path, executable_path=driver_executable_path,
force=patcher_force_close, force=patcher_force_close,
version_main=version_main, version_main=version_main,
user_multi_procs=user_multi_procs,
) )
# self.patcher.auto(user_multiprocess = user_multi_num_procs)
self.patcher.auto() self.patcher.auto()
# self.patcher = patcher # self.patcher = patcher
if not options: if not options:
options = ChromeOptions() options = ChromeOptions()
@@ -361,6 +374,18 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
browser_executable_path or find_chrome_executable() browser_executable_path or find_chrome_executable()
) )
if not options.binary_location or not \
pathlib.Path(options.binary_location).exists():
raise FileNotFoundError(
"\n---------------------\n"
"Could not determine browser executable."
"\n---------------------\n"
"Make sure your browser is installed in the default location (path).\n"
"If you are sure about the browser executable, you can specify it using\n"
"the `browser_executable_path='{}` parameter.\n\n"
.format("/path/to/browser/executable" if IS_POSIX else "c:/path/to/your/browser.exe")
)
self._delay = 3 self._delay = 3
self.user_data_dir = user_data_dir self.user_data_dir = user_data_dir
@@ -371,11 +396,17 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
if no_sandbox: if no_sandbox:
options.arguments.extend(["--no-sandbox", "--test-type"]) options.arguments.extend(["--no-sandbox", "--test-type"])
if headless or options.headless: if headless or getattr(options, 'headless', None):
v_main = int(self.patcher.version_main) if self.patcher.version_main else 108 #workaround until a better checking is found
if v_main < 108: try:
options.add_argument("--headless=chrome") v_main = int(self.patcher.version_main) if self.patcher.version_main else 108
elif v_main >= 108: if v_main < 108:
options.add_argument("--headless=chrome")
elif v_main >= 108:
options.add_argument("--headless=new")
except:
logger.warning("could not detect version_main."
"therefore, we are assuming it is chrome 108 or higher")
options.add_argument("--headless=new") options.add_argument("--headless=new")
options.add_argument("--window-size=1920,1080") options.add_argument("--window-size=1920,1080")
@@ -419,40 +450,29 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
self.browser_pid = start_detached( self.browser_pid = start_detached(
options.binary_location, *options.arguments options.binary_location, *options.arguments
) )
else: else:
startupinfo = subprocess.STARTUPINFO() startupinfo = subprocess.STARTUPINFO()
if os.name == 'nt' and windows_headless: if os.name == 'nt' and windows_headless:
startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
browser = subprocess.Popen( browser = subprocess.Popen(
[options.binary_location, *options.arguments], [options.binary_location, *options.arguments],
stdin=subprocess.PIPE, stdin=subprocess.PIPE,
stdout=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.PIPE, stderr=subprocess.PIPE,
close_fds=IS_POSIX, close_fds=IS_POSIX,
startupinfo=startupinfo startupinfo=startupinfo
) )
self.browser_pid = browser.pid self.browser_pid = browser.pid
if service_creationflags:
service = selenium.webdriver.common.service.Service( service = selenium.webdriver.chromium.service.ChromiumService(
self.patcher.executable_path, port, service_args, service_log_path self.patcher.executable_path
) )
for attr_name in ("creationflags", "creation_flags"):
if hasattr(service, attr_name):
setattr(service, attr_name, service_creationflags)
break
else:
service = None
super(Chrome, self).__init__( super(Chrome, self).__init__(
executable_path=self.patcher.executable_path, service=service,
port=port,
options=options, options=options,
service_args=service_args,
desired_capabilities=desired_capabilities,
service_log_path=service_log_path,
keep_alive=keep_alive, keep_alive=keep_alive,
service=service, # needed or the service will be re-created
) )
self.reactor = None self.reactor = None
@@ -471,7 +491,7 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
else: else:
self._web_element_cls = WebElement self._web_element_cls = WebElement
if options.headless: if headless or getattr(options, 'headless', None):
self._configure_headless() self._configure_headless()
def _configure_headless(self): def _configure_headless(self):
@@ -708,13 +728,48 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
if not capabilities: if not capabilities:
capabilities = self.options.to_capabilities() capabilities = self.options.to_capabilities()
super(selenium.webdriver.chrome.webdriver.WebDriver, self).start_session( super(selenium.webdriver.chrome.webdriver.WebDriver, self).start_session(
capabilities, browser_profile capabilities
) )
# super(Chrome, self).start_session(capabilities, browser_profile) # super(Chrome, self).start_session(capabilities, browser_profile)
def find_elements_recursive(self, by, value):
"""
find elements in all frames
this is a generator function, which is needed
since if it would return a list of elements, they
will be stale on arrival.
using generator, when the element is returned we are in the correct frame
to use it directly
Args:
by: By
value: str
Returns: Generator[webelement.WebElement]
"""
def search_frame(f=None):
if not f:
# ensure we are on main content frame
self.switch_to.default_content()
else:
self.switch_to.frame(f)
for elem in self.find_elements(by, value):
yield elem
# switch back to main content, otherwise we will get StaleElementReferenceException
self.switch_to.default_content()
# search root frame
for elem in search_frame():
yield elem
# get iframes
frames = self.find_elements('css selector', 'iframe')
# search per frame
for f in frames:
for elem in search_frame(f):
yield elem
def quit(self): def quit(self):
try: try:
self.service.process.kill() self.service.process.kill()
self.service.process.wait(5) self.service.process.wait(5)
logger.debug("webdriver process ended") logger.debug("webdriver process ended")
except (AttributeError, RuntimeError, OSError): except (AttributeError, RuntimeError, OSError):
@@ -728,7 +783,16 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
os.kill(self.browser_pid, 15) os.kill(self.browser_pid, 15)
logger.debug("gracefully closed browser") logger.debug("gracefully closed browser")
except Exception as e: # noqa except Exception as e: # noqa
logger.debug(e, exc_info=True) pass
# Force kill Chrome process in Windows
# https://github.com/FlareSolverr/FlareSolverr/issues/772
if os.name == 'nt':
try:
subprocess.call(['taskkill', '/f', '/pid', str(self.browser_pid)],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL)
except Exception:
pass
if ( if (
hasattr(self, "keep_user_data_dir") hasattr(self, "keep_user_data_dir")
and hasattr(self, "user_data_dir") and hasattr(self, "user_data_dir")
@@ -747,7 +811,11 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
else: else:
logger.debug("successfully removed %s" % self.user_data_dir) logger.debug("successfully removed %s" % self.user_data_dir)
break break
time.sleep(0.1)
try:
time.sleep(0.1)
except OSError:
pass
# dereference patcher, so patcher can start cleaning up as well. # dereference patcher, so patcher can start cleaning up as well.
# this must come last, otherwise it will throw 'in use' errors # this must come last, otherwise it will throw 'in use' errors
@@ -842,10 +910,10 @@ def find_chrome_executable():
if item is not None: if item is not None:
for subitem in ( for subitem in (
"Google/Chrome/Application", "Google/Chrome/Application",
"Google/Chrome Beta/Application",
"Google/Chrome Canary/Application",
): ):
candidates.add(os.sep.join((item, subitem, "chrome.exe"))) candidates.add(os.sep.join((item, subitem, "chrome.exe")))
for candidate in candidates: for candidate in candidates:
logger.debug('checking if %s exists and is executable' % candidate)
if os.path.exists(candidate) and os.access(candidate, os.X_OK): if os.path.exists(candidate) and os.access(candidate, os.X_OK):
logger.debug('found! using %s' % candidate)
return os.path.normpath(candidate) return os.path.normpath(candidate)

View File

@@ -41,6 +41,7 @@ def start_detached(executable, *args):
# close pipes # close pipes
writer.close() writer.close()
reader.close() reader.close()
process.close()
return pid return pid

View File

@@ -3,17 +3,21 @@
from distutils.version import LooseVersion from distutils.version import LooseVersion
import io import io
import json
import logging import logging
import os import os
import pathlib
import platform
import random import random
import re import re
import shutil
import string import string
import sys import sys
import time import time
from urllib.request import urlopen from urllib.request import urlopen
from urllib.request import urlretrieve from urllib.request import urlretrieve
import zipfile import zipfile
from multiprocessing import Lock
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -21,21 +25,10 @@ IS_POSIX = sys.platform.startswith(("darwin", "cygwin", "linux", "linux2"))
class Patcher(object): class Patcher(object):
url_repo = "https://chromedriver.storage.googleapis.com" lock = Lock()
zip_name = "chromedriver_%s.zip"
exe_name = "chromedriver%s" exe_name = "chromedriver%s"
platform = sys.platform platform = sys.platform
if platform.endswith("win32"):
zip_name %= "win32"
exe_name %= ".exe"
if platform.endswith(("linux", "linux2")):
zip_name %= "linux64"
exe_name %= ""
if platform.endswith("darwin"):
zip_name %= "mac64"
exe_name %= ""
if platform.endswith("win32"): if platform.endswith("win32"):
d = "~/appdata/roaming/undetected_chromedriver" d = "~/appdata/roaming/undetected_chromedriver"
elif "LAMBDA_TASK_ROOT" in os.environ: elif "LAMBDA_TASK_ROOT" in os.environ:
@@ -48,7 +41,13 @@ class Patcher(object):
d = "~/.undetected_chromedriver" d = "~/.undetected_chromedriver"
data_path = os.path.abspath(os.path.expanduser(d)) data_path = os.path.abspath(os.path.expanduser(d))
def __init__(self, executable_path=None, force=False, version_main: int = 0): def __init__(
self,
executable_path=None,
force=False,
version_main: int = 0,
user_multi_procs=False,
):
""" """
Args: Args:
executable_path: None = automatic executable_path: None = automatic
@@ -61,6 +60,21 @@ class Patcher(object):
self.force = force self.force = force
self._custom_exe_path = False self._custom_exe_path = False
prefix = "undetected" prefix = "undetected"
self.user_multi_procs = user_multi_procs
try:
# Try to convert version_main into an integer
version_main_int = int(version_main)
# check if version_main_int is less than or equal to e.g 114
self.is_old_chromedriver = version_main and version_main_int <= 114
except (ValueError,TypeError):
# If the conversion fails, print an error message
print("version_main cannot be converted to an integer")
# Set self.is_old_chromedriver to False if the conversion fails
self.is_old_chromedriver = False
# Needs to be called before self.exe_name is accessed
self._set_platform_name()
if not os.path.exists(self.data_path): if not os.path.exists(self.data_path):
os.makedirs(self.data_path, exist_ok=True) os.makedirs(self.data_path, exist_ok=True)
@@ -78,17 +92,64 @@ class Patcher(object):
self.zip_path = os.path.join(self.data_path, prefix) self.zip_path = os.path.join(self.data_path, prefix)
if not executable_path: if not executable_path:
self.executable_path = os.path.abspath( if not self.user_multi_procs:
os.path.join(".", self.executable_path) self.executable_path = os.path.abspath(
) os.path.join(".", self.executable_path)
)
if executable_path: if executable_path:
self._custom_exe_path = True self._custom_exe_path = True
self.executable_path = executable_path self.executable_path = executable_path
# Set the correct repository to download the Chromedriver from
if self.is_old_chromedriver:
self.url_repo = "https://chromedriver.storage.googleapis.com"
else:
self.url_repo = "https://googlechromelabs.github.io/chrome-for-testing"
self.version_main = version_main self.version_main = version_main
self.version_full = None self.version_full = None
def auto(self, executable_path=None, force=False, version_main=None): def _set_platform_name(self):
"""
Set the platform and exe name based on the platform undetected_chromedriver is running on
in order to download the correct chromedriver.
"""
if self.platform.endswith("win32"):
self.platform_name = "win32"
self.exe_name %= ".exe"
if self.platform.endswith(("linux", "linux2")):
self.platform_name = "linux64"
self.exe_name %= ""
if self.platform.endswith("darwin"):
if self.is_old_chromedriver:
self.platform_name = "mac64"
else:
self.platform_name = "mac-x64"
self.exe_name %= ""
def auto(self, executable_path=None, force=False, version_main=None, _=None):
"""
Args:
executable_path:
force:
version_main:
Returns:
"""
p = pathlib.Path(self.data_path)
if self.user_multi_procs:
with Lock():
files = list(p.rglob("*chromedriver*"))
most_recent = max(files, key=lambda f: f.stat().st_mtime)
files.remove(most_recent)
list(map(lambda f: f.unlink(), files))
if self.is_binary_patched(most_recent):
self.executable_path = str(most_recent)
return True
if executable_path: if executable_path:
self.executable_path = executable_path self.executable_path = executable_path
self._custom_exe_path = True self._custom_exe_path = True
@@ -127,6 +188,53 @@ class Patcher(object):
self.unzip_package(self.fetch_package()) self.unzip_package(self.fetch_package())
return self.patch() return self.patch()
def driver_binary_in_use(self, path: str = None) -> bool:
"""
naive test to check if a found chromedriver binary is
currently in use
Args:
path: a string or PathLike object to the binary to check.
if not specified, we check use this object's executable_path
"""
if not path:
path = self.executable_path
p = pathlib.Path(path)
if not p.exists():
raise OSError("file does not exist: %s" % p)
try:
with open(p, mode="a+b") as fs:
exc = []
try:
fs.seek(0, 0)
except PermissionError as e:
exc.append(e) # since some systems apprently allow seeking
# we conduct another test
try:
fs.readline()
except PermissionError as e:
exc.append(e)
if exc:
return True
return False
# ok safe to assume this is in use
except Exception as e:
# logger.exception("whoops ", e)
pass
def cleanup_unused_files(self):
p = pathlib.Path(self.data_path)
items = list(p.glob("*undetected*"))
for item in items:
try:
item.unlink()
except:
pass
def patch(self): def patch(self):
self.patch_exe() self.patch_exe()
return self.is_binary_patched() return self.is_binary_patched()
@@ -137,12 +245,32 @@ class Patcher(object):
:return: version string :return: version string
:rtype: LooseVersion :rtype: LooseVersion
""" """
path = "/latest_release" # Endpoint for old versions of Chromedriver (114 and below)
if self.version_main: if self.is_old_chromedriver:
path += f"_{self.version_main}" path = f"/latest_release_{self.version_main}"
path = path.upper() path = path.upper()
logger.debug("getting release number from %s" % path)
return LooseVersion(urlopen(self.url_repo + path).read().decode())
# Endpoint for new versions of Chromedriver (115+)
if not self.version_main:
# Fetch the latest version
path = "/last-known-good-versions-with-downloads.json"
logger.debug("getting release number from %s" % path)
with urlopen(self.url_repo + path) as conn:
response = conn.read().decode()
last_versions = json.loads(response)
return LooseVersion(last_versions["channels"]["Stable"]["version"])
# Fetch the latest minor version of the major version provided
path = "/latest-versions-per-milestone-with-downloads.json"
logger.debug("getting release number from %s" % path) logger.debug("getting release number from %s" % path)
return LooseVersion(urlopen(self.url_repo + path).read().decode()) with urlopen(self.url_repo + path) as conn:
response = conn.read().decode()
major_versions = json.loads(response)
return LooseVersion(major_versions["milestones"][str(self.version_main)]["version"])
def parse_exe_version(self): def parse_exe_version(self):
with io.open(self.executable_path, "rb") as f: with io.open(self.executable_path, "rb") as f:
@@ -157,10 +285,16 @@ class Patcher(object):
:return: path to downloaded file :return: path to downloaded file
""" """
u = "%s/%s/%s" % (self.url_repo, self.version_full.vstring, self.zip_name) zip_name = f"chromedriver_{self.platform_name}.zip"
logger.debug("downloading from %s" % u) if self.is_old_chromedriver:
# return urlretrieve(u, filename=self.data_path)[0] download_url = "%s/%s/%s" % (self.url_repo, self.version_full.vstring, zip_name)
return urlretrieve(u)[0] else:
zip_name = zip_name.replace("_", "-", 1)
download_url = "https://storage.googleapis.com/chrome-for-testing-public/%s/%s/%s"
download_url %= (self.version_full.vstring, self.platform_name, zip_name)
logger.debug("downloading from %s" % download_url)
return urlretrieve(download_url)[0]
def unzip_package(self, fp): def unzip_package(self, fp):
""" """
@@ -168,6 +302,12 @@ class Patcher(object):
:return: path to unpacked executable :return: path to unpacked executable
""" """
exe_path = self.exe_name
if not self.is_old_chromedriver:
# The new chromedriver unzips into its own folder
zip_name = f"chromedriver-{self.platform_name}"
exe_path = os.path.join(zip_name, self.exe_name)
logger.debug("unzipping %s" % fp) logger.debug("unzipping %s" % fp)
try: try:
os.unlink(self.zip_path) os.unlink(self.zip_path)
@@ -176,10 +316,10 @@ class Patcher(object):
os.makedirs(self.zip_path, mode=0o755, exist_ok=True) os.makedirs(self.zip_path, mode=0o755, exist_ok=True)
with zipfile.ZipFile(fp, mode="r") as zf: with zipfile.ZipFile(fp, mode="r") as zf:
zf.extract(self.exe_name, self.zip_path) zf.extractall(self.zip_path)
os.rename(os.path.join(self.zip_path, self.exe_name), self.executable_path) os.rename(os.path.join(self.zip_path, exe_path), self.executable_path)
os.remove(fp) os.remove(fp)
os.rmdir(self.zip_path) shutil.rmtree
os.chmod(self.executable_path, 0o755) os.chmod(self.executable_path, 0o755)
return self.executable_path return self.executable_path
@@ -255,21 +395,17 @@ class Patcher(object):
else: else:
timeout = 3 # stop trying after this many seconds timeout = 3 # stop trying after this many seconds
t = time.monotonic() t = time.monotonic()
while True: now = lambda: time.monotonic()
now = time.monotonic() while now() - t > timeout:
if now - t > timeout: # we don't want to wait until the end of time
# we don't want to wait until the end of time
logger.debug(
"could not unlink %s in time (%d seconds)"
% (self.executable_path, timeout)
)
break
try: try:
if self.user_multi_procs:
break
os.unlink(self.executable_path) os.unlink(self.executable_path)
logger.debug("successfully unlinked %s" % self.executable_path) logger.debug("successfully unlinked %s" % self.executable_path)
break break
except (OSError, RuntimeError, PermissionError): except (OSError, RuntimeError, PermissionError):
time.sleep(0.1) time.sleep(0.01)
continue continue
except FileNotFoundError: except FileNotFoundError:
break break

View File

@@ -3,6 +3,8 @@ import logging
import os import os
import re import re
import shutil import shutil
import urllib.parse
import tempfile
from selenium.webdriver.chrome.webdriver import WebDriver from selenium.webdriver.chrome.webdriver import WebDriver
import undetected_chromedriver as uc import undetected_chromedriver as uc
@@ -36,8 +38,82 @@ def get_flaresolverr_version() -> str:
return FLARESOLVERR_VERSION return FLARESOLVERR_VERSION
def create_proxy_extension(proxy: dict) -> str:
parsed_url = urllib.parse.urlparse(proxy['url'])
scheme = parsed_url.scheme
host = parsed_url.hostname
port = parsed_url.port
username = proxy['username']
password = proxy['password']
manifest_json = """
{
"version": "1.0.0",
"manifest_version": 2,
"name": "Chrome Proxy",
"permissions": [
"proxy",
"tabs",
"unlimitedStorage",
"storage",
"<all_urls>",
"webRequest",
"webRequestBlocking"
],
"background": {"scripts": ["background.js"]},
"minimum_chrome_version": "76.0.0"
}
"""
background_js = """
var config = {
mode: "fixed_servers",
rules: {
singleProxy: {
scheme: "%s",
host: "%s",
port: %d
},
bypassList: ["localhost"]
}
};
chrome.proxy.settings.set({value: config, scope: "regular"}, function() {});
function callbackFn(details) {
return {
authCredentials: {
username: "%s",
password: "%s"
}
};
}
chrome.webRequest.onAuthRequired.addListener(
callbackFn,
{ urls: ["<all_urls>"] },
['blocking']
);
""" % (
scheme,
host,
port,
username,
password
)
proxy_extension_dir = tempfile.mkdtemp()
with open(os.path.join(proxy_extension_dir, "manifest.json"), "w") as f:
f.write(manifest_json)
with open(os.path.join(proxy_extension_dir, "background.js"), "w") as f:
f.write(background_js)
return proxy_extension_dir
def get_webdriver(proxy: dict = None) -> WebDriver: def get_webdriver(proxy: dict = None) -> WebDriver:
global PATCHED_DRIVER_PATH global PATCHED_DRIVER_PATH, USER_AGENT
logging.debug('Launching web browser...') logging.debug('Launching web browser...')
# undetected_chromedriver # undetected_chromedriver
@@ -54,13 +130,30 @@ def get_webdriver(proxy: dict = None) -> WebDriver:
options.add_argument('--disable-software-rasterizer') options.add_argument('--disable-software-rasterizer')
options.add_argument('--ignore-certificate-errors') options.add_argument('--ignore-certificate-errors')
options.add_argument('--ignore-ssl-errors') options.add_argument('--ignore-ssl-errors')
# fix GL errors in ASUSTOR NAS
# https://github.com/FlareSolverr/FlareSolverr/issues/782
# https://github.com/microsoft/vscode/issues/127800#issuecomment-873342069
# https://peter.sh/experiments/chromium-command-line-switches/#use-gl
options.add_argument('--use-gl=swiftshader')
if proxy and 'url' in proxy: language = os.environ.get('LANG', None)
if language is not None:
options.add_argument('--lang=%s' % language)
# Fix for Chrome 117 | https://github.com/FlareSolverr/FlareSolverr/issues/910
if USER_AGENT is not None:
options.add_argument('--user-agent=%s' % USER_AGENT)
proxy_extension_dir = None
if proxy and all(key in proxy for key in ['url', 'username', 'password']):
proxy_extension_dir = create_proxy_extension(proxy)
options.add_argument("--load-extension=%s" % os.path.abspath(proxy_extension_dir))
elif proxy and 'url' in proxy:
proxy_url = proxy['url'] proxy_url = proxy['url']
logging.debug("Using webdriver proxy: %s", proxy_url) logging.debug("Using webdriver proxy: %s", proxy_url)
options.add_argument('--proxy-server=%s' % proxy_url) options.add_argument('--proxy-server=%s' % proxy_url)
# note: headless mode is detected (options.headless = True) # note: headless mode is detected (headless = True)
# we launch the browser in head-full mode with the window hidden # we launch the browser in head-full mode with the window hidden
windows_headless = False windows_headless = False
if get_config_headless(): if get_config_headless():
@@ -68,6 +161,8 @@ def get_webdriver(proxy: dict = None) -> WebDriver:
windows_headless = True windows_headless = True
else: else:
start_xvfb_display() start_xvfb_display()
# For normal headless mode:
# options.add_argument('--headless')
# if we are inside the Docker container, we avoid downloading the driver # if we are inside the Docker container, we avoid downloading the driver
driver_exe_path = None driver_exe_path = None
@@ -87,12 +182,17 @@ def get_webdriver(proxy: dict = None) -> WebDriver:
# if we don't set driver_executable_path it downloads, patches, and deletes the driver each time # if we don't set driver_executable_path it downloads, patches, and deletes the driver each time
driver = uc.Chrome(options=options, browser_executable_path=browser_executable_path, driver = uc.Chrome(options=options, browser_executable_path=browser_executable_path,
driver_executable_path=driver_exe_path, version_main=version_main, driver_executable_path=driver_exe_path, version_main=version_main,
windows_headless=windows_headless) windows_headless=windows_headless, headless=windows_headless)
# save the patched driver to avoid re-downloads # save the patched driver to avoid re-downloads
if driver_exe_path is None: if driver_exe_path is None:
PATCHED_DRIVER_PATH = os.path.join(driver.patcher.data_path, driver.patcher.exe_name) PATCHED_DRIVER_PATH = os.path.join(driver.patcher.data_path, driver.patcher.exe_name)
shutil.copy(driver.patcher.executable_path, PATCHED_DRIVER_PATH) if PATCHED_DRIVER_PATH != driver.patcher.executable_path:
shutil.copy(driver.patcher.executable_path, PATCHED_DRIVER_PATH)
# clean up proxy extension directory
if proxy_extension_dir is not None:
shutil.rmtree(proxy_extension_dir)
# selenium vanilla # selenium vanilla
# options = webdriver.ChromeOptions() # options = webdriver.ChromeOptions()
@@ -201,6 +301,8 @@ def get_user_agent(driver=None) -> str:
if driver is None: if driver is None:
driver = get_webdriver() driver = get_webdriver()
USER_AGENT = driver.execute_script("return navigator.userAgent") USER_AGENT = driver.execute_script("return navigator.userAgent")
# Fix for Chrome 117 | https://github.com/FlareSolverr/FlareSolverr/issues/910
USER_AGENT = re.sub('HEADLESS', '', USER_AGENT, flags=re.IGNORECASE)
return USER_AGENT return USER_AGENT
except Exception as e: except Exception as e:
raise Exception("Error getting browser User-Agent. " + str(e)) raise Exception("Error getting browser User-Agent. " + str(e))