mirror of
https://github.com/FlareSolverr/FlareSolverr.git
synced 2025-12-05 17:18:19 +01:00
Compare commits
18 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d92845f34f | ||
|
|
5d3b73ea9d | ||
|
|
2aa095ed5d | ||
|
|
687c8f75ae | ||
|
|
22ed3d324b | ||
|
|
5ba9ef03f3 | ||
|
|
d2e144ea12 | ||
|
|
313fb2c14b | ||
|
|
6d69f40b58 | ||
|
|
a1c36f60d2 | ||
|
|
0edc50e271 | ||
|
|
f4a4baa57c | ||
|
|
f7e434c6e3 | ||
|
|
7728f2ab31 | ||
|
|
c920bea4ca | ||
|
|
a785f83034 | ||
|
|
b42c22f5b1 | ||
|
|
9c62410a8b |
4
.github/workflows/release.yml
vendored
4
.github/workflows/release.yml
vendored
@@ -54,7 +54,7 @@ jobs:
|
|||||||
- name: Build artifacts
|
- name: Build artifacts
|
||||||
run: |
|
run: |
|
||||||
python -m pip install -r requirements.txt
|
python -m pip install -r requirements.txt
|
||||||
python -m pip install pyinstaller==5.9.0
|
python -m pip install pyinstaller==5.13.0
|
||||||
cd src
|
cd src
|
||||||
python build_package.py
|
python build_package.py
|
||||||
|
|
||||||
@@ -83,7 +83,7 @@ jobs:
|
|||||||
- name: Build artifacts
|
- name: Build artifacts
|
||||||
run: |
|
run: |
|
||||||
python -m pip install -r requirements.txt
|
python -m pip install -r requirements.txt
|
||||||
python -m pip install pyinstaller==5.9.0
|
python -m pip install pyinstaller==5.13.0
|
||||||
cd src
|
cd src
|
||||||
python build_package.py
|
python build_package.py
|
||||||
|
|
||||||
|
|||||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -124,3 +124,6 @@ venv.bak/
|
|||||||
.mypy_cache/
|
.mypy_cache/
|
||||||
.dmypy.json
|
.dmypy.json
|
||||||
dmypy.json
|
dmypy.json
|
||||||
|
|
||||||
|
# node
|
||||||
|
node_modules/
|
||||||
17
CHANGELOG.md
17
CHANGELOG.md
@@ -1,5 +1,22 @@
|
|||||||
# Changelog
|
# Changelog
|
||||||
|
|
||||||
|
## v3.3.0 (2023/08/02)
|
||||||
|
|
||||||
|
* Fix for new Cloudflare detection. Thanks @cedric-bour for #845
|
||||||
|
* Add support for proxy authentication username/password. Thanks @jacobprice808 for #807
|
||||||
|
* Implement Prometheus metrics
|
||||||
|
* Fix Chromium Driver for Chrome / Chromium version > 114
|
||||||
|
* Use Chromium 115 in binary packages (Windows and Linux)
|
||||||
|
* Install Chromium 115 from Debian testing (Docker)
|
||||||
|
* Update base Docker image to Debian Bookworm
|
||||||
|
* Update Selenium 4.11.2
|
||||||
|
* Update pyinstaller 5.13.0
|
||||||
|
* Add more traces in build_package.py
|
||||||
|
|
||||||
|
## v3.2.2 (2023/07/16)
|
||||||
|
|
||||||
|
* Workaround for updated 'verify you are human' check
|
||||||
|
|
||||||
## v3.2.1 (2023/06/10)
|
## v3.2.1 (2023/06/10)
|
||||||
|
|
||||||
* Kill dead Chrome processes in Windows
|
* Kill dead Chrome processes in Windows
|
||||||
|
|||||||
23
Dockerfile
23
Dockerfile
@@ -1,4 +1,4 @@
|
|||||||
FROM python:3.11-slim-bullseye as builder
|
FROM python:3.11-slim-bookworm as builder
|
||||||
|
|
||||||
# Build dummy packages to skip installing them and their dependencies
|
# Build dummy packages to skip installing them and their dependencies
|
||||||
RUN apt-get update \
|
RUN apt-get update \
|
||||||
@@ -12,7 +12,7 @@ RUN apt-get update \
|
|||||||
&& equivs-build adwaita-icon-theme \
|
&& equivs-build adwaita-icon-theme \
|
||||||
&& mv adwaita-icon-theme_*.deb /adwaita-icon-theme.deb
|
&& mv adwaita-icon-theme_*.deb /adwaita-icon-theme.deb
|
||||||
|
|
||||||
FROM python:3.11-slim-bullseye
|
FROM python:3.11-slim-bookworm
|
||||||
|
|
||||||
# Copy dummy packages
|
# Copy dummy packages
|
||||||
COPY --from=builder /*.deb /
|
COPY --from=builder /*.deb /
|
||||||
@@ -27,12 +27,16 @@ WORKDIR /app
|
|||||||
# Install dummy packages
|
# Install dummy packages
|
||||||
RUN dpkg -i /libgl1-mesa-dri.deb \
|
RUN dpkg -i /libgl1-mesa-dri.deb \
|
||||||
&& dpkg -i /adwaita-icon-theme.deb \
|
&& dpkg -i /adwaita-icon-theme.deb \
|
||||||
|
# Use Testing packages. The latest version of Chromium is not available for ARM
|
||||||
|
&& sed -i 's/bookworm-updates/bookworm-updates testing/g' /etc/apt/sources.list.d/debian.sources \
|
||||||
# Install dependencies
|
# Install dependencies
|
||||||
&& apt-get update \
|
&& apt-get update \
|
||||||
&& apt-get install -y --no-install-recommends chromium chromium-common chromium-driver xvfb dumb-init \
|
&& apt-get install -y --no-install-recommends -t testing chromium chromium-common chromium-driver xvfb dumb-init \
|
||||||
procps curl vim xauth \
|
procps curl vim-tiny xauth \
|
||||||
# Remove temporary files and hardware decoding libraries
|
# Remove temporary files and hardware decoding libraries
|
||||||
&& rm -rf /var/lib/apt/lists/* \
|
&& rm -rf /var/lib/apt/lists/* \
|
||||||
|
&& rm -f /usr/lib/systemd/systemd* \
|
||||||
|
&& rm -f /usr/lib/x86_64-linux-gnu/systemd/* \
|
||||||
&& rm -f /usr/lib/x86_64-linux-gnu/libmfxhw* \
|
&& rm -f /usr/lib/x86_64-linux-gnu/libmfxhw* \
|
||||||
&& rm -f /usr/lib/x86_64-linux-gnu/mfx/* \
|
&& rm -f /usr/lib/x86_64-linux-gnu/mfx/* \
|
||||||
# Create flaresolverr user
|
# Create flaresolverr user
|
||||||
@@ -54,6 +58,7 @@ COPY src .
|
|||||||
COPY package.json ../
|
COPY package.json ../
|
||||||
|
|
||||||
EXPOSE 8191
|
EXPOSE 8191
|
||||||
|
EXPOSE 8192
|
||||||
|
|
||||||
# dumb-init avoids zombie chromium processes
|
# dumb-init avoids zombie chromium processes
|
||||||
ENTRYPOINT ["/usr/bin/dumb-init", "--"]
|
ENTRYPOINT ["/usr/bin/dumb-init", "--"]
|
||||||
@@ -61,17 +66,17 @@ ENTRYPOINT ["/usr/bin/dumb-init", "--"]
|
|||||||
CMD ["/usr/local/bin/python", "-u", "/app/flaresolverr.py"]
|
CMD ["/usr/local/bin/python", "-u", "/app/flaresolverr.py"]
|
||||||
|
|
||||||
# Local build
|
# Local build
|
||||||
# docker build -t ngosang/flaresolverr:3.2.0 .
|
# docker build -t ngosang/flaresolverr:3.3.0 .
|
||||||
# docker run -p 8191:8191 ngosang/flaresolverr:3.2.0
|
# docker run -p 8191:8191 ngosang/flaresolverr:3.3.0
|
||||||
|
|
||||||
# Multi-arch build
|
# Multi-arch build
|
||||||
# docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
|
# docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
|
||||||
# docker buildx create --use
|
# docker buildx create --use
|
||||||
# docker buildx build -t ngosang/flaresolverr:3.2.0 --platform linux/386,linux/amd64,linux/arm/v7,linux/arm64/v8 .
|
# docker buildx build -t ngosang/flaresolverr:3.3.0 --platform linux/386,linux/amd64,linux/arm/v7,linux/arm64/v8 .
|
||||||
# add --push to publish in DockerHub
|
# add --push to publish in DockerHub
|
||||||
|
|
||||||
# Test multi-arch build
|
# Test multi-arch build
|
||||||
# docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
|
# docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
|
||||||
# docker buildx create --use
|
# docker buildx create --use
|
||||||
# docker buildx build -t ngosang/flaresolverr:3.2.0 --platform linux/arm/v7 --load .
|
# docker buildx build -t ngosang/flaresolverr:3.3.0 --platform linux/arm/v7 --load .
|
||||||
# docker run -p 8191:8191 --platform linux/arm/v7 ngosang/flaresolverr:3.2.0
|
# docker run -p 8191:8191 --platform linux/arm/v7 ngosang/flaresolverr:3.3.0
|
||||||
|
|||||||
58
README.md
58
README.md
@@ -110,10 +110,10 @@ cookies for the browser to use.
|
|||||||
|
|
||||||
This also speeds up the requests since it won't have to launch a new browser instance for every request.
|
This also speeds up the requests since it won't have to launch a new browser instance for every request.
|
||||||
|
|
||||||
| Parameter | Notes |
|
| Parameter | Notes |
|
||||||
|-----------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
|-----------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||||
| session | Optional. The session ID that you want to be assigned to the instance. If isn't set a random UUID will be assigned. |
|
| session | Optional. The session ID that you want to be assigned to the instance. If isn't set a random UUID will be assigned. |
|
||||||
| proxy | Optional, default disabled. Eg: `"proxy": {"url": "http://127.0.0.1:8888"}`. You must include the proxy schema in the URL: `http://`, `socks4://` or `socks5://`. Authorization (username/password) is not supported. |
|
| proxy | Optional, default disabled. Eg: `"proxy": {"url": "http://127.0.0.1:8888"}`. You must include the proxy schema in the URL: `http://`, `socks4://` or `socks5://`. Authorization (username/password) is supported. Eg: `"proxy": {"url": "http://127.0.0.1:8888", username": "testuser", "password": "testpass"}` |
|
||||||
|
|
||||||
#### + `sessions.list`
|
#### + `sessions.list`
|
||||||
|
|
||||||
@@ -226,23 +226,51 @@ This is the same as `request.get` but it takes one more param:
|
|||||||
|
|
||||||
## Environment variables
|
## Environment variables
|
||||||
|
|
||||||
| Name | Default | Notes |
|
| Name | Default | Notes |
|
||||||
|-----------------|------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
|--------------------|------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||||
| LOG_LEVEL | info | Verbosity of the logging. Use `LOG_LEVEL=debug` for more information. |
|
| LOG_LEVEL | info | Verbosity of the logging. Use `LOG_LEVEL=debug` for more information. |
|
||||||
| LOG_HTML | false | Only for debugging. If `true` all HTML that passes through the proxy will be logged to the console in `debug` level. |
|
| LOG_HTML | false | Only for debugging. If `true` all HTML that passes through the proxy will be logged to the console in `debug` level. |
|
||||||
| CAPTCHA_SOLVER | none | Captcha solving method. It is used when a captcha is encountered. See the Captcha Solvers section. |
|
| CAPTCHA_SOLVER | none | Captcha solving method. It is used when a captcha is encountered. See the Captcha Solvers section. |
|
||||||
| TZ | UTC | Timezone used in the logs and the web browser. Example: `TZ=Europe/London`. |
|
| TZ | UTC | Timezone used in the logs and the web browser. Example: `TZ=Europe/London`. |
|
||||||
| HEADLESS | true | Only for debugging. To run the web browser in headless mode or visible. |
|
| HEADLESS | true | Only for debugging. To run the web browser in headless mode or visible. |
|
||||||
| BROWSER_TIMEOUT | 40000 | If you are experiencing errors/timeouts because your system is slow, you can try to increase this value. Remember to increase the `maxTimeout` parameter too. |
|
| BROWSER_TIMEOUT | 40000 | If you are experiencing errors/timeouts because your system is slow, you can try to increase this value. Remember to increase the `maxTimeout` parameter too. |
|
||||||
| TEST_URL | https://www.google.com | FlareSolverr makes a request on start to make sure the web browser is working. You can change that URL if it is blocked in your country. |
|
| TEST_URL | https://www.google.com | FlareSolverr makes a request on start to make sure the web browser is working. You can change that URL if it is blocked in your country. |
|
||||||
| PORT | 8191 | Listening port. You don't need to change this if you are running on Docker. |
|
| PORT | 8191 | Listening port. You don't need to change this if you are running on Docker. |
|
||||||
| HOST | 0.0.0.0 | Listening interface. You don't need to change this if you are running on Docker. |
|
| HOST | 0.0.0.0 | Listening interface. You don't need to change this if you are running on Docker. |
|
||||||
|
| PROMETHEUS_ENABLED | false | Enable Prometheus exporter. See the Prometheus section below. |
|
||||||
|
| PROMETHEUS_PORT | 8192 | Listening port for Prometheus exporter. See the Prometheus section below. |
|
||||||
|
|
||||||
Environment variables are set differently depending on the operating system. Some examples:
|
Environment variables are set differently depending on the operating system. Some examples:
|
||||||
* Docker: Take a look at the Docker section in this document. Environment variables can be set in the `docker-compose.yml` file or in the Docker CLI command.
|
* Docker: Take a look at the Docker section in this document. Environment variables can be set in the `docker-compose.yml` file or in the Docker CLI command.
|
||||||
* Linux: Run `export LOG_LEVEL=debug` and then start FlareSolverr in the same shell.
|
* Linux: Run `export LOG_LEVEL=debug` and then start FlareSolverr in the same shell.
|
||||||
* Windows: Open `cmd.exe`, run `set LOG_LEVEL=debug` and then start FlareSolverr in the same shell.
|
* Windows: Open `cmd.exe`, run `set LOG_LEVEL=debug` and then start FlareSolverr in the same shell.
|
||||||
|
|
||||||
|
## Prometheus exporter
|
||||||
|
|
||||||
|
The Prometheus exporter for FlareSolverr is disabled by default. It can be enabled with the environment variable `PROMETHEUS_ENABLED`. If you are using Docker make sure you expose the `PROMETHEUS_PORT`.
|
||||||
|
|
||||||
|
Example metrics:
|
||||||
|
```shell
|
||||||
|
# HELP flaresolverr_request_total Total requests with result
|
||||||
|
# TYPE flaresolverr_request_total counter
|
||||||
|
flaresolverr_request_total{domain="nowsecure.nl",result="solved"} 1.0
|
||||||
|
# HELP flaresolverr_request_created Total requests with result
|
||||||
|
# TYPE flaresolverr_request_created gauge
|
||||||
|
flaresolverr_request_created{domain="nowsecure.nl",result="solved"} 1.690141657157109e+09
|
||||||
|
# HELP flaresolverr_request_duration Request duration in seconds
|
||||||
|
# TYPE flaresolverr_request_duration histogram
|
||||||
|
flaresolverr_request_duration_bucket{domain="nowsecure.nl",le="0.0"} 0.0
|
||||||
|
flaresolverr_request_duration_bucket{domain="nowsecure.nl",le="10.0"} 1.0
|
||||||
|
flaresolverr_request_duration_bucket{domain="nowsecure.nl",le="25.0"} 1.0
|
||||||
|
flaresolverr_request_duration_bucket{domain="nowsecure.nl",le="50.0"} 1.0
|
||||||
|
flaresolverr_request_duration_bucket{domain="nowsecure.nl",le="+Inf"} 1.0
|
||||||
|
flaresolverr_request_duration_count{domain="nowsecure.nl"} 1.0
|
||||||
|
flaresolverr_request_duration_sum{domain="nowsecure.nl"} 5.858
|
||||||
|
# HELP flaresolverr_request_duration_created Request duration in seconds
|
||||||
|
# TYPE flaresolverr_request_duration_created gauge
|
||||||
|
flaresolverr_request_duration_created{domain="nowsecure.nl"} 1.6901416571570296e+09
|
||||||
|
```
|
||||||
|
|
||||||
## Captcha Solvers
|
## Captcha Solvers
|
||||||
|
|
||||||
> **Warning**
|
> **Warning**
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "flaresolverr",
|
"name": "flaresolverr",
|
||||||
"version": "3.2.1",
|
"version": "3.3.0",
|
||||||
"description": "Proxy server to bypass Cloudflare protection",
|
"description": "Proxy server to bypass Cloudflare protection",
|
||||||
"author": "Diego Heras (ngosang / ngosang@hotmail.es)",
|
"author": "Diego Heras (ngosang / ngosang@hotmail.es)",
|
||||||
"license": "MIT"
|
"license": "MIT"
|
||||||
|
|||||||
@@ -1,10 +1,11 @@
|
|||||||
bottle==0.12.25
|
bottle==0.12.25
|
||||||
waitress==2.1.2
|
waitress==2.1.2
|
||||||
selenium==4.9.1
|
selenium==4.11.2
|
||||||
func-timeout==4.3.5
|
func-timeout==4.3.5
|
||||||
|
prometheus-client==0.17.1
|
||||||
# required by undetected_chromedriver
|
# required by undetected_chromedriver
|
||||||
requests==2.31.0
|
requests==2.31.0
|
||||||
certifi==2023.5.7
|
certifi==2023.7.22
|
||||||
websockets==11.0.3
|
websockets==11.0.3
|
||||||
# only required for linux
|
# only required for linux
|
||||||
xvfbwrapper==0.2.9
|
xvfbwrapper==0.2.9
|
||||||
|
|||||||
53
src/bottle_plugins/prometheus_plugin.py
Normal file
53
src/bottle_plugins/prometheus_plugin.py
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
|
from dtos import V1ResponseBase
|
||||||
|
from metrics import start_metrics_http_server, REQUEST_COUNTER, REQUEST_DURATION
|
||||||
|
|
||||||
|
PROMETHEUS_ENABLED = os.environ.get('PROMETHEUS_ENABLED', 'false').lower() == 'true'
|
||||||
|
PROMETHEUS_PORT = int(os.environ.get('PROMETHEUS_PORT', 8192))
|
||||||
|
|
||||||
|
|
||||||
|
def setup():
|
||||||
|
if PROMETHEUS_ENABLED:
|
||||||
|
start_metrics_http_server(PROMETHEUS_PORT)
|
||||||
|
|
||||||
|
|
||||||
|
def prometheus_plugin(callback):
|
||||||
|
"""
|
||||||
|
Bottle plugin to expose Prometheus metrics
|
||||||
|
http://bottlepy.org/docs/dev/plugindev.html
|
||||||
|
"""
|
||||||
|
def wrapper(*args, **kwargs):
|
||||||
|
actual_response = callback(*args, **kwargs)
|
||||||
|
|
||||||
|
if PROMETHEUS_ENABLED:
|
||||||
|
try:
|
||||||
|
export_metrics(actual_response)
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning("Error exporting metrics: " + str(e))
|
||||||
|
|
||||||
|
return actual_response
|
||||||
|
|
||||||
|
def export_metrics(actual_response):
|
||||||
|
res = V1ResponseBase(actual_response)
|
||||||
|
|
||||||
|
domain = "unknown"
|
||||||
|
if res.solution and res.solution.url:
|
||||||
|
parsed_url = urllib.parse.urlparse(res.solution.url)
|
||||||
|
domain = parsed_url.hostname
|
||||||
|
|
||||||
|
run_time = (res.endTimestamp - res.startTimestamp) / 1000
|
||||||
|
REQUEST_DURATION.labels(domain=domain).observe(run_time)
|
||||||
|
|
||||||
|
result = "unknown"
|
||||||
|
if res.message == "Challenge solved!":
|
||||||
|
result = "solved"
|
||||||
|
elif res.message == "Challenge not detected!":
|
||||||
|
result = "not_detected"
|
||||||
|
elif res.message.startswith("Error"):
|
||||||
|
result = "error"
|
||||||
|
REQUEST_COUNTER.labels(domain=domain, result=result).inc()
|
||||||
|
|
||||||
|
return wrapper
|
||||||
@@ -25,8 +25,8 @@ def clean_files():
|
|||||||
|
|
||||||
def download_chromium():
|
def download_chromium():
|
||||||
# https://commondatastorage.googleapis.com/chromium-browser-snapshots/index.html?prefix=Linux_x64/
|
# https://commondatastorage.googleapis.com/chromium-browser-snapshots/index.html?prefix=Linux_x64/
|
||||||
revision = "1090006" if os.name == 'nt' else '1090007'
|
revision = "1140001" if os.name == 'nt' else '1140000'
|
||||||
arch = 'Win' if os.name == 'nt' else 'Linux_x64'
|
arch = 'Win_x64' if os.name == 'nt' else 'Linux_x64'
|
||||||
dl_file = 'chrome-win' if os.name == 'nt' else 'chrome-linux'
|
dl_file = 'chrome-win' if os.name == 'nt' else 'chrome-linux'
|
||||||
dl_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, 'dist_chrome')
|
dl_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, 'dist_chrome')
|
||||||
dl_path_folder = os.path.join(dl_path, dl_file)
|
dl_path_folder = os.path.join(dl_path, dl_file)
|
||||||
@@ -68,12 +68,15 @@ def download_chromium():
|
|||||||
|
|
||||||
def run_pyinstaller():
|
def run_pyinstaller():
|
||||||
sep = ';' if os.name == 'nt' else ':'
|
sep = ';' if os.name == 'nt' else ':'
|
||||||
subprocess.check_call([sys.executable, "-m", "PyInstaller",
|
result = subprocess.run([sys.executable, "-m", "PyInstaller",
|
||||||
"--icon", "resources/flaresolverr_logo.ico",
|
"--icon", "resources/flaresolverr_logo.ico",
|
||||||
"--add-data", f"package.json{sep}.",
|
"--add-data", f"package.json{sep}.",
|
||||||
"--add-data", f"{os.path.join('dist_chrome', 'chrome')}{sep}chrome",
|
"--add-data", f"{os.path.join('dist_chrome', 'chrome')}{sep}chrome",
|
||||||
os.path.join("src", "flaresolverr.py")],
|
os.path.join("src", "flaresolverr.py")],
|
||||||
cwd=os.pardir)
|
cwd=os.pardir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||||
|
if result.returncode != 0:
|
||||||
|
print(result.stderr.decode('utf-8'))
|
||||||
|
raise Exception("Error running pyInstaller")
|
||||||
|
|
||||||
|
|
||||||
def compress_package():
|
def compress_package():
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ from bottle import run, response, Bottle, request, ServerAdapter
|
|||||||
|
|
||||||
from bottle_plugins.error_plugin import error_plugin
|
from bottle_plugins.error_plugin import error_plugin
|
||||||
from bottle_plugins.logger_plugin import logger_plugin
|
from bottle_plugins.logger_plugin import logger_plugin
|
||||||
|
from bottle_plugins import prometheus_plugin
|
||||||
from dtos import V1RequestBase
|
from dtos import V1RequestBase
|
||||||
import flaresolverr_service
|
import flaresolverr_service
|
||||||
import utils
|
import utils
|
||||||
@@ -24,10 +25,6 @@ class JSONErrorBottle(Bottle):
|
|||||||
|
|
||||||
app = JSONErrorBottle()
|
app = JSONErrorBottle()
|
||||||
|
|
||||||
# plugin order is important
|
|
||||||
app.install(logger_plugin)
|
|
||||||
app.install(error_plugin)
|
|
||||||
|
|
||||||
|
|
||||||
@app.route('/')
|
@app.route('/')
|
||||||
def index():
|
def index():
|
||||||
@@ -101,6 +98,13 @@ if __name__ == "__main__":
|
|||||||
# test browser installation
|
# test browser installation
|
||||||
flaresolverr_service.test_browser_installation()
|
flaresolverr_service.test_browser_installation()
|
||||||
|
|
||||||
|
# start bootle plugins
|
||||||
|
# plugin order is important
|
||||||
|
app.install(logger_plugin)
|
||||||
|
app.install(error_plugin)
|
||||||
|
prometheus_plugin.setup()
|
||||||
|
app.install(prometheus_plugin.prometheus_plugin)
|
||||||
|
|
||||||
# start webserver
|
# start webserver
|
||||||
# default server 'wsgiref' does not support concurrent requests
|
# default server 'wsgiref' does not support concurrent requests
|
||||||
# https://github.com/FlareSolverr/FlareSolverr/issues/680
|
# https://github.com/FlareSolverr/FlareSolverr/issues/680
|
||||||
|
|||||||
@@ -251,7 +251,7 @@ def _resolve_challenge(req: V1RequestBase, method: str) -> ChallengeResolutionT:
|
|||||||
|
|
||||||
def click_verify(driver: WebDriver):
|
def click_verify(driver: WebDriver):
|
||||||
try:
|
try:
|
||||||
logging.debug("Try to find the Cloudflare verify checkbox")
|
logging.debug("Try to find the Cloudflare verify checkbox...")
|
||||||
iframe = driver.find_element(By.XPATH, "//iframe[@title='Widget containing a Cloudflare security challenge']")
|
iframe = driver.find_element(By.XPATH, "//iframe[@title='Widget containing a Cloudflare security challenge']")
|
||||||
driver.switch_to.frame(iframe)
|
driver.switch_to.frame(iframe)
|
||||||
checkbox = driver.find_element(
|
checkbox = driver.find_element(
|
||||||
@@ -263,14 +263,14 @@ def click_verify(driver: WebDriver):
|
|||||||
actions.move_to_element_with_offset(checkbox, 5, 7)
|
actions.move_to_element_with_offset(checkbox, 5, 7)
|
||||||
actions.click(checkbox)
|
actions.click(checkbox)
|
||||||
actions.perform()
|
actions.perform()
|
||||||
logging.debug("Cloudflare verify checkbox found and clicked")
|
logging.debug("Cloudflare verify checkbox found and clicked!")
|
||||||
except Exception:
|
except Exception as e:
|
||||||
logging.debug("Cloudflare verify checkbox not found on the page")
|
logging.debug("Cloudflare verify checkbox not found on the page. Error: " + str(e))
|
||||||
finally:
|
finally:
|
||||||
driver.switch_to.default_content()
|
driver.switch_to.default_content()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
logging.debug("Try to find the Cloudflare 'Verify you are human' button")
|
logging.debug("Try to find the Cloudflare 'Verify you are human' button...")
|
||||||
button = driver.find_element(
|
button = driver.find_element(
|
||||||
by=By.XPATH,
|
by=By.XPATH,
|
||||||
value="//input[@type='button' and @value='Verify you are human']",
|
value="//input[@type='button' and @value='Verify you are human']",
|
||||||
@@ -280,10 +280,9 @@ def click_verify(driver: WebDriver):
|
|||||||
actions.move_to_element_with_offset(button, 5, 7)
|
actions.move_to_element_with_offset(button, 5, 7)
|
||||||
actions.click(button)
|
actions.click(button)
|
||||||
actions.perform()
|
actions.perform()
|
||||||
logging.debug("The Cloudflare 'Verify you are human' button found and clicked")
|
logging.debug("The Cloudflare 'Verify you are human' button found and clicked!")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.debug("The Cloudflare 'Verify you are human' button not found on the page")
|
logging.debug("The Cloudflare 'Verify you are human' button not found on the page. Error: " + str(e))
|
||||||
# print(e)
|
|
||||||
|
|
||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
|
|
||||||
@@ -298,7 +297,8 @@ def _evil_logic(req: V1RequestBase, driver: WebDriver, method: str) -> Challenge
|
|||||||
if method == 'POST':
|
if method == 'POST':
|
||||||
_post_request(req, driver)
|
_post_request(req, driver)
|
||||||
else:
|
else:
|
||||||
driver.get(req.url)
|
with driver:
|
||||||
|
driver.get(req.url)
|
||||||
|
|
||||||
# set cookies if required
|
# set cookies if required
|
||||||
if req.cookies is not None and len(req.cookies) > 0:
|
if req.cookies is not None and len(req.cookies) > 0:
|
||||||
@@ -310,7 +310,8 @@ def _evil_logic(req: V1RequestBase, driver: WebDriver, method: str) -> Challenge
|
|||||||
if method == 'POST':
|
if method == 'POST':
|
||||||
_post_request(req, driver)
|
_post_request(req, driver)
|
||||||
else:
|
else:
|
||||||
driver.get(req.url)
|
with driver:
|
||||||
|
driver.get(req.url)
|
||||||
|
|
||||||
# wait for the page
|
# wait for the page
|
||||||
if utils.get_config_log_html():
|
if utils.get_config_log_html():
|
||||||
@@ -429,4 +430,5 @@ def _post_request(req: V1RequestBase, driver: WebDriver):
|
|||||||
<script>document.getElementById('hackForm').submit();</script>
|
<script>document.getElementById('hackForm').submit();</script>
|
||||||
</body>
|
</body>
|
||||||
</html>"""
|
</html>"""
|
||||||
driver.get("data:text/html;charset=utf-8," + html_content)
|
with driver:
|
||||||
|
driver.get("data:text/html;charset=utf-8," + html_content)
|
||||||
|
|||||||
32
src/metrics.py
Normal file
32
src/metrics.py
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
import logging
|
||||||
|
|
||||||
|
from prometheus_client import Counter, Histogram, start_http_server
|
||||||
|
import time
|
||||||
|
|
||||||
|
REQUEST_COUNTER = Counter(
|
||||||
|
name='flaresolverr_request',
|
||||||
|
documentation='Total requests with result',
|
||||||
|
labelnames=['domain', 'result']
|
||||||
|
)
|
||||||
|
REQUEST_DURATION = Histogram(
|
||||||
|
name='flaresolverr_request_duration',
|
||||||
|
documentation='Request duration in seconds',
|
||||||
|
labelnames=['domain'],
|
||||||
|
buckets=[0, 10, 25, 50]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def serve(port):
|
||||||
|
start_http_server(port=port)
|
||||||
|
while True:
|
||||||
|
time.sleep(600)
|
||||||
|
|
||||||
|
|
||||||
|
def start_metrics_http_server(prometheus_port: int):
|
||||||
|
logging.info(f"Serving Prometheus exporter on http://0.0.0.0:{prometheus_port}/metrics")
|
||||||
|
from threading import Thread
|
||||||
|
Thread(
|
||||||
|
target=serve,
|
||||||
|
kwargs=dict(port=prometheus_port),
|
||||||
|
daemon=True,
|
||||||
|
).start()
|
||||||
36
src/tests.py
36
src/tests.py
@@ -335,6 +335,42 @@ class TestFlareSolverr(unittest.TestCase):
|
|||||||
self.assertGreater(len(solution.cookies), 0)
|
self.assertGreater(len(solution.cookies), 0)
|
||||||
self.assertIn("Chrome/", solution.userAgent)
|
self.assertIn("Chrome/", solution.userAgent)
|
||||||
|
|
||||||
|
def test_v1_endpoint_request_get_proxy_http_param_with_credentials(self):
|
||||||
|
"""
|
||||||
|
To configure TinyProxy in local:
|
||||||
|
* sudo vim /etc/tinyproxy/tinyproxy.conf
|
||||||
|
* edit => LogFile "/tmp/tinyproxy.log"
|
||||||
|
* edit => Syslog Off
|
||||||
|
* add => BasicAuth testuser testpass
|
||||||
|
* sudo tinyproxy -d
|
||||||
|
* sudo tail -f /tmp/tinyproxy.log
|
||||||
|
"""
|
||||||
|
res = self.app.post_json('/v1', {
|
||||||
|
"cmd": "request.get",
|
||||||
|
"url": self.google_url,
|
||||||
|
"proxy": {
|
||||||
|
"url": self.proxy_url,
|
||||||
|
"username": "testuser",
|
||||||
|
"password": "testpass"
|
||||||
|
}
|
||||||
|
})
|
||||||
|
self.assertEqual(res.status_code, 200)
|
||||||
|
|
||||||
|
body = V1ResponseBase(res.json)
|
||||||
|
self.assertEqual(STATUS_OK, body.status)
|
||||||
|
self.assertEqual("Challenge not detected!", body.message)
|
||||||
|
self.assertGreater(body.startTimestamp, 10000)
|
||||||
|
self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
|
||||||
|
self.assertEqual(utils.get_flaresolverr_version(), body.version)
|
||||||
|
|
||||||
|
solution = body.solution
|
||||||
|
self.assertIn(self.google_url, solution.url)
|
||||||
|
self.assertEqual(solution.status, 200)
|
||||||
|
self.assertIs(len(solution.headers), 0)
|
||||||
|
self.assertIn("<title>Google</title>", solution.response)
|
||||||
|
self.assertGreater(len(solution.cookies), 0)
|
||||||
|
self.assertIn("Chrome/", solution.userAgent)
|
||||||
|
|
||||||
def test_v1_endpoint_request_get_proxy_socks_param(self):
|
def test_v1_endpoint_request_get_proxy_socks_param(self):
|
||||||
"""
|
"""
|
||||||
To configure Dante in local:
|
To configure Dante in local:
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ by UltrafunkAmsterdam (https://github.com/ultrafunkamsterdam)
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
|
||||||
__version__ = "3.4.6"
|
__version__ = "3.5.0"
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
@@ -33,7 +33,7 @@ from weakref import finalize
|
|||||||
import selenium.webdriver.chrome.service
|
import selenium.webdriver.chrome.service
|
||||||
import selenium.webdriver.chrome.webdriver
|
import selenium.webdriver.chrome.webdriver
|
||||||
from selenium.webdriver.common.by import By
|
from selenium.webdriver.common.by import By
|
||||||
import selenium.webdriver.common.service
|
import selenium.webdriver.chromium.service
|
||||||
import selenium.webdriver.remote.command
|
import selenium.webdriver.remote.command
|
||||||
import selenium.webdriver.remote.webdriver
|
import selenium.webdriver.remote.webdriver
|
||||||
|
|
||||||
@@ -109,11 +109,11 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
|||||||
browser_executable_path=None,
|
browser_executable_path=None,
|
||||||
port=0,
|
port=0,
|
||||||
enable_cdp_events=False,
|
enable_cdp_events=False,
|
||||||
service_args=None,
|
# service_args=None,
|
||||||
service_creationflags=None,
|
# service_creationflags=None,
|
||||||
desired_capabilities=None,
|
desired_capabilities=None,
|
||||||
advanced_elements=False,
|
advanced_elements=False,
|
||||||
service_log_path=None,
|
# service_log_path=None,
|
||||||
keep_alive=True,
|
keep_alive=True,
|
||||||
log_level=0,
|
log_level=0,
|
||||||
headless=False,
|
headless=False,
|
||||||
@@ -122,8 +122,9 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
|||||||
suppress_welcome=True,
|
suppress_welcome=True,
|
||||||
use_subprocess=False,
|
use_subprocess=False,
|
||||||
debug=False,
|
debug=False,
|
||||||
no_sandbox=True,
|
no_sandbox=True,
|
||||||
windows_headless=False,
|
windows_headless=False,
|
||||||
|
user_multi_procs: bool = False,
|
||||||
**kw,
|
**kw,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
@@ -235,6 +236,14 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
|||||||
uses the --no-sandbox option, and additionally does suppress the "unsecure option" status bar
|
uses the --no-sandbox option, and additionally does suppress the "unsecure option" status bar
|
||||||
this option has a default of True since many people seem to run this as root (....) , and chrome does not start
|
this option has a default of True since many people seem to run this as root (....) , and chrome does not start
|
||||||
when running as root without using --no-sandbox flag.
|
when running as root without using --no-sandbox flag.
|
||||||
|
|
||||||
|
user_multi_procs:
|
||||||
|
set to true when you are using multithreads/multiprocessing
|
||||||
|
ensures not all processes are trying to modify a binary which is in use by another.
|
||||||
|
for this to work. YOU MUST HAVE AT LEAST 1 UNDETECTED_CHROMEDRIVER BINARY IN YOUR ROAMING DATA FOLDER.
|
||||||
|
this requirement can be easily satisfied, by just running this program "normal" and close/kill it.
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
finalize(self, self._ensure_close, self)
|
finalize(self, self._ensure_close, self)
|
||||||
@@ -243,8 +252,11 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
|||||||
executable_path=driver_executable_path,
|
executable_path=driver_executable_path,
|
||||||
force=patcher_force_close,
|
force=patcher_force_close,
|
||||||
version_main=version_main,
|
version_main=version_main,
|
||||||
|
user_multi_procs=user_multi_procs,
|
||||||
)
|
)
|
||||||
|
# self.patcher.auto(user_multiprocess = user_multi_num_procs)
|
||||||
self.patcher.auto()
|
self.patcher.auto()
|
||||||
|
|
||||||
# self.patcher = patcher
|
# self.patcher = patcher
|
||||||
if not options:
|
if not options:
|
||||||
options = ChromeOptions()
|
options = ChromeOptions()
|
||||||
@@ -372,10 +384,16 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
|||||||
options.arguments.extend(["--no-sandbox", "--test-type"])
|
options.arguments.extend(["--no-sandbox", "--test-type"])
|
||||||
|
|
||||||
if headless or options.headless:
|
if headless or options.headless:
|
||||||
v_main = int(self.patcher.version_main) if self.patcher.version_main else 108
|
#workaround until a better checking is found
|
||||||
if v_main < 108:
|
try:
|
||||||
options.add_argument("--headless=chrome")
|
v_main = int(self.patcher.version_main) if self.patcher.version_main else 108
|
||||||
elif v_main >= 108:
|
if v_main < 108:
|
||||||
|
options.add_argument("--headless=chrome")
|
||||||
|
elif v_main >= 108:
|
||||||
|
options.add_argument("--headless=new")
|
||||||
|
except:
|
||||||
|
logger.warning("could not detect version_main."
|
||||||
|
"therefore, we are assuming it is chrome 108 or higher")
|
||||||
options.add_argument("--headless=new")
|
options.add_argument("--headless=new")
|
||||||
|
|
||||||
options.add_argument("--window-size=1920,1080")
|
options.add_argument("--window-size=1920,1080")
|
||||||
@@ -419,40 +437,31 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
|||||||
self.browser_pid = start_detached(
|
self.browser_pid = start_detached(
|
||||||
options.binary_location, *options.arguments
|
options.binary_location, *options.arguments
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
startupinfo = subprocess.STARTUPINFO()
|
startupinfo = subprocess.STARTUPINFO()
|
||||||
if os.name == 'nt' and windows_headless:
|
if os.name == 'nt' and windows_headless:
|
||||||
startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
|
startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
|
||||||
browser = subprocess.Popen(
|
browser = subprocess.Popen(
|
||||||
[options.binary_location, *options.arguments],
|
[options.binary_location, *options.arguments],
|
||||||
stdin=subprocess.PIPE,
|
stdin=subprocess.PIPE,
|
||||||
stdout=subprocess.PIPE,
|
stdout=subprocess.PIPE,
|
||||||
stderr=subprocess.PIPE,
|
stderr=subprocess.PIPE,
|
||||||
close_fds=IS_POSIX,
|
close_fds=IS_POSIX,
|
||||||
startupinfo=startupinfo
|
startupinfo=startupinfo
|
||||||
)
|
)
|
||||||
self.browser_pid = browser.pid
|
self.browser_pid = browser.pid
|
||||||
|
|
||||||
if service_creationflags:
|
# Fix for Chrome 115
|
||||||
service = selenium.webdriver.common.service.Service(
|
# https://github.com/seleniumbase/SeleniumBase/pull/1967
|
||||||
self.patcher.executable_path, port, service_args, service_log_path
|
service = selenium.webdriver.chromium.service.ChromiumService(
|
||||||
)
|
executable_path=self.patcher.executable_path,
|
||||||
for attr_name in ("creationflags", "creation_flags"):
|
service_args=["--disable-build-check"]
|
||||||
if hasattr(service, attr_name):
|
)
|
||||||
setattr(service, attr_name, service_creationflags)
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
service = None
|
|
||||||
|
|
||||||
super(Chrome, self).__init__(
|
super(Chrome, self).__init__(
|
||||||
executable_path=self.patcher.executable_path,
|
service=service,
|
||||||
port=port,
|
|
||||||
options=options,
|
options=options,
|
||||||
service_args=service_args,
|
|
||||||
desired_capabilities=desired_capabilities,
|
|
||||||
service_log_path=service_log_path,
|
|
||||||
keep_alive=keep_alive,
|
keep_alive=keep_alive,
|
||||||
service=service, # needed or the service will be re-created
|
|
||||||
)
|
)
|
||||||
|
|
||||||
self.reactor = None
|
self.reactor = None
|
||||||
@@ -708,13 +717,48 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
|||||||
if not capabilities:
|
if not capabilities:
|
||||||
capabilities = self.options.to_capabilities()
|
capabilities = self.options.to_capabilities()
|
||||||
super(selenium.webdriver.chrome.webdriver.WebDriver, self).start_session(
|
super(selenium.webdriver.chrome.webdriver.WebDriver, self).start_session(
|
||||||
capabilities, browser_profile
|
capabilities
|
||||||
)
|
)
|
||||||
# super(Chrome, self).start_session(capabilities, browser_profile)
|
# super(Chrome, self).start_session(capabilities, browser_profile)
|
||||||
|
|
||||||
|
def find_elements_recursive(self, by, value):
|
||||||
|
"""
|
||||||
|
find elements in all frames
|
||||||
|
this is a generator function, which is needed
|
||||||
|
since if it would return a list of elements, they
|
||||||
|
will be stale on arrival.
|
||||||
|
using generator, when the element is returned we are in the correct frame
|
||||||
|
to use it directly
|
||||||
|
Args:
|
||||||
|
by: By
|
||||||
|
value: str
|
||||||
|
Returns: Generator[webelement.WebElement]
|
||||||
|
"""
|
||||||
|
def search_frame(f=None):
|
||||||
|
if not f:
|
||||||
|
# ensure we are on main content frame
|
||||||
|
self.switch_to.default_content()
|
||||||
|
else:
|
||||||
|
self.switch_to.frame(f)
|
||||||
|
for elem in self.find_elements(by, value):
|
||||||
|
yield elem
|
||||||
|
# switch back to main content, otherwise we will get StaleElementReferenceException
|
||||||
|
self.switch_to.default_content()
|
||||||
|
|
||||||
|
# search root frame
|
||||||
|
for elem in search_frame():
|
||||||
|
yield elem
|
||||||
|
# get iframes
|
||||||
|
frames = self.find_elements('css selector', 'iframe')
|
||||||
|
|
||||||
|
# search per frame
|
||||||
|
for f in frames:
|
||||||
|
for elem in search_frame(f):
|
||||||
|
yield elem
|
||||||
|
|
||||||
def quit(self):
|
def quit(self):
|
||||||
try:
|
try:
|
||||||
self.service.process.kill()
|
self.service.process.kill()
|
||||||
self.service.process.wait(5)
|
self.service.process.wait(5)
|
||||||
logger.debug("webdriver process ended")
|
logger.debug("webdriver process ended")
|
||||||
except (AttributeError, RuntimeError, OSError):
|
except (AttributeError, RuntimeError, OSError):
|
||||||
@@ -728,7 +772,7 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
|||||||
os.kill(self.browser_pid, 15)
|
os.kill(self.browser_pid, 15)
|
||||||
logger.debug("gracefully closed browser")
|
logger.debug("gracefully closed browser")
|
||||||
except Exception as e: # noqa
|
except Exception as e: # noqa
|
||||||
logger.debug(e, exc_info=True)
|
pass
|
||||||
# Force kill Chrome process in Windows
|
# Force kill Chrome process in Windows
|
||||||
# https://github.com/FlareSolverr/FlareSolverr/issues/772
|
# https://github.com/FlareSolverr/FlareSolverr/issues/772
|
||||||
if os.name == 'nt':
|
if os.name == 'nt':
|
||||||
@@ -856,5 +900,7 @@ def find_chrome_executable():
|
|||||||
):
|
):
|
||||||
candidates.add(os.sep.join((item, subitem, "chrome.exe")))
|
candidates.add(os.sep.join((item, subitem, "chrome.exe")))
|
||||||
for candidate in candidates:
|
for candidate in candidates:
|
||||||
|
logger.debug('checking if %s exists and is executable' % candidate)
|
||||||
if os.path.exists(candidate) and os.access(candidate, os.X_OK):
|
if os.path.exists(candidate) and os.access(candidate, os.X_OK):
|
||||||
|
logger.debug('found! using %s' % candidate)
|
||||||
return os.path.normpath(candidate)
|
return os.path.normpath(candidate)
|
||||||
|
|||||||
@@ -5,15 +5,17 @@ from distutils.version import LooseVersion
|
|||||||
import io
|
import io
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
import pathlib
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
|
import shutil
|
||||||
import string
|
import string
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
from urllib.request import urlopen
|
from urllib.request import urlopen
|
||||||
from urllib.request import urlretrieve
|
from urllib.request import urlretrieve
|
||||||
import zipfile
|
import zipfile
|
||||||
|
from multiprocessing import Lock
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -21,6 +23,7 @@ IS_POSIX = sys.platform.startswith(("darwin", "cygwin", "linux", "linux2"))
|
|||||||
|
|
||||||
|
|
||||||
class Patcher(object):
|
class Patcher(object):
|
||||||
|
lock = Lock()
|
||||||
url_repo = "https://chromedriver.storage.googleapis.com"
|
url_repo = "https://chromedriver.storage.googleapis.com"
|
||||||
zip_name = "chromedriver_%s.zip"
|
zip_name = "chromedriver_%s.zip"
|
||||||
exe_name = "chromedriver%s"
|
exe_name = "chromedriver%s"
|
||||||
@@ -48,7 +51,13 @@ class Patcher(object):
|
|||||||
d = "~/.undetected_chromedriver"
|
d = "~/.undetected_chromedriver"
|
||||||
data_path = os.path.abspath(os.path.expanduser(d))
|
data_path = os.path.abspath(os.path.expanduser(d))
|
||||||
|
|
||||||
def __init__(self, executable_path=None, force=False, version_main: int = 0):
|
def __init__(
|
||||||
|
self,
|
||||||
|
executable_path=None,
|
||||||
|
force=False,
|
||||||
|
version_main: int = 0,
|
||||||
|
user_multi_procs=False,
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
Args:
|
Args:
|
||||||
executable_path: None = automatic
|
executable_path: None = automatic
|
||||||
@@ -61,6 +70,7 @@ class Patcher(object):
|
|||||||
self.force = force
|
self.force = force
|
||||||
self._custom_exe_path = False
|
self._custom_exe_path = False
|
||||||
prefix = "undetected"
|
prefix = "undetected"
|
||||||
|
self.user_multi_procs = user_multi_procs
|
||||||
|
|
||||||
if not os.path.exists(self.data_path):
|
if not os.path.exists(self.data_path):
|
||||||
os.makedirs(self.data_path, exist_ok=True)
|
os.makedirs(self.data_path, exist_ok=True)
|
||||||
@@ -78,17 +88,41 @@ class Patcher(object):
|
|||||||
self.zip_path = os.path.join(self.data_path, prefix)
|
self.zip_path = os.path.join(self.data_path, prefix)
|
||||||
|
|
||||||
if not executable_path:
|
if not executable_path:
|
||||||
self.executable_path = os.path.abspath(
|
if not self.user_multi_procs:
|
||||||
os.path.join(".", self.executable_path)
|
self.executable_path = os.path.abspath(
|
||||||
)
|
os.path.join(".", self.executable_path)
|
||||||
|
)
|
||||||
|
|
||||||
if executable_path:
|
if executable_path:
|
||||||
self._custom_exe_path = True
|
self._custom_exe_path = True
|
||||||
self.executable_path = executable_path
|
self.executable_path = executable_path
|
||||||
|
|
||||||
self.version_main = version_main
|
self.version_main = version_main
|
||||||
self.version_full = None
|
self.version_full = None
|
||||||
|
|
||||||
def auto(self, executable_path=None, force=False, version_main=None):
|
def auto(self, executable_path=None, force=False, version_main=None, _=None):
|
||||||
|
"""
|
||||||
|
|
||||||
|
Args:
|
||||||
|
executable_path:
|
||||||
|
force:
|
||||||
|
version_main:
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
|
# if self.user_multi_procs and \
|
||||||
|
# self.user_multi_procs != -1:
|
||||||
|
# # -1 being a skip value used later in this block
|
||||||
|
#
|
||||||
|
p = pathlib.Path(self.data_path)
|
||||||
|
with Lock():
|
||||||
|
files = list(p.rglob("*chromedriver*?"))
|
||||||
|
for file in files:
|
||||||
|
if self.is_binary_patched(file):
|
||||||
|
self.executable_path = str(file)
|
||||||
|
return True
|
||||||
|
|
||||||
if executable_path:
|
if executable_path:
|
||||||
self.executable_path = executable_path
|
self.executable_path = executable_path
|
||||||
self._custom_exe_path = True
|
self._custom_exe_path = True
|
||||||
@@ -127,6 +161,49 @@ class Patcher(object):
|
|||||||
self.unzip_package(self.fetch_package())
|
self.unzip_package(self.fetch_package())
|
||||||
return self.patch()
|
return self.patch()
|
||||||
|
|
||||||
|
def driver_binary_in_use(self, path: str = None) -> bool:
|
||||||
|
"""
|
||||||
|
naive test to check if a found chromedriver binary is
|
||||||
|
currently in use
|
||||||
|
|
||||||
|
Args:
|
||||||
|
path: a string or PathLike object to the binary to check.
|
||||||
|
if not specified, we check use this object's executable_path
|
||||||
|
"""
|
||||||
|
if not path:
|
||||||
|
path = self.executable_path
|
||||||
|
p = pathlib.Path(path)
|
||||||
|
|
||||||
|
if not p.exists():
|
||||||
|
raise OSError("file does not exist: %s" % p)
|
||||||
|
try:
|
||||||
|
with open(p, mode="a+b") as fs:
|
||||||
|
exc = []
|
||||||
|
try:
|
||||||
|
|
||||||
|
fs.seek(0, 0)
|
||||||
|
except PermissionError as e:
|
||||||
|
exc.append(e) # since some systems apprently allow seeking
|
||||||
|
# we conduct another test
|
||||||
|
try:
|
||||||
|
fs.readline()
|
||||||
|
except PermissionError as e:
|
||||||
|
exc.append(e)
|
||||||
|
|
||||||
|
if exc:
|
||||||
|
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
# ok safe to assume this is in use
|
||||||
|
except Exception as e:
|
||||||
|
# logger.exception("whoops ", e)
|
||||||
|
pass
|
||||||
|
|
||||||
|
def cleanup_unused_files(self):
|
||||||
|
p = pathlib.Path(self.data_path)
|
||||||
|
items = list(p.glob("*undetected*"))
|
||||||
|
print(items)
|
||||||
|
|
||||||
def patch(self):
|
def patch(self):
|
||||||
self.patch_exe()
|
self.patch_exe()
|
||||||
return self.is_binary_patched()
|
return self.is_binary_patched()
|
||||||
@@ -255,21 +332,17 @@ class Patcher(object):
|
|||||||
else:
|
else:
|
||||||
timeout = 3 # stop trying after this many seconds
|
timeout = 3 # stop trying after this many seconds
|
||||||
t = time.monotonic()
|
t = time.monotonic()
|
||||||
while True:
|
now = lambda: time.monotonic()
|
||||||
now = time.monotonic()
|
while now() - t > timeout:
|
||||||
if now - t > timeout:
|
# we don't want to wait until the end of time
|
||||||
# we don't want to wait until the end of time
|
|
||||||
logger.debug(
|
|
||||||
"could not unlink %s in time (%d seconds)"
|
|
||||||
% (self.executable_path, timeout)
|
|
||||||
)
|
|
||||||
break
|
|
||||||
try:
|
try:
|
||||||
|
if self.user_multi_procs:
|
||||||
|
break
|
||||||
os.unlink(self.executable_path)
|
os.unlink(self.executable_path)
|
||||||
logger.debug("successfully unlinked %s" % self.executable_path)
|
logger.debug("successfully unlinked %s" % self.executable_path)
|
||||||
break
|
break
|
||||||
except (OSError, RuntimeError, PermissionError):
|
except (OSError, RuntimeError, PermissionError):
|
||||||
time.sleep(0.1)
|
time.sleep(0.01)
|
||||||
continue
|
continue
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
break
|
break
|
||||||
|
|||||||
97
src/utils.py
97
src/utils.py
@@ -3,6 +3,8 @@ import logging
|
|||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
|
import urllib.parse
|
||||||
|
import tempfile
|
||||||
|
|
||||||
from selenium.webdriver.chrome.webdriver import WebDriver
|
from selenium.webdriver.chrome.webdriver import WebDriver
|
||||||
import undetected_chromedriver as uc
|
import undetected_chromedriver as uc
|
||||||
@@ -36,6 +38,80 @@ def get_flaresolverr_version() -> str:
|
|||||||
return FLARESOLVERR_VERSION
|
return FLARESOLVERR_VERSION
|
||||||
|
|
||||||
|
|
||||||
|
def create_proxy_extension(proxy: dict) -> str:
|
||||||
|
parsed_url = urllib.parse.urlparse(proxy['url'])
|
||||||
|
scheme = parsed_url.scheme
|
||||||
|
host = parsed_url.hostname
|
||||||
|
port = parsed_url.port
|
||||||
|
username = proxy['username']
|
||||||
|
password = proxy['password']
|
||||||
|
manifest_json = """
|
||||||
|
{
|
||||||
|
"version": "1.0.0",
|
||||||
|
"manifest_version": 2,
|
||||||
|
"name": "Chrome Proxy",
|
||||||
|
"permissions": [
|
||||||
|
"proxy",
|
||||||
|
"tabs",
|
||||||
|
"unlimitedStorage",
|
||||||
|
"storage",
|
||||||
|
"<all_urls>",
|
||||||
|
"webRequest",
|
||||||
|
"webRequestBlocking"
|
||||||
|
],
|
||||||
|
"background": {"scripts": ["background.js"]},
|
||||||
|
"minimum_chrome_version": "76.0.0"
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
|
||||||
|
background_js = """
|
||||||
|
var config = {
|
||||||
|
mode: "fixed_servers",
|
||||||
|
rules: {
|
||||||
|
singleProxy: {
|
||||||
|
scheme: "%s",
|
||||||
|
host: "%s",
|
||||||
|
port: %d
|
||||||
|
},
|
||||||
|
bypassList: ["localhost"]
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
chrome.proxy.settings.set({value: config, scope: "regular"}, function() {});
|
||||||
|
|
||||||
|
function callbackFn(details) {
|
||||||
|
return {
|
||||||
|
authCredentials: {
|
||||||
|
username: "%s",
|
||||||
|
password: "%s"
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
chrome.webRequest.onAuthRequired.addListener(
|
||||||
|
callbackFn,
|
||||||
|
{ urls: ["<all_urls>"] },
|
||||||
|
['blocking']
|
||||||
|
);
|
||||||
|
""" % (
|
||||||
|
scheme,
|
||||||
|
host,
|
||||||
|
port,
|
||||||
|
username,
|
||||||
|
password
|
||||||
|
)
|
||||||
|
|
||||||
|
proxy_extension_dir = tempfile.mkdtemp()
|
||||||
|
|
||||||
|
with open(os.path.join(proxy_extension_dir, "manifest.json"), "w") as f:
|
||||||
|
f.write(manifest_json)
|
||||||
|
|
||||||
|
with open(os.path.join(proxy_extension_dir, "background.js"), "w") as f:
|
||||||
|
f.write(background_js)
|
||||||
|
|
||||||
|
return proxy_extension_dir
|
||||||
|
|
||||||
|
|
||||||
def get_webdriver(proxy: dict = None) -> WebDriver:
|
def get_webdriver(proxy: dict = None) -> WebDriver:
|
||||||
global PATCHED_DRIVER_PATH
|
global PATCHED_DRIVER_PATH
|
||||||
logging.debug('Launching web browser...')
|
logging.debug('Launching web browser...')
|
||||||
@@ -54,13 +130,17 @@ def get_webdriver(proxy: dict = None) -> WebDriver:
|
|||||||
options.add_argument('--disable-software-rasterizer')
|
options.add_argument('--disable-software-rasterizer')
|
||||||
options.add_argument('--ignore-certificate-errors')
|
options.add_argument('--ignore-certificate-errors')
|
||||||
options.add_argument('--ignore-ssl-errors')
|
options.add_argument('--ignore-ssl-errors')
|
||||||
# fix GL erros in ASUSTOR NAS
|
# fix GL errors in ASUSTOR NAS
|
||||||
# https://github.com/FlareSolverr/FlareSolverr/issues/782
|
# https://github.com/FlareSolverr/FlareSolverr/issues/782
|
||||||
# https://github.com/microsoft/vscode/issues/127800#issuecomment-873342069
|
# https://github.com/microsoft/vscode/issues/127800#issuecomment-873342069
|
||||||
# https://peter.sh/experiments/chromium-command-line-switches/#use-gl
|
# https://peter.sh/experiments/chromium-command-line-switches/#use-gl
|
||||||
options.add_argument('--use-gl=swiftshader')
|
options.add_argument('--use-gl=swiftshader')
|
||||||
|
|
||||||
if proxy and 'url' in proxy:
|
proxy_extension_dir = None
|
||||||
|
if proxy and all(key in proxy for key in ['url', 'username', 'password']):
|
||||||
|
proxy_extension_dir = create_proxy_extension(proxy)
|
||||||
|
options.add_argument("--load-extension=%s" % os.path.abspath(proxy_extension_dir))
|
||||||
|
elif proxy and 'url' in proxy:
|
||||||
proxy_url = proxy['url']
|
proxy_url = proxy['url']
|
||||||
logging.debug("Using webdriver proxy: %s", proxy_url)
|
logging.debug("Using webdriver proxy: %s", proxy_url)
|
||||||
options.add_argument('--proxy-server=%s' % proxy_url)
|
options.add_argument('--proxy-server=%s' % proxy_url)
|
||||||
@@ -82,6 +162,10 @@ def get_webdriver(proxy: dict = None) -> WebDriver:
|
|||||||
driver_exe_path = "/app/chromedriver"
|
driver_exe_path = "/app/chromedriver"
|
||||||
else:
|
else:
|
||||||
version_main = get_chrome_major_version()
|
version_main = get_chrome_major_version()
|
||||||
|
# Fix for Chrome 115
|
||||||
|
# https://github.com/seleniumbase/SeleniumBase/pull/1967
|
||||||
|
if int(version_main) > 114:
|
||||||
|
version_main = 114
|
||||||
if PATCHED_DRIVER_PATH is not None:
|
if PATCHED_DRIVER_PATH is not None:
|
||||||
driver_exe_path = PATCHED_DRIVER_PATH
|
driver_exe_path = PATCHED_DRIVER_PATH
|
||||||
|
|
||||||
@@ -92,12 +176,17 @@ def get_webdriver(proxy: dict = None) -> WebDriver:
|
|||||||
# if we don't set driver_executable_path it downloads, patches, and deletes the driver each time
|
# if we don't set driver_executable_path it downloads, patches, and deletes the driver each time
|
||||||
driver = uc.Chrome(options=options, browser_executable_path=browser_executable_path,
|
driver = uc.Chrome(options=options, browser_executable_path=browser_executable_path,
|
||||||
driver_executable_path=driver_exe_path, version_main=version_main,
|
driver_executable_path=driver_exe_path, version_main=version_main,
|
||||||
windows_headless=windows_headless)
|
windows_headless=windows_headless, headless=windows_headless)
|
||||||
|
|
||||||
# save the patched driver to avoid re-downloads
|
# save the patched driver to avoid re-downloads
|
||||||
if driver_exe_path is None:
|
if driver_exe_path is None:
|
||||||
PATCHED_DRIVER_PATH = os.path.join(driver.patcher.data_path, driver.patcher.exe_name)
|
PATCHED_DRIVER_PATH = os.path.join(driver.patcher.data_path, driver.patcher.exe_name)
|
||||||
shutil.copy(driver.patcher.executable_path, PATCHED_DRIVER_PATH)
|
if PATCHED_DRIVER_PATH != driver.patcher.executable_path:
|
||||||
|
shutil.copy(driver.patcher.executable_path, PATCHED_DRIVER_PATH)
|
||||||
|
|
||||||
|
# clean up proxy extension directory
|
||||||
|
if proxy_extension_dir is not None:
|
||||||
|
shutil.rmtree(proxy_extension_dir)
|
||||||
|
|
||||||
# selenium vanilla
|
# selenium vanilla
|
||||||
# options = webdriver.ChromeOptions()
|
# options = webdriver.ChromeOptions()
|
||||||
|
|||||||
Reference in New Issue
Block a user