mirror of
https://github.com/FlareSolverr/FlareSolverr.git
synced 2025-12-05 17:18:19 +01:00
Compare commits
145 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0fe9958afe | ||
|
|
9f8c71131f | ||
|
|
2405c00521 | ||
|
|
ff65b7cc68 | ||
|
|
409e0844a7 | ||
|
|
368d5d4e05 | ||
|
|
c7505e3cbf | ||
|
|
5a27090abe | ||
|
|
e505ea4fe4 | ||
|
|
63b6fc53e3 | ||
|
|
8d72617219 | ||
|
|
8a8b9415c3 | ||
|
|
16722ef963 | ||
|
|
bbc24e9d86 | ||
|
|
7dfdfc5e33 | ||
|
|
136422c85c | ||
|
|
05a72f2709 | ||
|
|
da810830da | ||
|
|
d27f57c27c | ||
|
|
a916d93779 | ||
|
|
0d889cb0b2 | ||
|
|
d430404de8 | ||
|
|
d3b1ba6e88 | ||
|
|
75e5b190d6 | ||
|
|
cdc3db3c21 | ||
|
|
2dbb0442e0 | ||
|
|
6faab19533 | ||
|
|
af0a7af757 | ||
|
|
ff74b50b60 | ||
|
|
3e51ac1188 | ||
|
|
6627de4fa6 | ||
|
|
fe649255f2 | ||
|
|
3e338fce2e | ||
|
|
3dd3e7559d | ||
|
|
f21c1d51bc | ||
|
|
957347f73a | ||
|
|
c55080b0ec | ||
|
|
639bfca020 | ||
|
|
237694df76 | ||
|
|
6e5d6f1795 | ||
|
|
30804a86e5 | ||
|
|
e0bdaf7745 | ||
|
|
795365dbe4 | ||
|
|
ce5369dd41 | ||
|
|
600b09d498 | ||
|
|
d1f19405a1 | ||
|
|
82a1366d34 | ||
|
|
a2fe9e7776 | ||
|
|
6cc628df9e | ||
|
|
8b1851eeb1 | ||
|
|
54668a11e7 | ||
|
|
701d8fb4ff | ||
|
|
39a265ccb8 | ||
|
|
e32b247014 | ||
|
|
0d8fe8fe50 | ||
|
|
718da3a36f | ||
|
|
a798561338 | ||
|
|
eb680efc90 | ||
|
|
0f8f0bec25 | ||
|
|
3d9bc5627b | ||
|
|
dd7eaee2e3 | ||
|
|
031177bbdb | ||
|
|
a8644532a1 | ||
|
|
e96161c873 | ||
|
|
5a1f25cd52 | ||
|
|
a2c0e4348e | ||
|
|
2ecf88895b | ||
|
|
984368edb5 | ||
|
|
6c1d78cb84 | ||
|
|
5a2c61601e | ||
|
|
c304da2964 | ||
|
|
b811412699 | ||
|
|
0bb8de144f | ||
|
|
38166dfaa0 | ||
|
|
8dea0ed017 | ||
|
|
20cd2944a7 | ||
|
|
fd773e5909 | ||
|
|
35c7bff3c8 | ||
|
|
afdc1c7a8e | ||
|
|
0bc7a4498c | ||
|
|
c5a5f6d65e | ||
|
|
aaf29be8e1 | ||
|
|
800866d033 | ||
|
|
043f18b231 | ||
|
|
d21a332519 | ||
|
|
3ca6d08f41 | ||
|
|
227bd7ac72 | ||
|
|
e6a08584c0 | ||
|
|
df06d13cf8 | ||
|
|
993b8c41ac | ||
|
|
a4d42d7834 | ||
|
|
1c855b8af0 | ||
|
|
745c69491f | ||
|
|
f7e316fd5a | ||
|
|
16c8ab5f3d | ||
|
|
7af311b73c | ||
|
|
daec97532d | ||
|
|
8d7ed48f21 | ||
|
|
220f2599ae | ||
|
|
d772cf3f50 | ||
|
|
ab4365894b | ||
|
|
3fa9631559 | ||
|
|
04858c22fd | ||
|
|
5085ca6990 | ||
|
|
cd4df1e061 | ||
|
|
6c79783f7c | ||
|
|
4139e8d47c | ||
|
|
1942eb5fdc | ||
|
|
401bf5be76 | ||
|
|
d8ffdd3061 | ||
|
|
2d66590b08 | ||
|
|
a217510dc7 | ||
|
|
553bd8ab4f | ||
|
|
1b197c3e53 | ||
|
|
fd308f01be | ||
|
|
b5eef32615 | ||
|
|
644a843d89 | ||
|
|
82e1c94c6f | ||
|
|
fbc71516f5 | ||
|
|
40bd1cba4c | ||
|
|
d1588c1156 | ||
|
|
b4ad583baa | ||
|
|
5d31e551cc | ||
|
|
d92845f34f | ||
|
|
5d3b73ea9d | ||
|
|
2aa095ed5d | ||
|
|
687c8f75ae | ||
|
|
22ed3d324b | ||
|
|
5ba9ef03f3 | ||
|
|
d2e144ea12 | ||
|
|
313fb2c14b | ||
|
|
6d69f40b58 | ||
|
|
a1c36f60d2 | ||
|
|
0edc50e271 | ||
|
|
f4a4baa57c | ||
|
|
f7e434c6e3 | ||
|
|
7728f2ab31 | ||
|
|
c920bea4ca | ||
|
|
a785f83034 | ||
|
|
b42c22f5b1 | ||
|
|
9c62410a8b | ||
|
|
b8768ae17d | ||
|
|
9b2c602a1f | ||
|
|
8316350b98 | ||
|
|
33307ce461 |
14
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
14
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
@@ -8,6 +8,13 @@ body:
|
||||
options:
|
||||
- label: I have checked the README
|
||||
required: true
|
||||
- type: checkboxes
|
||||
attributes:
|
||||
label: Have you followed our Troubleshooting?
|
||||
description: Please follow our <a href="https://github.com/FlareSolverr/FlareSolverr/wiki/Troubleshooting">Troubleshooting</a>.
|
||||
options:
|
||||
- label: I have followed your Troubleshooting
|
||||
required: true
|
||||
- type: checkboxes
|
||||
attributes:
|
||||
label: Is there already an issue for your problem?
|
||||
@@ -22,6 +29,13 @@ body:
|
||||
options:
|
||||
- label: I have read the Discussions
|
||||
required: true
|
||||
- type: input
|
||||
attributes:
|
||||
label: Have you ACTUALLY checked all these?
|
||||
description: Please do not waste our time and yours; these checks are there for a reason, it is not just so you can tick boxes for fun. If you type <b>YES</b> and it is clear you did not or have put in no effort, your issue will be closed and locked without comment. If you type <b>NO</b> but still open this issue, you will be permanently blocked for timewasting.
|
||||
placeholder: YES or NO
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Environment
|
||||
|
||||
11
.github/workflows/autotag.yml
vendored
11
.github/workflows/autotag.yml
vendored
@@ -1,4 +1,4 @@
|
||||
name: autotag
|
||||
name: Autotag
|
||||
|
||||
on:
|
||||
push:
|
||||
@@ -9,11 +9,10 @@ jobs:
|
||||
tag-release:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
-
|
||||
name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
-
|
||||
name: Auto Tag
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v5
|
||||
|
||||
- name: Auto Tag
|
||||
uses: Klemensas/action-autotag@stable
|
||||
with:
|
||||
GITHUB_TOKEN: "${{ secrets.GH_PAT }}"
|
||||
|
||||
72
.github/workflows/release-docker.yml
vendored
72
.github/workflows/release-docker.yml
vendored
@@ -1,53 +1,67 @@
|
||||
name: release-docker
|
||||
name: Docker release
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- 'v*.*.*'
|
||||
- "v*.*.*"
|
||||
pull_request:
|
||||
branches:
|
||||
- master
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
build-docker-images:
|
||||
runs-on: ubuntu-22.04
|
||||
if: ${{ !github.event.pull_request.head.repo.fork }}
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
-
|
||||
name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
-
|
||||
name: Downcase repo
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v5
|
||||
|
||||
- name: Downcase repo
|
||||
run: echo REPOSITORY=$(echo ${{ github.repository }} | tr '[:upper:]' '[:lower:]') >> $GITHUB_ENV
|
||||
-
|
||||
name: Docker meta
|
||||
|
||||
- name: Docker meta
|
||||
id: docker_meta
|
||||
uses: crazy-max/ghaction-docker-meta@v3
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ${{ env.REPOSITORY }},ghcr.io/${{ env.REPOSITORY }}
|
||||
tag-sha: false
|
||||
-
|
||||
name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v2
|
||||
-
|
||||
name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v2
|
||||
-
|
||||
name: Login to DockerHub
|
||||
uses: docker/login-action@v2
|
||||
images: |
|
||||
${{ env.REPOSITORY }},enable=${{ github.event_name != 'pull_request' }}
|
||||
ghcr.io/${{ env.REPOSITORY }}
|
||||
tags: |
|
||||
type=semver,pattern={{version}},prefix=v
|
||||
type=ref,event=pr
|
||||
flavor: |
|
||||
latest=auto
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to DockerHub
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
-
|
||||
name: Login to GitHub Container Registry
|
||||
uses: docker/login-action@v2
|
||||
|
||||
- name: Login to GitHub Container Registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.repository_owner }}
|
||||
password: ${{ secrets.GH_PAT }}
|
||||
-
|
||||
name: Build and push
|
||||
uses: docker/build-push-action@v3
|
||||
|
||||
- name: Build and push
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: .
|
||||
file: ./Dockerfile
|
||||
platforms: linux/386,linux/amd64,linux/arm/v7,linux/arm64/v8
|
||||
push: ${{ github.event_name != 'pull_request' }}
|
||||
push: true
|
||||
tags: ${{ steps.docker_meta.outputs.tags }}
|
||||
labels: ${{ steps.docker_meta.outputs.labels }}
|
||||
|
||||
86
.github/workflows/release.yml
vendored
86
.github/workflows/release.yml
vendored
@@ -1,95 +1,63 @@
|
||||
name: release
|
||||
name: Release
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- 'v*.*.*'
|
||||
- "v*.*.*"
|
||||
|
||||
jobs:
|
||||
create-release:
|
||||
name: Create release
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v5
|
||||
with:
|
||||
fetch-depth: 0 # get all commits, branches and tags (required for the changelog)
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Build changelog
|
||||
id: github_changelog
|
||||
run: |
|
||||
changelog=$(git log $(git tag | tail -2 | head -1)..HEAD --no-merges --oneline)
|
||||
changelog="${changelog//'%'/'%25'}"
|
||||
changelog="${changelog//$'\n'/'%0A'}"
|
||||
changelog="${changelog//$'\r'/'%0D'}"
|
||||
echo "##[set-output name=changelog;]${changelog}"
|
||||
echo "changelog<<EOF" >> $GITHUB_OUTPUT
|
||||
echo "$changelog" >> $GITHUB_OUTPUT
|
||||
echo "EOF" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Create release
|
||||
id: create_release
|
||||
uses: actions/create-release@v1
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GH_PAT }}
|
||||
uses: softprops/action-gh-release@v2
|
||||
with:
|
||||
tag_name: ${{ github.ref }}
|
||||
release_name: ${{ github.ref }}
|
||||
body: ${{ steps.github_changelog.outputs.changelog }}
|
||||
draft: false
|
||||
prerelease: false
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GH_PAT }}
|
||||
|
||||
build-linux-package:
|
||||
name: Build Linux binary
|
||||
build-package:
|
||||
name: Build binaries
|
||||
needs: create-release
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ubuntu-latest, windows-latest]
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v5
|
||||
with:
|
||||
fetch-depth: 0 # get all commits, branches and tags (required for the changelog)
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v4
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: '3.11'
|
||||
python-version: "3.13"
|
||||
|
||||
- name: Build artifacts
|
||||
run: |
|
||||
python -m pip install -r requirements.txt
|
||||
python -m pip install pyinstaller==5.9.0
|
||||
python -m pip install pyinstaller==6.16.0
|
||||
cd src
|
||||
python build_package.py
|
||||
|
||||
- name: Upload release artifacts
|
||||
uses: alexellis/upload-assets@0.4.0
|
||||
uses: softprops/action-gh-release@v2
|
||||
with:
|
||||
files: ./dist/flaresolverr_*
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GH_PAT }}
|
||||
with:
|
||||
asset_paths: '["./dist/flaresolverr_*"]'
|
||||
|
||||
build-windows-package:
|
||||
name: Build Windows binary
|
||||
needs: create-release
|
||||
runs-on: windows-2022
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
fetch-depth: 0 # get all commits, branches and tags (required for the changelog)
|
||||
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Build artifacts
|
||||
run: |
|
||||
python -m pip install -r requirements.txt
|
||||
python -m pip install pyinstaller==5.9.0
|
||||
cd src
|
||||
python build_package.py
|
||||
|
||||
- name: Upload release artifacts
|
||||
uses: alexellis/upload-assets@0.4.0
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GH_PAT }}
|
||||
with:
|
||||
asset_paths: '["./dist/flaresolverr_*"]'
|
||||
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -124,3 +124,6 @@ venv.bak/
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# node
|
||||
node_modules/
|
||||
172
CHANGELOG.md
172
CHANGELOG.md
@@ -1,5 +1,177 @@
|
||||
# Changelog
|
||||
|
||||
## v3.4.6 (2025/11/29)
|
||||
* Add disable image, css, fonts option with CDP. Thanks @Ananto30
|
||||
|
||||
## v3.4.5 (2025/11/11)
|
||||
* Revert to Python v3.13
|
||||
|
||||
## v3.4.4 (2025/11/04)
|
||||
* Bump dependencies, Chromium, and some other general fixes. Thanks @flowerey
|
||||
|
||||
## v3.4.3 (2025/10/28)
|
||||
* Update proxy extension
|
||||
|
||||
## v3.4.2 (2025/10/09)
|
||||
* Bump dependencies & CI actions. Thanks @flowerey
|
||||
* Add optional wait time after resolving the challenge before returning. Thanks @kennedyoliveira
|
||||
* Add proxy ENVs. Thanks @Robokishan
|
||||
* Handle empty string and keys without value in postData. Thanks @eZ4RK0
|
||||
* Add quote protection for password containing it. Thanks @warrenberberd
|
||||
* Add returnScreenshot parameter to screenshot the final web page. Thanks @estebanthi
|
||||
* Add log file support. Thanks @acg5159
|
||||
|
||||
## v3.4.1 (2025/09/15)
|
||||
* Fix regex pattern syntax in utils.py
|
||||
* Change access denied title check to use startswith
|
||||
|
||||
## v3.4.0 (2025/08/25)
|
||||
* Modernize and upgrade application. Thanks @TheCrazyLex
|
||||
* Remove disable software rasterizer option for ARM builds. Thanks @smrodman83
|
||||
|
||||
## v3.3.25 (2025/06/14)
|
||||
* Remove `use-gl` argument. Thanks @qwerty12
|
||||
* u_c: remove apparent c&p typo. Thanks @ok3721
|
||||
* Bump requirements
|
||||
|
||||
## v3.3.24 (2025/06/04)
|
||||
* Remove hidden character
|
||||
|
||||
## v3.3.23 (2025/06/04)
|
||||
* Update base image to bookworm. Thanks @rwjack
|
||||
|
||||
## v3.3.22 (2025/06/03)
|
||||
* Disable search engine choice screen
|
||||
* Fix headless=false stalling. Thanks @MAKMED1337
|
||||
* Change from click to keys. Thanks @sh4dowb
|
||||
* Don't open devtools
|
||||
* Bump Chromium to v137 for build
|
||||
* Bump requirements
|
||||
|
||||
## v3.3.21 (2024/06/26)
|
||||
* Add challenge selector to catch reloading page on non-English systems
|
||||
* Escape values for generated form used in request.post. Thanks @mynameisbogdan
|
||||
|
||||
## v3.3.20 (2024/06/21)
|
||||
* maxTimeout should always be int
|
||||
* Check not running in Docker before logging version_main error
|
||||
* Update Cloudflare challenge and checkbox selectors. Thanks @tenettow & @21hsmw
|
||||
|
||||
## v3.3.19 (2024/05/23)
|
||||
* Fix occasional headless issue on Linux when set to "false". Thanks @21hsmw
|
||||
|
||||
## v3.3.18 (2024/05/20)
|
||||
|
||||
* Fix LANG ENV for Linux
|
||||
* Fix Chrome v124+ not closing on Windows. Thanks @RileyXX
|
||||
|
||||
## v3.3.17 (2024/04/09)
|
||||
|
||||
* Fix file descriptor leak in service on quit(). Thanks @zkulis
|
||||
|
||||
## v3.3.16 (2024/02/28)
|
||||
|
||||
* Fix of the subprocess.STARTUPINFO() call. Thanks @ceconelo
|
||||
* Add FreeBSD support. Thanks @Asthowen
|
||||
* Use headless configuration properly. Thanks @hashworks
|
||||
|
||||
## v3.3.15 (2024/02/20)
|
||||
|
||||
* Fix looping challenges
|
||||
|
||||
## v3.3.14-hotfix2 (2024/02/17)
|
||||
|
||||
* Hotfix 2 - bad Chromium build, instances failed to terminate
|
||||
|
||||
## v3.3.14-hotfix (2024/02/17)
|
||||
|
||||
* Hotfix for Linux build - some Chrome files no longer exist
|
||||
|
||||
## v3.3.14 (2024/02/17)
|
||||
|
||||
* Update Chrome downloads. Thanks @opemvbs
|
||||
|
||||
## v3.3.13 (2024/01/07)
|
||||
|
||||
* Fix too many open files error
|
||||
|
||||
## v3.3.12 (2023/12/15)
|
||||
|
||||
* Fix looping challenges and invalid cookies
|
||||
|
||||
## v3.3.11 (2023/12/11)
|
||||
|
||||
* Update UC 3.5.4 & Selenium 4.15.2. Thanks @txtsd
|
||||
|
||||
## v3.3.10 (2023/11/14)
|
||||
|
||||
* Add LANG ENV - resolves issues with YGGtorrent
|
||||
|
||||
## v3.3.9 (2023/11/13)
|
||||
|
||||
* Fix for Docker build, capture TypeError
|
||||
|
||||
## v3.3.8 (2023/11/13)
|
||||
|
||||
* Fix headless=true for Chrome 117+. Thanks @NabiKAZ
|
||||
* Support running Chrome 119 from source. Thanks @koleg and @Chris7X
|
||||
* Fix "OSError: [WinError 6] The handle is invalid" on exit. Thanks @enesgorkemgenc
|
||||
|
||||
## v3.3.7 (2023/11/05)
|
||||
|
||||
* Bump to rebuild. Thanks @JoachimDorchies
|
||||
|
||||
## v3.3.6 (2023/09/15)
|
||||
|
||||
* Update checkbox selector, again
|
||||
|
||||
## v3.3.5 (2023/09/13)
|
||||
|
||||
* Change checkbox selector, support languages other than English
|
||||
|
||||
## v3.3.4 (2023/09/02)
|
||||
|
||||
* Update checkbox selector
|
||||
|
||||
## v3.3.3 (2023/08/31)
|
||||
|
||||
* Update undetected_chromedriver to v3.5.3
|
||||
|
||||
## v3.3.2 (2023/08/03)
|
||||
|
||||
* Fix URL domain in Prometheus exporter
|
||||
|
||||
## v3.3.1 (2023/08/03)
|
||||
|
||||
* Fix for Cloudflare verify checkbox
|
||||
* Fix HEADLESS=false in Windows binary
|
||||
* Fix Prometheus exporter for management and health endpoints
|
||||
* Remove misleading stack trace when the verify checkbox is not found
|
||||
* Revert "Update base Docker image to Debian Bookworm" #849
|
||||
* Revert "Install Chromium 115 from Debian testing" #849
|
||||
|
||||
## v3.3.0 (2023/08/02)
|
||||
|
||||
* Fix for new Cloudflare detection. Thanks @cedric-bour for #845
|
||||
* Add support for proxy authentication username/password. Thanks @jacobprice808 for #807
|
||||
* Implement Prometheus metrics
|
||||
* Fix Chromium Driver for Chrome / Chromium version > 114
|
||||
* Use Chromium 115 in binary packages (Windows and Linux)
|
||||
* Install Chromium 115 from Debian testing (Docker)
|
||||
* Update base Docker image to Debian Bookworm
|
||||
* Update Selenium 4.11.2
|
||||
* Update pyinstaller 5.13.0
|
||||
* Add more traces in build_package.py
|
||||
|
||||
## v3.2.2 (2023/07/16)
|
||||
|
||||
* Workaround for updated 'verify you are human' check
|
||||
|
||||
## v3.2.1 (2023/06/10)
|
||||
|
||||
* Kill dead Chrome processes in Windows
|
||||
* Fix Chrome GL erros in ASUSTOR NAS
|
||||
|
||||
## v3.2.0 (2023/05/23)
|
||||
|
||||
* Support "proxy" param in requests and sessions
|
||||
|
||||
22
Dockerfile
22
Dockerfile
@@ -1,4 +1,4 @@
|
||||
FROM python:3.11-slim-bullseye as builder
|
||||
FROM python:3.13-slim-bookworm AS builder
|
||||
|
||||
# Build dummy packages to skip installing them and their dependencies
|
||||
RUN apt-get update \
|
||||
@@ -12,7 +12,7 @@ RUN apt-get update \
|
||||
&& equivs-build adwaita-icon-theme \
|
||||
&& mv adwaita-icon-theme_*.deb /adwaita-icon-theme.deb
|
||||
|
||||
FROM python:3.11-slim-bullseye
|
||||
FROM python:3.13-slim-bookworm
|
||||
|
||||
# Copy dummy packages
|
||||
COPY --from=builder /*.deb /
|
||||
@@ -38,7 +38,12 @@ RUN dpkg -i /libgl1-mesa-dri.deb \
|
||||
# Create flaresolverr user
|
||||
&& useradd --home-dir /app --shell /bin/sh flaresolverr \
|
||||
&& mv /usr/bin/chromedriver chromedriver \
|
||||
&& chown -R flaresolverr:flaresolverr .
|
||||
&& chown -R flaresolverr:flaresolverr . \
|
||||
# Create config dir
|
||||
&& mkdir /config \
|
||||
&& chown flaresolverr:flaresolverr /config
|
||||
|
||||
VOLUME /config
|
||||
|
||||
# Install Python dependencies
|
||||
COPY requirements.txt .
|
||||
@@ -54,6 +59,7 @@ COPY src .
|
||||
COPY package.json ../
|
||||
|
||||
EXPOSE 8191
|
||||
EXPOSE 8192
|
||||
|
||||
# dumb-init avoids zombie chromium processes
|
||||
ENTRYPOINT ["/usr/bin/dumb-init", "--"]
|
||||
@@ -61,17 +67,17 @@ ENTRYPOINT ["/usr/bin/dumb-init", "--"]
|
||||
CMD ["/usr/local/bin/python", "-u", "/app/flaresolverr.py"]
|
||||
|
||||
# Local build
|
||||
# docker build -t ngosang/flaresolverr:3.2.0 .
|
||||
# docker run -p 8191:8191 ngosang/flaresolverr:3.2.0
|
||||
# docker build -t ngosang/flaresolverr:3.4.6 .
|
||||
# docker run -p 8191:8191 ngosang/flaresolverr:3.4.6
|
||||
|
||||
# Multi-arch build
|
||||
# docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
|
||||
# docker buildx create --use
|
||||
# docker buildx build -t ngosang/flaresolverr:3.2.0 --platform linux/386,linux/amd64,linux/arm/v7,linux/arm64/v8 .
|
||||
# docker buildx build -t ngosang/flaresolverr:3.4.6 --platform linux/386,linux/amd64,linux/arm/v7,linux/arm64/v8 .
|
||||
# add --push to publish in DockerHub
|
||||
|
||||
# Test multi-arch build
|
||||
# docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
|
||||
# docker buildx create --use
|
||||
# docker buildx build -t ngosang/flaresolverr:3.2.0 --platform linux/arm/v7 --load .
|
||||
# docker run -p 8191:8191 --platform linux/arm/v7 ngosang/flaresolverr:3.2.0
|
||||
# docker buildx build -t ngosang/flaresolverr:3.4.6 --platform linux/arm/v7 --load .
|
||||
# docker run -p 8191:8191 --platform linux/arm/v7 ngosang/flaresolverr:3.4.6
|
||||
|
||||
2
LICENSE
2
LICENSE
@@ -1,6 +1,6 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2023 Diego Heras (ngosang / ngosang@hotmail.es)
|
||||
Copyright (c) 2025 Diego Heras (ngosang / ngosang@hotmail.es)
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
|
||||
268
README.md
268
README.md
@@ -33,22 +33,25 @@ It is recommended to install using a Docker container because the project depend
|
||||
already included within the image.
|
||||
|
||||
Docker images are available in:
|
||||
* GitHub Registry => https://github.com/orgs/FlareSolverr/packages/container/package/flaresolverr
|
||||
* DockerHub => https://hub.docker.com/r/flaresolverr/flaresolverr
|
||||
|
||||
- GitHub Registry => https://github.com/orgs/FlareSolverr/packages/container/package/flaresolverr
|
||||
- DockerHub => https://hub.docker.com/r/flaresolverr/flaresolverr
|
||||
|
||||
Supported architectures are:
|
||||
|
||||
| Architecture | Tag |
|
||||
|--------------|--------------|
|
||||
| ------------ | ------------ |
|
||||
| x86 | linux/386 |
|
||||
| x86-64 | linux/amd64 |
|
||||
| ARM32 | linux/arm/v7 |
|
||||
| ARM64 | linux/arm64 |
|
||||
|
||||
We provide a `docker-compose.yml` configuration file. Clone this repository and execute `docker-compose up -d` to start
|
||||
We provide a `docker-compose.yml` configuration file. Clone this repository and execute
|
||||
`docker-compose up -d` _(Compose V1)_ or `docker compose up -d` _(Compose V2)_ to start
|
||||
the container.
|
||||
|
||||
If you prefer the `docker cli` execute the following command.
|
||||
|
||||
```bash
|
||||
docker run -d \
|
||||
--name=flaresolverr \
|
||||
@@ -58,7 +61,7 @@ docker run -d \
|
||||
ghcr.io/flaresolverr/flaresolverr:latest
|
||||
```
|
||||
|
||||
If your host OS is Debian, make sure `libseccomp2` version is 2.5.x. You can check the version with `sudo apt-cache policy libseccomp2`
|
||||
If your host OS is Debian, make sure `libseccomp2` version is 2.5.x. You can check the version with `sudo apt-cache policy libseccomp2`
|
||||
and update the package with `sudo apt install libseccomp2=2.5.1-1~bpo10+1` or `sudo apt install libseccomp2=2.5.1-1+deb11u1`.
|
||||
Remember to restart the Docker daemon and the container after the update.
|
||||
|
||||
@@ -68,20 +71,29 @@ Remember to restart the Docker daemon and the container after the update.
|
||||
> Precompiled binaries are only available for x64 architecture. For other architectures see Docker images.
|
||||
|
||||
This is the recommended way for Windows users.
|
||||
* Download the [FlareSolverr executable](https://github.com/FlareSolverr/FlareSolverr/releases) from the release's page. It is available for Windows x64 and Linux x64.
|
||||
* Execute FlareSolverr binary. In the environment variables section you can find how to change the configuration.
|
||||
|
||||
- Download the [FlareSolverr executable](https://github.com/FlareSolverr/FlareSolverr/releases) from the release's page. It is available for Windows x64 and Linux x64.
|
||||
- Execute FlareSolverr binary. In the environment variables section you can find how to change the configuration.
|
||||
|
||||
### From source code
|
||||
|
||||
> **Warning**
|
||||
> Installing from source code only works for x64 architecture. For other architectures see Docker images.
|
||||
|
||||
* Install [Python 3.11](https://www.python.org/downloads/).
|
||||
* Install [Chrome](https://www.google.com/intl/en_us/chrome/) (all OS) or [Chromium](https://www.chromium.org/getting-involved/download-chromium/) (just Linux, it doesn't work in Windows) web browser.
|
||||
* (Only in Linux / macOS) Install [Xvfb](https://en.wikipedia.org/wiki/Xvfb) package.
|
||||
* Clone this repository and open a shell in that path.
|
||||
* Run `pip install -r requirements.txt` command to install FlareSolverr dependencies.
|
||||
* Run `python src/flaresolverr.py` command to start FlareSolverr.
|
||||
- Install [Python 3.13](https://www.python.org/downloads/).
|
||||
- Install [Chrome](https://www.google.com/intl/en_us/chrome/) (all OS) or [Chromium](https://www.chromium.org/getting-involved/download-chromium/) (just Linux, it doesn't work in Windows) web browser.
|
||||
- (Only in Linux) Install [Xvfb](https://en.wikipedia.org/wiki/Xvfb) package.
|
||||
- (Only in macOS) Install [XQuartz](https://www.xquartz.org/) package.
|
||||
- Clone this repository and open a shell in that path.
|
||||
- Run `pip install -r requirements.txt` command to install FlareSolverr dependencies.
|
||||
- Run `python src/flaresolverr.py` command to start FlareSolverr.
|
||||
|
||||
### From source code (FreeBSD/TrueNAS CORE)
|
||||
|
||||
- Run `pkg install chromium python313 py313-pip xorg-vfbserver` command to install the required dependencies.
|
||||
- Clone this repository and open a shell in that path.
|
||||
- Run `python3.13 -m pip install -r requirements.txt` command to install FlareSolverr dependencies.
|
||||
- Run `python3.13 src/flaresolverr.py` command to start FlareSolverr.
|
||||
|
||||
### Systemd service
|
||||
|
||||
@@ -89,17 +101,46 @@ We provide an example Systemd unit file `flaresolverr.service` as reference. You
|
||||
|
||||
## Usage
|
||||
|
||||
Example request:
|
||||
Example Bash request:
|
||||
|
||||
```bash
|
||||
curl -L -X POST 'http://localhost:8191/v1' \
|
||||
-H 'Content-Type: application/json' \
|
||||
--data-raw '{
|
||||
"cmd": "request.get",
|
||||
"url":"http://www.google.com/",
|
||||
"url": "http://www.google.com/",
|
||||
"maxTimeout": 60000
|
||||
}'
|
||||
```
|
||||
|
||||
Example Python request:
|
||||
|
||||
```py
|
||||
import requests
|
||||
|
||||
url = "http://localhost:8191/v1"
|
||||
headers = {"Content-Type": "application/json"}
|
||||
data = {
|
||||
"cmd": "request.get",
|
||||
"url": "http://www.google.com/",
|
||||
"maxTimeout": 60000
|
||||
}
|
||||
response = requests.post(url, headers=headers, json=data)
|
||||
print(response.text)
|
||||
```
|
||||
|
||||
Example PowerShell request:
|
||||
|
||||
```ps1
|
||||
$body = @{
|
||||
cmd = "request.get"
|
||||
url = "http://www.google.com/"
|
||||
maxTimeout = 60000
|
||||
} | ConvertTo-Json
|
||||
|
||||
irm -UseBasicParsing 'http://localhost:8191/v1' -Headers @{"Content-Type"="application/json"} -Method Post -Body $body
|
||||
```
|
||||
|
||||
### Commands
|
||||
|
||||
#### + `sessions.create`
|
||||
@@ -110,10 +151,10 @@ cookies for the browser to use.
|
||||
|
||||
This also speeds up the requests since it won't have to launch a new browser instance for every request.
|
||||
|
||||
| Parameter | Notes |
|
||||
|-----------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| session | Optional. The session ID that you want to be assigned to the instance. If isn't set a random UUID will be assigned. |
|
||||
| proxy | Optional, default disabled. Eg: `"proxy": {"url": "http://127.0.0.1:8888"}`. You must include the proxy schema in the URL: `http://`, `socks4://` or `socks5://`. Authorization (username/password) is not supported. |
|
||||
| Parameter | Notes |
|
||||
| --------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| session | Optional. The session ID that you want to be assigned to the instance. If isn't set a random UUID will be assigned. |
|
||||
| proxy | Optional, default disabled. Eg: `"proxy": {"url": "http://127.0.0.1:8888"}`. You must include the proxy schema in the URL: `http://`, `socks4://` or `socks5://`. Authorization (username/password) is supported. Eg: `"proxy": {"url": "http://127.0.0.1:8888", "username": "testuser", "password": "testpass"}` |
|
||||
|
||||
#### + `sessions.list`
|
||||
|
||||
@@ -125,11 +166,7 @@ Example response:
|
||||
|
||||
```json
|
||||
{
|
||||
"sessions": [
|
||||
"session_id_1",
|
||||
"session_id_2",
|
||||
"session_id_3..."
|
||||
]
|
||||
"sessions": ["session_id_1", "session_id_2", "session_id_3..."]
|
||||
}
|
||||
```
|
||||
|
||||
@@ -139,20 +176,24 @@ This will properly shutdown a browser instance and remove all files associated w
|
||||
session. When you no longer need to use a session you should make sure to close it.
|
||||
|
||||
| Parameter | Notes |
|
||||
|-----------|-----------------------------------------------|
|
||||
| --------- | --------------------------------------------- |
|
||||
| session | The session ID that you want to be destroyed. |
|
||||
|
||||
#### + `request.get`
|
||||
|
||||
| Parameter | Notes |
|
||||
|---------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| ------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| url | Mandatory |
|
||||
| session | Optional. Will send the request from and existing browser instance. If one is not sent it will create a temporary instance that will be destroyed immediately after the request is completed. |
|
||||
| session_ttl_minutes | Optional. FlareSolverr will automatically rotate expired sessions based on the TTL provided in minutes. |
|
||||
| maxTimeout | Optional, default value 60000. Max timeout to solve the challenge in milliseconds. |
|
||||
| cookies | Optional. Will be used by the headless browser. Eg: `"cookies": [{"name": "cookie1", "value": "value1"}, {"name": "cookie2", "value": "value2"}]`. |
|
||||
| returnOnlyCookies | Optional, default false. Only returns the cookies. Response data, headers and other parts of the response are removed. |
|
||||
| returnScreenshot | Optional, default false. Captures a screenshot of the final rendered page after all challenges and waits are completed. The screenshot is returned as a Base64-encoded PNG string in the `screenshot` field of the response. |
|
||||
| proxy | Optional, default disabled. Eg: `"proxy": {"url": "http://127.0.0.1:8888"}`. You must include the proxy schema in the URL: `http://`, `socks4://` or `socks5://`. Authorization (username/password) is not supported. (When the `session` parameter is set, the proxy is ignored; a session specific proxy can be set in `sessions.create`.) |
|
||||
| waitInSeconds | Optional, default none. Length to wait in seconds after solving the challenge, and before returning the results. Useful to allow it to load dynamic content. |
|
||||
| disableMedia | Optional, default false. When true FlareSolverr will prevent media resources (images, CSS, and fonts) from being loaded to speed up navigation. |
|
||||
| tabs_till_verify | Optional, default none. Number of times the `Tab` button is needed to be pressed to end up on the turnstile captcha, in order to verify it. After verifying the captcha, the result will be stored in the solution under `turnstile_token`. |
|
||||
|
||||
> **Warning**
|
||||
> If you want to use Cloudflare clearance cookie in your scripts, make sure you use the FlareSolverr User-Agent too. If they don't match you will see the challenge.
|
||||
@@ -161,87 +202,123 @@ Example response from running the `curl` above:
|
||||
|
||||
```json
|
||||
{
|
||||
"solution": {
|
||||
"url": "https://www.google.com/?gws_rd=ssl",
|
||||
"status": 200,
|
||||
"headers": {
|
||||
"status": "200",
|
||||
"date": "Thu, 16 Jul 2020 04:15:49 GMT",
|
||||
"expires": "-1",
|
||||
"cache-control": "private, max-age=0",
|
||||
"content-type": "text/html; charset=UTF-8",
|
||||
"strict-transport-security": "max-age=31536000",
|
||||
"p3p": "CP=\"This is not a P3P policy! See g.co/p3phelp for more info.\"",
|
||||
"content-encoding": "br",
|
||||
"server": "gws",
|
||||
"content-length": "61587",
|
||||
"x-xss-protection": "0",
|
||||
"x-frame-options": "SAMEORIGIN",
|
||||
"set-cookie": "1P_JAR=2020-07-16-04; expires=Sat..."
|
||||
},
|
||||
"response":"<!DOCTYPE html>...",
|
||||
"cookies": [
|
||||
{
|
||||
"name": "NID",
|
||||
"value": "204=QE3Ocq15XalczqjuDy52HeseG3zAZuJzID3R57...",
|
||||
"domain": ".google.com",
|
||||
"path": "/",
|
||||
"expires": 1610684149.307722,
|
||||
"size": 178,
|
||||
"httpOnly": true,
|
||||
"secure": true,
|
||||
"session": false,
|
||||
"sameSite": "None"
|
||||
},
|
||||
{
|
||||
"name": "1P_JAR",
|
||||
"value": "2020-07-16-04",
|
||||
"domain": ".google.com",
|
||||
"path": "/",
|
||||
"expires": 1597464949.307626,
|
||||
"size": 19,
|
||||
"httpOnly": false,
|
||||
"secure": true,
|
||||
"session": false,
|
||||
"sameSite": "None"
|
||||
}
|
||||
],
|
||||
"userAgent": "Windows NT 10.0; Win64; x64) AppleWebKit/5..."
|
||||
"solution": {
|
||||
"url": "https://www.google.com/?gws_rd=ssl",
|
||||
"status": 200,
|
||||
"headers": {
|
||||
"status": "200",
|
||||
"date": "Thu, 16 Jul 2020 04:15:49 GMT",
|
||||
"expires": "-1",
|
||||
"cache-control": "private, max-age=0",
|
||||
"content-type": "text/html; charset=UTF-8",
|
||||
"strict-transport-security": "max-age=31536000",
|
||||
"p3p": "CP=\"This is not a P3P policy! See g.co/p3phelp for more info.\"",
|
||||
"content-encoding": "br",
|
||||
"server": "gws",
|
||||
"content-length": "61587",
|
||||
"x-xss-protection": "0",
|
||||
"x-frame-options": "SAMEORIGIN",
|
||||
"set-cookie": "1P_JAR=2020-07-16-04; expires=Sat..."
|
||||
},
|
||||
"status": "ok",
|
||||
"message": "",
|
||||
"startTimestamp": 1594872947467,
|
||||
"endTimestamp": 1594872949617,
|
||||
"version": "1.0.0"
|
||||
"response": "<!DOCTYPE html>...",
|
||||
"cookies": [
|
||||
{
|
||||
"name": "NID",
|
||||
"value": "204=QE3Ocq15XalczqjuDy52HeseG3zAZuJzID3R57...",
|
||||
"domain": ".google.com",
|
||||
"path": "/",
|
||||
"expires": 1610684149.307722,
|
||||
"size": 178,
|
||||
"httpOnly": true,
|
||||
"secure": true,
|
||||
"session": false,
|
||||
"sameSite": "None"
|
||||
},
|
||||
{
|
||||
"name": "1P_JAR",
|
||||
"value": "2020-07-16-04",
|
||||
"domain": ".google.com",
|
||||
"path": "/",
|
||||
"expires": 1597464949.307626,
|
||||
"size": 19,
|
||||
"httpOnly": false,
|
||||
"secure": true,
|
||||
"session": false,
|
||||
"sameSite": "None"
|
||||
}
|
||||
],
|
||||
"userAgent": "Windows NT 10.0; Win64; x64) AppleWebKit/5...",
|
||||
"turnstile_token": "03AGdBq24k3lK7JH2v8uN1T5F..."
|
||||
},
|
||||
"status": "ok",
|
||||
"message": "",
|
||||
"startTimestamp": 1594872947467,
|
||||
"endTimestamp": 1594872949617,
|
||||
"version": "1.0.0"
|
||||
}
|
||||
```
|
||||
|
||||
### + `request.post`
|
||||
|
||||
This is the same as `request.get` but it takes one more param:
|
||||
This works like `request.get`, with the addition of the postData parameter. Note that `tabs_till_verify` is currently supported only for GET requests and requires one extra argument.
|
||||
|
||||
| Parameter | Notes |
|
||||
|-----------|--------------------------------------------------------------------------|
|
||||
| --------- | ------------------------------------------------------------------------ |
|
||||
| postData | Must be a string with `application/x-www-form-urlencoded`. Eg: `a=b&c=d` |
|
||||
|
||||
## Environment variables
|
||||
|
||||
| Name | Default | Notes |
|
||||
|-----------------|------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| LOG_LEVEL | info | Verbosity of the logging. Use `LOG_LEVEL=debug` for more information. |
|
||||
| LOG_HTML | false | Only for debugging. If `true` all HTML that passes through the proxy will be logged to the console in `debug` level. |
|
||||
| CAPTCHA_SOLVER | none | Captcha solving method. It is used when a captcha is encountered. See the Captcha Solvers section. |
|
||||
| TZ | UTC | Timezone used in the logs and the web browser. Example: `TZ=Europe/London`. |
|
||||
| HEADLESS | true | Only for debugging. To run the web browser in headless mode or visible. |
|
||||
| BROWSER_TIMEOUT | 40000 | If you are experiencing errors/timeouts because your system is slow, you can try to increase this value. Remember to increase the `maxTimeout` parameter too. |
|
||||
| TEST_URL | https://www.google.com | FlareSolverr makes a request on start to make sure the web browser is working. You can change that URL if it is blocked in your country. |
|
||||
| PORT | 8191 | Listening port. You don't need to change this if you are running on Docker. |
|
||||
| HOST | 0.0.0.0 | Listening interface. You don't need to change this if you are running on Docker. |
|
||||
| Name | Default | Notes |
|
||||
| ------------------ | ---------------------- | ---------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| LOG_LEVEL | info | Verbosity of the logging. Use `LOG_LEVEL=debug` for more information. |
|
||||
| LOG_FILE | none | Path to capture log to file. Example: `/config/flaresolverr.log`. |
|
||||
| LOG_HTML | false | Only for debugging. If `true` all HTML that passes through the proxy will be logged to the console in `debug` level. |
|
||||
| PROXY_URL | none | URL for proxy. Will be overwritten by `request` or `sessions` proxy, if used. Example: `http://127.0.0.1:8080`. |
|
||||
| PROXY_USERNAME | none | Username for proxy. Will be overwritten by `request` or `sessions` proxy, if used. Example: `testuser`. |
|
||||
| PROXY_PASSWORD | none | Password for proxy. Will be overwritten by `request` or `sessions` proxy, if used. Example: `testpass`. |
|
||||
| CAPTCHA_SOLVER | none | Captcha solving method. It is used when a captcha is encountered. See the Captcha Solvers section. |
|
||||
| TZ | UTC | Timezone used in the logs and the web browser. Example: `TZ=Europe/London`. |
|
||||
| LANG | none | Language used in the web browser. Example: `LANG=en_GB`. |
|
||||
| HEADLESS | true | Only for debugging. To run the web browser in headless mode or visible. |
|
||||
| DISABLE_MEDIA | false | To disable loading images, CSS, and other media in the web browser to save network bandwidth. |
|
||||
| TEST_URL | https://www.google.com | FlareSolverr makes a request on start to make sure the web browser is working. You can change that URL if it is blocked in your country. |
|
||||
| PORT | 8191 | Listening port. You don't need to change this if you are running on Docker. |
|
||||
| HOST | 0.0.0.0 | Listening interface. You don't need to change this if you are running on Docker. |
|
||||
| PROMETHEUS_ENABLED | false | Enable Prometheus exporter. See the Prometheus section below. |
|
||||
| PROMETHEUS_PORT | 8192 | Listening port for Prometheus exporter. See the Prometheus section below. |
|
||||
|
||||
Environment variables are set differently depending on the operating system. Some examples:
|
||||
* Docker: Take a look at the Docker section in this document. Environment variables can be set in the `docker-compose.yml` file or in the Docker CLI command.
|
||||
* Linux: Run `export LOG_LEVEL=debug` and then start FlareSolverr in the same shell.
|
||||
* Windows: Open `cmd.exe`, run `set LOG_LEVEL=debug` and then start FlareSolverr in the same shell.
|
||||
|
||||
- Docker: Take a look at the Docker section in this document. Environment variables can be set in the `docker-compose.yml` file or in the Docker CLI command.
|
||||
- Linux: Run `export LOG_LEVEL=debug` and then run `flaresolverr` in the same shell.
|
||||
- Windows: Open `cmd.exe`, run `set LOG_LEVEL=debug` and then run `flaresolverr.exe` in the same shell.
|
||||
|
||||
## Prometheus exporter
|
||||
|
||||
The Prometheus exporter for FlareSolverr is disabled by default. It can be enabled with the environment variable `PROMETHEUS_ENABLED`. If you are using Docker make sure you expose the `PROMETHEUS_PORT`.
|
||||
|
||||
Example metrics:
|
||||
|
||||
```shell
|
||||
# HELP flaresolverr_request_total Total requests with result
|
||||
# TYPE flaresolverr_request_total counter
|
||||
flaresolverr_request_total{domain="nowsecure.nl",result="solved"} 1.0
|
||||
# HELP flaresolverr_request_created Total requests with result
|
||||
# TYPE flaresolverr_request_created gauge
|
||||
flaresolverr_request_created{domain="nowsecure.nl",result="solved"} 1.690141657157109e+09
|
||||
# HELP flaresolverr_request_duration Request duration in seconds
|
||||
# TYPE flaresolverr_request_duration histogram
|
||||
flaresolverr_request_duration_bucket{domain="nowsecure.nl",le="0.0"} 0.0
|
||||
flaresolverr_request_duration_bucket{domain="nowsecure.nl",le="10.0"} 1.0
|
||||
flaresolverr_request_duration_bucket{domain="nowsecure.nl",le="25.0"} 1.0
|
||||
flaresolverr_request_duration_bucket{domain="nowsecure.nl",le="50.0"} 1.0
|
||||
flaresolverr_request_duration_bucket{domain="nowsecure.nl",le="+Inf"} 1.0
|
||||
flaresolverr_request_duration_count{domain="nowsecure.nl"} 1.0
|
||||
flaresolverr_request_duration_sum{domain="nowsecure.nl"} 5.858
|
||||
# HELP flaresolverr_request_duration_created Request duration in seconds
|
||||
# TYPE flaresolverr_request_duration_created gauge
|
||||
flaresolverr_request_duration_created{domain="nowsecure.nl"} 1.6901416571570296e+09
|
||||
```
|
||||
|
||||
## Captcha Solvers
|
||||
|
||||
@@ -253,8 +330,9 @@ solve a captcha.
|
||||
If this is the case, FlareSolverr will return the error `Captcha detected but no automatic solver is configured.`
|
||||
|
||||
FlareSolverr can be customized to solve the CAPTCHA automatically by setting the environment variable `CAPTCHA_SOLVER`
|
||||
to the file name of one of the adapters inside the [/captcha](src/captcha) directory.
|
||||
to the file name of one of the adapters inside the `/captcha` directory.
|
||||
|
||||
## Related projects
|
||||
|
||||
* C# implementation => https://github.com/FlareSolverr/FlareSolverrSharp
|
||||
- C# implementation => https://github.com/FlareSolverr/FlareSolverrSharp
|
||||
|
||||
|
||||
@@ -7,9 +7,12 @@ services:
|
||||
container_name: flaresolverr
|
||||
environment:
|
||||
- LOG_LEVEL=${LOG_LEVEL:-info}
|
||||
- LOG_FILE=${LOG_FILE:-none}
|
||||
- LOG_HTML=${LOG_HTML:-false}
|
||||
- CAPTCHA_SOLVER=${CAPTCHA_SOLVER:-none}
|
||||
- TZ=Europe/London
|
||||
ports:
|
||||
- "${PORT:-8191}:8191"
|
||||
volumes:
|
||||
- /var/lib/flaresolver:/config
|
||||
restart: unless-stopped
|
||||
|
||||
19
flaresolverr.service
Normal file
19
flaresolverr.service
Normal file
@@ -0,0 +1,19 @@
|
||||
[Unit]
|
||||
Description=FlareSolverr
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
SyslogIdentifier=flaresolverr
|
||||
Restart=always
|
||||
RestartSec=5
|
||||
Type=simple
|
||||
User=flaresolverr
|
||||
Group=flaresolverr
|
||||
Environment="LOG_LEVEL=info"
|
||||
Environment="CAPTCHA_SOLVER=none"
|
||||
WorkingDirectory=/opt/flaresolverr
|
||||
ExecStart=/opt/flaresolverr/flaresolverr
|
||||
TimeoutStopSec=30
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "flaresolverr",
|
||||
"version": "3.2.0",
|
||||
"version": "3.4.6",
|
||||
"description": "Proxy server to bypass Cloudflare protection",
|
||||
"author": "Diego Heras (ngosang / ngosang@hotmail.es)",
|
||||
"license": "MIT"
|
||||
|
||||
@@ -1,12 +1,14 @@
|
||||
bottle==0.12.25
|
||||
waitress==2.1.2
|
||||
selenium==4.9.1
|
||||
bottle==0.13.4
|
||||
waitress==3.0.2
|
||||
selenium==4.38.0
|
||||
func-timeout==4.3.5
|
||||
# required by undetected_chromedriver
|
||||
requests==2.31.0
|
||||
certifi==2023.5.7
|
||||
websockets==11.0.3
|
||||
# only required for linux
|
||||
xvfbwrapper==0.2.9
|
||||
# only required for windows
|
||||
pefile==2023.2.7
|
||||
prometheus-client==0.23.1
|
||||
# Required by undetected_chromedriver
|
||||
requests==2.32.5
|
||||
certifi==2025.10.5
|
||||
websockets==15.0.1
|
||||
packaging==25.0
|
||||
# Only required for Linux and macOS
|
||||
xvfbwrapper==0.2.15; platform_system != "Windows"
|
||||
# Only required for Windows
|
||||
pefile==2024.8.26; platform_system == "Windows"
|
||||
|
||||
@@ -5,7 +5,7 @@ import logging
|
||||
def logger_plugin(callback):
|
||||
"""
|
||||
Bottle plugin to use logging module
|
||||
http://bottlepy.org/docs/dev/plugindev.html
|
||||
https://bottlepy.org/docs/dev/plugindev.html
|
||||
|
||||
Wrap a Bottle request so that a log line is emitted after it's handled.
|
||||
(This decorator can be extended to take the desired logger as a param.)
|
||||
|
||||
66
src/bottle_plugins/prometheus_plugin.py
Normal file
66
src/bottle_plugins/prometheus_plugin.py
Normal file
@@ -0,0 +1,66 @@
|
||||
import logging
|
||||
import os
|
||||
import urllib.parse
|
||||
|
||||
from bottle import request
|
||||
from dtos import V1RequestBase, V1ResponseBase
|
||||
from metrics import start_metrics_http_server, REQUEST_COUNTER, REQUEST_DURATION
|
||||
|
||||
PROMETHEUS_ENABLED = os.environ.get('PROMETHEUS_ENABLED', 'false').lower() == 'true'
|
||||
PROMETHEUS_PORT = int(os.environ.get('PROMETHEUS_PORT', 8192))
|
||||
|
||||
|
||||
def setup():
|
||||
if PROMETHEUS_ENABLED:
|
||||
start_metrics_http_server(PROMETHEUS_PORT)
|
||||
|
||||
|
||||
def prometheus_plugin(callback):
|
||||
"""
|
||||
Bottle plugin to expose Prometheus metrics
|
||||
https://bottlepy.org/docs/dev/plugindev.html
|
||||
"""
|
||||
def wrapper(*args, **kwargs):
|
||||
actual_response = callback(*args, **kwargs)
|
||||
|
||||
if PROMETHEUS_ENABLED:
|
||||
try:
|
||||
export_metrics(actual_response)
|
||||
except Exception as e:
|
||||
logging.warning("Error exporting metrics: " + str(e))
|
||||
|
||||
return actual_response
|
||||
|
||||
def export_metrics(actual_response):
|
||||
res = V1ResponseBase(actual_response)
|
||||
|
||||
if res.startTimestamp is None or res.endTimestamp is None:
|
||||
# skip management and healthcheck endpoints
|
||||
return
|
||||
|
||||
domain = "unknown"
|
||||
if res.solution and res.solution.url:
|
||||
domain = parse_domain_url(res.solution.url)
|
||||
else:
|
||||
# timeout error
|
||||
req = V1RequestBase(request.json)
|
||||
if req.url:
|
||||
domain = parse_domain_url(req.url)
|
||||
|
||||
run_time = (res.endTimestamp - res.startTimestamp) / 1000
|
||||
REQUEST_DURATION.labels(domain=domain).observe(run_time)
|
||||
|
||||
result = "unknown"
|
||||
if res.message == "Challenge solved!":
|
||||
result = "solved"
|
||||
elif res.message == "Challenge not detected!":
|
||||
result = "not_detected"
|
||||
elif res.message.startswith("Error"):
|
||||
result = "error"
|
||||
REQUEST_COUNTER.labels(domain=domain, result=result).inc()
|
||||
|
||||
def parse_domain_url(url):
|
||||
parsed_url = urllib.parse.urlparse(url)
|
||||
return parsed_url.hostname
|
||||
|
||||
return wrapper
|
||||
@@ -25,8 +25,8 @@ def clean_files():
|
||||
|
||||
def download_chromium():
|
||||
# https://commondatastorage.googleapis.com/chromium-browser-snapshots/index.html?prefix=Linux_x64/
|
||||
revision = "1090006" if os.name == 'nt' else '1090007'
|
||||
arch = 'Win' if os.name == 'nt' else 'Linux_x64'
|
||||
revision = "1522586" if os.name == 'nt' else '1522586'
|
||||
arch = 'Win_x64' if os.name == 'nt' else 'Linux_x64'
|
||||
dl_file = 'chrome-win' if os.name == 'nt' else 'chrome-linux'
|
||||
dl_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, 'dist_chrome')
|
||||
dl_path_folder = os.path.join(dl_path, dl_file)
|
||||
@@ -59,8 +59,7 @@ def download_chromium():
|
||||
# Give executable permissions for *nix
|
||||
# file * | grep executable | cut -d: -f1
|
||||
print("Giving executable permissions...")
|
||||
execs = ['chrome', 'chrome_crashpad_handler', 'chrome_sandbox', 'chrome-wrapper', 'nacl_helper',
|
||||
'nacl_helper_bootstrap', 'nacl_irt_x86_64.nexe', 'xdg-mime', 'xdg-settings']
|
||||
execs = ['chrome', 'chrome_crashpad_handler', 'chrome_sandbox', 'chrome-wrapper', 'xdg-mime', 'xdg-settings']
|
||||
for exec_file in execs:
|
||||
exec_path = os.path.join(chrome_path, exec_file)
|
||||
os.chmod(exec_path, 0o755)
|
||||
@@ -68,12 +67,15 @@ def download_chromium():
|
||||
|
||||
def run_pyinstaller():
|
||||
sep = ';' if os.name == 'nt' else ':'
|
||||
subprocess.check_call([sys.executable, "-m", "PyInstaller",
|
||||
"--icon", "resources/flaresolverr_logo.ico",
|
||||
"--add-data", f"package.json{sep}.",
|
||||
"--add-data", f"{os.path.join('dist_chrome', 'chrome')}{sep}chrome",
|
||||
os.path.join("src", "flaresolverr.py")],
|
||||
cwd=os.pardir)
|
||||
result = subprocess.run([sys.executable, "-m", "PyInstaller",
|
||||
"--icon", "resources/flaresolverr_logo.ico",
|
||||
"--add-data", f"package.json{sep}.",
|
||||
"--add-data", f"{os.path.join('dist_chrome', 'chrome')}{sep}chrome",
|
||||
os.path.join("src", "flaresolverr.py")],
|
||||
cwd=os.pardir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
if result.returncode != 0:
|
||||
print(result.stderr.decode('utf-8'))
|
||||
raise Exception("Error running pyInstaller")
|
||||
|
||||
|
||||
def compress_package():
|
||||
|
||||
@@ -10,6 +10,8 @@ class ChallengeResolutionResultT:
|
||||
response: str = None
|
||||
cookies: list = None
|
||||
userAgent: str = None
|
||||
screenshot: str | None = None
|
||||
turnstile_token: str = None
|
||||
|
||||
def __init__(self, _dict):
|
||||
self.__dict__.update(_dict)
|
||||
@@ -41,8 +43,14 @@ class V1RequestBase(object):
|
||||
url: str = None
|
||||
postData: str = None
|
||||
returnOnlyCookies: bool = None
|
||||
returnScreenshot: bool = None
|
||||
download: bool = None # deprecated v2.0.0, not used
|
||||
returnRawHtml: bool = None # deprecated v2.0.0, not used
|
||||
waitInSeconds: int = None
|
||||
# Optional resource blocking flag (blocks images, CSS, and fonts)
|
||||
disableMedia: bool = None
|
||||
# Optional when you've got a turnstile captcha that needs to be clicked after X number of Tab presses
|
||||
tabs_till_verify : int = None
|
||||
|
||||
def __init__(self, _dict):
|
||||
self.__dict__.update(_dict)
|
||||
|
||||
@@ -8,10 +8,15 @@ from bottle import run, response, Bottle, request, ServerAdapter
|
||||
|
||||
from bottle_plugins.error_plugin import error_plugin
|
||||
from bottle_plugins.logger_plugin import logger_plugin
|
||||
from bottle_plugins import prometheus_plugin
|
||||
from dtos import V1RequestBase
|
||||
import flaresolverr_service
|
||||
import utils
|
||||
|
||||
env_proxy_url = os.environ.get('PROXY_URL', None)
|
||||
env_proxy_username = os.environ.get('PROXY_USERNAME', None)
|
||||
env_proxy_password = os.environ.get('PROXY_PASSWORD', None)
|
||||
|
||||
|
||||
class JSONErrorBottle(Bottle):
|
||||
"""
|
||||
@@ -24,10 +29,6 @@ class JSONErrorBottle(Bottle):
|
||||
|
||||
app = JSONErrorBottle()
|
||||
|
||||
# plugin order is important
|
||||
app.install(logger_plugin)
|
||||
app.install(error_plugin)
|
||||
|
||||
|
||||
@app.route('/')
|
||||
def index():
|
||||
@@ -53,7 +54,14 @@ def controller_v1():
|
||||
"""
|
||||
Controller v1
|
||||
"""
|
||||
req = V1RequestBase(request.json)
|
||||
data = request.json or {}
|
||||
if (('proxy' not in data or not data.get('proxy')) and env_proxy_url is not None and (env_proxy_username is None and env_proxy_password is None)):
|
||||
logging.info('Using proxy URL ENV')
|
||||
data['proxy'] = {"url": env_proxy_url}
|
||||
if (('proxy' not in data or not data.get('proxy')) and env_proxy_url is not None and (env_proxy_username is not None or env_proxy_password is not None)):
|
||||
logging.info('Using proxy URL, username & password ENVs')
|
||||
data['proxy'] = {"url": env_proxy_url, "username": env_proxy_username, "password": env_proxy_password}
|
||||
req = V1RequestBase(data)
|
||||
res = flaresolverr_service.controller_v1_endpoint(req)
|
||||
if res.__error_500__:
|
||||
response.status = 500
|
||||
@@ -65,14 +73,21 @@ if __name__ == "__main__":
|
||||
if sys.version_info < (3, 9):
|
||||
raise Exception("The Python version is less than 3.9, a version equal to or higher is required.")
|
||||
|
||||
# fix for HEADLESS=false in Windows binary
|
||||
# https://stackoverflow.com/a/27694505
|
||||
if os.name == 'nt':
|
||||
import multiprocessing
|
||||
multiprocessing.freeze_support()
|
||||
|
||||
# fix ssl certificates for compiled binaries
|
||||
# https://github.com/pyinstaller/pyinstaller/issues/7229
|
||||
# https://stackoverflow.com/questions/55736855/how-to-change-the-cafile-argument-in-the-ssl-module-in-python3
|
||||
# https://stackoverflow.com/q/55736855
|
||||
os.environ["REQUESTS_CA_BUNDLE"] = certifi.where()
|
||||
os.environ["SSL_CERT_FILE"] = certifi.where()
|
||||
|
||||
# validate configuration
|
||||
log_level = os.environ.get('LOG_LEVEL', 'info').upper()
|
||||
log_file = os.environ.get('LOG_FILE', None)
|
||||
log_html = utils.get_config_log_html()
|
||||
headless = utils.get_config_headless()
|
||||
server_host = os.environ.get('HOST', '0.0.0.0')
|
||||
@@ -82,14 +97,29 @@ if __name__ == "__main__":
|
||||
logger_format = '%(asctime)s %(levelname)-8s %(message)s'
|
||||
if log_level == 'DEBUG':
|
||||
logger_format = '%(asctime)s %(levelname)-8s ReqId %(thread)s %(message)s'
|
||||
logging.basicConfig(
|
||||
format=logger_format,
|
||||
level=log_level,
|
||||
datefmt='%Y-%m-%d %H:%M:%S',
|
||||
handlers=[
|
||||
logging.StreamHandler(sys.stdout)
|
||||
]
|
||||
)
|
||||
if log_file:
|
||||
log_file = os.path.realpath(log_file)
|
||||
log_path = os.path.dirname(log_file)
|
||||
os.makedirs(log_path, exist_ok=True)
|
||||
logging.basicConfig(
|
||||
format=logger_format,
|
||||
level=log_level,
|
||||
datefmt='%Y-%m-%d %H:%M:%S',
|
||||
handlers=[
|
||||
logging.StreamHandler(sys.stdout),
|
||||
logging.FileHandler(log_file)
|
||||
]
|
||||
)
|
||||
else:
|
||||
logging.basicConfig(
|
||||
format=logger_format,
|
||||
level=log_level,
|
||||
datefmt='%Y-%m-%d %H:%M:%S',
|
||||
handlers=[
|
||||
logging.StreamHandler(sys.stdout)
|
||||
]
|
||||
)
|
||||
|
||||
# disable warning traces from urllib3
|
||||
logging.getLogger('urllib3').setLevel(logging.ERROR)
|
||||
logging.getLogger('selenium.webdriver.remote.remote_connection').setLevel(logging.WARNING)
|
||||
@@ -98,9 +128,19 @@ if __name__ == "__main__":
|
||||
logging.info(f'FlareSolverr {utils.get_flaresolverr_version()}')
|
||||
logging.debug('Debug log enabled')
|
||||
|
||||
# Get current OS for global variable
|
||||
utils.get_current_platform()
|
||||
|
||||
# test browser installation
|
||||
flaresolverr_service.test_browser_installation()
|
||||
|
||||
# start bootle plugins
|
||||
# plugin order is important
|
||||
app.install(logger_plugin)
|
||||
app.install(error_plugin)
|
||||
prometheus_plugin.setup()
|
||||
app.install(prometheus_plugin.prometheus_plugin)
|
||||
|
||||
# start webserver
|
||||
# default server 'wsgiref' does not support concurrent requests
|
||||
# https://github.com/FlareSolverr/FlareSolverr/issues/680
|
||||
|
||||
@@ -3,12 +3,14 @@ import platform
|
||||
import sys
|
||||
import time
|
||||
from datetime import timedelta
|
||||
from urllib.parse import unquote
|
||||
from html import escape
|
||||
from urllib.parse import unquote, quote
|
||||
|
||||
from func_timeout import FunctionTimedOut, func_timeout
|
||||
from selenium.common import TimeoutException
|
||||
from selenium.webdriver.chrome.webdriver import WebDriver
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.common.keys import Keys
|
||||
from selenium.webdriver.support.expected_conditions import (
|
||||
presence_of_element_located, staleness_of, title_is)
|
||||
from selenium.webdriver.common.action_chains import ActionChains
|
||||
@@ -40,13 +42,18 @@ CHALLENGE_TITLES = [
|
||||
]
|
||||
CHALLENGE_SELECTORS = [
|
||||
# Cloudflare
|
||||
'#cf-challenge-running', '.ray_id', '.attack-box', '#cf-please-wait', '#challenge-spinner', '#trk_jschal_js',
|
||||
'#cf-challenge-running', '.ray_id', '.attack-box', '#cf-please-wait', '#challenge-spinner', '#trk_jschal_js', '#turnstile-wrapper', '.lds-ring',
|
||||
# Custom CloudFlare for EbookParadijs, Film-Paleis, MuziekFabriek and Puur-Hollands
|
||||
'td.info #js_info',
|
||||
# Fairlane / pararius.com
|
||||
'div.vc div.text-box h2'
|
||||
]
|
||||
SHORT_TIMEOUT = 10
|
||||
|
||||
TURNSTILE_SELECTORS = [
|
||||
"input[name='cf-turnstile-response']"
|
||||
]
|
||||
|
||||
SHORT_TIMEOUT = 1
|
||||
SESSIONS_STORAGE = SessionsStorage()
|
||||
|
||||
|
||||
@@ -119,7 +126,7 @@ def _controller_v1_handler(req: V1RequestBase) -> V1ResponseBase:
|
||||
logging.warning("Request parameter 'userAgent' was removed in FlareSolverr v2.")
|
||||
|
||||
# set default values
|
||||
if req.maxTimeout is None or req.maxTimeout < 1:
|
||||
if req.maxTimeout is None or int(req.maxTimeout) < 1:
|
||||
req.maxTimeout = 60000
|
||||
|
||||
# execute the command
|
||||
@@ -220,7 +227,7 @@ def _cmd_sessions_destroy(req: V1RequestBase) -> V1ResponseBase:
|
||||
|
||||
|
||||
def _resolve_challenge(req: V1RequestBase, method: str) -> ChallengeResolutionT:
|
||||
timeout = req.maxTimeout / 1000
|
||||
timeout = int(req.maxTimeout) / 1000
|
||||
driver = None
|
||||
try:
|
||||
if req.session:
|
||||
@@ -245,32 +252,30 @@ def _resolve_challenge(req: V1RequestBase, method: str) -> ChallengeResolutionT:
|
||||
raise Exception('Error solving the challenge. ' + str(e).replace('\n', '\\n'))
|
||||
finally:
|
||||
if not req.session and driver is not None:
|
||||
if utils.PLATFORM_VERSION == "nt":
|
||||
driver.close()
|
||||
driver.quit()
|
||||
logging.debug('A used instance of webdriver has been destroyed')
|
||||
|
||||
|
||||
def click_verify(driver: WebDriver):
|
||||
def click_verify(driver: WebDriver, num_tabs: int = 1):
|
||||
try:
|
||||
logging.debug("Try to find the Cloudflare verify checkbox")
|
||||
iframe = driver.find_element(By.XPATH, "//iframe[@title='Widget containing a Cloudflare security challenge']")
|
||||
driver.switch_to.frame(iframe)
|
||||
checkbox = driver.find_element(
|
||||
by=By.XPATH,
|
||||
value='//*[@id="cf-stage"]//label[@class="ctp-checkbox-label"]/input',
|
||||
)
|
||||
if checkbox:
|
||||
actions = ActionChains(driver)
|
||||
actions.move_to_element_with_offset(checkbox, 5, 7)
|
||||
actions.click(checkbox)
|
||||
actions.perform()
|
||||
logging.debug("Cloudflare verify checkbox found and clicked")
|
||||
logging.debug("Try to find the Cloudflare verify checkbox...")
|
||||
actions = ActionChains(driver)
|
||||
actions.pause(5)
|
||||
for _ in range(num_tabs):
|
||||
actions.send_keys(Keys.TAB).pause(0.1)
|
||||
actions.pause(1)
|
||||
actions.send_keys(Keys.SPACE).perform()
|
||||
|
||||
logging.debug(f"Cloudflare verify checkbox clicked after {num_tabs} tabs!")
|
||||
except Exception:
|
||||
logging.debug("Cloudflare verify checkbox not found on the page")
|
||||
logging.debug("Cloudflare verify checkbox not found on the page.")
|
||||
finally:
|
||||
driver.switch_to.default_content()
|
||||
|
||||
try:
|
||||
logging.debug("Try to find the Cloudflare 'Verify you are human' button")
|
||||
logging.debug("Try to find the Cloudflare 'Verify you are human' button...")
|
||||
button = driver.find_element(
|
||||
by=By.XPATH,
|
||||
value="//input[@type='button' and @value='Verify you are human']",
|
||||
@@ -280,25 +285,96 @@ def click_verify(driver: WebDriver):
|
||||
actions.move_to_element_with_offset(button, 5, 7)
|
||||
actions.click(button)
|
||||
actions.perform()
|
||||
logging.debug("The Cloudflare 'Verify you are human' button found and clicked")
|
||||
except Exception as e:
|
||||
logging.debug("The Cloudflare 'Verify you are human' button not found on the page")
|
||||
# print(e)
|
||||
logging.debug("The Cloudflare 'Verify you are human' button found and clicked!")
|
||||
except Exception:
|
||||
logging.debug("The Cloudflare 'Verify you are human' button not found on the page.")
|
||||
|
||||
time.sleep(2)
|
||||
|
||||
def _get_turnstile_token(driver: WebDriver, tabs: int):
|
||||
token_input = driver.find_element(By.CSS_SELECTOR, "input[name='cf-turnstile-response']")
|
||||
current_value = token_input.get_attribute("value")
|
||||
while True:
|
||||
click_verify(driver, num_tabs=tabs)
|
||||
turnstile_token = token_input.get_attribute("value")
|
||||
if turnstile_token:
|
||||
if turnstile_token != current_value:
|
||||
logging.info(f"Turnstile token: {turnstile_token}")
|
||||
return turnstile_token
|
||||
logging.debug(f"Failed to extract token possibly click failed")
|
||||
|
||||
# reset focus
|
||||
driver.execute_script("""
|
||||
let el = document.createElement('button');
|
||||
el.style.position='fixed';
|
||||
el.style.top='0';
|
||||
el.style.left='0';
|
||||
document.body.prepend(el);
|
||||
el.focus();
|
||||
""")
|
||||
time.sleep(1)
|
||||
|
||||
def _resolve_turnstile_captcha(req: V1RequestBase, driver: WebDriver):
|
||||
turnstile_token = None
|
||||
if req.tabs_till_verify is not None:
|
||||
logging.debug(f'Navigating to... {req.url} in order to pass the turnstile challenge')
|
||||
driver.get(req.url)
|
||||
|
||||
turnstile_challenge_found = False
|
||||
for selector in TURNSTILE_SELECTORS:
|
||||
found_elements = driver.find_elements(By.CSS_SELECTOR, selector)
|
||||
if len(found_elements) > 0:
|
||||
turnstile_challenge_found = True
|
||||
logging.info("Turnstile challenge detected. Selector found: " + selector)
|
||||
break
|
||||
if turnstile_challenge_found:
|
||||
turnstile_token = _get_turnstile_token(driver=driver, tabs=req.tabs_till_verify)
|
||||
else:
|
||||
logging.debug(f'Turnstile challenge not found')
|
||||
return turnstile_token
|
||||
|
||||
def _evil_logic(req: V1RequestBase, driver: WebDriver, method: str) -> ChallengeResolutionT:
|
||||
res = ChallengeResolutionT({})
|
||||
res.status = STATUS_OK
|
||||
res.message = ""
|
||||
|
||||
# optionally block resources like images/css/fonts using CDP
|
||||
disable_media = utils.get_config_disable_media()
|
||||
if req.disableMedia is not None:
|
||||
disable_media = req.disableMedia
|
||||
if disable_media:
|
||||
block_urls = [
|
||||
# Images
|
||||
"*.png", "*.jpg", "*.jpeg", "*.gif", "*.webp", "*.bmp", "*.svg", "*.ico",
|
||||
"*.PNG", "*.JPG", "*.JPEG", "*.GIF", "*.WEBP", "*.BMP", "*.SVG", "*.ICO",
|
||||
"*.tiff", "*.tif", "*.jpe", "*.apng", "*.avif", "*.heic", "*.heif",
|
||||
"*.TIFF", "*.TIF", "*.JPE", "*.APNG", "*.AVIF", "*.HEIC", "*.HEIF",
|
||||
# Stylesheets
|
||||
"*.css",
|
||||
"*.CSS",
|
||||
# Fonts
|
||||
"*.woff", "*.woff2", "*.ttf", "*.otf", "*.eot",
|
||||
"*.WOFF", "*.WOFF2", "*.TTF", "*.OTF", "*.EOT"
|
||||
]
|
||||
try:
|
||||
logging.debug("Network.setBlockedURLs: %s", block_urls)
|
||||
driver.execute_cdp_cmd("Network.enable", {})
|
||||
driver.execute_cdp_cmd("Network.setBlockedURLs", {"urls": block_urls})
|
||||
except Exception:
|
||||
# if CDP commands are not available or fail, ignore and continue
|
||||
logging.debug("Network.setBlockedURLs failed or unsupported on this webdriver")
|
||||
|
||||
# navigate to the page
|
||||
logging.debug(f'Navigating to... {req.url}')
|
||||
if method == 'POST':
|
||||
logging.debug(f"Navigating to... {req.url}")
|
||||
turnstile_token = None
|
||||
|
||||
if method == "POST":
|
||||
_post_request(req, driver)
|
||||
else:
|
||||
driver.get(req.url)
|
||||
if req.tabs_till_verify is None:
|
||||
driver.get(req.url)
|
||||
else:
|
||||
turnstile_token = _resolve_turnstile_captcha(req, driver)
|
||||
|
||||
# set cookies if required
|
||||
if req.cookies is not None and len(req.cookies) > 0:
|
||||
@@ -320,7 +396,7 @@ def _evil_logic(req: V1RequestBase, driver: WebDriver, method: str) -> Challenge
|
||||
|
||||
# find access denied titles
|
||||
for title in ACCESS_DENIED_TITLES:
|
||||
if title == page_title:
|
||||
if page_title.startswith(title):
|
||||
raise Exception('Cloudflare has blocked this request. '
|
||||
'Probably your IP is banned for this site, check in your web browser.')
|
||||
# find access denied selectors
|
||||
@@ -392,21 +468,30 @@ def _evil_logic(req: V1RequestBase, driver: WebDriver, method: str) -> Challenge
|
||||
challenge_res.status = 200 # todo: fix, selenium not provides this info
|
||||
challenge_res.cookies = driver.get_cookies()
|
||||
challenge_res.userAgent = utils.get_user_agent(driver)
|
||||
challenge_res.turnstile_token = turnstile_token
|
||||
|
||||
if not req.returnOnlyCookies:
|
||||
challenge_res.headers = {} # todo: fix, selenium not provides this info
|
||||
|
||||
if req.waitInSeconds and req.waitInSeconds > 0:
|
||||
logging.info("Waiting " + str(req.waitInSeconds) + " seconds before returning the response...")
|
||||
time.sleep(req.waitInSeconds)
|
||||
|
||||
challenge_res.response = driver.page_source
|
||||
|
||||
if req.returnScreenshot:
|
||||
challenge_res.screenshot = driver.get_screenshot_as_base64()
|
||||
|
||||
res.result = challenge_res
|
||||
return res
|
||||
|
||||
|
||||
def _post_request(req: V1RequestBase, driver: WebDriver):
|
||||
post_form = f'<form id="hackForm" action="{req.url}" method="POST">'
|
||||
query_string = req.postData if req.postData[0] != '?' else req.postData[1:]
|
||||
query_string = req.postData if req.postData and req.postData[0] != '?' else req.postData[1:] if req.postData else ''
|
||||
pairs = query_string.split('&')
|
||||
for pair in pairs:
|
||||
parts = pair.split('=')
|
||||
parts = pair.split('=', 1)
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
name = unquote(parts[0])
|
||||
@@ -416,10 +501,12 @@ def _post_request(req: V1RequestBase, driver: WebDriver):
|
||||
continue
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
value = unquote(parts[1])
|
||||
value = unquote(parts[1]) if len(parts) > 1 else ''
|
||||
except Exception:
|
||||
value = parts[1]
|
||||
post_form += f'<input type="text" name="{name}" value="{value}"><br>'
|
||||
value = parts[1] if len(parts) > 1 else ''
|
||||
# Protection of " character, for syntax
|
||||
value=value.replace('"','"')
|
||||
post_form += f'<input type="text" name="{escape(quote(name))}" value="{escape(quote(value))}"><br>'
|
||||
post_form += '</form>'
|
||||
html_content = f"""
|
||||
<!DOCTYPE html>
|
||||
@@ -429,4 +516,4 @@ def _post_request(req: V1RequestBase, driver: WebDriver):
|
||||
<script>document.getElementById('hackForm').submit();</script>
|
||||
</body>
|
||||
</html>"""
|
||||
driver.get("data:text/html;charset=utf-8," + html_content)
|
||||
driver.get("data:text/html;charset=utf-8,{html_content}".format(html_content=html_content))
|
||||
|
||||
32
src/metrics.py
Normal file
32
src/metrics.py
Normal file
@@ -0,0 +1,32 @@
|
||||
import logging
|
||||
|
||||
from prometheus_client import Counter, Histogram, start_http_server
|
||||
import time
|
||||
|
||||
REQUEST_COUNTER = Counter(
|
||||
name='flaresolverr_request',
|
||||
documentation='Total requests with result',
|
||||
labelnames=['domain', 'result']
|
||||
)
|
||||
REQUEST_DURATION = Histogram(
|
||||
name='flaresolverr_request_duration',
|
||||
documentation='Request duration in seconds',
|
||||
labelnames=['domain'],
|
||||
buckets=[0, 10, 25, 50]
|
||||
)
|
||||
|
||||
|
||||
def serve(port):
|
||||
start_http_server(port=port)
|
||||
while True:
|
||||
time.sleep(600)
|
||||
|
||||
|
||||
def start_metrics_http_server(prometheus_port: int):
|
||||
logging.info(f"Serving Prometheus exporter on http://0.0.0.0:{prometheus_port}/metrics")
|
||||
from threading import Thread
|
||||
Thread(
|
||||
target=serve,
|
||||
kwargs=dict(port=prometheus_port),
|
||||
daemon=True,
|
||||
).start()
|
||||
@@ -66,6 +66,8 @@ class SessionsStorage:
|
||||
return False
|
||||
|
||||
session = self.sessions.pop(session_id)
|
||||
if utils.PLATFORM_VERSION == "nt":
|
||||
session.driver.close()
|
||||
session.driver.quit()
|
||||
return True
|
||||
|
||||
|
||||
67
src/tests.py
67
src/tests.py
@@ -21,11 +21,11 @@ class TestFlareSolverr(unittest.TestCase):
|
||||
proxy_socks_url = "socks5://127.0.0.1:1080"
|
||||
google_url = "https://www.google.com"
|
||||
post_url = "https://httpbin.org/post"
|
||||
cloudflare_url = "https://nowsecure.nl"
|
||||
cloudflare_url = "https://nowsecure.nl/"
|
||||
cloudflare_url_2 = "https://idope.se/torrent-list/harry/"
|
||||
ddos_guard_url = "https://anidex.info/"
|
||||
ddos_guard_url = "https://www.litres.ru/"
|
||||
fairlane_url = "https://www.pararius.com/apartments/amsterdam"
|
||||
custom_cloudflare_url = "https://www.muziekfabriek.org"
|
||||
custom_cloudflare_url = "https://www.muziekfabriek.org/"
|
||||
cloudflare_blocked_url = "https://cpasbiens3.fr/index.php?do=search&subaction=search"
|
||||
|
||||
app = TestApp(flaresolverr.app)
|
||||
@@ -92,6 +92,29 @@ class TestFlareSolverr(unittest.TestCase):
|
||||
self.assertGreater(len(solution.cookies), 0)
|
||||
self.assertIn("Chrome/", solution.userAgent)
|
||||
|
||||
def test_v1_endpoint_request_get_disable_resources(self):
|
||||
res = self.app.post_json("/v1", {
|
||||
"cmd": "request.get",
|
||||
"url": self.google_url,
|
||||
"disableMedia": True
|
||||
})
|
||||
self.assertEqual(res.status_code, 200)
|
||||
|
||||
body = V1ResponseBase(res.json)
|
||||
self.assertEqual(STATUS_OK, body.status)
|
||||
self.assertEqual("Challenge not detected!", body.message)
|
||||
self.assertGreater(body.startTimestamp, 10000)
|
||||
self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
|
||||
self.assertEqual(utils.get_flaresolverr_version(), body.version)
|
||||
|
||||
solution = body.solution
|
||||
self.assertIn(self.google_url, solution.url)
|
||||
self.assertEqual(solution.status, 200)
|
||||
self.assertIs(len(solution.headers), 0)
|
||||
self.assertIn("<title>Google</title>", solution.response)
|
||||
self.assertGreater(len(solution.cookies), 0)
|
||||
self.assertIn("Chrome/", solution.userAgent)
|
||||
|
||||
def test_v1_endpoint_request_get_cloudflare_js_1(self):
|
||||
res = self.app.post_json('/v1', {
|
||||
"cmd": "request.get",
|
||||
@@ -162,7 +185,7 @@ class TestFlareSolverr(unittest.TestCase):
|
||||
self.assertIn(self.ddos_guard_url, solution.url)
|
||||
self.assertEqual(solution.status, 200)
|
||||
self.assertIs(len(solution.headers), 0)
|
||||
self.assertIn("<title>AniDex</title>", solution.response)
|
||||
self.assertIn("<title>Литрес", solution.response)
|
||||
self.assertGreater(len(solution.cookies), 0)
|
||||
self.assertIn("Chrome/", solution.userAgent)
|
||||
|
||||
@@ -335,6 +358,42 @@ class TestFlareSolverr(unittest.TestCase):
|
||||
self.assertGreater(len(solution.cookies), 0)
|
||||
self.assertIn("Chrome/", solution.userAgent)
|
||||
|
||||
def test_v1_endpoint_request_get_proxy_http_param_with_credentials(self):
|
||||
"""
|
||||
To configure TinyProxy in local:
|
||||
* sudo vim /etc/tinyproxy/tinyproxy.conf
|
||||
* edit => LogFile "/tmp/tinyproxy.log"
|
||||
* edit => Syslog Off
|
||||
* add => BasicAuth testuser testpass
|
||||
* sudo tinyproxy -d
|
||||
* sudo tail -f /tmp/tinyproxy.log
|
||||
"""
|
||||
res = self.app.post_json('/v1', {
|
||||
"cmd": "request.get",
|
||||
"url": self.google_url,
|
||||
"proxy": {
|
||||
"url": self.proxy_url,
|
||||
"username": "testuser",
|
||||
"password": "testpass"
|
||||
}
|
||||
})
|
||||
self.assertEqual(res.status_code, 200)
|
||||
|
||||
body = V1ResponseBase(res.json)
|
||||
self.assertEqual(STATUS_OK, body.status)
|
||||
self.assertEqual("Challenge not detected!", body.message)
|
||||
self.assertGreater(body.startTimestamp, 10000)
|
||||
self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
|
||||
self.assertEqual(utils.get_flaresolverr_version(), body.version)
|
||||
|
||||
solution = body.solution
|
||||
self.assertIn(self.google_url, solution.url)
|
||||
self.assertEqual(solution.status, 200)
|
||||
self.assertIs(len(solution.headers), 0)
|
||||
self.assertIn("<title>Google</title>", solution.response)
|
||||
self.assertGreater(len(solution.cookies), 0)
|
||||
self.assertIn("Chrome/", solution.userAgent)
|
||||
|
||||
def test_v1_endpoint_request_get_proxy_socks_param(self):
|
||||
"""
|
||||
To configure Dante in local:
|
||||
|
||||
@@ -17,11 +17,12 @@ by UltrafunkAmsterdam (https://github.com/ultrafunkamsterdam)
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
__version__ = "3.4.6"
|
||||
__version__ = "3.5.5"
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import pathlib
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
@@ -33,7 +34,7 @@ from weakref import finalize
|
||||
import selenium.webdriver.chrome.service
|
||||
import selenium.webdriver.chrome.webdriver
|
||||
from selenium.webdriver.common.by import By
|
||||
import selenium.webdriver.common.service
|
||||
import selenium.webdriver.chromium.service
|
||||
import selenium.webdriver.remote.command
|
||||
import selenium.webdriver.remote.webdriver
|
||||
|
||||
@@ -109,11 +110,11 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
||||
browser_executable_path=None,
|
||||
port=0,
|
||||
enable_cdp_events=False,
|
||||
service_args=None,
|
||||
service_creationflags=None,
|
||||
# service_args=None,
|
||||
# service_creationflags=None,
|
||||
desired_capabilities=None,
|
||||
advanced_elements=False,
|
||||
service_log_path=None,
|
||||
# service_log_path=None,
|
||||
keep_alive=True,
|
||||
log_level=0,
|
||||
headless=False,
|
||||
@@ -122,8 +123,9 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
||||
suppress_welcome=True,
|
||||
use_subprocess=False,
|
||||
debug=False,
|
||||
no_sandbox=True,
|
||||
no_sandbox=True,
|
||||
windows_headless=False,
|
||||
user_multi_procs: bool = False,
|
||||
**kw,
|
||||
):
|
||||
"""
|
||||
@@ -235,6 +237,14 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
||||
uses the --no-sandbox option, and additionally does suppress the "unsecure option" status bar
|
||||
this option has a default of True since many people seem to run this as root (....) , and chrome does not start
|
||||
when running as root without using --no-sandbox flag.
|
||||
|
||||
user_multi_procs:
|
||||
set to true when you are using multithreads/multiprocessing
|
||||
ensures not all processes are trying to modify a binary which is in use by another.
|
||||
for this to work. YOU MUST HAVE AT LEAST 1 UNDETECTED_CHROMEDRIVER BINARY IN YOUR ROAMING DATA FOLDER.
|
||||
this requirement can be easily satisfied, by just running this program "normal" and close/kill it.
|
||||
|
||||
|
||||
"""
|
||||
|
||||
finalize(self, self._ensure_close, self)
|
||||
@@ -243,8 +253,11 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
||||
executable_path=driver_executable_path,
|
||||
force=patcher_force_close,
|
||||
version_main=version_main,
|
||||
user_multi_procs=user_multi_procs,
|
||||
)
|
||||
# self.patcher.auto(user_multiprocess = user_multi_num_procs)
|
||||
self.patcher.auto()
|
||||
|
||||
# self.patcher = patcher
|
||||
if not options:
|
||||
options = ChromeOptions()
|
||||
@@ -361,6 +374,18 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
||||
browser_executable_path or find_chrome_executable()
|
||||
)
|
||||
|
||||
if not options.binary_location or not \
|
||||
pathlib.Path(options.binary_location).exists():
|
||||
raise FileNotFoundError(
|
||||
"\n---------------------\n"
|
||||
"Could not determine browser executable."
|
||||
"\n---------------------\n"
|
||||
"Make sure your browser is installed in the default location (path).\n"
|
||||
"If you are sure about the browser executable, you can specify it using\n"
|
||||
"the `browser_executable_path='{}` parameter.\n\n"
|
||||
.format("/path/to/browser/executable" if IS_POSIX else "c:/path/to/your/browser.exe")
|
||||
)
|
||||
|
||||
self._delay = 3
|
||||
|
||||
self.user_data_dir = user_data_dir
|
||||
@@ -371,11 +396,17 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
||||
if no_sandbox:
|
||||
options.arguments.extend(["--no-sandbox", "--test-type"])
|
||||
|
||||
if headless or options.headless:
|
||||
v_main = int(self.patcher.version_main) if self.patcher.version_main else 108
|
||||
if v_main < 108:
|
||||
options.add_argument("--headless=chrome")
|
||||
elif v_main >= 108:
|
||||
if headless or getattr(options, 'headless', None):
|
||||
#workaround until a better checking is found
|
||||
try:
|
||||
v_main = int(self.patcher.version_main) if self.patcher.version_main else 108
|
||||
if v_main < 108:
|
||||
options.add_argument("--headless=chrome")
|
||||
elif v_main >= 108:
|
||||
options.add_argument("--headless=new")
|
||||
except:
|
||||
logger.warning("could not detect version_main."
|
||||
"therefore, we are assuming it is chrome 108 or higher")
|
||||
options.add_argument("--headless=new")
|
||||
|
||||
options.add_argument("--window-size=1920,1080")
|
||||
@@ -419,40 +450,31 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
||||
self.browser_pid = start_detached(
|
||||
options.binary_location, *options.arguments
|
||||
)
|
||||
else:
|
||||
startupinfo = subprocess.STARTUPINFO()
|
||||
if os.name == 'nt' and windows_headless:
|
||||
else:
|
||||
startupinfo = None
|
||||
if os.name == 'nt' and windows_headless:
|
||||
# STARTUPINFO() is Windows only
|
||||
startupinfo = subprocess.STARTUPINFO()
|
||||
startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
|
||||
browser = subprocess.Popen(
|
||||
[options.binary_location, *options.arguments],
|
||||
stdin=subprocess.PIPE,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
close_fds=IS_POSIX,
|
||||
close_fds=IS_POSIX,
|
||||
startupinfo=startupinfo
|
||||
)
|
||||
self.browser_pid = browser.pid
|
||||
|
||||
if service_creationflags:
|
||||
service = selenium.webdriver.common.service.Service(
|
||||
self.patcher.executable_path, port, service_args, service_log_path
|
||||
)
|
||||
for attr_name in ("creationflags", "creation_flags"):
|
||||
if hasattr(service, attr_name):
|
||||
setattr(service, attr_name, service_creationflags)
|
||||
break
|
||||
else:
|
||||
service = None
|
||||
|
||||
super(Chrome, self).__init__(
|
||||
executable_path=self.patcher.executable_path,
|
||||
port=port,
|
||||
service = selenium.webdriver.chromium.service.ChromiumService(
|
||||
self.patcher.executable_path
|
||||
)
|
||||
|
||||
super().__init__(
|
||||
service=service,
|
||||
options=options,
|
||||
service_args=service_args,
|
||||
desired_capabilities=desired_capabilities,
|
||||
service_log_path=service_log_path,
|
||||
keep_alive=keep_alive,
|
||||
service=service, # needed or the service will be re-created
|
||||
)
|
||||
|
||||
self.reactor = None
|
||||
@@ -471,7 +493,7 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
||||
else:
|
||||
self._web_element_cls = WebElement
|
||||
|
||||
if options.headless:
|
||||
if headless or getattr(options, 'headless', None):
|
||||
self._configure_headless()
|
||||
|
||||
def _configure_headless(self):
|
||||
@@ -485,8 +507,6 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
||||
"Page.addScriptToEvaluateOnNewDocument",
|
||||
{
|
||||
"source": """
|
||||
|
||||
Object.defineProperty(window, "navigator", {
|
||||
Object.defineProperty(window, "navigator", {
|
||||
value: new Proxy(navigator, {
|
||||
has: (target, key) => (key === "webdriver" ? false : key in target),
|
||||
@@ -707,14 +727,49 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
||||
def start_session(self, capabilities=None, browser_profile=None):
|
||||
if not capabilities:
|
||||
capabilities = self.options.to_capabilities()
|
||||
super(selenium.webdriver.chrome.webdriver.WebDriver, self).start_session(
|
||||
capabilities, browser_profile
|
||||
)
|
||||
# super(Chrome, self).start_session(capabilities, browser_profile)
|
||||
super().start_session(capabilities)
|
||||
# super(Chrome, self).start_session(capabilities, browser_profile) # Original explicit call commented out
|
||||
|
||||
def find_elements_recursive(self, by, value):
|
||||
"""
|
||||
find elements in all frames
|
||||
this is a generator function, which is needed
|
||||
since if it would return a list of elements, they
|
||||
will be stale on arrival.
|
||||
using generator, when the element is returned we are in the correct frame
|
||||
to use it directly
|
||||
Args:
|
||||
by: By
|
||||
value: str
|
||||
Returns: Generator[webelement.WebElement]
|
||||
"""
|
||||
def search_frame(f=None):
|
||||
if not f:
|
||||
# ensure we are on main content frame
|
||||
self.switch_to.default_content()
|
||||
else:
|
||||
self.switch_to.frame(f)
|
||||
for elem in self.find_elements(by, value):
|
||||
yield elem
|
||||
# switch back to main content, otherwise we will get StaleElementReferenceException
|
||||
self.switch_to.default_content()
|
||||
|
||||
# search root frame
|
||||
for elem in search_frame():
|
||||
yield elem
|
||||
# get iframes
|
||||
frames = self.find_elements('css selector', 'iframe')
|
||||
|
||||
# search per frame
|
||||
for f in frames:
|
||||
for elem in search_frame(f):
|
||||
yield elem
|
||||
|
||||
def quit(self):
|
||||
try:
|
||||
self.service.process.kill()
|
||||
self.service.stop()
|
||||
self.service.process.kill()
|
||||
self.command_executor.close()
|
||||
self.service.process.wait(5)
|
||||
logger.debug("webdriver process ended")
|
||||
except (AttributeError, RuntimeError, OSError):
|
||||
@@ -728,7 +783,7 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
||||
os.kill(self.browser_pid, 15)
|
||||
logger.debug("gracefully closed browser")
|
||||
except Exception as e: # noqa
|
||||
logger.debug(e, exc_info=True)
|
||||
pass
|
||||
if (
|
||||
hasattr(self, "keep_user_data_dir")
|
||||
and hasattr(self, "user_data_dir")
|
||||
@@ -747,7 +802,11 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
||||
else:
|
||||
logger.debug("successfully removed %s" % self.user_data_dir)
|
||||
break
|
||||
time.sleep(0.1)
|
||||
|
||||
try:
|
||||
time.sleep(0.1)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
# dereference patcher, so patcher can start cleaning up as well.
|
||||
# this must come last, otherwise it will throw 'in use' errors
|
||||
@@ -842,10 +901,10 @@ def find_chrome_executable():
|
||||
if item is not None:
|
||||
for subitem in (
|
||||
"Google/Chrome/Application",
|
||||
"Google/Chrome Beta/Application",
|
||||
"Google/Chrome Canary/Application",
|
||||
):
|
||||
candidates.add(os.sep.join((item, subitem, "chrome.exe")))
|
||||
for candidate in candidates:
|
||||
logger.debug('checking if %s exists and is executable' % candidate)
|
||||
if os.path.exists(candidate) and os.access(candidate, os.X_OK):
|
||||
logger.debug('found! using %s' % candidate)
|
||||
return os.path.normpath(candidate)
|
||||
|
||||
@@ -2,6 +2,7 @@ import asyncio
|
||||
from collections.abc import Mapping
|
||||
from collections.abc import Sequence
|
||||
from functools import wraps
|
||||
import os
|
||||
import logging
|
||||
import threading
|
||||
import time
|
||||
@@ -187,4 +188,6 @@ def test():
|
||||
|
||||
time.sleep(10)
|
||||
|
||||
if os.name == "nt":
|
||||
driver.close()
|
||||
driver.quit()
|
||||
|
||||
@@ -41,6 +41,7 @@ def start_detached(executable, *args):
|
||||
# close pipes
|
||||
writer.close()
|
||||
reader.close()
|
||||
process.close()
|
||||
|
||||
return pid
|
||||
|
||||
|
||||
@@ -1,41 +1,35 @@
|
||||
#!/usr/bin/env python3
|
||||
# this module is part of undetected_chromedriver
|
||||
|
||||
from distutils.version import LooseVersion
|
||||
from packaging.version import Version as LooseVersion
|
||||
import io
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import pathlib
|
||||
import platform
|
||||
import random
|
||||
import re
|
||||
import shutil
|
||||
import string
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
from urllib.request import urlopen
|
||||
from urllib.request import urlretrieve
|
||||
import zipfile
|
||||
|
||||
from multiprocessing import Lock
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
IS_POSIX = sys.platform.startswith(("darwin", "cygwin", "linux", "linux2"))
|
||||
IS_POSIX = sys.platform.startswith(("darwin", "cygwin", "linux", "linux2", "freebsd"))
|
||||
|
||||
|
||||
class Patcher(object):
|
||||
url_repo = "https://chromedriver.storage.googleapis.com"
|
||||
zip_name = "chromedriver_%s.zip"
|
||||
lock = Lock()
|
||||
exe_name = "chromedriver%s"
|
||||
|
||||
platform = sys.platform
|
||||
if platform.endswith("win32"):
|
||||
zip_name %= "win32"
|
||||
exe_name %= ".exe"
|
||||
if platform.endswith(("linux", "linux2")):
|
||||
zip_name %= "linux64"
|
||||
exe_name %= ""
|
||||
if platform.endswith("darwin"):
|
||||
zip_name %= "mac64"
|
||||
exe_name %= ""
|
||||
|
||||
if platform.endswith("win32"):
|
||||
d = "~/appdata/roaming/undetected_chromedriver"
|
||||
elif "LAMBDA_TASK_ROOT" in os.environ:
|
||||
@@ -48,7 +42,13 @@ class Patcher(object):
|
||||
d = "~/.undetected_chromedriver"
|
||||
data_path = os.path.abspath(os.path.expanduser(d))
|
||||
|
||||
def __init__(self, executable_path=None, force=False, version_main: int = 0):
|
||||
def __init__(
|
||||
self,
|
||||
executable_path=None,
|
||||
force=False,
|
||||
version_main: int = 0,
|
||||
user_multi_procs=False,
|
||||
):
|
||||
"""
|
||||
Args:
|
||||
executable_path: None = automatic
|
||||
@@ -61,14 +61,36 @@ class Patcher(object):
|
||||
self.force = force
|
||||
self._custom_exe_path = False
|
||||
prefix = "undetected"
|
||||
self.user_multi_procs = user_multi_procs
|
||||
|
||||
try:
|
||||
# Try to convert version_main into an integer
|
||||
version_main_int = int(version_main)
|
||||
# check if version_main_int is less than or equal to e.g 114
|
||||
self.is_old_chromedriver = version_main and version_main_int <= 114
|
||||
except (ValueError,TypeError):
|
||||
# Check not running inside Docker
|
||||
if not os.path.exists("/app/chromedriver"):
|
||||
# If the conversion fails, log an error message
|
||||
logging.info("version_main cannot be converted to an integer")
|
||||
# Set self.is_old_chromedriver to False if the conversion fails
|
||||
self.is_old_chromedriver = False
|
||||
|
||||
# Needs to be called before self.exe_name is accessed
|
||||
self._set_platform_name()
|
||||
|
||||
if not os.path.exists(self.data_path):
|
||||
os.makedirs(self.data_path, exist_ok=True)
|
||||
|
||||
if not executable_path:
|
||||
self.executable_path = os.path.join(
|
||||
self.data_path, "_".join([prefix, self.exe_name])
|
||||
)
|
||||
if sys.platform.startswith("freebsd"):
|
||||
self.executable_path = os.path.join(
|
||||
self.data_path, self.exe_name
|
||||
)
|
||||
else:
|
||||
self.executable_path = os.path.join(
|
||||
self.data_path, "_".join([prefix, self.exe_name])
|
||||
)
|
||||
|
||||
if not IS_POSIX:
|
||||
if executable_path:
|
||||
@@ -78,17 +100,67 @@ class Patcher(object):
|
||||
self.zip_path = os.path.join(self.data_path, prefix)
|
||||
|
||||
if not executable_path:
|
||||
self.executable_path = os.path.abspath(
|
||||
os.path.join(".", self.executable_path)
|
||||
)
|
||||
if not self.user_multi_procs:
|
||||
self.executable_path = os.path.abspath(
|
||||
os.path.join(".", self.executable_path)
|
||||
)
|
||||
|
||||
if executable_path:
|
||||
self._custom_exe_path = True
|
||||
self.executable_path = executable_path
|
||||
|
||||
# Set the correct repository to download the Chromedriver from
|
||||
if self.is_old_chromedriver:
|
||||
self.url_repo = "https://chromedriver.storage.googleapis.com"
|
||||
else:
|
||||
self.url_repo = "https://googlechromelabs.github.io/chrome-for-testing"
|
||||
|
||||
self.version_main = version_main
|
||||
self.version_full = None
|
||||
|
||||
def auto(self, executable_path=None, force=False, version_main=None):
|
||||
def _set_platform_name(self):
|
||||
"""
|
||||
Set the platform and exe name based on the platform undetected_chromedriver is running on
|
||||
in order to download the correct chromedriver.
|
||||
"""
|
||||
if self.platform.endswith("win32"):
|
||||
self.platform_name = "win32"
|
||||
self.exe_name %= ".exe"
|
||||
if self.platform.endswith(("linux", "linux2")):
|
||||
self.platform_name = "linux64"
|
||||
self.exe_name %= ""
|
||||
if self.platform.endswith("darwin"):
|
||||
if self.is_old_chromedriver:
|
||||
self.platform_name = "mac64"
|
||||
else:
|
||||
self.platform_name = "mac-x64"
|
||||
self.exe_name %= ""
|
||||
if self.platform.startswith("freebsd"):
|
||||
self.platform_name = "freebsd"
|
||||
self.exe_name %= ""
|
||||
|
||||
def auto(self, executable_path=None, force=False, version_main=None, _=None):
|
||||
"""
|
||||
|
||||
Args:
|
||||
executable_path:
|
||||
force:
|
||||
version_main:
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
p = pathlib.Path(self.data_path)
|
||||
if self.user_multi_procs:
|
||||
with Lock():
|
||||
files = list(p.rglob("*chromedriver*"))
|
||||
most_recent = max(files, key=lambda f: f.stat().st_mtime)
|
||||
files.remove(most_recent)
|
||||
list(map(lambda f: f.unlink(), files))
|
||||
if self.is_binary_patched(most_recent):
|
||||
self.executable_path = str(most_recent)
|
||||
return True
|
||||
|
||||
if executable_path:
|
||||
self.executable_path = executable_path
|
||||
self._custom_exe_path = True
|
||||
@@ -105,27 +177,104 @@ class Patcher(object):
|
||||
if force is True:
|
||||
self.force = force
|
||||
|
||||
try:
|
||||
os.unlink(self.executable_path)
|
||||
except PermissionError:
|
||||
if self.force:
|
||||
self.force_kill_instances(self.executable_path)
|
||||
return self.auto(force=not self.force)
|
||||
|
||||
if self.platform_name == "freebsd":
|
||||
chromedriver_path = shutil.which("chromedriver")
|
||||
|
||||
if not os.path.isfile(chromedriver_path) or not os.access(chromedriver_path, os.X_OK):
|
||||
logging.error("Chromedriver not installed!")
|
||||
return
|
||||
|
||||
version_path = os.path.join(os.path.dirname(self.executable_path), "version.txt")
|
||||
|
||||
process = os.popen(f'"{chromedriver_path}" --version')
|
||||
chromedriver_version = process.read().split(' ')[1].split(' ')[0]
|
||||
process.close()
|
||||
|
||||
current_version = None
|
||||
if os.path.isfile(version_path) or os.access(version_path, os.X_OK):
|
||||
with open(version_path, 'r') as f:
|
||||
current_version = f.read()
|
||||
|
||||
if current_version != chromedriver_version:
|
||||
logging.info("Copying chromedriver executable...")
|
||||
shutil.copy(chromedriver_path, self.executable_path)
|
||||
os.chmod(self.executable_path, 0o755)
|
||||
|
||||
with open(version_path, 'w') as f:
|
||||
f.write(chromedriver_version)
|
||||
|
||||
logging.info("Chromedriver executable copied!")
|
||||
else:
|
||||
try:
|
||||
if self.is_binary_patched():
|
||||
# assumes already running AND patched
|
||||
return True
|
||||
os.unlink(self.executable_path)
|
||||
except PermissionError:
|
||||
if self.force:
|
||||
self.force_kill_instances(self.executable_path)
|
||||
return self.auto(force=not self.force)
|
||||
try:
|
||||
if self.is_binary_patched():
|
||||
# assumes already running AND patched
|
||||
return True
|
||||
except PermissionError:
|
||||
pass
|
||||
# return False
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
# return False
|
||||
except FileNotFoundError:
|
||||
|
||||
release = self.fetch_release_number()
|
||||
self.version_main = release.major
|
||||
self.version_full = release
|
||||
self.unzip_package(self.fetch_package())
|
||||
|
||||
return self.patch()
|
||||
|
||||
def driver_binary_in_use(self, path: str = None) -> bool:
|
||||
"""
|
||||
naive test to check if a found chromedriver binary is
|
||||
currently in use
|
||||
|
||||
Args:
|
||||
path: a string or PathLike object to the binary to check.
|
||||
if not specified, we check use this object's executable_path
|
||||
"""
|
||||
if not path:
|
||||
path = self.executable_path
|
||||
p = pathlib.Path(path)
|
||||
|
||||
if not p.exists():
|
||||
raise OSError("file does not exist: %s" % p)
|
||||
try:
|
||||
with open(p, mode="a+b") as fs:
|
||||
exc = []
|
||||
try:
|
||||
|
||||
fs.seek(0, 0)
|
||||
except PermissionError as e:
|
||||
exc.append(e) # since some systems apprently allow seeking
|
||||
# we conduct another test
|
||||
try:
|
||||
fs.readline()
|
||||
except PermissionError as e:
|
||||
exc.append(e)
|
||||
|
||||
if exc:
|
||||
|
||||
return True
|
||||
return False
|
||||
# ok safe to assume this is in use
|
||||
except Exception as e:
|
||||
# logger.exception("whoops ", e)
|
||||
pass
|
||||
|
||||
release = self.fetch_release_number()
|
||||
self.version_main = release.version[0]
|
||||
self.version_full = release
|
||||
self.unzip_package(self.fetch_package())
|
||||
return self.patch()
|
||||
def cleanup_unused_files(self):
|
||||
p = pathlib.Path(self.data_path)
|
||||
items = list(p.glob("*undetected*"))
|
||||
for item in items:
|
||||
try:
|
||||
item.unlink()
|
||||
except:
|
||||
pass
|
||||
|
||||
def patch(self):
|
||||
self.patch_exe()
|
||||
@@ -137,12 +286,32 @@ class Patcher(object):
|
||||
:return: version string
|
||||
:rtype: LooseVersion
|
||||
"""
|
||||
path = "/latest_release"
|
||||
if self.version_main:
|
||||
path += f"_{self.version_main}"
|
||||
path = path.upper()
|
||||
# Endpoint for old versions of Chromedriver (114 and below)
|
||||
if self.is_old_chromedriver:
|
||||
path = f"/latest_release_{self.version_main}"
|
||||
path = path.upper()
|
||||
logger.debug("getting release number from %s" % path)
|
||||
return LooseVersion(urlopen(self.url_repo + path).read().decode())
|
||||
|
||||
# Endpoint for new versions of Chromedriver (115+)
|
||||
if not self.version_main:
|
||||
# Fetch the latest version
|
||||
path = "/last-known-good-versions-with-downloads.json"
|
||||
logger.debug("getting release number from %s" % path)
|
||||
with urlopen(self.url_repo + path) as conn:
|
||||
response = conn.read().decode()
|
||||
|
||||
last_versions = json.loads(response)
|
||||
return LooseVersion(last_versions["channels"]["Stable"]["version"])
|
||||
|
||||
# Fetch the latest minor version of the major version provided
|
||||
path = "/latest-versions-per-milestone-with-downloads.json"
|
||||
logger.debug("getting release number from %s" % path)
|
||||
return LooseVersion(urlopen(self.url_repo + path).read().decode())
|
||||
with urlopen(self.url_repo + path) as conn:
|
||||
response = conn.read().decode()
|
||||
|
||||
major_versions = json.loads(response)
|
||||
return LooseVersion(major_versions["milestones"][str(self.version_main)]["version"])
|
||||
|
||||
def parse_exe_version(self):
|
||||
with io.open(self.executable_path, "rb") as f:
|
||||
@@ -157,10 +326,16 @@ class Patcher(object):
|
||||
|
||||
:return: path to downloaded file
|
||||
"""
|
||||
u = "%s/%s/%s" % (self.url_repo, self.version_full.vstring, self.zip_name)
|
||||
logger.debug("downloading from %s" % u)
|
||||
# return urlretrieve(u, filename=self.data_path)[0]
|
||||
return urlretrieve(u)[0]
|
||||
zip_name = f"chromedriver_{self.platform_name}.zip"
|
||||
if self.is_old_chromedriver:
|
||||
download_url = "%s/%s/%s" % (self.url_repo, str(self.version_full), zip_name)
|
||||
else:
|
||||
zip_name = zip_name.replace("_", "-", 1)
|
||||
download_url = "https://storage.googleapis.com/chrome-for-testing-public/%s/%s/%s"
|
||||
download_url %= (str(self.version_full), self.platform_name, zip_name)
|
||||
|
||||
logger.debug("downloading from %s" % download_url)
|
||||
return urlretrieve(download_url)[0]
|
||||
|
||||
def unzip_package(self, fp):
|
||||
"""
|
||||
@@ -168,6 +343,12 @@ class Patcher(object):
|
||||
|
||||
:return: path to unpacked executable
|
||||
"""
|
||||
exe_path = self.exe_name
|
||||
if not self.is_old_chromedriver:
|
||||
# The new chromedriver unzips into its own folder
|
||||
zip_name = f"chromedriver-{self.platform_name}"
|
||||
exe_path = os.path.join(zip_name, self.exe_name)
|
||||
|
||||
logger.debug("unzipping %s" % fp)
|
||||
try:
|
||||
os.unlink(self.zip_path)
|
||||
@@ -176,10 +357,10 @@ class Patcher(object):
|
||||
|
||||
os.makedirs(self.zip_path, mode=0o755, exist_ok=True)
|
||||
with zipfile.ZipFile(fp, mode="r") as zf:
|
||||
zf.extract(self.exe_name, self.zip_path)
|
||||
os.rename(os.path.join(self.zip_path, self.exe_name), self.executable_path)
|
||||
zf.extractall(self.zip_path)
|
||||
os.rename(os.path.join(self.zip_path, exe_path), self.executable_path)
|
||||
os.remove(fp)
|
||||
os.rmdir(self.zip_path)
|
||||
shutil.rmtree
|
||||
os.chmod(self.executable_path, 0o755)
|
||||
return self.executable_path
|
||||
|
||||
@@ -193,10 +374,31 @@ class Patcher(object):
|
||||
"""
|
||||
exe_name = os.path.basename(exe_name)
|
||||
if IS_POSIX:
|
||||
r = os.system("kill -f -9 $(pidof %s)" % exe_name)
|
||||
# Using shell=True for pidof, consider a more robust pid finding method if issues arise.
|
||||
# pgrep can be an alternative: ["pgrep", "-f", exe_name]
|
||||
# Or psutil if adding a dependency is acceptable.
|
||||
command = f"pidof {exe_name}"
|
||||
try:
|
||||
result = subprocess.run(command, shell=True, capture_output=True, text=True, check=True)
|
||||
pids = result.stdout.strip().split()
|
||||
if pids:
|
||||
subprocess.run(["kill", "-9"] + pids, check=False) # Changed from -f -9 to -9 as -f is not standard for kill
|
||||
return True
|
||||
return False # No PIDs found
|
||||
except subprocess.CalledProcessError: # pidof returns 1 if no process found
|
||||
return False # No process found
|
||||
except Exception as e:
|
||||
logger.debug(f"Error killing process on POSIX: {e}")
|
||||
return False
|
||||
else:
|
||||
r = os.system("taskkill /f /im %s" % exe_name)
|
||||
return not r
|
||||
try:
|
||||
# TASKKILL /F /IM chromedriver.exe
|
||||
result = subprocess.run(["taskkill", "/f", "/im", exe_name], check=False, capture_output=True)
|
||||
# taskkill returns 0 if process was killed, 128 if not found.
|
||||
return result.returncode == 0
|
||||
except Exception as e:
|
||||
logger.debug(f"Error killing process on Windows: {e}")
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def gen_random_cdc():
|
||||
@@ -255,21 +457,17 @@ class Patcher(object):
|
||||
else:
|
||||
timeout = 3 # stop trying after this many seconds
|
||||
t = time.monotonic()
|
||||
while True:
|
||||
now = time.monotonic()
|
||||
if now - t > timeout:
|
||||
# we don't want to wait until the end of time
|
||||
logger.debug(
|
||||
"could not unlink %s in time (%d seconds)"
|
||||
% (self.executable_path, timeout)
|
||||
)
|
||||
break
|
||||
now = lambda: time.monotonic()
|
||||
while now() - t > timeout:
|
||||
# we don't want to wait until the end of time
|
||||
try:
|
||||
if self.user_multi_procs:
|
||||
break
|
||||
os.unlink(self.executable_path)
|
||||
logger.debug("successfully unlinked %s" % self.executable_path)
|
||||
break
|
||||
except (OSError, RuntimeError, PermissionError):
|
||||
time.sleep(0.1)
|
||||
time.sleep(0.01)
|
||||
continue
|
||||
except FileNotFoundError:
|
||||
break
|
||||
|
||||
144
src/utils.py
144
src/utils.py
@@ -1,13 +1,18 @@
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
import re
|
||||
import shutil
|
||||
import sys
|
||||
import tempfile
|
||||
import urllib.parse
|
||||
|
||||
from selenium.webdriver.chrome.webdriver import WebDriver
|
||||
import undetected_chromedriver as uc
|
||||
|
||||
FLARESOLVERR_VERSION = None
|
||||
PLATFORM_VERSION = None
|
||||
CHROME_EXE_PATH = None
|
||||
CHROME_MAJOR_VERSION = None
|
||||
USER_AGENT = None
|
||||
@@ -23,6 +28,10 @@ def get_config_headless() -> bool:
|
||||
return os.environ.get('HEADLESS', 'true').lower() == 'true'
|
||||
|
||||
|
||||
def get_config_disable_media() -> bool:
|
||||
return os.environ.get('DISABLE_MEDIA', 'false').lower() == 'true'
|
||||
|
||||
|
||||
def get_flaresolverr_version() -> str:
|
||||
global FLARESOLVERR_VERSION
|
||||
if FLARESOLVERR_VERSION is not None:
|
||||
@@ -35,32 +44,131 @@ def get_flaresolverr_version() -> str:
|
||||
FLARESOLVERR_VERSION = json.loads(f.read())['version']
|
||||
return FLARESOLVERR_VERSION
|
||||
|
||||
def get_current_platform() -> str:
|
||||
global PLATFORM_VERSION
|
||||
if PLATFORM_VERSION is not None:
|
||||
return PLATFORM_VERSION
|
||||
PLATFORM_VERSION = os.name
|
||||
return PLATFORM_VERSION
|
||||
|
||||
|
||||
def create_proxy_extension(proxy: dict) -> str:
|
||||
parsed_url = urllib.parse.urlparse(proxy['url'])
|
||||
scheme = parsed_url.scheme
|
||||
host = parsed_url.hostname
|
||||
port = parsed_url.port
|
||||
username = proxy['username']
|
||||
password = proxy['password']
|
||||
manifest_json = """
|
||||
{
|
||||
"version": "1.0.0",
|
||||
"manifest_version": 3,
|
||||
"name": "Chrome Proxy",
|
||||
"permissions": [
|
||||
"proxy",
|
||||
"tabs",
|
||||
"storage",
|
||||
"webRequest",
|
||||
"webRequestAuthProvider"
|
||||
],
|
||||
"host_permissions": [
|
||||
"<all_urls>"
|
||||
],
|
||||
"background": {
|
||||
"service_worker": "background.js"
|
||||
},
|
||||
"minimum_chrome_version": "76.0.0"
|
||||
}
|
||||
"""
|
||||
|
||||
background_js = """
|
||||
var config = {
|
||||
mode: "fixed_servers",
|
||||
rules: {
|
||||
singleProxy: {
|
||||
scheme: "%s",
|
||||
host: "%s",
|
||||
port: %d
|
||||
},
|
||||
bypassList: ["localhost"]
|
||||
}
|
||||
};
|
||||
|
||||
chrome.proxy.settings.set({value: config, scope: "regular"}, function() {});
|
||||
|
||||
function callbackFn(details) {
|
||||
return {
|
||||
authCredentials: {
|
||||
username: "%s",
|
||||
password: "%s"
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
chrome.webRequest.onAuthRequired.addListener(
|
||||
callbackFn,
|
||||
{ urls: ["<all_urls>"] },
|
||||
['blocking']
|
||||
);
|
||||
""" % (
|
||||
scheme,
|
||||
host,
|
||||
port,
|
||||
username,
|
||||
password
|
||||
)
|
||||
|
||||
proxy_extension_dir = tempfile.mkdtemp()
|
||||
|
||||
with open(os.path.join(proxy_extension_dir, "manifest.json"), "w") as f:
|
||||
f.write(manifest_json)
|
||||
|
||||
with open(os.path.join(proxy_extension_dir, "background.js"), "w") as f:
|
||||
f.write(background_js)
|
||||
|
||||
return proxy_extension_dir
|
||||
|
||||
|
||||
def get_webdriver(proxy: dict = None) -> WebDriver:
|
||||
global PATCHED_DRIVER_PATH
|
||||
global PATCHED_DRIVER_PATH, USER_AGENT
|
||||
logging.debug('Launching web browser...')
|
||||
|
||||
# undetected_chromedriver
|
||||
options = uc.ChromeOptions()
|
||||
options.add_argument('--no-sandbox')
|
||||
options.add_argument('--window-size=1920,1080')
|
||||
options.add_argument('--disable-search-engine-choice-screen')
|
||||
# todo: this param shows a warning in chrome head-full
|
||||
options.add_argument('--disable-setuid-sandbox')
|
||||
options.add_argument('--disable-dev-shm-usage')
|
||||
# this option removes the zygote sandbox (it seems that the resolution is a bit faster)
|
||||
options.add_argument('--no-zygote')
|
||||
# attempt to fix Docker ARM32 build
|
||||
options.add_argument('--disable-gpu-sandbox')
|
||||
options.add_argument('--disable-software-rasterizer')
|
||||
IS_ARMARCH = platform.machine().startswith(('arm', 'aarch'))
|
||||
if IS_ARMARCH:
|
||||
options.add_argument('--disable-gpu-sandbox')
|
||||
options.add_argument('--ignore-certificate-errors')
|
||||
options.add_argument('--ignore-ssl-errors')
|
||||
|
||||
if proxy and 'url' in proxy:
|
||||
language = os.environ.get('LANG', None)
|
||||
if language is not None:
|
||||
options.add_argument('--accept-lang=%s' % language)
|
||||
|
||||
# Fix for Chrome 117 | https://github.com/FlareSolverr/FlareSolverr/issues/910
|
||||
if USER_AGENT is not None:
|
||||
options.add_argument('--user-agent=%s' % USER_AGENT)
|
||||
|
||||
proxy_extension_dir = None
|
||||
if proxy and all(key in proxy for key in ['url', 'username', 'password']):
|
||||
proxy_extension_dir = create_proxy_extension(proxy)
|
||||
options.add_argument("--disable-features=DisableLoadExtensionCommandLineSwitch")
|
||||
options.add_argument("--load-extension=%s" % os.path.abspath(proxy_extension_dir))
|
||||
elif proxy and 'url' in proxy:
|
||||
proxy_url = proxy['url']
|
||||
logging.debug("Using webdriver proxy: %s", proxy_url)
|
||||
options.add_argument('--proxy-server=%s' % proxy_url)
|
||||
|
||||
# note: headless mode is detected (options.headless = True)
|
||||
# note: headless mode is detected (headless = True)
|
||||
# we launch the browser in head-full mode with the window hidden
|
||||
windows_headless = False
|
||||
if get_config_headless():
|
||||
@@ -68,6 +176,8 @@ def get_webdriver(proxy: dict = None) -> WebDriver:
|
||||
windows_headless = True
|
||||
else:
|
||||
start_xvfb_display()
|
||||
# For normal headless mode:
|
||||
# options.add_argument('--headless')
|
||||
|
||||
# if we are inside the Docker container, we avoid downloading the driver
|
||||
driver_exe_path = None
|
||||
@@ -85,14 +195,24 @@ def get_webdriver(proxy: dict = None) -> WebDriver:
|
||||
|
||||
# downloads and patches the chromedriver
|
||||
# if we don't set driver_executable_path it downloads, patches, and deletes the driver each time
|
||||
driver = uc.Chrome(options=options, browser_executable_path=browser_executable_path,
|
||||
driver_executable_path=driver_exe_path, version_main=version_main,
|
||||
windows_headless=windows_headless)
|
||||
try:
|
||||
driver = uc.Chrome(options=options, browser_executable_path=browser_executable_path,
|
||||
driver_executable_path=driver_exe_path, version_main=version_main,
|
||||
windows_headless=windows_headless, headless=get_config_headless())
|
||||
except Exception as e:
|
||||
logging.error("Error starting Chrome: %s" % e)
|
||||
# No point in continuing if we cannot retrieve the driver
|
||||
raise e
|
||||
|
||||
# save the patched driver to avoid re-downloads
|
||||
if driver_exe_path is None:
|
||||
PATCHED_DRIVER_PATH = os.path.join(driver.patcher.data_path, driver.patcher.exe_name)
|
||||
shutil.copy(driver.patcher.executable_path, PATCHED_DRIVER_PATH)
|
||||
if PATCHED_DRIVER_PATH != driver.patcher.executable_path:
|
||||
shutil.copy(driver.patcher.executable_path, PATCHED_DRIVER_PATH)
|
||||
|
||||
# clean up proxy extension directory
|
||||
if proxy_extension_dir is not None:
|
||||
shutil.rmtree(proxy_extension_dir)
|
||||
|
||||
# selenium vanilla
|
||||
# options = webdriver.ChromeOptions()
|
||||
@@ -184,7 +304,7 @@ def extract_version_nt_folder() -> str:
|
||||
paths = [f.path for f in os.scandir(path) if f.is_dir()]
|
||||
for path in paths:
|
||||
filename = os.path.basename(path)
|
||||
pattern = '\d+\.\d+\.\d+\.\d+'
|
||||
pattern = r'\d+\.\d+\.\d+\.\d+'
|
||||
match = re.search(pattern, filename)
|
||||
if match and match.group():
|
||||
# Found a Chrome version.
|
||||
@@ -201,11 +321,15 @@ def get_user_agent(driver=None) -> str:
|
||||
if driver is None:
|
||||
driver = get_webdriver()
|
||||
USER_AGENT = driver.execute_script("return navigator.userAgent")
|
||||
# Fix for Chrome 117 | https://github.com/FlareSolverr/FlareSolverr/issues/910
|
||||
USER_AGENT = re.sub('HEADLESS', '', USER_AGENT, flags=re.IGNORECASE)
|
||||
return USER_AGENT
|
||||
except Exception as e:
|
||||
raise Exception("Error getting browser User-Agent. " + str(e))
|
||||
finally:
|
||||
if driver is not None:
|
||||
if PLATFORM_VERSION == "nt":
|
||||
driver.close()
|
||||
driver.quit()
|
||||
|
||||
|
||||
|
||||
@@ -1 +1 @@
|
||||
WebTest==3.0.0
|
||||
WebTest==3.0.7
|
||||
|
||||
Reference in New Issue
Block a user