mirror of
https://github.com/FlareSolverr/FlareSolverr.git
synced 2025-12-05 17:18:19 +01:00
Compare commits
193 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0fe9958afe | ||
|
|
9f8c71131f | ||
|
|
2405c00521 | ||
|
|
ff65b7cc68 | ||
|
|
409e0844a7 | ||
|
|
368d5d4e05 | ||
|
|
c7505e3cbf | ||
|
|
5a27090abe | ||
|
|
e505ea4fe4 | ||
|
|
63b6fc53e3 | ||
|
|
8d72617219 | ||
|
|
8a8b9415c3 | ||
|
|
16722ef963 | ||
|
|
bbc24e9d86 | ||
|
|
7dfdfc5e33 | ||
|
|
136422c85c | ||
|
|
05a72f2709 | ||
|
|
da810830da | ||
|
|
d27f57c27c | ||
|
|
a916d93779 | ||
|
|
0d889cb0b2 | ||
|
|
d430404de8 | ||
|
|
d3b1ba6e88 | ||
|
|
75e5b190d6 | ||
|
|
cdc3db3c21 | ||
|
|
2dbb0442e0 | ||
|
|
6faab19533 | ||
|
|
af0a7af757 | ||
|
|
ff74b50b60 | ||
|
|
3e51ac1188 | ||
|
|
6627de4fa6 | ||
|
|
fe649255f2 | ||
|
|
3e338fce2e | ||
|
|
3dd3e7559d | ||
|
|
f21c1d51bc | ||
|
|
957347f73a | ||
|
|
c55080b0ec | ||
|
|
639bfca020 | ||
|
|
237694df76 | ||
|
|
6e5d6f1795 | ||
|
|
30804a86e5 | ||
|
|
e0bdaf7745 | ||
|
|
795365dbe4 | ||
|
|
ce5369dd41 | ||
|
|
600b09d498 | ||
|
|
d1f19405a1 | ||
|
|
82a1366d34 | ||
|
|
a2fe9e7776 | ||
|
|
6cc628df9e | ||
|
|
8b1851eeb1 | ||
|
|
54668a11e7 | ||
|
|
701d8fb4ff | ||
|
|
39a265ccb8 | ||
|
|
e32b247014 | ||
|
|
0d8fe8fe50 | ||
|
|
718da3a36f | ||
|
|
a798561338 | ||
|
|
eb680efc90 | ||
|
|
0f8f0bec25 | ||
|
|
3d9bc5627b | ||
|
|
dd7eaee2e3 | ||
|
|
031177bbdb | ||
|
|
a8644532a1 | ||
|
|
e96161c873 | ||
|
|
5a1f25cd52 | ||
|
|
a2c0e4348e | ||
|
|
2ecf88895b | ||
|
|
984368edb5 | ||
|
|
6c1d78cb84 | ||
|
|
5a2c61601e | ||
|
|
c304da2964 | ||
|
|
b811412699 | ||
|
|
0bb8de144f | ||
|
|
38166dfaa0 | ||
|
|
8dea0ed017 | ||
|
|
20cd2944a7 | ||
|
|
fd773e5909 | ||
|
|
35c7bff3c8 | ||
|
|
afdc1c7a8e | ||
|
|
0bc7a4498c | ||
|
|
c5a5f6d65e | ||
|
|
aaf29be8e1 | ||
|
|
800866d033 | ||
|
|
043f18b231 | ||
|
|
d21a332519 | ||
|
|
3ca6d08f41 | ||
|
|
227bd7ac72 | ||
|
|
e6a08584c0 | ||
|
|
df06d13cf8 | ||
|
|
993b8c41ac | ||
|
|
a4d42d7834 | ||
|
|
1c855b8af0 | ||
|
|
745c69491f | ||
|
|
f7e316fd5a | ||
|
|
16c8ab5f3d | ||
|
|
7af311b73c | ||
|
|
daec97532d | ||
|
|
8d7ed48f21 | ||
|
|
220f2599ae | ||
|
|
d772cf3f50 | ||
|
|
ab4365894b | ||
|
|
3fa9631559 | ||
|
|
04858c22fd | ||
|
|
5085ca6990 | ||
|
|
cd4df1e061 | ||
|
|
6c79783f7c | ||
|
|
4139e8d47c | ||
|
|
1942eb5fdc | ||
|
|
401bf5be76 | ||
|
|
d8ffdd3061 | ||
|
|
2d66590b08 | ||
|
|
a217510dc7 | ||
|
|
553bd8ab4f | ||
|
|
1b197c3e53 | ||
|
|
fd308f01be | ||
|
|
b5eef32615 | ||
|
|
644a843d89 | ||
|
|
82e1c94c6f | ||
|
|
fbc71516f5 | ||
|
|
40bd1cba4c | ||
|
|
d1588c1156 | ||
|
|
b4ad583baa | ||
|
|
5d31e551cc | ||
|
|
d92845f34f | ||
|
|
5d3b73ea9d | ||
|
|
2aa095ed5d | ||
|
|
687c8f75ae | ||
|
|
22ed3d324b | ||
|
|
5ba9ef03f3 | ||
|
|
d2e144ea12 | ||
|
|
313fb2c14b | ||
|
|
6d69f40b58 | ||
|
|
a1c36f60d2 | ||
|
|
0edc50e271 | ||
|
|
f4a4baa57c | ||
|
|
f7e434c6e3 | ||
|
|
7728f2ab31 | ||
|
|
c920bea4ca | ||
|
|
a785f83034 | ||
|
|
b42c22f5b1 | ||
|
|
9c62410a8b | ||
|
|
b8768ae17d | ||
|
|
9b2c602a1f | ||
|
|
8316350b98 | ||
|
|
33307ce461 | ||
|
|
cedb7bc54e | ||
|
|
6ecaf2362c | ||
|
|
3c97c9603a | ||
|
|
efaa5f31b6 | ||
|
|
4db85a2d0f | ||
|
|
66b9db21e5 | ||
|
|
ab0fe58d4a | ||
|
|
f68ddb7573 | ||
|
|
ac77110578 | ||
|
|
a9d1a2de2d | ||
|
|
ab5f14d6c3 | ||
|
|
e0bf02fb8b | ||
|
|
82a1cd835a | ||
|
|
7017715e21 | ||
|
|
ae18559db1 | ||
|
|
2680521008 | ||
|
|
2297bab185 | ||
|
|
8d9bac9dd4 | ||
|
|
30ccf18e85 | ||
|
|
a15d041a0c | ||
|
|
c6c74e7c9d | ||
|
|
49fd1aacfc | ||
|
|
f6879c70de | ||
|
|
24f59a39cb | ||
|
|
4d16105176 | ||
|
|
5957b7b3bc | ||
|
|
8de16058d0 | ||
|
|
5fc4f966a5 | ||
|
|
b903a5dd84 | ||
|
|
7e9d5f424f | ||
|
|
fc6d2d9095 | ||
|
|
aef9b2d4d6 | ||
|
|
6dc279a9d3 | ||
|
|
96fcd21174 | ||
|
|
3a6e8e0f92 | ||
|
|
2d97f88276 | ||
|
|
ac5c64319e | ||
|
|
c93834e2f0 | ||
|
|
e3b4200d94 | ||
|
|
0941861f80 | ||
|
|
8a10eb27a6 | ||
|
|
e9c08c84ef | ||
|
|
2aa1744476 | ||
|
|
a89679a52d | ||
|
|
410ee7981f | ||
|
|
e163019f28 | ||
|
|
7d84f1b663 | ||
|
|
4807e9dbe2 |
17
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
17
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
@@ -8,6 +8,13 @@ body:
|
|||||||
options:
|
options:
|
||||||
- label: I have checked the README
|
- label: I have checked the README
|
||||||
required: true
|
required: true
|
||||||
|
- type: checkboxes
|
||||||
|
attributes:
|
||||||
|
label: Have you followed our Troubleshooting?
|
||||||
|
description: Please follow our <a href="https://github.com/FlareSolverr/FlareSolverr/wiki/Troubleshooting">Troubleshooting</a>.
|
||||||
|
options:
|
||||||
|
- label: I have followed your Troubleshooting
|
||||||
|
required: true
|
||||||
- type: checkboxes
|
- type: checkboxes
|
||||||
attributes:
|
attributes:
|
||||||
label: Is there already an issue for your problem?
|
label: Is there already an issue for your problem?
|
||||||
@@ -22,6 +29,13 @@ body:
|
|||||||
options:
|
options:
|
||||||
- label: I have read the Discussions
|
- label: I have read the Discussions
|
||||||
required: true
|
required: true
|
||||||
|
- type: input
|
||||||
|
attributes:
|
||||||
|
label: Have you ACTUALLY checked all these?
|
||||||
|
description: Please do not waste our time and yours; these checks are there for a reason, it is not just so you can tick boxes for fun. If you type <b>YES</b> and it is clear you did not or have put in no effort, your issue will be closed and locked without comment. If you type <b>NO</b> but still open this issue, you will be permanently blocked for timewasting.
|
||||||
|
placeholder: YES or NO
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
- type: textarea
|
- type: textarea
|
||||||
attributes:
|
attributes:
|
||||||
label: Environment
|
label: Environment
|
||||||
@@ -32,7 +46,8 @@ body:
|
|||||||
- Operating system:
|
- Operating system:
|
||||||
- Are you using Docker: [yes/no]
|
- Are you using Docker: [yes/no]
|
||||||
- FlareSolverr User-Agent (see log traces or / endpoint):
|
- FlareSolverr User-Agent (see log traces or / endpoint):
|
||||||
- Are you using a proxy or VPN: [yes/no]
|
- Are you using a VPN: [yes/no]
|
||||||
|
- Are you using a Proxy: [yes/no]
|
||||||
- Are you using Captcha Solver: [yes/no]
|
- Are you using Captcha Solver: [yes/no]
|
||||||
- If using captcha solver, which one:
|
- If using captcha solver, which one:
|
||||||
- URL to test this issue:
|
- URL to test this issue:
|
||||||
|
|||||||
13
.github/workflows/autotag.yml
vendored
13
.github/workflows/autotag.yml
vendored
@@ -1,4 +1,4 @@
|
|||||||
name: autotag
|
name: Autotag
|
||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
@@ -6,14 +6,13 @@ on:
|
|||||||
- "master"
|
- "master"
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
build:
|
tag-release:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
-
|
- name: Checkout repository
|
||||||
name: Checkout
|
uses: actions/checkout@v5
|
||||||
uses: actions/checkout@v2
|
|
||||||
-
|
- name: Auto Tag
|
||||||
name: Auto Tag
|
|
||||||
uses: Klemensas/action-autotag@stable
|
uses: Klemensas/action-autotag@stable
|
||||||
with:
|
with:
|
||||||
GITHUB_TOKEN: "${{ secrets.GH_PAT }}"
|
GITHUB_TOKEN: "${{ secrets.GH_PAT }}"
|
||||||
|
|||||||
72
.github/workflows/release-docker.yml
vendored
72
.github/workflows/release-docker.yml
vendored
@@ -1,53 +1,67 @@
|
|||||||
name: release-docker
|
name: Docker release
|
||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
tags:
|
tags:
|
||||||
- 'v*.*.*'
|
- "v*.*.*"
|
||||||
|
pull_request:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
build:
|
build-docker-images:
|
||||||
|
if: ${{ !github.event.pull_request.head.repo.fork }}
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
-
|
- name: Checkout repository
|
||||||
name: Checkout
|
uses: actions/checkout@v5
|
||||||
uses: actions/checkout@v2
|
|
||||||
-
|
- name: Downcase repo
|
||||||
name: Downcase repo
|
|
||||||
run: echo REPOSITORY=$(echo ${{ github.repository }} | tr '[:upper:]' '[:lower:]') >> $GITHUB_ENV
|
run: echo REPOSITORY=$(echo ${{ github.repository }} | tr '[:upper:]' '[:lower:]') >> $GITHUB_ENV
|
||||||
-
|
|
||||||
name: Docker meta
|
- name: Docker meta
|
||||||
id: docker_meta
|
id: docker_meta
|
||||||
uses: crazy-max/ghaction-docker-meta@v1
|
uses: docker/metadata-action@v5
|
||||||
with:
|
with:
|
||||||
images: ${{ env.REPOSITORY }},ghcr.io/${{ env.REPOSITORY }}
|
images: |
|
||||||
tag-sha: false
|
${{ env.REPOSITORY }},enable=${{ github.event_name != 'pull_request' }}
|
||||||
-
|
ghcr.io/${{ env.REPOSITORY }}
|
||||||
name: Set up QEMU
|
tags: |
|
||||||
uses: docker/setup-qemu-action@v1.0.1
|
type=semver,pattern={{version}},prefix=v
|
||||||
-
|
type=ref,event=pr
|
||||||
name: Set up Docker Buildx
|
flavor: |
|
||||||
uses: docker/setup-buildx-action@v1
|
latest=auto
|
||||||
-
|
|
||||||
name: Login to DockerHub
|
- name: Set up QEMU
|
||||||
uses: docker/login-action@v1
|
uses: docker/setup-qemu-action@v3
|
||||||
|
|
||||||
|
- name: Set up Docker Buildx
|
||||||
|
uses: docker/setup-buildx-action@v3
|
||||||
|
|
||||||
|
- name: Login to DockerHub
|
||||||
|
if: github.event_name != 'pull_request'
|
||||||
|
uses: docker/login-action@v3
|
||||||
with:
|
with:
|
||||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||||
-
|
|
||||||
name: Login to GitHub Container Registry
|
- name: Login to GitHub Container Registry
|
||||||
uses: docker/login-action@v1
|
uses: docker/login-action@v3
|
||||||
with:
|
with:
|
||||||
registry: ghcr.io
|
registry: ghcr.io
|
||||||
username: ${{ github.repository_owner }}
|
username: ${{ github.repository_owner }}
|
||||||
password: ${{ secrets.GH_PAT }}
|
password: ${{ secrets.GH_PAT }}
|
||||||
-
|
|
||||||
name: Build and push
|
- name: Build and push
|
||||||
uses: docker/build-push-action@v2
|
uses: docker/build-push-action@v6
|
||||||
with:
|
with:
|
||||||
context: .
|
context: .
|
||||||
file: ./Dockerfile
|
file: ./Dockerfile
|
||||||
platforms: linux/386,linux/amd64,linux/arm/v7,linux/arm64/v8
|
platforms: linux/386,linux/amd64,linux/arm/v7,linux/arm64/v8
|
||||||
push: ${{ github.event_name != 'pull_request' }}
|
push: true
|
||||||
tags: ${{ steps.docker_meta.outputs.tags }}
|
tags: ${{ steps.docker_meta.outputs.tags }}
|
||||||
labels: ${{ steps.docker_meta.outputs.labels }}
|
labels: ${{ steps.docker_meta.outputs.labels }}
|
||||||
|
|||||||
70
.github/workflows/release.yml
vendored
70
.github/workflows/release.yml
vendored
@@ -1,55 +1,63 @@
|
|||||||
name: release
|
name: Release
|
||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
tags:
|
tags:
|
||||||
- 'v*.*.*'
|
- "v*.*.*"
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
build:
|
create-release:
|
||||||
name: Create release
|
name: Create release
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout repository
|
||||||
uses: actions/checkout@v2
|
uses: actions/checkout@v5
|
||||||
with:
|
with:
|
||||||
fetch-depth: 0 # get all commits, branches and tags (required for the changelog)
|
fetch-depth: 0
|
||||||
|
|
||||||
- name: Setup Node
|
|
||||||
uses: actions/setup-node@v2
|
|
||||||
with:
|
|
||||||
node-version: '16'
|
|
||||||
|
|
||||||
- name: Build artifacts
|
|
||||||
run: |
|
|
||||||
npm install
|
|
||||||
npm run build
|
|
||||||
npm run package
|
|
||||||
|
|
||||||
- name: Build changelog
|
- name: Build changelog
|
||||||
id: github_changelog
|
id: github_changelog
|
||||||
run: |
|
run: |
|
||||||
changelog=$(git log $(git tag | tail -2 | head -1)..HEAD --no-merges --oneline)
|
changelog=$(git log $(git tag | tail -2 | head -1)..HEAD --no-merges --oneline)
|
||||||
changelog="${changelog//'%'/'%25'}"
|
echo "changelog<<EOF" >> $GITHUB_OUTPUT
|
||||||
changelog="${changelog//$'\n'/'%0A'}"
|
echo "$changelog" >> $GITHUB_OUTPUT
|
||||||
changelog="${changelog//$'\r'/'%0D'}"
|
echo "EOF" >> $GITHUB_OUTPUT
|
||||||
echo "##[set-output name=changelog;]${changelog}"
|
|
||||||
|
|
||||||
- name: Create release
|
- name: Create release
|
||||||
id: create_release
|
uses: softprops/action-gh-release@v2
|
||||||
uses: actions/create-release@v1
|
with:
|
||||||
|
body: ${{ steps.github_changelog.outputs.changelog }}
|
||||||
env:
|
env:
|
||||||
GITHUB_TOKEN: ${{ secrets.GH_PAT }}
|
GITHUB_TOKEN: ${{ secrets.GH_PAT }}
|
||||||
|
|
||||||
|
build-package:
|
||||||
|
name: Build binaries
|
||||||
|
needs: create-release
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
os: [ubuntu-latest, windows-latest]
|
||||||
|
steps:
|
||||||
|
- name: Checkout repository
|
||||||
|
uses: actions/checkout@v5
|
||||||
with:
|
with:
|
||||||
tag_name: ${{ github.ref }}
|
fetch-depth: 0
|
||||||
release_name: ${{ github.ref }}
|
|
||||||
body: ${{ steps.github_changelog.outputs.changelog }}
|
- name: Setup Python
|
||||||
draft: false
|
uses: actions/setup-python@v6
|
||||||
prerelease: false
|
with:
|
||||||
|
python-version: "3.13"
|
||||||
|
|
||||||
|
- name: Build artifacts
|
||||||
|
run: |
|
||||||
|
python -m pip install -r requirements.txt
|
||||||
|
python -m pip install pyinstaller==6.16.0
|
||||||
|
cd src
|
||||||
|
python build_package.py
|
||||||
|
|
||||||
- name: Upload release artifacts
|
- name: Upload release artifacts
|
||||||
uses: alexellis/upload-assets@0.2.2
|
uses: softprops/action-gh-release@v2
|
||||||
|
with:
|
||||||
|
files: ./dist/flaresolverr_*
|
||||||
env:
|
env:
|
||||||
GITHUB_TOKEN: ${{ secrets.GH_PAT }}
|
GITHUB_TOKEN: ${{ secrets.GH_PAT }}
|
||||||
with:
|
|
||||||
asset_paths: '["./bin/*.zip"]'
|
|
||||||
|
|||||||
4
.gitignore
vendored
4
.gitignore
vendored
@@ -25,6 +25,7 @@ __pycache__/
|
|||||||
build/
|
build/
|
||||||
develop-eggs/
|
develop-eggs/
|
||||||
dist/
|
dist/
|
||||||
|
dist_chrome/
|
||||||
downloads/
|
downloads/
|
||||||
eggs/
|
eggs/
|
||||||
.eggs/
|
.eggs/
|
||||||
@@ -123,3 +124,6 @@ venv.bak/
|
|||||||
.mypy_cache/
|
.mypy_cache/
|
||||||
.dmypy.json
|
.dmypy.json
|
||||||
dmypy.json
|
dmypy.json
|
||||||
|
|
||||||
|
# node
|
||||||
|
node_modules/
|
||||||
225
CHANGELOG.md
225
CHANGELOG.md
@@ -1,5 +1,230 @@
|
|||||||
# Changelog
|
# Changelog
|
||||||
|
|
||||||
|
## v3.4.6 (2025/11/29)
|
||||||
|
* Add disable image, css, fonts option with CDP. Thanks @Ananto30
|
||||||
|
|
||||||
|
## v3.4.5 (2025/11/11)
|
||||||
|
* Revert to Python v3.13
|
||||||
|
|
||||||
|
## v3.4.4 (2025/11/04)
|
||||||
|
* Bump dependencies, Chromium, and some other general fixes. Thanks @flowerey
|
||||||
|
|
||||||
|
## v3.4.3 (2025/10/28)
|
||||||
|
* Update proxy extension
|
||||||
|
|
||||||
|
## v3.4.2 (2025/10/09)
|
||||||
|
* Bump dependencies & CI actions. Thanks @flowerey
|
||||||
|
* Add optional wait time after resolving the challenge before returning. Thanks @kennedyoliveira
|
||||||
|
* Add proxy ENVs. Thanks @Robokishan
|
||||||
|
* Handle empty string and keys without value in postData. Thanks @eZ4RK0
|
||||||
|
* Add quote protection for password containing it. Thanks @warrenberberd
|
||||||
|
* Add returnScreenshot parameter to screenshot the final web page. Thanks @estebanthi
|
||||||
|
* Add log file support. Thanks @acg5159
|
||||||
|
|
||||||
|
## v3.4.1 (2025/09/15)
|
||||||
|
* Fix regex pattern syntax in utils.py
|
||||||
|
* Change access denied title check to use startswith
|
||||||
|
|
||||||
|
## v3.4.0 (2025/08/25)
|
||||||
|
* Modernize and upgrade application. Thanks @TheCrazyLex
|
||||||
|
* Remove disable software rasterizer option for ARM builds. Thanks @smrodman83
|
||||||
|
|
||||||
|
## v3.3.25 (2025/06/14)
|
||||||
|
* Remove `use-gl` argument. Thanks @qwerty12
|
||||||
|
* u_c: remove apparent c&p typo. Thanks @ok3721
|
||||||
|
* Bump requirements
|
||||||
|
|
||||||
|
## v3.3.24 (2025/06/04)
|
||||||
|
* Remove hidden character
|
||||||
|
|
||||||
|
## v3.3.23 (2025/06/04)
|
||||||
|
* Update base image to bookworm. Thanks @rwjack
|
||||||
|
|
||||||
|
## v3.3.22 (2025/06/03)
|
||||||
|
* Disable search engine choice screen
|
||||||
|
* Fix headless=false stalling. Thanks @MAKMED1337
|
||||||
|
* Change from click to keys. Thanks @sh4dowb
|
||||||
|
* Don't open devtools
|
||||||
|
* Bump Chromium to v137 for build
|
||||||
|
* Bump requirements
|
||||||
|
|
||||||
|
## v3.3.21 (2024/06/26)
|
||||||
|
* Add challenge selector to catch reloading page on non-English systems
|
||||||
|
* Escape values for generated form used in request.post. Thanks @mynameisbogdan
|
||||||
|
|
||||||
|
## v3.3.20 (2024/06/21)
|
||||||
|
* maxTimeout should always be int
|
||||||
|
* Check not running in Docker before logging version_main error
|
||||||
|
* Update Cloudflare challenge and checkbox selectors. Thanks @tenettow & @21hsmw
|
||||||
|
|
||||||
|
## v3.3.19 (2024/05/23)
|
||||||
|
* Fix occasional headless issue on Linux when set to "false". Thanks @21hsmw
|
||||||
|
|
||||||
|
## v3.3.18 (2024/05/20)
|
||||||
|
|
||||||
|
* Fix LANG ENV for Linux
|
||||||
|
* Fix Chrome v124+ not closing on Windows. Thanks @RileyXX
|
||||||
|
|
||||||
|
## v3.3.17 (2024/04/09)
|
||||||
|
|
||||||
|
* Fix file descriptor leak in service on quit(). Thanks @zkulis
|
||||||
|
|
||||||
|
## v3.3.16 (2024/02/28)
|
||||||
|
|
||||||
|
* Fix of the subprocess.STARTUPINFO() call. Thanks @ceconelo
|
||||||
|
* Add FreeBSD support. Thanks @Asthowen
|
||||||
|
* Use headless configuration properly. Thanks @hashworks
|
||||||
|
|
||||||
|
## v3.3.15 (2024/02/20)
|
||||||
|
|
||||||
|
* Fix looping challenges
|
||||||
|
|
||||||
|
## v3.3.14-hotfix2 (2024/02/17)
|
||||||
|
|
||||||
|
* Hotfix 2 - bad Chromium build, instances failed to terminate
|
||||||
|
|
||||||
|
## v3.3.14-hotfix (2024/02/17)
|
||||||
|
|
||||||
|
* Hotfix for Linux build - some Chrome files no longer exist
|
||||||
|
|
||||||
|
## v3.3.14 (2024/02/17)
|
||||||
|
|
||||||
|
* Update Chrome downloads. Thanks @opemvbs
|
||||||
|
|
||||||
|
## v3.3.13 (2024/01/07)
|
||||||
|
|
||||||
|
* Fix too many open files error
|
||||||
|
|
||||||
|
## v3.3.12 (2023/12/15)
|
||||||
|
|
||||||
|
* Fix looping challenges and invalid cookies
|
||||||
|
|
||||||
|
## v3.3.11 (2023/12/11)
|
||||||
|
|
||||||
|
* Update UC 3.5.4 & Selenium 4.15.2. Thanks @txtsd
|
||||||
|
|
||||||
|
## v3.3.10 (2023/11/14)
|
||||||
|
|
||||||
|
* Add LANG ENV - resolves issues with YGGtorrent
|
||||||
|
|
||||||
|
## v3.3.9 (2023/11/13)
|
||||||
|
|
||||||
|
* Fix for Docker build, capture TypeError
|
||||||
|
|
||||||
|
## v3.3.8 (2023/11/13)
|
||||||
|
|
||||||
|
* Fix headless=true for Chrome 117+. Thanks @NabiKAZ
|
||||||
|
* Support running Chrome 119 from source. Thanks @koleg and @Chris7X
|
||||||
|
* Fix "OSError: [WinError 6] The handle is invalid" on exit. Thanks @enesgorkemgenc
|
||||||
|
|
||||||
|
## v3.3.7 (2023/11/05)
|
||||||
|
|
||||||
|
* Bump to rebuild. Thanks @JoachimDorchies
|
||||||
|
|
||||||
|
## v3.3.6 (2023/09/15)
|
||||||
|
|
||||||
|
* Update checkbox selector, again
|
||||||
|
|
||||||
|
## v3.3.5 (2023/09/13)
|
||||||
|
|
||||||
|
* Change checkbox selector, support languages other than English
|
||||||
|
|
||||||
|
## v3.3.4 (2023/09/02)
|
||||||
|
|
||||||
|
* Update checkbox selector
|
||||||
|
|
||||||
|
## v3.3.3 (2023/08/31)
|
||||||
|
|
||||||
|
* Update undetected_chromedriver to v3.5.3
|
||||||
|
|
||||||
|
## v3.3.2 (2023/08/03)
|
||||||
|
|
||||||
|
* Fix URL domain in Prometheus exporter
|
||||||
|
|
||||||
|
## v3.3.1 (2023/08/03)
|
||||||
|
|
||||||
|
* Fix for Cloudflare verify checkbox
|
||||||
|
* Fix HEADLESS=false in Windows binary
|
||||||
|
* Fix Prometheus exporter for management and health endpoints
|
||||||
|
* Remove misleading stack trace when the verify checkbox is not found
|
||||||
|
* Revert "Update base Docker image to Debian Bookworm" #849
|
||||||
|
* Revert "Install Chromium 115 from Debian testing" #849
|
||||||
|
|
||||||
|
## v3.3.0 (2023/08/02)
|
||||||
|
|
||||||
|
* Fix for new Cloudflare detection. Thanks @cedric-bour for #845
|
||||||
|
* Add support for proxy authentication username/password. Thanks @jacobprice808 for #807
|
||||||
|
* Implement Prometheus metrics
|
||||||
|
* Fix Chromium Driver for Chrome / Chromium version > 114
|
||||||
|
* Use Chromium 115 in binary packages (Windows and Linux)
|
||||||
|
* Install Chromium 115 from Debian testing (Docker)
|
||||||
|
* Update base Docker image to Debian Bookworm
|
||||||
|
* Update Selenium 4.11.2
|
||||||
|
* Update pyinstaller 5.13.0
|
||||||
|
* Add more traces in build_package.py
|
||||||
|
|
||||||
|
## v3.2.2 (2023/07/16)
|
||||||
|
|
||||||
|
* Workaround for updated 'verify you are human' check
|
||||||
|
|
||||||
|
## v3.2.1 (2023/06/10)
|
||||||
|
|
||||||
|
* Kill dead Chrome processes in Windows
|
||||||
|
* Fix Chrome GL erros in ASUSTOR NAS
|
||||||
|
|
||||||
|
## v3.2.0 (2023/05/23)
|
||||||
|
|
||||||
|
* Support "proxy" param in requests and sessions
|
||||||
|
* Support "cookies" param in requests
|
||||||
|
* Fix Chromium exec permissions in Linux package
|
||||||
|
* Update Python dependencies
|
||||||
|
|
||||||
|
## v3.1.2 (2023/04/02)
|
||||||
|
|
||||||
|
* Fix headless mode in macOS
|
||||||
|
* Remove redundant artifact from Windows binary package
|
||||||
|
* Bump Selenium dependency
|
||||||
|
|
||||||
|
## v3.1.1 (2023/03/25)
|
||||||
|
|
||||||
|
* Distribute binary executables in compressed package
|
||||||
|
* Add icon for binary executable
|
||||||
|
* Include information about supported architectures in the readme
|
||||||
|
* Check Python version on start
|
||||||
|
|
||||||
|
## v3.1.0 (2023/03/20)
|
||||||
|
|
||||||
|
* Build binaries for Linux x64 and Windows x64
|
||||||
|
* Sessions with auto-creation on fetch request and TTL
|
||||||
|
* Fix error trace: Crash Reports/pending No such file or directory
|
||||||
|
* Fix Waitress server error with asyncore_use_poll=true
|
||||||
|
* Attempt to fix Docker ARM32 build
|
||||||
|
* Print platform information on start up
|
||||||
|
* Add Fairlane challenge selector
|
||||||
|
* Update DDOS-GUARD title
|
||||||
|
* Update dependencies
|
||||||
|
|
||||||
|
## v3.0.4 (2023/03/07)
|
||||||
|
|
||||||
|
* Click on the Cloudflare's 'Verify you are human' button if necessary
|
||||||
|
|
||||||
|
## v3.0.3 (2023/03/06)
|
||||||
|
|
||||||
|
* Update undetected_chromedriver version to 3.4.6
|
||||||
|
|
||||||
|
## v3.0.2 (2023/01/08)
|
||||||
|
|
||||||
|
* Detect Cloudflare blocked access
|
||||||
|
* Check Chrome / Chromium web browser is installed correctly
|
||||||
|
|
||||||
|
## v3.0.1 (2023/01/06)
|
||||||
|
|
||||||
|
* Kill Chromium processes properly to avoid defunct/zombie processes
|
||||||
|
* Update undetected-chromedriver
|
||||||
|
* Disable Zygote sandbox in Chromium browser
|
||||||
|
* Add more selectors to detect blocked access
|
||||||
|
* Include procps (ps), curl and vim packages in the Docker image
|
||||||
|
|
||||||
## v3.0.0 (2023/01/04)
|
## v3.0.0 (2023/01/04)
|
||||||
|
|
||||||
* This is the first release of FlareSolverr v3. There are some breaking changes
|
* This is the first release of FlareSolverr v3. There are some breaking changes
|
||||||
|
|||||||
32
Dockerfile
32
Dockerfile
@@ -1,4 +1,4 @@
|
|||||||
FROM python:3.11-slim-bullseye as builder
|
FROM python:3.13-slim-bookworm AS builder
|
||||||
|
|
||||||
# Build dummy packages to skip installing them and their dependencies
|
# Build dummy packages to skip installing them and their dependencies
|
||||||
RUN apt-get update \
|
RUN apt-get update \
|
||||||
@@ -12,7 +12,7 @@ RUN apt-get update \
|
|||||||
&& equivs-build adwaita-icon-theme \
|
&& equivs-build adwaita-icon-theme \
|
||||||
&& mv adwaita-icon-theme_*.deb /adwaita-icon-theme.deb
|
&& mv adwaita-icon-theme_*.deb /adwaita-icon-theme.deb
|
||||||
|
|
||||||
FROM python:3.11-slim-bullseye
|
FROM python:3.13-slim-bookworm
|
||||||
|
|
||||||
# Copy dummy packages
|
# Copy dummy packages
|
||||||
COPY --from=builder /*.deb /
|
COPY --from=builder /*.deb /
|
||||||
@@ -29,7 +29,8 @@ RUN dpkg -i /libgl1-mesa-dri.deb \
|
|||||||
&& dpkg -i /adwaita-icon-theme.deb \
|
&& dpkg -i /adwaita-icon-theme.deb \
|
||||||
# Install dependencies
|
# Install dependencies
|
||||||
&& apt-get update \
|
&& apt-get update \
|
||||||
&& apt-get install -y --no-install-recommends chromium chromium-common chromium-driver xvfb \
|
&& apt-get install -y --no-install-recommends chromium chromium-common chromium-driver xvfb dumb-init \
|
||||||
|
procps curl vim xauth \
|
||||||
# Remove temporary files and hardware decoding libraries
|
# Remove temporary files and hardware decoding libraries
|
||||||
&& rm -rf /var/lib/apt/lists/* \
|
&& rm -rf /var/lib/apt/lists/* \
|
||||||
&& rm -f /usr/lib/x86_64-linux-gnu/libmfxhw* \
|
&& rm -f /usr/lib/x86_64-linux-gnu/libmfxhw* \
|
||||||
@@ -37,7 +38,12 @@ RUN dpkg -i /libgl1-mesa-dri.deb \
|
|||||||
# Create flaresolverr user
|
# Create flaresolverr user
|
||||||
&& useradd --home-dir /app --shell /bin/sh flaresolverr \
|
&& useradd --home-dir /app --shell /bin/sh flaresolverr \
|
||||||
&& mv /usr/bin/chromedriver chromedriver \
|
&& mv /usr/bin/chromedriver chromedriver \
|
||||||
&& chown -R flaresolverr:flaresolverr .
|
&& chown -R flaresolverr:flaresolverr . \
|
||||||
|
# Create config dir
|
||||||
|
&& mkdir /config \
|
||||||
|
&& chown flaresolverr:flaresolverr /config
|
||||||
|
|
||||||
|
VOLUME /config
|
||||||
|
|
||||||
# Install Python dependencies
|
# Install Python dependencies
|
||||||
COPY requirements.txt .
|
COPY requirements.txt .
|
||||||
@@ -47,19 +53,31 @@ RUN pip install -r requirements.txt \
|
|||||||
|
|
||||||
USER flaresolverr
|
USER flaresolverr
|
||||||
|
|
||||||
|
RUN mkdir -p "/app/.config/chromium/Crash Reports/pending"
|
||||||
|
|
||||||
COPY src .
|
COPY src .
|
||||||
COPY package.json ../
|
COPY package.json ../
|
||||||
|
|
||||||
EXPOSE 8191
|
EXPOSE 8191
|
||||||
|
EXPOSE 8192
|
||||||
|
|
||||||
|
# dumb-init avoids zombie chromium processes
|
||||||
|
ENTRYPOINT ["/usr/bin/dumb-init", "--"]
|
||||||
|
|
||||||
CMD ["/usr/local/bin/python", "-u", "/app/flaresolverr.py"]
|
CMD ["/usr/local/bin/python", "-u", "/app/flaresolverr.py"]
|
||||||
|
|
||||||
# Local build
|
# Local build
|
||||||
# docker build -t ngosang/flaresolverr:3.0.0 .
|
# docker build -t ngosang/flaresolverr:3.4.6 .
|
||||||
# docker run -p 8191:8191 ngosang/flaresolverr:3.0.0
|
# docker run -p 8191:8191 ngosang/flaresolverr:3.4.6
|
||||||
|
|
||||||
# Multi-arch build
|
# Multi-arch build
|
||||||
# docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
|
# docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
|
||||||
# docker buildx create --use
|
# docker buildx create --use
|
||||||
# docker buildx build -t ngosang/flaresolverr:3.0.0 --platform linux/386,linux/amd64,linux/arm/v7,linux/arm64/v8 .
|
# docker buildx build -t ngosang/flaresolverr:3.4.6 --platform linux/386,linux/amd64,linux/arm/v7,linux/arm64/v8 .
|
||||||
# add --push to publish in DockerHub
|
# add --push to publish in DockerHub
|
||||||
|
|
||||||
|
# Test multi-arch build
|
||||||
|
# docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
|
||||||
|
# docker buildx create --use
|
||||||
|
# docker buildx build -t ngosang/flaresolverr:3.4.6 --platform linux/arm/v7 --load .
|
||||||
|
# docker run -p 8191:8191 --platform linux/arm/v7 ngosang/flaresolverr:3.4.6
|
||||||
|
|||||||
2
LICENSE
2
LICENSE
@@ -1,6 +1,6 @@
|
|||||||
MIT License
|
MIT License
|
||||||
|
|
||||||
Copyright (c) 2023 Diego Heras (ngosang / ngosang@hotmail.es)
|
Copyright (c) 2025 Diego Heras (ngosang / ngosang@hotmail.es)
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
|||||||
299
README.md
299
README.md
@@ -33,22 +33,25 @@ It is recommended to install using a Docker container because the project depend
|
|||||||
already included within the image.
|
already included within the image.
|
||||||
|
|
||||||
Docker images are available in:
|
Docker images are available in:
|
||||||
* GitHub Registry => https://github.com/orgs/FlareSolverr/packages/container/package/flaresolverr
|
|
||||||
* DockerHub => https://hub.docker.com/r/flaresolverr/flaresolverr
|
- GitHub Registry => https://github.com/orgs/FlareSolverr/packages/container/package/flaresolverr
|
||||||
|
- DockerHub => https://hub.docker.com/r/flaresolverr/flaresolverr
|
||||||
|
|
||||||
Supported architectures are:
|
Supported architectures are:
|
||||||
|
|
||||||
| Architecture | Tag |
|
| Architecture | Tag |
|
||||||
|--------------|--------------|
|
| ------------ | ------------ |
|
||||||
| x86 | linux/386 |
|
| x86 | linux/386 |
|
||||||
| x86-64 | linux/amd64 |
|
| x86-64 | linux/amd64 |
|
||||||
| ARM32 | linux/arm/v7 |
|
| ARM32 | linux/arm/v7 |
|
||||||
| ARM64 | linux/arm64 |
|
| ARM64 | linux/arm64 |
|
||||||
|
|
||||||
We provide a `docker-compose.yml` configuration file. Clone this repository and execute `docker-compose up -d` to start
|
We provide a `docker-compose.yml` configuration file. Clone this repository and execute
|
||||||
|
`docker-compose up -d` _(Compose V1)_ or `docker compose up -d` _(Compose V2)_ to start
|
||||||
the container.
|
the container.
|
||||||
|
|
||||||
If you prefer the `docker cli` execute the following command.
|
If you prefer the `docker cli` execute the following command.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker run -d \
|
docker run -d \
|
||||||
--name=flaresolverr \
|
--name=flaresolverr \
|
||||||
@@ -58,26 +61,39 @@ docker run -d \
|
|||||||
ghcr.io/flaresolverr/flaresolverr:latest
|
ghcr.io/flaresolverr/flaresolverr:latest
|
||||||
```
|
```
|
||||||
|
|
||||||
If your host OS is Debian, make sure `libseccomp2` version is 2.5.x. You can check the version with `sudo apt-cache policy libseccomp2`
|
If your host OS is Debian, make sure `libseccomp2` version is 2.5.x. You can check the version with `sudo apt-cache policy libseccomp2`
|
||||||
and update the package with `sudo apt install libseccomp2=2.5.1-1~bpo10+1` or `sudo apt install libseccomp2=2.5.1-1+deb11u1`.
|
and update the package with `sudo apt install libseccomp2=2.5.1-1~bpo10+1` or `sudo apt install libseccomp2=2.5.1-1+deb11u1`.
|
||||||
Remember to restart the Docker daemon and the container after the update.
|
Remember to restart the Docker daemon and the container after the update.
|
||||||
|
|
||||||
### Precompiled binaries
|
### Precompiled binaries
|
||||||
|
|
||||||
|
> **Warning**
|
||||||
|
> Precompiled binaries are only available for x64 architecture. For other architectures see Docker images.
|
||||||
|
|
||||||
This is the recommended way for Windows users.
|
This is the recommended way for Windows users.
|
||||||
* Download the [FlareSolverr zip](https://github.com/FlareSolverr/FlareSolverr/releases) from the release's assets. It is available for Windows and Linux.
|
|
||||||
* Extract the zip file. FlareSolverr executable and firefox folder must be in the same directory.
|
- Download the [FlareSolverr executable](https://github.com/FlareSolverr/FlareSolverr/releases) from the release's page. It is available for Windows x64 and Linux x64.
|
||||||
* Execute FlareSolverr binary. In the environment variables section you can find how to change the configuration.
|
- Execute FlareSolverr binary. In the environment variables section you can find how to change the configuration.
|
||||||
|
|
||||||
### From source code
|
### From source code
|
||||||
|
|
||||||
This is the recommended way for macOS users and for developers.
|
> **Warning**
|
||||||
* Install [Python 3.10](https://www.python.org/downloads/).
|
> Installing from source code only works for x64 architecture. For other architectures see Docker images.
|
||||||
* Install [Chrome](https://www.google.com/intl/en_us/chrome/) or [Chromium](https://www.chromium.org/getting-involved/download-chromium/) web browser.
|
|
||||||
* (Only in Linux / macOS) Install [Xvfb](https://en.wikipedia.org/wiki/Xvfb) package.
|
- Install [Python 3.13](https://www.python.org/downloads/).
|
||||||
* Clone this repository and open a shell in that path.
|
- Install [Chrome](https://www.google.com/intl/en_us/chrome/) (all OS) or [Chromium](https://www.chromium.org/getting-involved/download-chromium/) (just Linux, it doesn't work in Windows) web browser.
|
||||||
* Run `pip install -r requirements.txt` command to install FlareSolverr dependencies.
|
- (Only in Linux) Install [Xvfb](https://en.wikipedia.org/wiki/Xvfb) package.
|
||||||
* Run `python src/flaresolverr.py` command to start FlareSolverr.
|
- (Only in macOS) Install [XQuartz](https://www.xquartz.org/) package.
|
||||||
|
- Clone this repository and open a shell in that path.
|
||||||
|
- Run `pip install -r requirements.txt` command to install FlareSolverr dependencies.
|
||||||
|
- Run `python src/flaresolverr.py` command to start FlareSolverr.
|
||||||
|
|
||||||
|
### From source code (FreeBSD/TrueNAS CORE)
|
||||||
|
|
||||||
|
- Run `pkg install chromium python313 py313-pip xorg-vfbserver` command to install the required dependencies.
|
||||||
|
- Clone this repository and open a shell in that path.
|
||||||
|
- Run `python3.13 -m pip install -r requirements.txt` command to install FlareSolverr dependencies.
|
||||||
|
- Run `python3.13 src/flaresolverr.py` command to start FlareSolverr.
|
||||||
|
|
||||||
### Systemd service
|
### Systemd service
|
||||||
|
|
||||||
@@ -85,17 +101,46 @@ We provide an example Systemd unit file `flaresolverr.service` as reference. You
|
|||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
Example request:
|
Example Bash request:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl -L -X POST 'http://localhost:8191/v1' \
|
curl -L -X POST 'http://localhost:8191/v1' \
|
||||||
-H 'Content-Type: application/json' \
|
-H 'Content-Type: application/json' \
|
||||||
--data-raw '{
|
--data-raw '{
|
||||||
"cmd": "request.get",
|
"cmd": "request.get",
|
||||||
"url":"http://www.google.com/",
|
"url": "http://www.google.com/",
|
||||||
"maxTimeout": 60000
|
"maxTimeout": 60000
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Example Python request:
|
||||||
|
|
||||||
|
```py
|
||||||
|
import requests
|
||||||
|
|
||||||
|
url = "http://localhost:8191/v1"
|
||||||
|
headers = {"Content-Type": "application/json"}
|
||||||
|
data = {
|
||||||
|
"cmd": "request.get",
|
||||||
|
"url": "http://www.google.com/",
|
||||||
|
"maxTimeout": 60000
|
||||||
|
}
|
||||||
|
response = requests.post(url, headers=headers, json=data)
|
||||||
|
print(response.text)
|
||||||
|
```
|
||||||
|
|
||||||
|
Example PowerShell request:
|
||||||
|
|
||||||
|
```ps1
|
||||||
|
$body = @{
|
||||||
|
cmd = "request.get"
|
||||||
|
url = "http://www.google.com/"
|
||||||
|
maxTimeout = 60000
|
||||||
|
} | ConvertTo-Json
|
||||||
|
|
||||||
|
irm -UseBasicParsing 'http://localhost:8191/v1' -Headers @{"Content-Type"="application/json"} -Method Post -Body $body
|
||||||
|
```
|
||||||
|
|
||||||
### Commands
|
### Commands
|
||||||
|
|
||||||
#### + `sessions.create`
|
#### + `sessions.create`
|
||||||
@@ -106,10 +151,10 @@ cookies for the browser to use.
|
|||||||
|
|
||||||
This also speeds up the requests since it won't have to launch a new browser instance for every request.
|
This also speeds up the requests since it won't have to launch a new browser instance for every request.
|
||||||
|
|
||||||
| Parameter | Notes |
|
| Parameter | Notes |
|
||||||
|-----------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
| --------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
| session | Optional. The session ID that you want to be assigned to the instance. If isn't set a random UUID will be assigned. |
|
| session | Optional. The session ID that you want to be assigned to the instance. If isn't set a random UUID will be assigned. |
|
||||||
| proxy | Optional, default disabled. Eg: `"proxy": {"url": "http://127.0.0.1:8888"}`. You must include the proxy schema in the URL: `http://`, `socks4://` or `socks5://`. Authorization (username/password) is not supported. |
|
| proxy | Optional, default disabled. Eg: `"proxy": {"url": "http://127.0.0.1:8888"}`. You must include the proxy schema in the URL: `http://`, `socks4://` or `socks5://`. Authorization (username/password) is supported. Eg: `"proxy": {"url": "http://127.0.0.1:8888", "username": "testuser", "password": "testpass"}` |
|
||||||
|
|
||||||
#### + `sessions.list`
|
#### + `sessions.list`
|
||||||
|
|
||||||
@@ -121,11 +166,7 @@ Example response:
|
|||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"sessions": [
|
"sessions": ["session_id_1", "session_id_2", "session_id_3..."]
|
||||||
"session_id_1",
|
|
||||||
"session_id_2",
|
|
||||||
"session_id_3..."
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -135,119 +176,163 @@ This will properly shutdown a browser instance and remove all files associated w
|
|||||||
session. When you no longer need to use a session you should make sure to close it.
|
session. When you no longer need to use a session you should make sure to close it.
|
||||||
|
|
||||||
| Parameter | Notes |
|
| Parameter | Notes |
|
||||||
|-----------|-----------------------------------------------|
|
| --------- | --------------------------------------------- |
|
||||||
| session | The session ID that you want to be destroyed. |
|
| session | The session ID that you want to be destroyed. |
|
||||||
|
|
||||||
#### + `request.get`
|
#### + `request.get`
|
||||||
|
|
||||||
| Parameter | Notes |
|
| Parameter | Notes |
|
||||||
|-------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
| ------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
| url | Mandatory |
|
| url | Mandatory |
|
||||||
| session | Optional. Will send the request from and existing browser instance. If one is not sent it will create a temporary instance that will be destroyed immediately after the request is completed. |
|
| session | Optional. Will send the request from and existing browser instance. If one is not sent it will create a temporary instance that will be destroyed immediately after the request is completed. |
|
||||||
| maxTimeout | Optional, default value 60000. Max timeout to solve the challenge in milliseconds. |
|
| session_ttl_minutes | Optional. FlareSolverr will automatically rotate expired sessions based on the TTL provided in minutes. |
|
||||||
| cookies | Optional. Will be used by the headless browser. Follow [this](https://github.com/puppeteer/puppeteer/blob/v3.3.0/docs/api.md#pagesetcookiecookies) format. |
|
| maxTimeout | Optional, default value 60000. Max timeout to solve the challenge in milliseconds. |
|
||||||
| returnOnlyCookies | Optional, default false. Only returns the cookies. Response data, headers and other parts of the response are removed. |
|
| cookies | Optional. Will be used by the headless browser. Eg: `"cookies": [{"name": "cookie1", "value": "value1"}, {"name": "cookie2", "value": "value2"}]`. |
|
||||||
| proxy | Optional, default disabled. Eg: `"proxy": {"url": "http://127.0.0.1:8888"}`. You must include the proxy schema in the URL: `http://`, `socks4://` or `socks5://`. Authorization (username/password) is not supported. (When the `session` parameter is set, the proxy is ignored; a session specific proxy can be set in `sessions.create`.) |
|
| returnOnlyCookies | Optional, default false. Only returns the cookies. Response data, headers and other parts of the response are removed. |
|
||||||
|
| returnScreenshot | Optional, default false. Captures a screenshot of the final rendered page after all challenges and waits are completed. The screenshot is returned as a Base64-encoded PNG string in the `screenshot` field of the response. |
|
||||||
|
| proxy | Optional, default disabled. Eg: `"proxy": {"url": "http://127.0.0.1:8888"}`. You must include the proxy schema in the URL: `http://`, `socks4://` or `socks5://`. Authorization (username/password) is not supported. (When the `session` parameter is set, the proxy is ignored; a session specific proxy can be set in `sessions.create`.) |
|
||||||
|
| waitInSeconds | Optional, default none. Length to wait in seconds after solving the challenge, and before returning the results. Useful to allow it to load dynamic content. |
|
||||||
|
| disableMedia | Optional, default false. When true FlareSolverr will prevent media resources (images, CSS, and fonts) from being loaded to speed up navigation. |
|
||||||
|
| tabs_till_verify | Optional, default none. Number of times the `Tab` button is needed to be pressed to end up on the turnstile captcha, in order to verify it. After verifying the captcha, the result will be stored in the solution under `turnstile_token`. |
|
||||||
|
|
||||||
:warning: If you want to use Cloudflare clearance cookie in your scripts, make sure you use the FlareSolverr User-Agent too. If they don't match you will see the challenge.
|
> **Warning**
|
||||||
|
> If you want to use Cloudflare clearance cookie in your scripts, make sure you use the FlareSolverr User-Agent too. If they don't match you will see the challenge.
|
||||||
|
|
||||||
Example response from running the `curl` above:
|
Example response from running the `curl` above:
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"solution": {
|
"solution": {
|
||||||
"url": "https://www.google.com/?gws_rd=ssl",
|
"url": "https://www.google.com/?gws_rd=ssl",
|
||||||
"status": 200,
|
"status": 200,
|
||||||
"headers": {
|
"headers": {
|
||||||
"status": "200",
|
"status": "200",
|
||||||
"date": "Thu, 16 Jul 2020 04:15:49 GMT",
|
"date": "Thu, 16 Jul 2020 04:15:49 GMT",
|
||||||
"expires": "-1",
|
"expires": "-1",
|
||||||
"cache-control": "private, max-age=0",
|
"cache-control": "private, max-age=0",
|
||||||
"content-type": "text/html; charset=UTF-8",
|
"content-type": "text/html; charset=UTF-8",
|
||||||
"strict-transport-security": "max-age=31536000",
|
"strict-transport-security": "max-age=31536000",
|
||||||
"p3p": "CP=\"This is not a P3P policy! See g.co/p3phelp for more info.\"",
|
"p3p": "CP=\"This is not a P3P policy! See g.co/p3phelp for more info.\"",
|
||||||
"content-encoding": "br",
|
"content-encoding": "br",
|
||||||
"server": "gws",
|
"server": "gws",
|
||||||
"content-length": "61587",
|
"content-length": "61587",
|
||||||
"x-xss-protection": "0",
|
"x-xss-protection": "0",
|
||||||
"x-frame-options": "SAMEORIGIN",
|
"x-frame-options": "SAMEORIGIN",
|
||||||
"set-cookie": "1P_JAR=2020-07-16-04; expires=Sat..."
|
"set-cookie": "1P_JAR=2020-07-16-04; expires=Sat..."
|
||||||
},
|
|
||||||
"response":"<!DOCTYPE html>...",
|
|
||||||
"cookies": [
|
|
||||||
{
|
|
||||||
"name": "NID",
|
|
||||||
"value": "204=QE3Ocq15XalczqjuDy52HeseG3zAZuJzID3R57...",
|
|
||||||
"domain": ".google.com",
|
|
||||||
"path": "/",
|
|
||||||
"expires": 1610684149.307722,
|
|
||||||
"size": 178,
|
|
||||||
"httpOnly": true,
|
|
||||||
"secure": true,
|
|
||||||
"session": false,
|
|
||||||
"sameSite": "None"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "1P_JAR",
|
|
||||||
"value": "2020-07-16-04",
|
|
||||||
"domain": ".google.com",
|
|
||||||
"path": "/",
|
|
||||||
"expires": 1597464949.307626,
|
|
||||||
"size": 19,
|
|
||||||
"httpOnly": false,
|
|
||||||
"secure": true,
|
|
||||||
"session": false,
|
|
||||||
"sameSite": "None"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"userAgent": "Windows NT 10.0; Win64; x64) AppleWebKit/5..."
|
|
||||||
},
|
},
|
||||||
"status": "ok",
|
"response": "<!DOCTYPE html>...",
|
||||||
"message": "",
|
"cookies": [
|
||||||
"startTimestamp": 1594872947467,
|
{
|
||||||
"endTimestamp": 1594872949617,
|
"name": "NID",
|
||||||
"version": "1.0.0"
|
"value": "204=QE3Ocq15XalczqjuDy52HeseG3zAZuJzID3R57...",
|
||||||
|
"domain": ".google.com",
|
||||||
|
"path": "/",
|
||||||
|
"expires": 1610684149.307722,
|
||||||
|
"size": 178,
|
||||||
|
"httpOnly": true,
|
||||||
|
"secure": true,
|
||||||
|
"session": false,
|
||||||
|
"sameSite": "None"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "1P_JAR",
|
||||||
|
"value": "2020-07-16-04",
|
||||||
|
"domain": ".google.com",
|
||||||
|
"path": "/",
|
||||||
|
"expires": 1597464949.307626,
|
||||||
|
"size": 19,
|
||||||
|
"httpOnly": false,
|
||||||
|
"secure": true,
|
||||||
|
"session": false,
|
||||||
|
"sameSite": "None"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"userAgent": "Windows NT 10.0; Win64; x64) AppleWebKit/5...",
|
||||||
|
"turnstile_token": "03AGdBq24k3lK7JH2v8uN1T5F..."
|
||||||
|
},
|
||||||
|
"status": "ok",
|
||||||
|
"message": "",
|
||||||
|
"startTimestamp": 1594872947467,
|
||||||
|
"endTimestamp": 1594872949617,
|
||||||
|
"version": "1.0.0"
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
### + `request.post`
|
### + `request.post`
|
||||||
|
|
||||||
This is the same as `request.get` but it takes one more param:
|
This works like `request.get`, with the addition of the postData parameter. Note that `tabs_till_verify` is currently supported only for GET requests and requires one extra argument.
|
||||||
|
|
||||||
| Parameter | Notes |
|
| Parameter | Notes |
|
||||||
|-----------|--------------------------------------------------------------------------|
|
| --------- | ------------------------------------------------------------------------ |
|
||||||
| postData | Must be a string with `application/x-www-form-urlencoded`. Eg: `a=b&c=d` |
|
| postData | Must be a string with `application/x-www-form-urlencoded`. Eg: `a=b&c=d` |
|
||||||
|
|
||||||
## Environment variables
|
## Environment variables
|
||||||
|
|
||||||
| Name | Default | Notes |
|
| Name | Default | Notes |
|
||||||
|-----------------|------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
| ------------------ | ---------------------- | ---------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
| LOG_LEVEL | info | Verbosity of the logging. Use `LOG_LEVEL=debug` for more information. |
|
| LOG_LEVEL | info | Verbosity of the logging. Use `LOG_LEVEL=debug` for more information. |
|
||||||
| LOG_HTML | false | Only for debugging. If `true` all HTML that passes through the proxy will be logged to the console in `debug` level. |
|
| LOG_FILE | none | Path to capture log to file. Example: `/config/flaresolverr.log`. |
|
||||||
| CAPTCHA_SOLVER | none | Captcha solving method. It is used when a captcha is encountered. See the Captcha Solvers section. |
|
| LOG_HTML | false | Only for debugging. If `true` all HTML that passes through the proxy will be logged to the console in `debug` level. |
|
||||||
| TZ | UTC | Timezone used in the logs and the web browser. Example: `TZ=Europe/London`. |
|
| PROXY_URL | none | URL for proxy. Will be overwritten by `request` or `sessions` proxy, if used. Example: `http://127.0.0.1:8080`. |
|
||||||
| HEADLESS | true | Only for debugging. To run the web browser in headless mode or visible. |
|
| PROXY_USERNAME | none | Username for proxy. Will be overwritten by `request` or `sessions` proxy, if used. Example: `testuser`. |
|
||||||
| BROWSER_TIMEOUT | 40000 | If you are experiencing errors/timeouts because your system is slow, you can try to increase this value. Remember to increase the `maxTimeout` parameter too. |
|
| PROXY_PASSWORD | none | Password for proxy. Will be overwritten by `request` or `sessions` proxy, if used. Example: `testpass`. |
|
||||||
| TEST_URL | https://www.google.com | FlareSolverr makes a request on start to make sure the web browser is working. You can change that URL if it is blocked in your country. |
|
| CAPTCHA_SOLVER | none | Captcha solving method. It is used when a captcha is encountered. See the Captcha Solvers section. |
|
||||||
| PORT | 8191 | Listening port. You don't need to change this if you are running on Docker. |
|
| TZ | UTC | Timezone used in the logs and the web browser. Example: `TZ=Europe/London`. |
|
||||||
| HOST | 0.0.0.0 | Listening interface. You don't need to change this if you are running on Docker. |
|
| LANG | none | Language used in the web browser. Example: `LANG=en_GB`. |
|
||||||
|
| HEADLESS | true | Only for debugging. To run the web browser in headless mode or visible. |
|
||||||
|
| DISABLE_MEDIA | false | To disable loading images, CSS, and other media in the web browser to save network bandwidth. |
|
||||||
|
| TEST_URL | https://www.google.com | FlareSolverr makes a request on start to make sure the web browser is working. You can change that URL if it is blocked in your country. |
|
||||||
|
| PORT | 8191 | Listening port. You don't need to change this if you are running on Docker. |
|
||||||
|
| HOST | 0.0.0.0 | Listening interface. You don't need to change this if you are running on Docker. |
|
||||||
|
| PROMETHEUS_ENABLED | false | Enable Prometheus exporter. See the Prometheus section below. |
|
||||||
|
| PROMETHEUS_PORT | 8192 | Listening port for Prometheus exporter. See the Prometheus section below. |
|
||||||
|
|
||||||
Environment variables are set differently depending on the operating system. Some examples:
|
Environment variables are set differently depending on the operating system. Some examples:
|
||||||
* Docker: Take a look at the Docker section in this document. Environment variables can be set in the `docker-compose.yml` file or in the Docker CLI command.
|
|
||||||
* Linux: Run `export LOG_LEVEL=debug` and then start FlareSolverr in the same shell.
|
- Docker: Take a look at the Docker section in this document. Environment variables can be set in the `docker-compose.yml` file or in the Docker CLI command.
|
||||||
* Windows: Open `cmd.exe`, run `set LOG_LEVEL=debug` and then start FlareSolverr in the same shell.
|
- Linux: Run `export LOG_LEVEL=debug` and then run `flaresolverr` in the same shell.
|
||||||
|
- Windows: Open `cmd.exe`, run `set LOG_LEVEL=debug` and then run `flaresolverr.exe` in the same shell.
|
||||||
|
|
||||||
|
## Prometheus exporter
|
||||||
|
|
||||||
|
The Prometheus exporter for FlareSolverr is disabled by default. It can be enabled with the environment variable `PROMETHEUS_ENABLED`. If you are using Docker make sure you expose the `PROMETHEUS_PORT`.
|
||||||
|
|
||||||
|
Example metrics:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
# HELP flaresolverr_request_total Total requests with result
|
||||||
|
# TYPE flaresolverr_request_total counter
|
||||||
|
flaresolverr_request_total{domain="nowsecure.nl",result="solved"} 1.0
|
||||||
|
# HELP flaresolverr_request_created Total requests with result
|
||||||
|
# TYPE flaresolverr_request_created gauge
|
||||||
|
flaresolverr_request_created{domain="nowsecure.nl",result="solved"} 1.690141657157109e+09
|
||||||
|
# HELP flaresolverr_request_duration Request duration in seconds
|
||||||
|
# TYPE flaresolverr_request_duration histogram
|
||||||
|
flaresolverr_request_duration_bucket{domain="nowsecure.nl",le="0.0"} 0.0
|
||||||
|
flaresolverr_request_duration_bucket{domain="nowsecure.nl",le="10.0"} 1.0
|
||||||
|
flaresolverr_request_duration_bucket{domain="nowsecure.nl",le="25.0"} 1.0
|
||||||
|
flaresolverr_request_duration_bucket{domain="nowsecure.nl",le="50.0"} 1.0
|
||||||
|
flaresolverr_request_duration_bucket{domain="nowsecure.nl",le="+Inf"} 1.0
|
||||||
|
flaresolverr_request_duration_count{domain="nowsecure.nl"} 1.0
|
||||||
|
flaresolverr_request_duration_sum{domain="nowsecure.nl"} 5.858
|
||||||
|
# HELP flaresolverr_request_duration_created Request duration in seconds
|
||||||
|
# TYPE flaresolverr_request_duration_created gauge
|
||||||
|
flaresolverr_request_duration_created{domain="nowsecure.nl"} 1.6901416571570296e+09
|
||||||
|
```
|
||||||
|
|
||||||
## Captcha Solvers
|
## Captcha Solvers
|
||||||
|
|
||||||
:warning: At this time none of the captcha solvers work. You can check the status in the open issues. Any help is welcome.
|
> **Warning**
|
||||||
|
> At this time none of the captcha solvers work. You can check the status in the open issues. Any help is welcome.
|
||||||
|
|
||||||
Sometimes CloudFlare not only gives mathematical computations and browser tests, sometimes they also require the user to
|
Sometimes CloudFlare not only gives mathematical computations and browser tests, sometimes they also require the user to
|
||||||
solve a captcha.
|
solve a captcha.
|
||||||
If this is the case, FlareSolverr will return the error `Captcha detected but no automatic solver is configured.`
|
If this is the case, FlareSolverr will return the error `Captcha detected but no automatic solver is configured.`
|
||||||
|
|
||||||
FlareSolverr can be customized to solve the captchas automatically by setting the environment variable `CAPTCHA_SOLVER`
|
FlareSolverr can be customized to solve the CAPTCHA automatically by setting the environment variable `CAPTCHA_SOLVER`
|
||||||
to the file name of one of the adapters inside the [/captcha](src/captcha) directory.
|
to the file name of one of the adapters inside the `/captcha` directory.
|
||||||
|
|
||||||
## Related projects
|
## Related projects
|
||||||
|
|
||||||
* C# implementation => https://github.com/FlareSolverr/FlareSolverrSharp
|
- C# implementation => https://github.com/FlareSolverr/FlareSolverrSharp
|
||||||
|
|
||||||
|
|||||||
@@ -7,9 +7,12 @@ services:
|
|||||||
container_name: flaresolverr
|
container_name: flaresolverr
|
||||||
environment:
|
environment:
|
||||||
- LOG_LEVEL=${LOG_LEVEL:-info}
|
- LOG_LEVEL=${LOG_LEVEL:-info}
|
||||||
|
- LOG_FILE=${LOG_FILE:-none}
|
||||||
- LOG_HTML=${LOG_HTML:-false}
|
- LOG_HTML=${LOG_HTML:-false}
|
||||||
- CAPTCHA_SOLVER=${CAPTCHA_SOLVER:-none}
|
- CAPTCHA_SOLVER=${CAPTCHA_SOLVER:-none}
|
||||||
- TZ=Europe/London
|
- TZ=Europe/London
|
||||||
ports:
|
ports:
|
||||||
- "${PORT:-8191}:8191"
|
- "${PORT:-8191}:8191"
|
||||||
|
volumes:
|
||||||
|
- /var/lib/flaresolver:/config
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
|||||||
19
flaresolverr.service
Normal file
19
flaresolverr.service
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=FlareSolverr
|
||||||
|
After=network.target
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
SyslogIdentifier=flaresolverr
|
||||||
|
Restart=always
|
||||||
|
RestartSec=5
|
||||||
|
Type=simple
|
||||||
|
User=flaresolverr
|
||||||
|
Group=flaresolverr
|
||||||
|
Environment="LOG_LEVEL=info"
|
||||||
|
Environment="CAPTCHA_SOLVER=none"
|
||||||
|
WorkingDirectory=/opt/flaresolverr
|
||||||
|
ExecStart=/opt/flaresolverr/flaresolverr
|
||||||
|
TimeoutStopSec=30
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"name": "flaresolverr",
|
"name": "flaresolverr",
|
||||||
"version": "3.0.0",
|
"version": "3.4.6",
|
||||||
"description": "Proxy server to bypass Cloudflare protection",
|
"description": "Proxy server to bypass Cloudflare protection",
|
||||||
"author": "Diego Heras (ngosang / ngosang@hotmail.es)",
|
"author": "Diego Heras (ngosang / ngosang@hotmail.es)",
|
||||||
"license": "MIT"
|
"license": "MIT"
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,9 +1,14 @@
|
|||||||
bottle==0.12.23
|
bottle==0.13.4
|
||||||
waitress==2.1.2
|
waitress==3.0.2
|
||||||
selenium==4.4.3
|
selenium==4.38.0
|
||||||
func-timeout==4.3.5
|
func-timeout==4.3.5
|
||||||
# required by undetected_chromedriver
|
prometheus-client==0.23.1
|
||||||
requests==2.28.1
|
# Required by undetected_chromedriver
|
||||||
websockets==10.3
|
requests==2.32.5
|
||||||
# only required for linux
|
certifi==2025.10.5
|
||||||
xvfbwrapper==0.2.9
|
websockets==15.0.1
|
||||||
|
packaging==25.0
|
||||||
|
# Only required for Linux and macOS
|
||||||
|
xvfbwrapper==0.2.15; platform_system != "Windows"
|
||||||
|
# Only required for Windows
|
||||||
|
pefile==2024.8.26; platform_system == "Windows"
|
||||||
|
|||||||
BIN
resources/flaresolverr_logo.ico
Normal file
BIN
resources/flaresolverr_logo.ico
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 8.8 KiB |
@@ -5,7 +5,7 @@ import logging
|
|||||||
def logger_plugin(callback):
|
def logger_plugin(callback):
|
||||||
"""
|
"""
|
||||||
Bottle plugin to use logging module
|
Bottle plugin to use logging module
|
||||||
http://bottlepy.org/docs/dev/plugindev.html
|
https://bottlepy.org/docs/dev/plugindev.html
|
||||||
|
|
||||||
Wrap a Bottle request so that a log line is emitted after it's handled.
|
Wrap a Bottle request so that a log line is emitted after it's handled.
|
||||||
(This decorator can be extended to take the desired logger as a param.)
|
(This decorator can be extended to take the desired logger as a param.)
|
||||||
|
|||||||
66
src/bottle_plugins/prometheus_plugin.py
Normal file
66
src/bottle_plugins/prometheus_plugin.py
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
|
from bottle import request
|
||||||
|
from dtos import V1RequestBase, V1ResponseBase
|
||||||
|
from metrics import start_metrics_http_server, REQUEST_COUNTER, REQUEST_DURATION
|
||||||
|
|
||||||
|
PROMETHEUS_ENABLED = os.environ.get('PROMETHEUS_ENABLED', 'false').lower() == 'true'
|
||||||
|
PROMETHEUS_PORT = int(os.environ.get('PROMETHEUS_PORT', 8192))
|
||||||
|
|
||||||
|
|
||||||
|
def setup():
|
||||||
|
if PROMETHEUS_ENABLED:
|
||||||
|
start_metrics_http_server(PROMETHEUS_PORT)
|
||||||
|
|
||||||
|
|
||||||
|
def prometheus_plugin(callback):
|
||||||
|
"""
|
||||||
|
Bottle plugin to expose Prometheus metrics
|
||||||
|
https://bottlepy.org/docs/dev/plugindev.html
|
||||||
|
"""
|
||||||
|
def wrapper(*args, **kwargs):
|
||||||
|
actual_response = callback(*args, **kwargs)
|
||||||
|
|
||||||
|
if PROMETHEUS_ENABLED:
|
||||||
|
try:
|
||||||
|
export_metrics(actual_response)
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning("Error exporting metrics: " + str(e))
|
||||||
|
|
||||||
|
return actual_response
|
||||||
|
|
||||||
|
def export_metrics(actual_response):
|
||||||
|
res = V1ResponseBase(actual_response)
|
||||||
|
|
||||||
|
if res.startTimestamp is None or res.endTimestamp is None:
|
||||||
|
# skip management and healthcheck endpoints
|
||||||
|
return
|
||||||
|
|
||||||
|
domain = "unknown"
|
||||||
|
if res.solution and res.solution.url:
|
||||||
|
domain = parse_domain_url(res.solution.url)
|
||||||
|
else:
|
||||||
|
# timeout error
|
||||||
|
req = V1RequestBase(request.json)
|
||||||
|
if req.url:
|
||||||
|
domain = parse_domain_url(req.url)
|
||||||
|
|
||||||
|
run_time = (res.endTimestamp - res.startTimestamp) / 1000
|
||||||
|
REQUEST_DURATION.labels(domain=domain).observe(run_time)
|
||||||
|
|
||||||
|
result = "unknown"
|
||||||
|
if res.message == "Challenge solved!":
|
||||||
|
result = "solved"
|
||||||
|
elif res.message == "Challenge not detected!":
|
||||||
|
result = "not_detected"
|
||||||
|
elif res.message.startswith("Error"):
|
||||||
|
result = "error"
|
||||||
|
REQUEST_COUNTER.labels(domain=domain, result=result).inc()
|
||||||
|
|
||||||
|
def parse_domain_url(url):
|
||||||
|
parsed_url = urllib.parse.urlparse(url)
|
||||||
|
return parsed_url.hostname
|
||||||
|
|
||||||
|
return wrapper
|
||||||
110
src/build_package.py
Normal file
110
src/build_package.py
Normal file
@@ -0,0 +1,110 @@
|
|||||||
|
import os
|
||||||
|
import platform
|
||||||
|
import shutil
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import zipfile
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
|
||||||
|
def clean_files():
|
||||||
|
try:
|
||||||
|
shutil.rmtree(os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, 'build'))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
shutil.rmtree(os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, 'dist'))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
shutil.rmtree(os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, 'dist_chrome'))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def download_chromium():
|
||||||
|
# https://commondatastorage.googleapis.com/chromium-browser-snapshots/index.html?prefix=Linux_x64/
|
||||||
|
revision = "1522586" if os.name == 'nt' else '1522586'
|
||||||
|
arch = 'Win_x64' if os.name == 'nt' else 'Linux_x64'
|
||||||
|
dl_file = 'chrome-win' if os.name == 'nt' else 'chrome-linux'
|
||||||
|
dl_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, 'dist_chrome')
|
||||||
|
dl_path_folder = os.path.join(dl_path, dl_file)
|
||||||
|
dl_path_zip = dl_path_folder + '.zip'
|
||||||
|
|
||||||
|
# response = requests.get(
|
||||||
|
# f'https://commondatastorage.googleapis.com/chromium-browser-snapshots/{arch}/LAST_CHANGE',
|
||||||
|
# timeout=30)
|
||||||
|
# revision = response.text.strip()
|
||||||
|
print("Downloading revision: " + revision)
|
||||||
|
|
||||||
|
os.mkdir(dl_path)
|
||||||
|
with requests.get(
|
||||||
|
f'https://commondatastorage.googleapis.com/chromium-browser-snapshots/{arch}/{revision}/{dl_file}.zip',
|
||||||
|
stream=True) as r:
|
||||||
|
r.raise_for_status()
|
||||||
|
with open(dl_path_zip, 'wb') as f:
|
||||||
|
for chunk in r.iter_content(chunk_size=8192):
|
||||||
|
f.write(chunk)
|
||||||
|
print("File downloaded: " + dl_path_zip)
|
||||||
|
with zipfile.ZipFile(dl_path_zip, 'r') as zip_ref:
|
||||||
|
zip_ref.extractall(dl_path)
|
||||||
|
os.remove(dl_path_zip)
|
||||||
|
|
||||||
|
chrome_path = os.path.join(dl_path, "chrome")
|
||||||
|
shutil.move(dl_path_folder, chrome_path)
|
||||||
|
print("Extracted in: " + chrome_path)
|
||||||
|
|
||||||
|
if os.name != 'nt':
|
||||||
|
# Give executable permissions for *nix
|
||||||
|
# file * | grep executable | cut -d: -f1
|
||||||
|
print("Giving executable permissions...")
|
||||||
|
execs = ['chrome', 'chrome_crashpad_handler', 'chrome_sandbox', 'chrome-wrapper', 'xdg-mime', 'xdg-settings']
|
||||||
|
for exec_file in execs:
|
||||||
|
exec_path = os.path.join(chrome_path, exec_file)
|
||||||
|
os.chmod(exec_path, 0o755)
|
||||||
|
|
||||||
|
|
||||||
|
def run_pyinstaller():
|
||||||
|
sep = ';' if os.name == 'nt' else ':'
|
||||||
|
result = subprocess.run([sys.executable, "-m", "PyInstaller",
|
||||||
|
"--icon", "resources/flaresolverr_logo.ico",
|
||||||
|
"--add-data", f"package.json{sep}.",
|
||||||
|
"--add-data", f"{os.path.join('dist_chrome', 'chrome')}{sep}chrome",
|
||||||
|
os.path.join("src", "flaresolverr.py")],
|
||||||
|
cwd=os.pardir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||||
|
if result.returncode != 0:
|
||||||
|
print(result.stderr.decode('utf-8'))
|
||||||
|
raise Exception("Error running pyInstaller")
|
||||||
|
|
||||||
|
|
||||||
|
def compress_package():
|
||||||
|
dist_folder = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, 'dist')
|
||||||
|
package_folder = os.path.join(dist_folder, 'package')
|
||||||
|
shutil.move(os.path.join(dist_folder, 'flaresolverr'), os.path.join(package_folder, 'flaresolverr'))
|
||||||
|
print("Package folder: " + package_folder)
|
||||||
|
|
||||||
|
compr_format = 'zip' if os.name == 'nt' else 'gztar'
|
||||||
|
compr_file_name = 'flaresolverr_windows_x64' if os.name == 'nt' else 'flaresolverr_linux_x64'
|
||||||
|
compr_file_path = os.path.join(dist_folder, compr_file_name)
|
||||||
|
shutil.make_archive(compr_file_path, compr_format, package_folder)
|
||||||
|
print("Compressed file path: " + compr_file_path)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
print("Building package...")
|
||||||
|
print("Platform: " + platform.platform())
|
||||||
|
|
||||||
|
print("Cleaning previous build...")
|
||||||
|
clean_files()
|
||||||
|
|
||||||
|
print("Downloading Chromium...")
|
||||||
|
download_chromium()
|
||||||
|
|
||||||
|
print("Building pyinstaller executable... ")
|
||||||
|
run_pyinstaller()
|
||||||
|
|
||||||
|
print("Compressing package... ")
|
||||||
|
compress_package()
|
||||||
|
|
||||||
|
# NOTE: python -m pip install pyinstaller
|
||||||
11
src/dtos.py
11
src/dtos.py
@@ -10,6 +10,8 @@ class ChallengeResolutionResultT:
|
|||||||
response: str = None
|
response: str = None
|
||||||
cookies: list = None
|
cookies: list = None
|
||||||
userAgent: str = None
|
userAgent: str = None
|
||||||
|
screenshot: str | None = None
|
||||||
|
turnstile_token: str = None
|
||||||
|
|
||||||
def __init__(self, _dict):
|
def __init__(self, _dict):
|
||||||
self.__dict__.update(_dict)
|
self.__dict__.update(_dict)
|
||||||
@@ -33,6 +35,7 @@ class V1RequestBase(object):
|
|||||||
maxTimeout: int = None
|
maxTimeout: int = None
|
||||||
proxy: dict = None
|
proxy: dict = None
|
||||||
session: str = None
|
session: str = None
|
||||||
|
session_ttl_minutes: int = None
|
||||||
headers: list = None # deprecated v2.0.0, not used
|
headers: list = None # deprecated v2.0.0, not used
|
||||||
userAgent: str = None # deprecated v2.0.0, not used
|
userAgent: str = None # deprecated v2.0.0, not used
|
||||||
|
|
||||||
@@ -40,8 +43,14 @@ class V1RequestBase(object):
|
|||||||
url: str = None
|
url: str = None
|
||||||
postData: str = None
|
postData: str = None
|
||||||
returnOnlyCookies: bool = None
|
returnOnlyCookies: bool = None
|
||||||
|
returnScreenshot: bool = None
|
||||||
download: bool = None # deprecated v2.0.0, not used
|
download: bool = None # deprecated v2.0.0, not used
|
||||||
returnRawHtml: bool = None # deprecated v2.0.0, not used
|
returnRawHtml: bool = None # deprecated v2.0.0, not used
|
||||||
|
waitInSeconds: int = None
|
||||||
|
# Optional resource blocking flag (blocks images, CSS, and fonts)
|
||||||
|
disableMedia: bool = None
|
||||||
|
# Optional when you've got a turnstile captcha that needs to be clicked after X number of Tab presses
|
||||||
|
tabs_till_verify : int = None
|
||||||
|
|
||||||
def __init__(self, _dict):
|
def __init__(self, _dict):
|
||||||
self.__dict__.update(_dict)
|
self.__dict__.update(_dict)
|
||||||
@@ -51,6 +60,8 @@ class V1ResponseBase(object):
|
|||||||
# V1ResponseBase
|
# V1ResponseBase
|
||||||
status: str = None
|
status: str = None
|
||||||
message: str = None
|
message: str = None
|
||||||
|
session: str = None
|
||||||
|
sessions: list[str] = None
|
||||||
startTimestamp: int = None
|
startTimestamp: int = None
|
||||||
endTimestamp: int = None
|
endTimestamp: int = None
|
||||||
version: str = None
|
version: str = None
|
||||||
|
|||||||
@@ -3,14 +3,20 @@ import logging
|
|||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
from bottle import run, response, Bottle, request
|
import certifi
|
||||||
|
from bottle import run, response, Bottle, request, ServerAdapter
|
||||||
|
|
||||||
from bottle_plugins.error_plugin import error_plugin
|
from bottle_plugins.error_plugin import error_plugin
|
||||||
from bottle_plugins.logger_plugin import logger_plugin
|
from bottle_plugins.logger_plugin import logger_plugin
|
||||||
from dtos import IndexResponse, V1RequestBase
|
from bottle_plugins import prometheus_plugin
|
||||||
|
from dtos import V1RequestBase
|
||||||
import flaresolverr_service
|
import flaresolverr_service
|
||||||
import utils
|
import utils
|
||||||
|
|
||||||
|
env_proxy_url = os.environ.get('PROXY_URL', None)
|
||||||
|
env_proxy_username = os.environ.get('PROXY_USERNAME', None)
|
||||||
|
env_proxy_password = os.environ.get('PROXY_PASSWORD', None)
|
||||||
|
|
||||||
|
|
||||||
class JSONErrorBottle(Bottle):
|
class JSONErrorBottle(Bottle):
|
||||||
"""
|
"""
|
||||||
@@ -23,10 +29,6 @@ class JSONErrorBottle(Bottle):
|
|||||||
|
|
||||||
app = JSONErrorBottle()
|
app = JSONErrorBottle()
|
||||||
|
|
||||||
# plugin order is important
|
|
||||||
app.install(logger_plugin)
|
|
||||||
app.install(error_plugin)
|
|
||||||
|
|
||||||
|
|
||||||
@app.route('/')
|
@app.route('/')
|
||||||
def index():
|
def index():
|
||||||
@@ -52,7 +54,14 @@ def controller_v1():
|
|||||||
"""
|
"""
|
||||||
Controller v1
|
Controller v1
|
||||||
"""
|
"""
|
||||||
req = V1RequestBase(request.json)
|
data = request.json or {}
|
||||||
|
if (('proxy' not in data or not data.get('proxy')) and env_proxy_url is not None and (env_proxy_username is None and env_proxy_password is None)):
|
||||||
|
logging.info('Using proxy URL ENV')
|
||||||
|
data['proxy'] = {"url": env_proxy_url}
|
||||||
|
if (('proxy' not in data or not data.get('proxy')) and env_proxy_url is not None and (env_proxy_username is not None or env_proxy_password is not None)):
|
||||||
|
logging.info('Using proxy URL, username & password ENVs')
|
||||||
|
data['proxy'] = {"url": env_proxy_url, "username": env_proxy_username, "password": env_proxy_password}
|
||||||
|
req = V1RequestBase(data)
|
||||||
res = flaresolverr_service.controller_v1_endpoint(req)
|
res = flaresolverr_service.controller_v1_endpoint(req)
|
||||||
if res.__error_500__:
|
if res.__error_500__:
|
||||||
response.status = 500
|
response.status = 500
|
||||||
@@ -60,8 +69,25 @@ def controller_v1():
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
# check python version
|
||||||
|
if sys.version_info < (3, 9):
|
||||||
|
raise Exception("The Python version is less than 3.9, a version equal to or higher is required.")
|
||||||
|
|
||||||
|
# fix for HEADLESS=false in Windows binary
|
||||||
|
# https://stackoverflow.com/a/27694505
|
||||||
|
if os.name == 'nt':
|
||||||
|
import multiprocessing
|
||||||
|
multiprocessing.freeze_support()
|
||||||
|
|
||||||
|
# fix ssl certificates for compiled binaries
|
||||||
|
# https://github.com/pyinstaller/pyinstaller/issues/7229
|
||||||
|
# https://stackoverflow.com/q/55736855
|
||||||
|
os.environ["REQUESTS_CA_BUNDLE"] = certifi.where()
|
||||||
|
os.environ["SSL_CERT_FILE"] = certifi.where()
|
||||||
|
|
||||||
# validate configuration
|
# validate configuration
|
||||||
log_level = os.environ.get('LOG_LEVEL', 'info').upper()
|
log_level = os.environ.get('LOG_LEVEL', 'info').upper()
|
||||||
|
log_file = os.environ.get('LOG_FILE', None)
|
||||||
log_html = utils.get_config_log_html()
|
log_html = utils.get_config_log_html()
|
||||||
headless = utils.get_config_headless()
|
headless = utils.get_config_headless()
|
||||||
server_host = os.environ.get('HOST', '0.0.0.0')
|
server_host = os.environ.get('HOST', '0.0.0.0')
|
||||||
@@ -71,14 +97,29 @@ if __name__ == "__main__":
|
|||||||
logger_format = '%(asctime)s %(levelname)-8s %(message)s'
|
logger_format = '%(asctime)s %(levelname)-8s %(message)s'
|
||||||
if log_level == 'DEBUG':
|
if log_level == 'DEBUG':
|
||||||
logger_format = '%(asctime)s %(levelname)-8s ReqId %(thread)s %(message)s'
|
logger_format = '%(asctime)s %(levelname)-8s ReqId %(thread)s %(message)s'
|
||||||
logging.basicConfig(
|
if log_file:
|
||||||
format=logger_format,
|
log_file = os.path.realpath(log_file)
|
||||||
level=log_level,
|
log_path = os.path.dirname(log_file)
|
||||||
datefmt='%Y-%m-%d %H:%M:%S',
|
os.makedirs(log_path, exist_ok=True)
|
||||||
handlers=[
|
logging.basicConfig(
|
||||||
logging.StreamHandler(sys.stdout)
|
format=logger_format,
|
||||||
]
|
level=log_level,
|
||||||
)
|
datefmt='%Y-%m-%d %H:%M:%S',
|
||||||
|
handlers=[
|
||||||
|
logging.StreamHandler(sys.stdout),
|
||||||
|
logging.FileHandler(log_file)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logging.basicConfig(
|
||||||
|
format=logger_format,
|
||||||
|
level=log_level,
|
||||||
|
datefmt='%Y-%m-%d %H:%M:%S',
|
||||||
|
handlers=[
|
||||||
|
logging.StreamHandler(sys.stdout)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
# disable warning traces from urllib3
|
# disable warning traces from urllib3
|
||||||
logging.getLogger('urllib3').setLevel(logging.ERROR)
|
logging.getLogger('urllib3').setLevel(logging.ERROR)
|
||||||
logging.getLogger('selenium.webdriver.remote.remote_connection').setLevel(logging.WARNING)
|
logging.getLogger('selenium.webdriver.remote.remote_connection').setLevel(logging.WARNING)
|
||||||
@@ -87,9 +128,25 @@ if __name__ == "__main__":
|
|||||||
logging.info(f'FlareSolverr {utils.get_flaresolverr_version()}')
|
logging.info(f'FlareSolverr {utils.get_flaresolverr_version()}')
|
||||||
logging.debug('Debug log enabled')
|
logging.debug('Debug log enabled')
|
||||||
|
|
||||||
|
# Get current OS for global variable
|
||||||
|
utils.get_current_platform()
|
||||||
|
|
||||||
# test browser installation
|
# test browser installation
|
||||||
flaresolverr_service.test_browser_installation()
|
flaresolverr_service.test_browser_installation()
|
||||||
|
|
||||||
|
# start bootle plugins
|
||||||
|
# plugin order is important
|
||||||
|
app.install(logger_plugin)
|
||||||
|
app.install(error_plugin)
|
||||||
|
prometheus_plugin.setup()
|
||||||
|
app.install(prometheus_plugin.prometheus_plugin)
|
||||||
|
|
||||||
# start webserver
|
# start webserver
|
||||||
# default server 'wsgiref' does not support concurrent requests
|
# default server 'wsgiref' does not support concurrent requests
|
||||||
run(app, host=server_host, port=server_port, quiet=True, server='waitress')
|
# https://github.com/FlareSolverr/FlareSolverr/issues/680
|
||||||
|
# https://github.com/Pylons/waitress/issues/31
|
||||||
|
class WaitressServerPoll(ServerAdapter):
|
||||||
|
def run(self, handler):
|
||||||
|
from waitress import serve
|
||||||
|
serve(handler, host=self.host, port=self.port, asyncore_use_poll=True)
|
||||||
|
run(app, host=server_host, port=server_port, quiet=True, server=WaitressServerPoll)
|
||||||
|
|||||||
@@ -1,42 +1,84 @@
|
|||||||
import logging
|
import logging
|
||||||
|
import platform
|
||||||
|
import sys
|
||||||
import time
|
import time
|
||||||
from urllib.parse import unquote
|
from datetime import timedelta
|
||||||
|
from html import escape
|
||||||
|
from urllib.parse import unquote, quote
|
||||||
|
|
||||||
from func_timeout import func_timeout, FunctionTimedOut
|
from func_timeout import FunctionTimedOut, func_timeout
|
||||||
from selenium.common import TimeoutException
|
from selenium.common import TimeoutException
|
||||||
from selenium.webdriver.chrome.webdriver import WebDriver
|
from selenium.webdriver.chrome.webdriver import WebDriver
|
||||||
from selenium.webdriver.common.by import By
|
from selenium.webdriver.common.by import By
|
||||||
|
from selenium.webdriver.common.keys import Keys
|
||||||
|
from selenium.webdriver.support.expected_conditions import (
|
||||||
|
presence_of_element_located, staleness_of, title_is)
|
||||||
|
from selenium.webdriver.common.action_chains import ActionChains
|
||||||
from selenium.webdriver.support.wait import WebDriverWait
|
from selenium.webdriver.support.wait import WebDriverWait
|
||||||
from selenium.webdriver.support.expected_conditions import presence_of_element_located, staleness_of, title_is
|
|
||||||
|
|
||||||
from dtos import V1RequestBase, V1ResponseBase, ChallengeResolutionT, ChallengeResolutionResultT, IndexResponse, \
|
|
||||||
HealthResponse, STATUS_OK, STATUS_ERROR
|
|
||||||
import utils
|
import utils
|
||||||
|
from dtos import (STATUS_ERROR, STATUS_OK, ChallengeResolutionResultT,
|
||||||
|
ChallengeResolutionT, HealthResponse, IndexResponse,
|
||||||
|
V1RequestBase, V1ResponseBase)
|
||||||
|
from sessions import SessionsStorage
|
||||||
|
|
||||||
|
ACCESS_DENIED_TITLES = [
|
||||||
|
# Cloudflare
|
||||||
|
'Access denied',
|
||||||
|
# Cloudflare http://bitturk.net/ Firefox
|
||||||
|
'Attention Required! | Cloudflare'
|
||||||
|
]
|
||||||
ACCESS_DENIED_SELECTORS = [
|
ACCESS_DENIED_SELECTORS = [
|
||||||
# Cloudflare
|
# Cloudflare
|
||||||
'div.cf-error-title span.cf-code-label span'
|
'div.cf-error-title span.cf-code-label span',
|
||||||
|
# Cloudflare http://bitturk.net/ Firefox
|
||||||
|
'#cf-error-details div.cf-error-overview h1'
|
||||||
]
|
]
|
||||||
CHALLENGE_TITLE = [
|
CHALLENGE_TITLES = [
|
||||||
# Cloudflare
|
# Cloudflare
|
||||||
'Just a moment...',
|
'Just a moment...',
|
||||||
# DDoS-GUARD
|
# DDoS-GUARD
|
||||||
'DDOS-GUARD',
|
'DDoS-Guard'
|
||||||
]
|
]
|
||||||
CHALLENGE_SELECTORS = [
|
CHALLENGE_SELECTORS = [
|
||||||
# Cloudflare
|
# Cloudflare
|
||||||
'#cf-challenge-running', '.ray_id', '.attack-box', '#cf-please-wait', '#challenge-spinner', '#trk_jschal_js',
|
'#cf-challenge-running', '.ray_id', '.attack-box', '#cf-please-wait', '#challenge-spinner', '#trk_jschal_js', '#turnstile-wrapper', '.lds-ring',
|
||||||
# Custom CloudFlare for EbookParadijs, Film-Paleis, MuziekFabriek and Puur-Hollands
|
# Custom CloudFlare for EbookParadijs, Film-Paleis, MuziekFabriek and Puur-Hollands
|
||||||
'td.info #js_info'
|
'td.info #js_info',
|
||||||
|
# Fairlane / pararius.com
|
||||||
|
'div.vc div.text-box h2'
|
||||||
]
|
]
|
||||||
SHORT_TIMEOUT = 10
|
|
||||||
|
TURNSTILE_SELECTORS = [
|
||||||
|
"input[name='cf-turnstile-response']"
|
||||||
|
]
|
||||||
|
|
||||||
|
SHORT_TIMEOUT = 1
|
||||||
|
SESSIONS_STORAGE = SessionsStorage()
|
||||||
|
|
||||||
|
|
||||||
def test_browser_installation():
|
def test_browser_installation():
|
||||||
logging.info("Testing web browser installation...")
|
logging.info("Testing web browser installation...")
|
||||||
|
logging.info("Platform: " + platform.platform())
|
||||||
|
|
||||||
|
chrome_exe_path = utils.get_chrome_exe_path()
|
||||||
|
if chrome_exe_path is None:
|
||||||
|
logging.error("Chrome / Chromium web browser not installed!")
|
||||||
|
sys.exit(1)
|
||||||
|
else:
|
||||||
|
logging.info("Chrome / Chromium path: " + chrome_exe_path)
|
||||||
|
|
||||||
|
chrome_major_version = utils.get_chrome_major_version()
|
||||||
|
if chrome_major_version == '':
|
||||||
|
logging.error("Chrome / Chromium version not detected!")
|
||||||
|
sys.exit(1)
|
||||||
|
else:
|
||||||
|
logging.info("Chrome / Chromium major version: " + chrome_major_version)
|
||||||
|
|
||||||
|
logging.info("Launching web browser...")
|
||||||
user_agent = utils.get_user_agent()
|
user_agent = utils.get_user_agent()
|
||||||
logging.info("FlareSolverr User-Agent: " + user_agent)
|
logging.info("FlareSolverr User-Agent: " + user_agent)
|
||||||
logging.info("Test successful")
|
logging.info("Test successful!")
|
||||||
|
|
||||||
|
|
||||||
def index_endpoint() -> IndexResponse:
|
def index_endpoint() -> IndexResponse:
|
||||||
@@ -84,17 +126,17 @@ def _controller_v1_handler(req: V1RequestBase) -> V1ResponseBase:
|
|||||||
logging.warning("Request parameter 'userAgent' was removed in FlareSolverr v2.")
|
logging.warning("Request parameter 'userAgent' was removed in FlareSolverr v2.")
|
||||||
|
|
||||||
# set default values
|
# set default values
|
||||||
if req.maxTimeout is None or req.maxTimeout < 1:
|
if req.maxTimeout is None or int(req.maxTimeout) < 1:
|
||||||
req.maxTimeout = 60000
|
req.maxTimeout = 60000
|
||||||
|
|
||||||
# execute the command
|
# execute the command
|
||||||
res: V1ResponseBase
|
res: V1ResponseBase
|
||||||
if req.cmd == 'sessions.create':
|
if req.cmd == 'sessions.create':
|
||||||
raise Exception("Not implemented yet.")
|
res = _cmd_sessions_create(req)
|
||||||
elif req.cmd == 'sessions.list':
|
elif req.cmd == 'sessions.list':
|
||||||
raise Exception("Not implemented yet.")
|
res = _cmd_sessions_list(req)
|
||||||
elif req.cmd == 'sessions.destroy':
|
elif req.cmd == 'sessions.destroy':
|
||||||
raise Exception("Not implemented yet.")
|
res = _cmd_sessions_destroy(req)
|
||||||
elif req.cmd == 'request.get':
|
elif req.cmd == 'request.get':
|
||||||
res = _cmd_request_get(req)
|
res = _cmd_request_get(req)
|
||||||
elif req.cmd == 'request.post':
|
elif req.cmd == 'request.post':
|
||||||
@@ -141,38 +183,222 @@ def _cmd_request_post(req: V1RequestBase) -> V1ResponseBase:
|
|||||||
return res
|
return res
|
||||||
|
|
||||||
|
|
||||||
|
def _cmd_sessions_create(req: V1RequestBase) -> V1ResponseBase:
|
||||||
|
logging.debug("Creating new session...")
|
||||||
|
|
||||||
|
session, fresh = SESSIONS_STORAGE.create(session_id=req.session, proxy=req.proxy)
|
||||||
|
session_id = session.session_id
|
||||||
|
|
||||||
|
if not fresh:
|
||||||
|
return V1ResponseBase({
|
||||||
|
"status": STATUS_OK,
|
||||||
|
"message": "Session already exists.",
|
||||||
|
"session": session_id
|
||||||
|
})
|
||||||
|
|
||||||
|
return V1ResponseBase({
|
||||||
|
"status": STATUS_OK,
|
||||||
|
"message": "Session created successfully.",
|
||||||
|
"session": session_id
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
def _cmd_sessions_list(req: V1RequestBase) -> V1ResponseBase:
|
||||||
|
session_ids = SESSIONS_STORAGE.session_ids()
|
||||||
|
|
||||||
|
return V1ResponseBase({
|
||||||
|
"status": STATUS_OK,
|
||||||
|
"message": "",
|
||||||
|
"sessions": session_ids
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
def _cmd_sessions_destroy(req: V1RequestBase) -> V1ResponseBase:
|
||||||
|
session_id = req.session
|
||||||
|
existed = SESSIONS_STORAGE.destroy(session_id)
|
||||||
|
|
||||||
|
if not existed:
|
||||||
|
raise Exception("The session doesn't exist.")
|
||||||
|
|
||||||
|
return V1ResponseBase({
|
||||||
|
"status": STATUS_OK,
|
||||||
|
"message": "The session has been removed."
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
def _resolve_challenge(req: V1RequestBase, method: str) -> ChallengeResolutionT:
|
def _resolve_challenge(req: V1RequestBase, method: str) -> ChallengeResolutionT:
|
||||||
timeout = req.maxTimeout / 1000
|
timeout = int(req.maxTimeout) / 1000
|
||||||
driver = None
|
driver = None
|
||||||
try:
|
try:
|
||||||
driver = utils.get_webdriver()
|
if req.session:
|
||||||
|
session_id = req.session
|
||||||
|
ttl = timedelta(minutes=req.session_ttl_minutes) if req.session_ttl_minutes else None
|
||||||
|
session, fresh = SESSIONS_STORAGE.get(session_id, ttl)
|
||||||
|
|
||||||
|
if fresh:
|
||||||
|
logging.debug(f"new session created to perform the request (session_id={session_id})")
|
||||||
|
else:
|
||||||
|
logging.debug(f"existing session is used to perform the request (session_id={session_id}, "
|
||||||
|
f"lifetime={str(session.lifetime())}, ttl={str(ttl)})")
|
||||||
|
|
||||||
|
driver = session.driver
|
||||||
|
else:
|
||||||
|
driver = utils.get_webdriver(req.proxy)
|
||||||
|
logging.debug('New instance of webdriver has been created to perform the request')
|
||||||
return func_timeout(timeout, _evil_logic, (req, driver, method))
|
return func_timeout(timeout, _evil_logic, (req, driver, method))
|
||||||
except FunctionTimedOut:
|
except FunctionTimedOut:
|
||||||
raise Exception(f'Error solving the challenge. Timeout after {timeout} seconds.')
|
raise Exception(f'Error solving the challenge. Timeout after {timeout} seconds.')
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise Exception('Error solving the challenge. ' + str(e))
|
raise Exception('Error solving the challenge. ' + str(e).replace('\n', '\\n'))
|
||||||
finally:
|
finally:
|
||||||
if driver is not None:
|
if not req.session and driver is not None:
|
||||||
|
if utils.PLATFORM_VERSION == "nt":
|
||||||
|
driver.close()
|
||||||
driver.quit()
|
driver.quit()
|
||||||
|
logging.debug('A used instance of webdriver has been destroyed')
|
||||||
|
|
||||||
|
|
||||||
|
def click_verify(driver: WebDriver, num_tabs: int = 1):
|
||||||
|
try:
|
||||||
|
logging.debug("Try to find the Cloudflare verify checkbox...")
|
||||||
|
actions = ActionChains(driver)
|
||||||
|
actions.pause(5)
|
||||||
|
for _ in range(num_tabs):
|
||||||
|
actions.send_keys(Keys.TAB).pause(0.1)
|
||||||
|
actions.pause(1)
|
||||||
|
actions.send_keys(Keys.SPACE).perform()
|
||||||
|
|
||||||
|
logging.debug(f"Cloudflare verify checkbox clicked after {num_tabs} tabs!")
|
||||||
|
except Exception:
|
||||||
|
logging.debug("Cloudflare verify checkbox not found on the page.")
|
||||||
|
finally:
|
||||||
|
driver.switch_to.default_content()
|
||||||
|
|
||||||
|
try:
|
||||||
|
logging.debug("Try to find the Cloudflare 'Verify you are human' button...")
|
||||||
|
button = driver.find_element(
|
||||||
|
by=By.XPATH,
|
||||||
|
value="//input[@type='button' and @value='Verify you are human']",
|
||||||
|
)
|
||||||
|
if button:
|
||||||
|
actions = ActionChains(driver)
|
||||||
|
actions.move_to_element_with_offset(button, 5, 7)
|
||||||
|
actions.click(button)
|
||||||
|
actions.perform()
|
||||||
|
logging.debug("The Cloudflare 'Verify you are human' button found and clicked!")
|
||||||
|
except Exception:
|
||||||
|
logging.debug("The Cloudflare 'Verify you are human' button not found on the page.")
|
||||||
|
|
||||||
|
time.sleep(2)
|
||||||
|
|
||||||
|
def _get_turnstile_token(driver: WebDriver, tabs: int):
|
||||||
|
token_input = driver.find_element(By.CSS_SELECTOR, "input[name='cf-turnstile-response']")
|
||||||
|
current_value = token_input.get_attribute("value")
|
||||||
|
while True:
|
||||||
|
click_verify(driver, num_tabs=tabs)
|
||||||
|
turnstile_token = token_input.get_attribute("value")
|
||||||
|
if turnstile_token:
|
||||||
|
if turnstile_token != current_value:
|
||||||
|
logging.info(f"Turnstile token: {turnstile_token}")
|
||||||
|
return turnstile_token
|
||||||
|
logging.debug(f"Failed to extract token possibly click failed")
|
||||||
|
|
||||||
|
# reset focus
|
||||||
|
driver.execute_script("""
|
||||||
|
let el = document.createElement('button');
|
||||||
|
el.style.position='fixed';
|
||||||
|
el.style.top='0';
|
||||||
|
el.style.left='0';
|
||||||
|
document.body.prepend(el);
|
||||||
|
el.focus();
|
||||||
|
""")
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
def _resolve_turnstile_captcha(req: V1RequestBase, driver: WebDriver):
|
||||||
|
turnstile_token = None
|
||||||
|
if req.tabs_till_verify is not None:
|
||||||
|
logging.debug(f'Navigating to... {req.url} in order to pass the turnstile challenge')
|
||||||
|
driver.get(req.url)
|
||||||
|
|
||||||
|
turnstile_challenge_found = False
|
||||||
|
for selector in TURNSTILE_SELECTORS:
|
||||||
|
found_elements = driver.find_elements(By.CSS_SELECTOR, selector)
|
||||||
|
if len(found_elements) > 0:
|
||||||
|
turnstile_challenge_found = True
|
||||||
|
logging.info("Turnstile challenge detected. Selector found: " + selector)
|
||||||
|
break
|
||||||
|
if turnstile_challenge_found:
|
||||||
|
turnstile_token = _get_turnstile_token(driver=driver, tabs=req.tabs_till_verify)
|
||||||
|
else:
|
||||||
|
logging.debug(f'Turnstile challenge not found')
|
||||||
|
return turnstile_token
|
||||||
|
|
||||||
def _evil_logic(req: V1RequestBase, driver: WebDriver, method: str) -> ChallengeResolutionT:
|
def _evil_logic(req: V1RequestBase, driver: WebDriver, method: str) -> ChallengeResolutionT:
|
||||||
res = ChallengeResolutionT({})
|
res = ChallengeResolutionT({})
|
||||||
res.status = STATUS_OK
|
res.status = STATUS_OK
|
||||||
res.message = ""
|
res.message = ""
|
||||||
|
|
||||||
|
# optionally block resources like images/css/fonts using CDP
|
||||||
|
disable_media = utils.get_config_disable_media()
|
||||||
|
if req.disableMedia is not None:
|
||||||
|
disable_media = req.disableMedia
|
||||||
|
if disable_media:
|
||||||
|
block_urls = [
|
||||||
|
# Images
|
||||||
|
"*.png", "*.jpg", "*.jpeg", "*.gif", "*.webp", "*.bmp", "*.svg", "*.ico",
|
||||||
|
"*.PNG", "*.JPG", "*.JPEG", "*.GIF", "*.WEBP", "*.BMP", "*.SVG", "*.ICO",
|
||||||
|
"*.tiff", "*.tif", "*.jpe", "*.apng", "*.avif", "*.heic", "*.heif",
|
||||||
|
"*.TIFF", "*.TIF", "*.JPE", "*.APNG", "*.AVIF", "*.HEIC", "*.HEIF",
|
||||||
|
# Stylesheets
|
||||||
|
"*.css",
|
||||||
|
"*.CSS",
|
||||||
|
# Fonts
|
||||||
|
"*.woff", "*.woff2", "*.ttf", "*.otf", "*.eot",
|
||||||
|
"*.WOFF", "*.WOFF2", "*.TTF", "*.OTF", "*.EOT"
|
||||||
|
]
|
||||||
|
try:
|
||||||
|
logging.debug("Network.setBlockedURLs: %s", block_urls)
|
||||||
|
driver.execute_cdp_cmd("Network.enable", {})
|
||||||
|
driver.execute_cdp_cmd("Network.setBlockedURLs", {"urls": block_urls})
|
||||||
|
except Exception:
|
||||||
|
# if CDP commands are not available or fail, ignore and continue
|
||||||
|
logging.debug("Network.setBlockedURLs failed or unsupported on this webdriver")
|
||||||
|
|
||||||
# navigate to the page
|
# navigate to the page
|
||||||
logging.debug(f'Navigating to... {req.url}')
|
logging.debug(f"Navigating to... {req.url}")
|
||||||
if method == 'POST':
|
turnstile_token = None
|
||||||
|
|
||||||
|
if method == "POST":
|
||||||
_post_request(req, driver)
|
_post_request(req, driver)
|
||||||
else:
|
else:
|
||||||
driver.get(req.url)
|
if req.tabs_till_verify is None:
|
||||||
if utils.get_config_log_html():
|
driver.get(req.url)
|
||||||
logging.debug(f"Response HTML:\n{driver.page_source}")
|
else:
|
||||||
|
turnstile_token = _resolve_turnstile_captcha(req, driver)
|
||||||
|
|
||||||
|
# set cookies if required
|
||||||
|
if req.cookies is not None and len(req.cookies) > 0:
|
||||||
|
logging.debug(f'Setting cookies...')
|
||||||
|
for cookie in req.cookies:
|
||||||
|
driver.delete_cookie(cookie['name'])
|
||||||
|
driver.add_cookie(cookie)
|
||||||
|
# reload the page
|
||||||
|
if method == 'POST':
|
||||||
|
_post_request(req, driver)
|
||||||
|
else:
|
||||||
|
driver.get(req.url)
|
||||||
|
|
||||||
# wait for the page
|
# wait for the page
|
||||||
|
if utils.get_config_log_html():
|
||||||
|
logging.debug(f"Response HTML:\n{driver.page_source}")
|
||||||
html_element = driver.find_element(By.TAG_NAME, "html")
|
html_element = driver.find_element(By.TAG_NAME, "html")
|
||||||
|
page_title = driver.title
|
||||||
|
|
||||||
|
# find access denied titles
|
||||||
|
for title in ACCESS_DENIED_TITLES:
|
||||||
|
if page_title.startswith(title):
|
||||||
|
raise Exception('Cloudflare has blocked this request. '
|
||||||
|
'Probably your IP is banned for this site, check in your web browser.')
|
||||||
# find access denied selectors
|
# find access denied selectors
|
||||||
for selector in ACCESS_DENIED_SELECTORS:
|
for selector in ACCESS_DENIED_SELECTORS:
|
||||||
found_elements = driver.find_elements(By.CSS_SELECTOR, selector)
|
found_elements = driver.find_elements(By.CSS_SELECTOR, selector)
|
||||||
@@ -182,11 +408,10 @@ def _evil_logic(req: V1RequestBase, driver: WebDriver, method: str) -> Challenge
|
|||||||
|
|
||||||
# find challenge by title
|
# find challenge by title
|
||||||
challenge_found = False
|
challenge_found = False
|
||||||
page_title = driver.title
|
for title in CHALLENGE_TITLES:
|
||||||
for title in CHALLENGE_TITLE:
|
if title.lower() == page_title.lower():
|
||||||
if title == page_title:
|
|
||||||
challenge_found = True
|
challenge_found = True
|
||||||
logging.info("Challenge detected. Title found: " + title)
|
logging.info("Challenge detected. Title found: " + page_title)
|
||||||
break
|
break
|
||||||
if not challenge_found:
|
if not challenge_found:
|
||||||
# find challenge by selectors
|
# find challenge by selectors
|
||||||
@@ -197,17 +422,19 @@ def _evil_logic(req: V1RequestBase, driver: WebDriver, method: str) -> Challenge
|
|||||||
logging.info("Challenge detected. Selector found: " + selector)
|
logging.info("Challenge detected. Selector found: " + selector)
|
||||||
break
|
break
|
||||||
|
|
||||||
|
attempt = 0
|
||||||
if challenge_found:
|
if challenge_found:
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
# wait until the title change
|
attempt = attempt + 1
|
||||||
for title in CHALLENGE_TITLE:
|
# wait until the title changes
|
||||||
logging.debug("Waiting for title: " + title)
|
for title in CHALLENGE_TITLES:
|
||||||
|
logging.debug("Waiting for title (attempt " + str(attempt) + "): " + title)
|
||||||
WebDriverWait(driver, SHORT_TIMEOUT).until_not(title_is(title))
|
WebDriverWait(driver, SHORT_TIMEOUT).until_not(title_is(title))
|
||||||
|
|
||||||
# then wait until all the selectors disappear
|
# then wait until all the selectors disappear
|
||||||
for selector in CHALLENGE_SELECTORS:
|
for selector in CHALLENGE_SELECTORS:
|
||||||
logging.debug("Waiting for selector: " + selector)
|
logging.debug("Waiting for selector (attempt " + str(attempt) + "): " + selector)
|
||||||
WebDriverWait(driver, SHORT_TIMEOUT).until_not(
|
WebDriverWait(driver, SHORT_TIMEOUT).until_not(
|
||||||
presence_of_element_located((By.CSS_SELECTOR, selector)))
|
presence_of_element_located((By.CSS_SELECTOR, selector)))
|
||||||
|
|
||||||
@@ -216,6 +443,9 @@ def _evil_logic(req: V1RequestBase, driver: WebDriver, method: str) -> Challenge
|
|||||||
|
|
||||||
except TimeoutException:
|
except TimeoutException:
|
||||||
logging.debug("Timeout waiting for selector")
|
logging.debug("Timeout waiting for selector")
|
||||||
|
|
||||||
|
click_verify(driver)
|
||||||
|
|
||||||
# update the html (cloudflare reloads the page every 5 s)
|
# update the html (cloudflare reloads the page every 5 s)
|
||||||
html_element = driver.find_element(By.TAG_NAME, "html")
|
html_element = driver.find_element(By.TAG_NAME, "html")
|
||||||
|
|
||||||
@@ -238,21 +468,30 @@ def _evil_logic(req: V1RequestBase, driver: WebDriver, method: str) -> Challenge
|
|||||||
challenge_res.status = 200 # todo: fix, selenium not provides this info
|
challenge_res.status = 200 # todo: fix, selenium not provides this info
|
||||||
challenge_res.cookies = driver.get_cookies()
|
challenge_res.cookies = driver.get_cookies()
|
||||||
challenge_res.userAgent = utils.get_user_agent(driver)
|
challenge_res.userAgent = utils.get_user_agent(driver)
|
||||||
|
challenge_res.turnstile_token = turnstile_token
|
||||||
|
|
||||||
if not req.returnOnlyCookies:
|
if not req.returnOnlyCookies:
|
||||||
challenge_res.headers = {} # todo: fix, selenium not provides this info
|
challenge_res.headers = {} # todo: fix, selenium not provides this info
|
||||||
|
|
||||||
|
if req.waitInSeconds and req.waitInSeconds > 0:
|
||||||
|
logging.info("Waiting " + str(req.waitInSeconds) + " seconds before returning the response...")
|
||||||
|
time.sleep(req.waitInSeconds)
|
||||||
|
|
||||||
challenge_res.response = driver.page_source
|
challenge_res.response = driver.page_source
|
||||||
|
|
||||||
|
if req.returnScreenshot:
|
||||||
|
challenge_res.screenshot = driver.get_screenshot_as_base64()
|
||||||
|
|
||||||
res.result = challenge_res
|
res.result = challenge_res
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
|
||||||
def _post_request(req: V1RequestBase, driver: WebDriver):
|
def _post_request(req: V1RequestBase, driver: WebDriver):
|
||||||
post_form = f'<form id="hackForm" action="{req.url}" method="POST">'
|
post_form = f'<form id="hackForm" action="{req.url}" method="POST">'
|
||||||
query_string = req.postData if req.postData[0] != '?' else req.postData[1:]
|
query_string = req.postData if req.postData and req.postData[0] != '?' else req.postData[1:] if req.postData else ''
|
||||||
pairs = query_string.split('&')
|
pairs = query_string.split('&')
|
||||||
for pair in pairs:
|
for pair in pairs:
|
||||||
parts = pair.split('=')
|
parts = pair.split('=', 1)
|
||||||
# noinspection PyBroadException
|
# noinspection PyBroadException
|
||||||
try:
|
try:
|
||||||
name = unquote(parts[0])
|
name = unquote(parts[0])
|
||||||
@@ -262,10 +501,12 @@ def _post_request(req: V1RequestBase, driver: WebDriver):
|
|||||||
continue
|
continue
|
||||||
# noinspection PyBroadException
|
# noinspection PyBroadException
|
||||||
try:
|
try:
|
||||||
value = unquote(parts[1])
|
value = unquote(parts[1]) if len(parts) > 1 else ''
|
||||||
except Exception:
|
except Exception:
|
||||||
value = parts[1]
|
value = parts[1] if len(parts) > 1 else ''
|
||||||
post_form += f'<input type="text" name="{name}" value="{value}"><br>'
|
# Protection of " character, for syntax
|
||||||
|
value=value.replace('"','"')
|
||||||
|
post_form += f'<input type="text" name="{escape(quote(name))}" value="{escape(quote(value))}"><br>'
|
||||||
post_form += '</form>'
|
post_form += '</form>'
|
||||||
html_content = f"""
|
html_content = f"""
|
||||||
<!DOCTYPE html>
|
<!DOCTYPE html>
|
||||||
@@ -275,4 +516,4 @@ def _post_request(req: V1RequestBase, driver: WebDriver):
|
|||||||
<script>document.getElementById('hackForm').submit();</script>
|
<script>document.getElementById('hackForm').submit();</script>
|
||||||
</body>
|
</body>
|
||||||
</html>"""
|
</html>"""
|
||||||
driver.get("data:text/html;charset=utf-8," + html_content)
|
driver.get("data:text/html;charset=utf-8,{html_content}".format(html_content=html_content))
|
||||||
|
|||||||
32
src/metrics.py
Normal file
32
src/metrics.py
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
import logging
|
||||||
|
|
||||||
|
from prometheus_client import Counter, Histogram, start_http_server
|
||||||
|
import time
|
||||||
|
|
||||||
|
REQUEST_COUNTER = Counter(
|
||||||
|
name='flaresolverr_request',
|
||||||
|
documentation='Total requests with result',
|
||||||
|
labelnames=['domain', 'result']
|
||||||
|
)
|
||||||
|
REQUEST_DURATION = Histogram(
|
||||||
|
name='flaresolverr_request_duration',
|
||||||
|
documentation='Request duration in seconds',
|
||||||
|
labelnames=['domain'],
|
||||||
|
buckets=[0, 10, 25, 50]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def serve(port):
|
||||||
|
start_http_server(port=port)
|
||||||
|
while True:
|
||||||
|
time.sleep(600)
|
||||||
|
|
||||||
|
|
||||||
|
def start_metrics_http_server(prometheus_port: int):
|
||||||
|
logging.info(f"Serving Prometheus exporter on http://0.0.0.0:{prometheus_port}/metrics")
|
||||||
|
from threading import Thread
|
||||||
|
Thread(
|
||||||
|
target=serve,
|
||||||
|
kwargs=dict(port=prometheus_port),
|
||||||
|
daemon=True,
|
||||||
|
).start()
|
||||||
84
src/sessions.py
Normal file
84
src/sessions.py
Normal file
@@ -0,0 +1,84 @@
|
|||||||
|
import logging
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
from typing import Optional, Tuple
|
||||||
|
from uuid import uuid1
|
||||||
|
|
||||||
|
from selenium.webdriver.chrome.webdriver import WebDriver
|
||||||
|
|
||||||
|
import utils
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Session:
|
||||||
|
session_id: str
|
||||||
|
driver: WebDriver
|
||||||
|
created_at: datetime
|
||||||
|
|
||||||
|
def lifetime(self) -> timedelta:
|
||||||
|
return datetime.now() - self.created_at
|
||||||
|
|
||||||
|
|
||||||
|
class SessionsStorage:
|
||||||
|
"""SessionsStorage creates, stores and process all the sessions"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.sessions = {}
|
||||||
|
|
||||||
|
def create(self, session_id: Optional[str] = None, proxy: Optional[dict] = None,
|
||||||
|
force_new: Optional[bool] = False) -> Tuple[Session, bool]:
|
||||||
|
"""create creates new instance of WebDriver if necessary,
|
||||||
|
assign defined (or newly generated) session_id to the instance
|
||||||
|
and returns the session object. If a new session has been created
|
||||||
|
second argument is set to True.
|
||||||
|
|
||||||
|
Note: The function is idempotent, so in case if session_id
|
||||||
|
already exists in the storage a new instance of WebDriver won't be created
|
||||||
|
and existing session will be returned. Second argument defines if
|
||||||
|
new session has been created (True) or an existing one was used (False).
|
||||||
|
"""
|
||||||
|
session_id = session_id or str(uuid1())
|
||||||
|
|
||||||
|
if force_new:
|
||||||
|
self.destroy(session_id)
|
||||||
|
|
||||||
|
if self.exists(session_id):
|
||||||
|
return self.sessions[session_id], False
|
||||||
|
|
||||||
|
driver = utils.get_webdriver(proxy)
|
||||||
|
created_at = datetime.now()
|
||||||
|
session = Session(session_id, driver, created_at)
|
||||||
|
|
||||||
|
self.sessions[session_id] = session
|
||||||
|
|
||||||
|
return session, True
|
||||||
|
|
||||||
|
def exists(self, session_id: str) -> bool:
|
||||||
|
return session_id in self.sessions
|
||||||
|
|
||||||
|
def destroy(self, session_id: str) -> bool:
|
||||||
|
"""destroy closes the driver instance and removes session from the storage.
|
||||||
|
The function is noop if session_id doesn't exist.
|
||||||
|
The function returns True if session was found and destroyed,
|
||||||
|
and False if session_id wasn't found.
|
||||||
|
"""
|
||||||
|
if not self.exists(session_id):
|
||||||
|
return False
|
||||||
|
|
||||||
|
session = self.sessions.pop(session_id)
|
||||||
|
if utils.PLATFORM_VERSION == "nt":
|
||||||
|
session.driver.close()
|
||||||
|
session.driver.quit()
|
||||||
|
return True
|
||||||
|
|
||||||
|
def get(self, session_id: str, ttl: Optional[timedelta] = None) -> Tuple[Session, bool]:
|
||||||
|
session, fresh = self.create(session_id)
|
||||||
|
|
||||||
|
if ttl is not None and not fresh and session.lifetime() > ttl:
|
||||||
|
logging.debug(f'session\'s lifetime has expired, so the session is recreated (session_id={session_id})')
|
||||||
|
session, fresh = self.create(session_id, force_new=True)
|
||||||
|
|
||||||
|
return session, fresh
|
||||||
|
|
||||||
|
def session_ids(self) -> list[str]:
|
||||||
|
return list(self.sessions.keys())
|
||||||
336
src/tests.py
336
src/tests.py
@@ -1,4 +1,5 @@
|
|||||||
import unittest
|
import unittest
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
from webtest import TestApp
|
from webtest import TestApp
|
||||||
|
|
||||||
@@ -7,7 +8,7 @@ import flaresolverr
|
|||||||
import utils
|
import utils
|
||||||
|
|
||||||
|
|
||||||
def _find_obj_by_key(key: str, value: str, _list: list) -> dict | None:
|
def _find_obj_by_key(key: str, value: str, _list: list) -> Optional[dict]:
|
||||||
for obj in _list:
|
for obj in _list:
|
||||||
if obj[key] == value:
|
if obj[key] == value:
|
||||||
return obj
|
return obj
|
||||||
@@ -20,13 +21,16 @@ class TestFlareSolverr(unittest.TestCase):
|
|||||||
proxy_socks_url = "socks5://127.0.0.1:1080"
|
proxy_socks_url = "socks5://127.0.0.1:1080"
|
||||||
google_url = "https://www.google.com"
|
google_url = "https://www.google.com"
|
||||||
post_url = "https://httpbin.org/post"
|
post_url = "https://httpbin.org/post"
|
||||||
cloudflare_url = "https://nowsecure.nl"
|
cloudflare_url = "https://nowsecure.nl/"
|
||||||
cloudflare_url_2 = "https://idope.se/torrent-list/harry/"
|
cloudflare_url_2 = "https://idope.se/torrent-list/harry/"
|
||||||
ddos_guard_url = "https://anidex.info/"
|
ddos_guard_url = "https://www.litres.ru/"
|
||||||
custom_cloudflare_url = "https://www.muziekfabriek.org"
|
fairlane_url = "https://www.pararius.com/apartments/amsterdam"
|
||||||
|
custom_cloudflare_url = "https://www.muziekfabriek.org/"
|
||||||
cloudflare_blocked_url = "https://cpasbiens3.fr/index.php?do=search&subaction=search"
|
cloudflare_blocked_url = "https://cpasbiens3.fr/index.php?do=search&subaction=search"
|
||||||
|
|
||||||
app = TestApp(flaresolverr.app)
|
app = TestApp(flaresolverr.app)
|
||||||
|
# wait until the server is ready
|
||||||
|
app.get('/')
|
||||||
|
|
||||||
def test_wrong_endpoint(self):
|
def test_wrong_endpoint(self):
|
||||||
res = self.app.get('/wrong', status=404)
|
res = self.app.get('/wrong', status=404)
|
||||||
@@ -64,7 +68,7 @@ class TestFlareSolverr(unittest.TestCase):
|
|||||||
self.assertEqual("Error: Request parameter 'cmd' = 'request.bad' is invalid.", body.message)
|
self.assertEqual("Error: Request parameter 'cmd' = 'request.bad' is invalid.", body.message)
|
||||||
self.assertGreater(body.startTimestamp, 10000)
|
self.assertGreater(body.startTimestamp, 10000)
|
||||||
self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
|
self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
|
||||||
self.assertEqual(utils.get_flaresolverr_version(), body.version)
|
self.assertEqual(utils.get_flaresolverr_version(), body.version)
|
||||||
|
|
||||||
def test_v1_endpoint_request_get_no_cloudflare(self):
|
def test_v1_endpoint_request_get_no_cloudflare(self):
|
||||||
res = self.app.post_json('/v1', {
|
res = self.app.post_json('/v1', {
|
||||||
@@ -78,7 +82,30 @@ class TestFlareSolverr(unittest.TestCase):
|
|||||||
self.assertEqual("Challenge not detected!", body.message)
|
self.assertEqual("Challenge not detected!", body.message)
|
||||||
self.assertGreater(body.startTimestamp, 10000)
|
self.assertGreater(body.startTimestamp, 10000)
|
||||||
self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
|
self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
|
||||||
self.assertEqual(utils.get_flaresolverr_version(), body.version)
|
self.assertEqual(utils.get_flaresolverr_version(), body.version)
|
||||||
|
|
||||||
|
solution = body.solution
|
||||||
|
self.assertIn(self.google_url, solution.url)
|
||||||
|
self.assertEqual(solution.status, 200)
|
||||||
|
self.assertIs(len(solution.headers), 0)
|
||||||
|
self.assertIn("<title>Google</title>", solution.response)
|
||||||
|
self.assertGreater(len(solution.cookies), 0)
|
||||||
|
self.assertIn("Chrome/", solution.userAgent)
|
||||||
|
|
||||||
|
def test_v1_endpoint_request_get_disable_resources(self):
|
||||||
|
res = self.app.post_json("/v1", {
|
||||||
|
"cmd": "request.get",
|
||||||
|
"url": self.google_url,
|
||||||
|
"disableMedia": True
|
||||||
|
})
|
||||||
|
self.assertEqual(res.status_code, 200)
|
||||||
|
|
||||||
|
body = V1ResponseBase(res.json)
|
||||||
|
self.assertEqual(STATUS_OK, body.status)
|
||||||
|
self.assertEqual("Challenge not detected!", body.message)
|
||||||
|
self.assertGreater(body.startTimestamp, 10000)
|
||||||
|
self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
|
||||||
|
self.assertEqual(utils.get_flaresolverr_version(), body.version)
|
||||||
|
|
||||||
solution = body.solution
|
solution = body.solution
|
||||||
self.assertIn(self.google_url, solution.url)
|
self.assertIn(self.google_url, solution.url)
|
||||||
@@ -100,7 +127,7 @@ class TestFlareSolverr(unittest.TestCase):
|
|||||||
self.assertEqual("Challenge solved!", body.message)
|
self.assertEqual("Challenge solved!", body.message)
|
||||||
self.assertGreater(body.startTimestamp, 10000)
|
self.assertGreater(body.startTimestamp, 10000)
|
||||||
self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
|
self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
|
||||||
self.assertEqual(utils.get_flaresolverr_version(), body.version)
|
self.assertEqual(utils.get_flaresolverr_version(), body.version)
|
||||||
|
|
||||||
solution = body.solution
|
solution = body.solution
|
||||||
self.assertIn(self.cloudflare_url, solution.url)
|
self.assertIn(self.cloudflare_url, solution.url)
|
||||||
@@ -126,7 +153,7 @@ class TestFlareSolverr(unittest.TestCase):
|
|||||||
self.assertEqual("Challenge solved!", body.message)
|
self.assertEqual("Challenge solved!", body.message)
|
||||||
self.assertGreater(body.startTimestamp, 10000)
|
self.assertGreater(body.startTimestamp, 10000)
|
||||||
self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
|
self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
|
||||||
self.assertEqual(utils.get_flaresolverr_version(), body.version)
|
self.assertEqual(utils.get_flaresolverr_version(), body.version)
|
||||||
|
|
||||||
solution = body.solution
|
solution = body.solution
|
||||||
self.assertIn(self.cloudflare_url_2, solution.url)
|
self.assertIn(self.cloudflare_url_2, solution.url)
|
||||||
@@ -152,13 +179,13 @@ class TestFlareSolverr(unittest.TestCase):
|
|||||||
self.assertEqual("Challenge solved!", body.message)
|
self.assertEqual("Challenge solved!", body.message)
|
||||||
self.assertGreater(body.startTimestamp, 10000)
|
self.assertGreater(body.startTimestamp, 10000)
|
||||||
self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
|
self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
|
||||||
self.assertEqual(utils.get_flaresolverr_version(), body.version)
|
self.assertEqual(utils.get_flaresolverr_version(), body.version)
|
||||||
|
|
||||||
solution = body.solution
|
solution = body.solution
|
||||||
self.assertIn(self.ddos_guard_url, solution.url)
|
self.assertIn(self.ddos_guard_url, solution.url)
|
||||||
self.assertEqual(solution.status, 200)
|
self.assertEqual(solution.status, 200)
|
||||||
self.assertIs(len(solution.headers), 0)
|
self.assertIs(len(solution.headers), 0)
|
||||||
self.assertIn("<title>AniDex</title>", solution.response)
|
self.assertIn("<title>Литрес", solution.response)
|
||||||
self.assertGreater(len(solution.cookies), 0)
|
self.assertGreater(len(solution.cookies), 0)
|
||||||
self.assertIn("Chrome/", solution.userAgent)
|
self.assertIn("Chrome/", solution.userAgent)
|
||||||
|
|
||||||
@@ -166,6 +193,32 @@ class TestFlareSolverr(unittest.TestCase):
|
|||||||
self.assertIsNotNone(cf_cookie, "DDOS-Guard cookie not found")
|
self.assertIsNotNone(cf_cookie, "DDOS-Guard cookie not found")
|
||||||
self.assertGreater(len(cf_cookie["value"]), 10)
|
self.assertGreater(len(cf_cookie["value"]), 10)
|
||||||
|
|
||||||
|
def test_v1_endpoint_request_get_fairlane_js(self):
|
||||||
|
res = self.app.post_json('/v1', {
|
||||||
|
"cmd": "request.get",
|
||||||
|
"url": self.fairlane_url
|
||||||
|
})
|
||||||
|
self.assertEqual(res.status_code, 200)
|
||||||
|
|
||||||
|
body = V1ResponseBase(res.json)
|
||||||
|
self.assertEqual(STATUS_OK, body.status)
|
||||||
|
self.assertEqual("Challenge solved!", body.message)
|
||||||
|
self.assertGreater(body.startTimestamp, 10000)
|
||||||
|
self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
|
||||||
|
self.assertEqual(utils.get_flaresolverr_version(), body.version)
|
||||||
|
|
||||||
|
solution = body.solution
|
||||||
|
self.assertIn(self.fairlane_url, solution.url)
|
||||||
|
self.assertEqual(solution.status, 200)
|
||||||
|
self.assertIs(len(solution.headers), 0)
|
||||||
|
self.assertIn("<title>Rental Apartments Amsterdam</title>", solution.response)
|
||||||
|
self.assertGreater(len(solution.cookies), 0)
|
||||||
|
self.assertIn("Chrome/", solution.userAgent)
|
||||||
|
|
||||||
|
cf_cookie = _find_obj_by_key("name", "fl_pass_v2_b", solution.cookies)
|
||||||
|
self.assertIsNotNone(cf_cookie, "Fairlane cookie not found")
|
||||||
|
self.assertGreater(len(cf_cookie["value"]), 50)
|
||||||
|
|
||||||
def test_v1_endpoint_request_get_custom_cloudflare_js(self):
|
def test_v1_endpoint_request_get_custom_cloudflare_js(self):
|
||||||
res = self.app.post_json('/v1', {
|
res = self.app.post_json('/v1', {
|
||||||
"cmd": "request.get",
|
"cmd": "request.get",
|
||||||
@@ -178,7 +231,7 @@ class TestFlareSolverr(unittest.TestCase):
|
|||||||
self.assertEqual("Challenge solved!", body.message)
|
self.assertEqual("Challenge solved!", body.message)
|
||||||
self.assertGreater(body.startTimestamp, 10000)
|
self.assertGreater(body.startTimestamp, 10000)
|
||||||
self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
|
self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
|
||||||
self.assertEqual(utils.get_flaresolverr_version(), body.version)
|
self.assertEqual(utils.get_flaresolverr_version(), body.version)
|
||||||
|
|
||||||
solution = body.solution
|
solution = body.solution
|
||||||
self.assertIn(self.custom_cloudflare_url, solution.url)
|
self.assertIn(self.custom_cloudflare_url, solution.url)
|
||||||
@@ -209,7 +262,45 @@ class TestFlareSolverr(unittest.TestCase):
|
|||||||
self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
|
self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
|
||||||
self.assertEqual(utils.get_flaresolverr_version(), body.version)
|
self.assertEqual(utils.get_flaresolverr_version(), body.version)
|
||||||
|
|
||||||
# todo: test Cmd 'request.get' should return OK with 'cookies' param
|
def test_v1_endpoint_request_get_cookies_param(self):
|
||||||
|
res = self.app.post_json('/v1', {
|
||||||
|
"cmd": "request.get",
|
||||||
|
"url": self.google_url,
|
||||||
|
"cookies": [
|
||||||
|
{
|
||||||
|
"name": "testcookie1",
|
||||||
|
"value": "testvalue1"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "testcookie2",
|
||||||
|
"value": "testvalue2"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
})
|
||||||
|
self.assertEqual(res.status_code, 200)
|
||||||
|
|
||||||
|
body = V1ResponseBase(res.json)
|
||||||
|
self.assertEqual(STATUS_OK, body.status)
|
||||||
|
self.assertEqual("Challenge not detected!", body.message)
|
||||||
|
self.assertGreater(body.startTimestamp, 10000)
|
||||||
|
self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
|
||||||
|
self.assertEqual(utils.get_flaresolverr_version(), body.version)
|
||||||
|
|
||||||
|
solution = body.solution
|
||||||
|
self.assertIn(self.google_url, solution.url)
|
||||||
|
self.assertEqual(solution.status, 200)
|
||||||
|
self.assertIs(len(solution.headers), 0)
|
||||||
|
self.assertIn("<title>Google</title>", solution.response)
|
||||||
|
self.assertGreater(len(solution.cookies), 1)
|
||||||
|
self.assertIn("Chrome/", solution.userAgent)
|
||||||
|
|
||||||
|
user_cookie1 = _find_obj_by_key("name", "testcookie1", solution.cookies)
|
||||||
|
self.assertIsNotNone(user_cookie1, "User cookie 1 not found")
|
||||||
|
self.assertEqual("testvalue1", user_cookie1["value"])
|
||||||
|
|
||||||
|
user_cookie2 = _find_obj_by_key("name", "testcookie2", solution.cookies)
|
||||||
|
self.assertIsNotNone(user_cookie2, "User cookie 2 not found")
|
||||||
|
self.assertEqual("testvalue2", user_cookie2["value"])
|
||||||
|
|
||||||
def test_v1_endpoint_request_get_returnOnlyCookies_param(self):
|
def test_v1_endpoint_request_get_returnOnlyCookies_param(self):
|
||||||
res = self.app.post_json('/v1', {
|
res = self.app.post_json('/v1', {
|
||||||
@@ -234,10 +325,124 @@ class TestFlareSolverr(unittest.TestCase):
|
|||||||
self.assertGreater(len(solution.cookies), 0)
|
self.assertGreater(len(solution.cookies), 0)
|
||||||
self.assertIn("Chrome/", solution.userAgent)
|
self.assertIn("Chrome/", solution.userAgent)
|
||||||
|
|
||||||
# todo: test Cmd 'request.get' should return OK with HTTP 'proxy' param
|
def test_v1_endpoint_request_get_proxy_http_param(self):
|
||||||
# todo: test Cmd 'request.get' should return OK with HTTP 'proxy' param with credentials
|
"""
|
||||||
# todo: test Cmd 'request.get' should return OK with SOCKSv5 'proxy' param
|
To configure TinyProxy in local:
|
||||||
# todo: test Cmd 'request.get' should fail with wrong 'proxy' param
|
* sudo vim /etc/tinyproxy/tinyproxy.conf
|
||||||
|
* edit => LogFile "/tmp/tinyproxy.log"
|
||||||
|
* edit => Syslog Off
|
||||||
|
* sudo tinyproxy -d
|
||||||
|
* sudo tail -f /tmp/tinyproxy.log
|
||||||
|
"""
|
||||||
|
res = self.app.post_json('/v1', {
|
||||||
|
"cmd": "request.get",
|
||||||
|
"url": self.google_url,
|
||||||
|
"proxy": {
|
||||||
|
"url": self.proxy_url
|
||||||
|
}
|
||||||
|
})
|
||||||
|
self.assertEqual(res.status_code, 200)
|
||||||
|
|
||||||
|
body = V1ResponseBase(res.json)
|
||||||
|
self.assertEqual(STATUS_OK, body.status)
|
||||||
|
self.assertEqual("Challenge not detected!", body.message)
|
||||||
|
self.assertGreater(body.startTimestamp, 10000)
|
||||||
|
self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
|
||||||
|
self.assertEqual(utils.get_flaresolverr_version(), body.version)
|
||||||
|
|
||||||
|
solution = body.solution
|
||||||
|
self.assertIn(self.google_url, solution.url)
|
||||||
|
self.assertEqual(solution.status, 200)
|
||||||
|
self.assertIs(len(solution.headers), 0)
|
||||||
|
self.assertIn("<title>Google</title>", solution.response)
|
||||||
|
self.assertGreater(len(solution.cookies), 0)
|
||||||
|
self.assertIn("Chrome/", solution.userAgent)
|
||||||
|
|
||||||
|
def test_v1_endpoint_request_get_proxy_http_param_with_credentials(self):
|
||||||
|
"""
|
||||||
|
To configure TinyProxy in local:
|
||||||
|
* sudo vim /etc/tinyproxy/tinyproxy.conf
|
||||||
|
* edit => LogFile "/tmp/tinyproxy.log"
|
||||||
|
* edit => Syslog Off
|
||||||
|
* add => BasicAuth testuser testpass
|
||||||
|
* sudo tinyproxy -d
|
||||||
|
* sudo tail -f /tmp/tinyproxy.log
|
||||||
|
"""
|
||||||
|
res = self.app.post_json('/v1', {
|
||||||
|
"cmd": "request.get",
|
||||||
|
"url": self.google_url,
|
||||||
|
"proxy": {
|
||||||
|
"url": self.proxy_url,
|
||||||
|
"username": "testuser",
|
||||||
|
"password": "testpass"
|
||||||
|
}
|
||||||
|
})
|
||||||
|
self.assertEqual(res.status_code, 200)
|
||||||
|
|
||||||
|
body = V1ResponseBase(res.json)
|
||||||
|
self.assertEqual(STATUS_OK, body.status)
|
||||||
|
self.assertEqual("Challenge not detected!", body.message)
|
||||||
|
self.assertGreater(body.startTimestamp, 10000)
|
||||||
|
self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
|
||||||
|
self.assertEqual(utils.get_flaresolverr_version(), body.version)
|
||||||
|
|
||||||
|
solution = body.solution
|
||||||
|
self.assertIn(self.google_url, solution.url)
|
||||||
|
self.assertEqual(solution.status, 200)
|
||||||
|
self.assertIs(len(solution.headers), 0)
|
||||||
|
self.assertIn("<title>Google</title>", solution.response)
|
||||||
|
self.assertGreater(len(solution.cookies), 0)
|
||||||
|
self.assertIn("Chrome/", solution.userAgent)
|
||||||
|
|
||||||
|
def test_v1_endpoint_request_get_proxy_socks_param(self):
|
||||||
|
"""
|
||||||
|
To configure Dante in local:
|
||||||
|
* https://linuxhint.com/set-up-a-socks5-proxy-on-ubuntu-with-dante/
|
||||||
|
* sudo vim /etc/sockd.conf
|
||||||
|
* sudo systemctl restart sockd.service
|
||||||
|
* curl --socks5 socks5://127.0.0.1:1080 https://www.google.com
|
||||||
|
"""
|
||||||
|
res = self.app.post_json('/v1', {
|
||||||
|
"cmd": "request.get",
|
||||||
|
"url": self.google_url,
|
||||||
|
"proxy": {
|
||||||
|
"url": self.proxy_socks_url
|
||||||
|
}
|
||||||
|
})
|
||||||
|
self.assertEqual(res.status_code, 200)
|
||||||
|
|
||||||
|
body = V1ResponseBase(res.json)
|
||||||
|
self.assertEqual(STATUS_OK, body.status)
|
||||||
|
self.assertEqual("Challenge not detected!", body.message)
|
||||||
|
self.assertGreater(body.startTimestamp, 10000)
|
||||||
|
self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
|
||||||
|
self.assertEqual(utils.get_flaresolverr_version(), body.version)
|
||||||
|
|
||||||
|
solution = body.solution
|
||||||
|
self.assertIn(self.google_url, solution.url)
|
||||||
|
self.assertEqual(solution.status, 200)
|
||||||
|
self.assertIs(len(solution.headers), 0)
|
||||||
|
self.assertIn("<title>Google</title>", solution.response)
|
||||||
|
self.assertGreater(len(solution.cookies), 0)
|
||||||
|
self.assertIn("Chrome/", solution.userAgent)
|
||||||
|
|
||||||
|
def test_v1_endpoint_request_get_proxy_wrong_param(self):
|
||||||
|
res = self.app.post_json('/v1', {
|
||||||
|
"cmd": "request.get",
|
||||||
|
"url": self.google_url,
|
||||||
|
"proxy": {
|
||||||
|
"url": "http://127.0.0.1:43210"
|
||||||
|
}
|
||||||
|
}, status=500)
|
||||||
|
self.assertEqual(res.status_code, 500)
|
||||||
|
|
||||||
|
body = V1ResponseBase(res.json)
|
||||||
|
self.assertEqual(STATUS_ERROR, body.status)
|
||||||
|
self.assertIn("Error: Error solving the challenge. Message: unknown error: net::ERR_PROXY_CONNECTION_FAILED",
|
||||||
|
body.message)
|
||||||
|
self.assertGreater(body.startTimestamp, 10000)
|
||||||
|
self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
|
||||||
|
self.assertEqual(utils.get_flaresolverr_version(), body.version)
|
||||||
|
|
||||||
def test_v1_endpoint_request_get_fail_timeout(self):
|
def test_v1_endpoint_request_get_fail_timeout(self):
|
||||||
res = self.app.post_json('/v1', {
|
res = self.app.post_json('/v1', {
|
||||||
@@ -351,12 +556,99 @@ class TestFlareSolverr(unittest.TestCase):
|
|||||||
self.assertEqual(STATUS_OK, body.status)
|
self.assertEqual(STATUS_OK, body.status)
|
||||||
self.assertEqual("Challenge not detected!", body.message)
|
self.assertEqual("Challenge not detected!", body.message)
|
||||||
|
|
||||||
# todo: test Cmd 'sessions.create' should return OK
|
def test_v1_endpoint_sessions_create_without_session(self):
|
||||||
# todo: test Cmd 'sessions.create' should return OK with session
|
res = self.app.post_json('/v1', {
|
||||||
# todo: test Cmd 'sessions.list' should return OK
|
"cmd": "sessions.create"
|
||||||
# todo: test Cmd 'sessions.destroy' should return OK
|
})
|
||||||
# todo: test Cmd 'sessions.destroy' should fail
|
self.assertEqual(res.status_code, 200)
|
||||||
# todo: test Cmd 'request.get' should use session
|
|
||||||
|
body = V1ResponseBase(res.json)
|
||||||
|
self.assertEqual(STATUS_OK, body.status)
|
||||||
|
self.assertEqual("Session created successfully.", body.message)
|
||||||
|
self.assertIsNotNone(body.session)
|
||||||
|
|
||||||
|
def test_v1_endpoint_sessions_create_with_session(self):
|
||||||
|
res = self.app.post_json('/v1', {
|
||||||
|
"cmd": "sessions.create",
|
||||||
|
"session": "test_create_session"
|
||||||
|
})
|
||||||
|
self.assertEqual(res.status_code, 200)
|
||||||
|
|
||||||
|
body = V1ResponseBase(res.json)
|
||||||
|
self.assertEqual(STATUS_OK, body.status)
|
||||||
|
self.assertEqual("Session created successfully.", body.message)
|
||||||
|
self.assertEqual(body.session, "test_create_session")
|
||||||
|
|
||||||
|
def test_v1_endpoint_sessions_create_with_proxy(self):
|
||||||
|
res = self.app.post_json('/v1', {
|
||||||
|
"cmd": "sessions.create",
|
||||||
|
"proxy": {
|
||||||
|
"url": self.proxy_url
|
||||||
|
}
|
||||||
|
})
|
||||||
|
self.assertEqual(res.status_code, 200)
|
||||||
|
|
||||||
|
body = V1ResponseBase(res.json)
|
||||||
|
self.assertEqual(STATUS_OK, body.status)
|
||||||
|
self.assertEqual("Session created successfully.", body.message)
|
||||||
|
self.assertIsNotNone(body.session)
|
||||||
|
|
||||||
|
def test_v1_endpoint_sessions_list(self):
|
||||||
|
self.app.post_json('/v1', {
|
||||||
|
"cmd": "sessions.create",
|
||||||
|
"session": "test_list_sessions"
|
||||||
|
})
|
||||||
|
res = self.app.post_json('/v1', {
|
||||||
|
"cmd": "sessions.list"
|
||||||
|
})
|
||||||
|
self.assertEqual(res.status_code, 200)
|
||||||
|
|
||||||
|
body = V1ResponseBase(res.json)
|
||||||
|
self.assertEqual(STATUS_OK, body.status)
|
||||||
|
self.assertEqual("", body.message)
|
||||||
|
self.assertGreaterEqual(len(body.sessions), 1)
|
||||||
|
self.assertIn("test_list_sessions", body.sessions)
|
||||||
|
|
||||||
|
def test_v1_endpoint_sessions_destroy_existing_session(self):
|
||||||
|
self.app.post_json('/v1', {
|
||||||
|
"cmd": "sessions.create",
|
||||||
|
"session": "test_destroy_sessions"
|
||||||
|
})
|
||||||
|
res = self.app.post_json('/v1', {
|
||||||
|
"cmd": "sessions.destroy",
|
||||||
|
"session": "test_destroy_sessions"
|
||||||
|
})
|
||||||
|
self.assertEqual(res.status_code, 200)
|
||||||
|
|
||||||
|
body = V1ResponseBase(res.json)
|
||||||
|
self.assertEqual(STATUS_OK, body.status)
|
||||||
|
self.assertEqual("The session has been removed.", body.message)
|
||||||
|
|
||||||
|
def test_v1_endpoint_sessions_destroy_non_existing_session(self):
|
||||||
|
res = self.app.post_json('/v1', {
|
||||||
|
"cmd": "sessions.destroy",
|
||||||
|
"session": "non_existing_session_name"
|
||||||
|
}, status=500)
|
||||||
|
self.assertEqual(res.status_code, 500)
|
||||||
|
|
||||||
|
body = V1ResponseBase(res.json)
|
||||||
|
self.assertEqual(STATUS_ERROR, body.status)
|
||||||
|
self.assertEqual("Error: The session doesn't exist.", body.message)
|
||||||
|
|
||||||
|
def test_v1_endpoint_request_get_with_session(self):
|
||||||
|
self.app.post_json('/v1', {
|
||||||
|
"cmd": "sessions.create",
|
||||||
|
"session": "test_request_sessions"
|
||||||
|
})
|
||||||
|
res = self.app.post_json('/v1', {
|
||||||
|
"cmd": "request.get",
|
||||||
|
"session": "test_request_sessions",
|
||||||
|
"url": self.google_url
|
||||||
|
})
|
||||||
|
self.assertEqual(res.status_code, 200)
|
||||||
|
|
||||||
|
body = V1ResponseBase(res.json)
|
||||||
|
self.assertEqual(STATUS_OK, body.status)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|||||||
@@ -39,6 +39,8 @@ def asset_cloudflare_solution(self, res, site_url, site_text):
|
|||||||
|
|
||||||
class TestFlareSolverr(unittest.TestCase):
|
class TestFlareSolverr(unittest.TestCase):
|
||||||
app = TestApp(flaresolverr.app)
|
app = TestApp(flaresolverr.app)
|
||||||
|
# wait until the server is ready
|
||||||
|
app.get('/')
|
||||||
|
|
||||||
def test_v1_endpoint_request_get_cloudflare(self):
|
def test_v1_endpoint_request_get_cloudflare(self):
|
||||||
sites_get = [
|
sites_get = [
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -1,259 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
# this module is part of undetected_chromedriver
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
888 888 d8b
|
|
||||||
888 888 Y8P
|
|
||||||
888 888
|
|
||||||
.d8888b 88888b. 888d888 .d88b. 88888b.d88b. .d88b. .d88888 888d888 888 888 888 .d88b. 888d888
|
|
||||||
d88P" 888 "88b 888P" d88""88b 888 "888 "88b d8P Y8b d88" 888 888P" 888 888 888 d8P Y8b 888P"
|
|
||||||
888 888 888 888 888 888 888 888 888 88888888 888 888 888 888 Y88 88P 88888888 888
|
|
||||||
Y88b. 888 888 888 Y88..88P 888 888 888 Y8b. Y88b 888 888 888 Y8bd8P Y8b. 888
|
|
||||||
"Y8888P 888 888 888 "Y88P" 888 888 888 "Y8888 "Y88888 888 888 Y88P "Y8888 888 88888888
|
|
||||||
|
|
||||||
by UltrafunkAmsterdam (https://github.com/ultrafunkamsterdam)
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
import io
|
|
||||||
import logging
|
|
||||||
import os
|
|
||||||
import random
|
|
||||||
import re
|
|
||||||
import string
|
|
||||||
import sys
|
|
||||||
import zipfile
|
|
||||||
from distutils.version import LooseVersion
|
|
||||||
from urllib.request import urlopen, urlretrieve
|
|
||||||
|
|
||||||
from selenium.webdriver import Chrome as _Chrome, ChromeOptions as _ChromeOptions
|
|
||||||
|
|
||||||
TARGET_VERSION = 0
|
|
||||||
logger = logging.getLogger("uc")
|
|
||||||
|
|
||||||
|
|
||||||
class Chrome:
|
|
||||||
def __new__(cls, *args, emulate_touch=False, **kwargs):
|
|
||||||
|
|
||||||
if not ChromeDriverManager.installed:
|
|
||||||
ChromeDriverManager(*args, **kwargs).install()
|
|
||||||
if not ChromeDriverManager.selenium_patched:
|
|
||||||
ChromeDriverManager(*args, **kwargs).patch_selenium_webdriver()
|
|
||||||
if not kwargs.get("executable_path"):
|
|
||||||
kwargs["executable_path"] = "./{}".format(
|
|
||||||
ChromeDriverManager(*args, **kwargs).executable_path
|
|
||||||
)
|
|
||||||
if not kwargs.get("options"):
|
|
||||||
kwargs["options"] = ChromeOptions()
|
|
||||||
instance = object.__new__(_Chrome)
|
|
||||||
instance.__init__(*args, **kwargs)
|
|
||||||
|
|
||||||
instance._orig_get = instance.get
|
|
||||||
|
|
||||||
def _get_wrapped(*args, **kwargs):
|
|
||||||
if instance.execute_script("return navigator.webdriver"):
|
|
||||||
instance.execute_cdp_cmd(
|
|
||||||
"Page.addScriptToEvaluateOnNewDocument",
|
|
||||||
{
|
|
||||||
"source": """
|
|
||||||
|
|
||||||
Object.defineProperty(window, 'navigator', {
|
|
||||||
value: new Proxy(navigator, {
|
|
||||||
has: (target, key) => (key === 'webdriver' ? false : key in target),
|
|
||||||
get: (target, key) =>
|
|
||||||
key === 'webdriver'
|
|
||||||
? undefined
|
|
||||||
: typeof target[key] === 'function'
|
|
||||||
? target[key].bind(target)
|
|
||||||
: target[key]
|
|
||||||
})
|
|
||||||
});
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
|
||||||
},
|
|
||||||
)
|
|
||||||
return instance._orig_get(*args, **kwargs)
|
|
||||||
|
|
||||||
instance.get = _get_wrapped
|
|
||||||
instance.get = _get_wrapped
|
|
||||||
instance.get = _get_wrapped
|
|
||||||
|
|
||||||
original_user_agent_string = instance.execute_script(
|
|
||||||
"return navigator.userAgent"
|
|
||||||
)
|
|
||||||
instance.execute_cdp_cmd(
|
|
||||||
"Network.setUserAgentOverride",
|
|
||||||
{
|
|
||||||
"userAgent": original_user_agent_string.replace("Headless", ""),
|
|
||||||
},
|
|
||||||
)
|
|
||||||
if emulate_touch:
|
|
||||||
instance.execute_cdp_cmd(
|
|
||||||
"Page.addScriptToEvaluateOnNewDocument",
|
|
||||||
{
|
|
||||||
"source": """
|
|
||||||
Object.defineProperty(navigator, 'maxTouchPoints', {
|
|
||||||
get: () => 1
|
|
||||||
})"""
|
|
||||||
},
|
|
||||||
)
|
|
||||||
logger.info(f"starting undetected_chromedriver.Chrome({args}, {kwargs})")
|
|
||||||
return instance
|
|
||||||
|
|
||||||
|
|
||||||
class ChromeOptions:
|
|
||||||
def __new__(cls, *args, **kwargs):
|
|
||||||
if not ChromeDriverManager.installed:
|
|
||||||
ChromeDriverManager(*args, **kwargs).install()
|
|
||||||
if not ChromeDriverManager.selenium_patched:
|
|
||||||
ChromeDriverManager(*args, **kwargs).patch_selenium_webdriver()
|
|
||||||
|
|
||||||
instance = object.__new__(_ChromeOptions)
|
|
||||||
instance.__init__()
|
|
||||||
instance.add_argument("start-maximized")
|
|
||||||
instance.add_experimental_option("excludeSwitches", ["enable-automation"])
|
|
||||||
instance.add_argument("--disable-blink-features=AutomationControlled")
|
|
||||||
return instance
|
|
||||||
|
|
||||||
|
|
||||||
class ChromeDriverManager(object):
|
|
||||||
installed = False
|
|
||||||
selenium_patched = False
|
|
||||||
target_version = None
|
|
||||||
|
|
||||||
DL_BASE = "https://chromedriver.storage.googleapis.com/"
|
|
||||||
|
|
||||||
def __init__(self, executable_path=None, target_version=None, *args, **kwargs):
|
|
||||||
|
|
||||||
_platform = sys.platform
|
|
||||||
|
|
||||||
if TARGET_VERSION:
|
|
||||||
# use global if set
|
|
||||||
self.target_version = TARGET_VERSION
|
|
||||||
|
|
||||||
if target_version:
|
|
||||||
# use explicitly passed target
|
|
||||||
self.target_version = target_version # user override
|
|
||||||
|
|
||||||
if not self.target_version:
|
|
||||||
# none of the above (default) and just get current version
|
|
||||||
self.target_version = self.get_release_version_number().version[
|
|
||||||
0
|
|
||||||
] # only major version int
|
|
||||||
|
|
||||||
self._base = base_ = "chromedriver{}"
|
|
||||||
|
|
||||||
exe_name = self._base
|
|
||||||
if _platform in ("win32",):
|
|
||||||
exe_name = base_.format(".exe")
|
|
||||||
if _platform in ("linux",):
|
|
||||||
_platform += "64"
|
|
||||||
exe_name = exe_name.format("")
|
|
||||||
if _platform in ("darwin",):
|
|
||||||
_platform = "mac64"
|
|
||||||
exe_name = exe_name.format("")
|
|
||||||
self.platform = _platform
|
|
||||||
self.executable_path = executable_path or exe_name
|
|
||||||
self._exe_name = exe_name
|
|
||||||
|
|
||||||
def patch_selenium_webdriver(self_):
|
|
||||||
"""
|
|
||||||
Patches selenium package Chrome, ChromeOptions classes for current session
|
|
||||||
|
|
||||||
:return:
|
|
||||||
"""
|
|
||||||
import selenium.webdriver.chrome.service
|
|
||||||
import selenium.webdriver
|
|
||||||
|
|
||||||
selenium.webdriver.Chrome = Chrome
|
|
||||||
selenium.webdriver.ChromeOptions = ChromeOptions
|
|
||||||
logger.info("Selenium patched. Safe to import Chrome / ChromeOptions")
|
|
||||||
self_.__class__.selenium_patched = True
|
|
||||||
|
|
||||||
def install(self, patch_selenium=True):
|
|
||||||
"""
|
|
||||||
Initialize the patch
|
|
||||||
|
|
||||||
This will:
|
|
||||||
download chromedriver if not present
|
|
||||||
patch the downloaded chromedriver
|
|
||||||
patch selenium package if <patch_selenium> is True (default)
|
|
||||||
|
|
||||||
:param patch_selenium: patch selenium webdriver classes for Chrome and ChromeDriver (for current python session)
|
|
||||||
:return:
|
|
||||||
"""
|
|
||||||
if not os.path.exists(self.executable_path):
|
|
||||||
self.fetch_chromedriver()
|
|
||||||
if not self.__class__.installed:
|
|
||||||
if self.patch_binary():
|
|
||||||
self.__class__.installed = True
|
|
||||||
|
|
||||||
if patch_selenium:
|
|
||||||
self.patch_selenium_webdriver()
|
|
||||||
|
|
||||||
def get_release_version_number(self):
|
|
||||||
"""
|
|
||||||
Gets the latest major version available, or the latest major version of self.target_version if set explicitly.
|
|
||||||
|
|
||||||
:return: version string
|
|
||||||
"""
|
|
||||||
path = (
|
|
||||||
"LATEST_RELEASE"
|
|
||||||
if not self.target_version
|
|
||||||
else f"LATEST_RELEASE_{self.target_version}"
|
|
||||||
)
|
|
||||||
return LooseVersion(urlopen(self.__class__.DL_BASE + path).read().decode())
|
|
||||||
|
|
||||||
def fetch_chromedriver(self):
|
|
||||||
"""
|
|
||||||
Downloads ChromeDriver from source and unpacks the executable
|
|
||||||
|
|
||||||
:return: on success, name of the unpacked executable
|
|
||||||
"""
|
|
||||||
base_ = self._base
|
|
||||||
zip_name = base_.format(".zip")
|
|
||||||
ver = self.get_release_version_number().vstring
|
|
||||||
if os.path.exists(self.executable_path):
|
|
||||||
return self.executable_path
|
|
||||||
urlretrieve(
|
|
||||||
f"{self.__class__.DL_BASE}{ver}/{base_.format(f'_{self.platform}')}.zip",
|
|
||||||
filename=zip_name,
|
|
||||||
)
|
|
||||||
with zipfile.ZipFile(zip_name) as zf:
|
|
||||||
zf.extract(self._exe_name)
|
|
||||||
os.remove(zip_name)
|
|
||||||
if sys.platform != "win32":
|
|
||||||
os.chmod(self._exe_name, 0o755)
|
|
||||||
return self._exe_name
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def random_cdc():
|
|
||||||
cdc = random.choices(string.ascii_lowercase, k=26)
|
|
||||||
cdc[-6:-4] = map(str.upper, cdc[-6:-4])
|
|
||||||
cdc[2] = cdc[0]
|
|
||||||
cdc[3] = "_"
|
|
||||||
return "".join(cdc).encode()
|
|
||||||
|
|
||||||
def patch_binary(self):
|
|
||||||
"""
|
|
||||||
Patches the ChromeDriver binary
|
|
||||||
|
|
||||||
:return: False on failure, binary name on success
|
|
||||||
"""
|
|
||||||
linect = 0
|
|
||||||
replacement = self.random_cdc()
|
|
||||||
with io.open(self.executable_path, "r+b") as fh:
|
|
||||||
for line in iter(lambda: fh.readline(), b""):
|
|
||||||
if b"cdc_" in line:
|
|
||||||
fh.seek(-len(line), 1)
|
|
||||||
newline = re.sub(b"cdc_.{22}", replacement, line)
|
|
||||||
fh.write(newline)
|
|
||||||
linect += 1
|
|
||||||
return linect
|
|
||||||
|
|
||||||
|
|
||||||
def install(executable_path=None, target_version=None, *args, **kwargs):
|
|
||||||
ChromeDriverManager(executable_path, target_version, *args, **kwargs).install()
|
|
||||||
@@ -1,112 +1,112 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# this module is part of undetected_chromedriver
|
# this module is part of undetected_chromedriver
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
from collections.abc import Mapping, Sequence
|
|
||||||
|
import requests
|
||||||
import requests
|
import websockets
|
||||||
import websockets
|
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class CDPObject(dict):
|
class CDPObject(dict):
|
||||||
def __init__(self, *a, **k):
|
def __init__(self, *a, **k):
|
||||||
super().__init__(*a, **k)
|
super().__init__(*a, **k)
|
||||||
self.__dict__ = self
|
self.__dict__ = self
|
||||||
for k in self.__dict__:
|
for k in self.__dict__:
|
||||||
if isinstance(self.__dict__[k], dict):
|
if isinstance(self.__dict__[k], dict):
|
||||||
self.__dict__[k] = CDPObject(self.__dict__[k])
|
self.__dict__[k] = CDPObject(self.__dict__[k])
|
||||||
elif isinstance(self.__dict__[k], list):
|
elif isinstance(self.__dict__[k], list):
|
||||||
for i in range(len(self.__dict__[k])):
|
for i in range(len(self.__dict__[k])):
|
||||||
if isinstance(self.__dict__[k][i], dict):
|
if isinstance(self.__dict__[k][i], dict):
|
||||||
self.__dict__[k][i] = CDPObject(self)
|
self.__dict__[k][i] = CDPObject(self)
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
tpl = f"{self.__class__.__name__}(\n\t{{}}\n\t)"
|
tpl = f"{self.__class__.__name__}(\n\t{{}}\n\t)"
|
||||||
return tpl.format("\n ".join(f"{k} = {v}" for k, v in self.items()))
|
return tpl.format("\n ".join(f"{k} = {v}" for k, v in self.items()))
|
||||||
|
|
||||||
|
|
||||||
class PageElement(CDPObject):
|
class PageElement(CDPObject):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class CDP:
|
class CDP:
|
||||||
log = logging.getLogger("CDP")
|
log = logging.getLogger("CDP")
|
||||||
|
|
||||||
endpoints = CDPObject(
|
endpoints = CDPObject(
|
||||||
{
|
{
|
||||||
"json": "/json",
|
"json": "/json",
|
||||||
"protocol": "/json/protocol",
|
"protocol": "/json/protocol",
|
||||||
"list": "/json/list",
|
"list": "/json/list",
|
||||||
"new": "/json/new?{url}",
|
"new": "/json/new?{url}",
|
||||||
"activate": "/json/activate/{id}",
|
"activate": "/json/activate/{id}",
|
||||||
"close": "/json/close/{id}",
|
"close": "/json/close/{id}",
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
def __init__(self, options: "ChromeOptions"): # noqa
|
def __init__(self, options: "ChromeOptions"): # noqa
|
||||||
self.server_addr = "http://{0}:{1}".format(*options.debugger_address.split(":"))
|
self.server_addr = "http://{0}:{1}".format(*options.debugger_address.split(":"))
|
||||||
|
|
||||||
self._reqid = 0
|
self._reqid = 0
|
||||||
self._session = requests.Session()
|
self._session = requests.Session()
|
||||||
self._last_resp = None
|
self._last_resp = None
|
||||||
self._last_json = None
|
self._last_json = None
|
||||||
|
|
||||||
resp = self.get(self.endpoints.json) # noqa
|
resp = self.get(self.endpoints.json) # noqa
|
||||||
self.sessionId = resp[0]["id"]
|
self.sessionId = resp[0]["id"]
|
||||||
self.wsurl = resp[0]["webSocketDebuggerUrl"]
|
self.wsurl = resp[0]["webSocketDebuggerUrl"]
|
||||||
|
|
||||||
def tab_activate(self, id=None):
|
def tab_activate(self, id=None):
|
||||||
if not id:
|
if not id:
|
||||||
active_tab = self.tab_list()[0]
|
active_tab = self.tab_list()[0]
|
||||||
id = active_tab.id # noqa
|
id = active_tab.id # noqa
|
||||||
self.wsurl = active_tab.webSocketDebuggerUrl # noqa
|
self.wsurl = active_tab.webSocketDebuggerUrl # noqa
|
||||||
return self.post(self.endpoints["activate"].format(id=id))
|
return self.post(self.endpoints["activate"].format(id=id))
|
||||||
|
|
||||||
def tab_list(self):
|
def tab_list(self):
|
||||||
retval = self.get(self.endpoints["list"])
|
retval = self.get(self.endpoints["list"])
|
||||||
return [PageElement(o) for o in retval]
|
return [PageElement(o) for o in retval]
|
||||||
|
|
||||||
def tab_new(self, url):
|
def tab_new(self, url):
|
||||||
return self.post(self.endpoints["new"].format(url=url))
|
return self.post(self.endpoints["new"].format(url=url))
|
||||||
|
|
||||||
def tab_close_last_opened(self):
|
def tab_close_last_opened(self):
|
||||||
sessions = self.tab_list()
|
sessions = self.tab_list()
|
||||||
opentabs = [s for s in sessions if s["type"] == "page"]
|
opentabs = [s for s in sessions if s["type"] == "page"]
|
||||||
return self.post(self.endpoints["close"].format(id=opentabs[-1]["id"]))
|
return self.post(self.endpoints["close"].format(id=opentabs[-1]["id"]))
|
||||||
|
|
||||||
async def send(self, method: str, params: dict):
|
async def send(self, method: str, params: dict):
|
||||||
self._reqid += 1
|
self._reqid += 1
|
||||||
async with websockets.connect(self.wsurl) as ws:
|
async with websockets.connect(self.wsurl) as ws:
|
||||||
await ws.send(
|
await ws.send(
|
||||||
json.dumps({"method": method, "params": params, "id": self._reqid})
|
json.dumps({"method": method, "params": params, "id": self._reqid})
|
||||||
)
|
)
|
||||||
self._last_resp = await ws.recv()
|
self._last_resp = await ws.recv()
|
||||||
self._last_json = json.loads(self._last_resp)
|
self._last_json = json.loads(self._last_resp)
|
||||||
self.log.info(self._last_json)
|
self.log.info(self._last_json)
|
||||||
|
|
||||||
def get(self, uri):
|
def get(self, uri):
|
||||||
resp = self._session.get(self.server_addr + uri)
|
resp = self._session.get(self.server_addr + uri)
|
||||||
try:
|
try:
|
||||||
self._last_resp = resp
|
self._last_resp = resp
|
||||||
self._last_json = resp.json()
|
self._last_json = resp.json()
|
||||||
except Exception:
|
except Exception:
|
||||||
return
|
return
|
||||||
else:
|
else:
|
||||||
return self._last_json
|
return self._last_json
|
||||||
|
|
||||||
def post(self, uri, data: dict = None):
|
def post(self, uri, data: dict = None):
|
||||||
if not data:
|
if not data:
|
||||||
data = {}
|
data = {}
|
||||||
resp = self._session.post(self.server_addr + uri, json=data)
|
resp = self._session.post(self.server_addr + uri, json=data)
|
||||||
try:
|
try:
|
||||||
self._last_resp = resp
|
self._last_resp = resp
|
||||||
self._last_json = resp.json()
|
self._last_json = resp.json()
|
||||||
except Exception:
|
except Exception:
|
||||||
return self._last_resp
|
return self._last_resp
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def last_json(self):
|
def last_json(self):
|
||||||
return self._last_json
|
return self._last_json
|
||||||
|
|||||||
@@ -1,191 +1,193 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
import logging
|
from collections.abc import Mapping
|
||||||
import time
|
from collections.abc import Sequence
|
||||||
import traceback
|
from functools import wraps
|
||||||
from collections.abc import Mapping
|
import os
|
||||||
from collections.abc import Sequence
|
import logging
|
||||||
from typing import Any
|
import threading
|
||||||
from typing import Awaitable
|
import time
|
||||||
from typing import Callable
|
import traceback
|
||||||
from typing import List
|
from typing import Any
|
||||||
from typing import Optional
|
from typing import Awaitable
|
||||||
from contextlib import ExitStack
|
from typing import Callable
|
||||||
import threading
|
from typing import List
|
||||||
from functools import wraps, partial
|
from typing import Optional
|
||||||
|
|
||||||
|
|
||||||
class Structure(dict):
|
class Structure(dict):
|
||||||
"""
|
"""
|
||||||
This is a dict-like object structure, which you should subclass
|
This is a dict-like object structure, which you should subclass
|
||||||
Only properties defined in the class context are used on initialization.
|
Only properties defined in the class context are used on initialization.
|
||||||
|
|
||||||
See example
|
See example
|
||||||
"""
|
"""
|
||||||
|
|
||||||
_store = {}
|
_store = {}
|
||||||
|
|
||||||
def __init__(self, *a, **kw):
|
def __init__(self, *a, **kw):
|
||||||
"""
|
"""
|
||||||
Instantiate a new instance.
|
Instantiate a new instance.
|
||||||
|
|
||||||
:param a:
|
:param a:
|
||||||
:param kw:
|
:param kw:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
# auxiliar dict
|
# auxiliar dict
|
||||||
d = dict(*a, **kw)
|
d = dict(*a, **kw)
|
||||||
for k, v in d.items():
|
for k, v in d.items():
|
||||||
if isinstance(v, Mapping):
|
if isinstance(v, Mapping):
|
||||||
self[k] = self.__class__(v)
|
self[k] = self.__class__(v)
|
||||||
elif isinstance(v, Sequence) and not isinstance(v, (str, bytes)):
|
elif isinstance(v, Sequence) and not isinstance(v, (str, bytes)):
|
||||||
self[k] = [self.__class__(i) for i in v]
|
self[k] = [self.__class__(i) for i in v]
|
||||||
else:
|
else:
|
||||||
self[k] = v
|
self[k] = v
|
||||||
super().__setattr__("__dict__", self)
|
super().__setattr__("__dict__", self)
|
||||||
|
|
||||||
def __getattr__(self, item):
|
def __getattr__(self, item):
|
||||||
return getattr(super(), item)
|
return getattr(super(), item)
|
||||||
|
|
||||||
def __getitem__(self, item):
|
def __getitem__(self, item):
|
||||||
return super().__getitem__(item)
|
return super().__getitem__(item)
|
||||||
|
|
||||||
def __setattr__(self, key, value):
|
def __setattr__(self, key, value):
|
||||||
self.__setitem__(key, value)
|
self.__setitem__(key, value)
|
||||||
|
|
||||||
def __setitem__(self, key, value):
|
def __setitem__(self, key, value):
|
||||||
super().__setitem__(key, value)
|
super().__setitem__(key, value)
|
||||||
|
|
||||||
def update(self, *a, **kw):
|
def update(self, *a, **kw):
|
||||||
super().update(*a, **kw)
|
super().update(*a, **kw)
|
||||||
|
|
||||||
def __eq__(self, other):
|
def __eq__(self, other):
|
||||||
return frozenset(other.items()) == frozenset(self.items())
|
return frozenset(other.items()) == frozenset(self.items())
|
||||||
|
|
||||||
def __hash__(self):
|
def __hash__(self):
|
||||||
return hash(frozenset(self.items()))
|
return hash(frozenset(self.items()))
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def __init_subclass__(cls, **kwargs):
|
def __init_subclass__(cls, **kwargs):
|
||||||
cls._store = {}
|
cls._store = {}
|
||||||
|
|
||||||
def _normalize_strings(self):
|
def _normalize_strings(self):
|
||||||
for k, v in self.copy().items():
|
for k, v in self.copy().items():
|
||||||
if isinstance(v, (str)):
|
if isinstance(v, (str)):
|
||||||
self[k] = v.strip()
|
self[k] = v.strip()
|
||||||
|
|
||||||
|
|
||||||
def timeout(seconds=3, on_timeout: Optional[Callable[[callable], Any]] = None):
|
def timeout(seconds=3, on_timeout: Optional[Callable[[callable], Any]] = None):
|
||||||
def wrapper(func):
|
def wrapper(func):
|
||||||
@wraps(func)
|
@wraps(func)
|
||||||
def wrapped(*args, **kwargs):
|
def wrapped(*args, **kwargs):
|
||||||
def function_reached_timeout():
|
def function_reached_timeout():
|
||||||
if on_timeout:
|
if on_timeout:
|
||||||
on_timeout(func)
|
on_timeout(func)
|
||||||
else:
|
else:
|
||||||
raise TimeoutError("function call timed out")
|
raise TimeoutError("function call timed out")
|
||||||
|
|
||||||
t = threading.Timer(interval=seconds, function=function_reached_timeout)
|
t = threading.Timer(interval=seconds, function=function_reached_timeout)
|
||||||
t.start()
|
t.start()
|
||||||
try:
|
try:
|
||||||
return func(*args, **kwargs)
|
return func(*args, **kwargs)
|
||||||
except:
|
except:
|
||||||
t.cancel()
|
t.cancel()
|
||||||
raise
|
raise
|
||||||
finally:
|
finally:
|
||||||
t.cancel()
|
t.cancel()
|
||||||
|
|
||||||
return wrapped
|
return wrapped
|
||||||
|
|
||||||
return wrapper
|
return wrapper
|
||||||
|
|
||||||
|
|
||||||
def test():
|
def test():
|
||||||
import sys, os
|
import sys, os
|
||||||
|
|
||||||
sys.path.insert(0, os.path.abspath(os.path.dirname(__file__)))
|
sys.path.insert(0, os.path.abspath(os.path.dirname(__file__)))
|
||||||
import undetected_chromedriver as uc
|
import undetected_chromedriver as uc
|
||||||
import threading
|
import threading
|
||||||
|
|
||||||
def collector(
|
def collector(
|
||||||
driver: uc.Chrome,
|
driver: uc.Chrome,
|
||||||
stop_event: threading.Event,
|
stop_event: threading.Event,
|
||||||
on_event_coro: Optional[Callable[[List[str]], Awaitable[Any]]] = None,
|
on_event_coro: Optional[Callable[[List[str]], Awaitable[Any]]] = None,
|
||||||
listen_events: Sequence = ("browser", "network", "performance"),
|
listen_events: Sequence = ("browser", "network", "performance"),
|
||||||
):
|
):
|
||||||
def threaded(driver, stop_event, on_event_coro):
|
def threaded(driver, stop_event, on_event_coro):
|
||||||
async def _ensure_service_started():
|
async def _ensure_service_started():
|
||||||
while (
|
while (
|
||||||
getattr(driver, "service", False)
|
getattr(driver, "service", False)
|
||||||
and getattr(driver.service, "process", False)
|
and getattr(driver.service, "process", False)
|
||||||
and driver.service.process.poll()
|
and driver.service.process.poll()
|
||||||
):
|
):
|
||||||
print("waiting for driver service to come back on")
|
print("waiting for driver service to come back on")
|
||||||
await asyncio.sleep(0.05)
|
await asyncio.sleep(0.05)
|
||||||
# await asyncio.sleep(driver._delay or .25)
|
# await asyncio.sleep(driver._delay or .25)
|
||||||
|
|
||||||
async def get_log_lines(typ):
|
async def get_log_lines(typ):
|
||||||
await _ensure_service_started()
|
await _ensure_service_started()
|
||||||
return driver.get_log(typ)
|
return driver.get_log(typ)
|
||||||
|
|
||||||
async def looper():
|
async def looper():
|
||||||
while not stop_event.is_set():
|
while not stop_event.is_set():
|
||||||
log_lines = []
|
log_lines = []
|
||||||
try:
|
try:
|
||||||
for _ in listen_events:
|
for _ in listen_events:
|
||||||
try:
|
try:
|
||||||
log_lines += await get_log_lines(_)
|
log_lines += await get_log_lines(_)
|
||||||
except:
|
except:
|
||||||
if logging.getLogger().getEffectiveLevel() <= 10:
|
if logging.getLogger().getEffectiveLevel() <= 10:
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
continue
|
continue
|
||||||
if log_lines and on_event_coro:
|
if log_lines and on_event_coro:
|
||||||
await on_event_coro(log_lines)
|
await on_event_coro(log_lines)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if logging.getLogger().getEffectiveLevel() <= 10:
|
if logging.getLogger().getEffectiveLevel() <= 10:
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
|
|
||||||
loop = asyncio.new_event_loop()
|
loop = asyncio.new_event_loop()
|
||||||
asyncio.set_event_loop(loop)
|
asyncio.set_event_loop(loop)
|
||||||
loop.run_until_complete(looper())
|
loop.run_until_complete(looper())
|
||||||
|
|
||||||
t = threading.Thread(target=threaded, args=(driver, stop_event, on_event_coro))
|
t = threading.Thread(target=threaded, args=(driver, stop_event, on_event_coro))
|
||||||
t.start()
|
t.start()
|
||||||
|
|
||||||
async def on_event(data):
|
async def on_event(data):
|
||||||
print("on_event")
|
print("on_event")
|
||||||
print("data:", data)
|
print("data:", data)
|
||||||
|
|
||||||
def func_called(fn):
|
def func_called(fn):
|
||||||
def wrapped(*args, **kwargs):
|
def wrapped(*args, **kwargs):
|
||||||
print(
|
print(
|
||||||
"func called! %s (args: %s, kwargs: %s)" % (fn.__name__, args, kwargs)
|
"func called! %s (args: %s, kwargs: %s)" % (fn.__name__, args, kwargs)
|
||||||
)
|
)
|
||||||
while driver.service.process and driver.service.process.poll() is not None:
|
while driver.service.process and driver.service.process.poll() is not None:
|
||||||
time.sleep(0.1)
|
time.sleep(0.1)
|
||||||
res = fn(*args, **kwargs)
|
res = fn(*args, **kwargs)
|
||||||
print("func completed! (result: %s)" % res)
|
print("func completed! (result: %s)" % res)
|
||||||
return res
|
return res
|
||||||
|
|
||||||
return wrapped
|
return wrapped
|
||||||
|
|
||||||
logging.basicConfig(level=10)
|
logging.basicConfig(level=10)
|
||||||
|
|
||||||
options = uc.ChromeOptions()
|
options = uc.ChromeOptions()
|
||||||
options.set_capability(
|
options.set_capability(
|
||||||
"goog:loggingPrefs", {"performance": "ALL", "browser": "ALL", "network": "ALL"}
|
"goog:loggingPrefs", {"performance": "ALL", "browser": "ALL", "network": "ALL"}
|
||||||
)
|
)
|
||||||
|
|
||||||
driver = uc.Chrome(version_main=96, options=options)
|
driver = uc.Chrome(version_main=96, options=options)
|
||||||
|
|
||||||
# driver.command_executor._request = timeout(seconds=1)(driver.command_executor._request)
|
# driver.command_executor._request = timeout(seconds=1)(driver.command_executor._request)
|
||||||
driver.command_executor._request = func_called(driver.command_executor._request)
|
driver.command_executor._request = func_called(driver.command_executor._request)
|
||||||
collector_stop = threading.Event()
|
collector_stop = threading.Event()
|
||||||
collector(driver, collector_stop, on_event)
|
collector(driver, collector_stop, on_event)
|
||||||
|
|
||||||
driver.get("https://nowsecure.nl")
|
driver.get("https://nowsecure.nl")
|
||||||
|
|
||||||
time.sleep(10)
|
time.sleep(10)
|
||||||
|
|
||||||
driver.quit()
|
if os.name == "nt":
|
||||||
|
driver.close()
|
||||||
|
driver.quit()
|
||||||
|
|||||||
@@ -1,75 +1,77 @@
|
|||||||
import multiprocessing
|
import atexit
|
||||||
import os
|
import logging
|
||||||
import platform
|
import multiprocessing
|
||||||
import sys
|
import os
|
||||||
from subprocess import PIPE
|
import platform
|
||||||
from subprocess import Popen
|
import signal
|
||||||
import atexit
|
from subprocess import PIPE
|
||||||
import traceback
|
from subprocess import Popen
|
||||||
import logging
|
import sys
|
||||||
import signal
|
|
||||||
|
|
||||||
CREATE_NEW_PROCESS_GROUP = 0x00000200
|
CREATE_NEW_PROCESS_GROUP = 0x00000200
|
||||||
DETACHED_PROCESS = 0x00000008
|
DETACHED_PROCESS = 0x00000008
|
||||||
|
|
||||||
REGISTERED = []
|
REGISTERED = []
|
||||||
|
|
||||||
|
|
||||||
def start_detached(executable, *args):
|
def start_detached(executable, *args):
|
||||||
"""
|
"""
|
||||||
Starts a fully independent subprocess (with no parent)
|
Starts a fully independent subprocess (with no parent)
|
||||||
:param executable: executable
|
:param executable: executable
|
||||||
:param args: arguments to the executable, eg: ['--param1_key=param1_val', '-vvv' ...]
|
:param args: arguments to the executable, eg: ['--param1_key=param1_val', '-vvv' ...]
|
||||||
:return: pid of the grandchild process
|
:return: pid of the grandchild process
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# create pipe
|
# create pipe
|
||||||
reader, writer = multiprocessing.Pipe(False)
|
reader, writer = multiprocessing.Pipe(False)
|
||||||
|
|
||||||
# do not keep reference
|
# do not keep reference
|
||||||
multiprocessing.Process(
|
process = multiprocessing.Process(
|
||||||
target=_start_detached,
|
target=_start_detached,
|
||||||
args=(executable, *args),
|
args=(executable, *args),
|
||||||
kwargs={"writer": writer},
|
kwargs={"writer": writer},
|
||||||
daemon=True,
|
daemon=True,
|
||||||
).start()
|
)
|
||||||
# receive pid from pipe
|
process.start()
|
||||||
pid = reader.recv()
|
process.join()
|
||||||
REGISTERED.append(pid)
|
# receive pid from pipe
|
||||||
# close pipes
|
pid = reader.recv()
|
||||||
writer.close()
|
REGISTERED.append(pid)
|
||||||
reader.close()
|
# close pipes
|
||||||
|
writer.close()
|
||||||
return pid
|
reader.close()
|
||||||
|
process.close()
|
||||||
|
|
||||||
def _start_detached(executable, *args, writer: multiprocessing.Pipe = None):
|
return pid
|
||||||
|
|
||||||
# configure launch
|
|
||||||
kwargs = {}
|
def _start_detached(executable, *args, writer: multiprocessing.Pipe = None):
|
||||||
if platform.system() == "Windows":
|
# configure launch
|
||||||
kwargs.update(creationflags=DETACHED_PROCESS | CREATE_NEW_PROCESS_GROUP)
|
kwargs = {}
|
||||||
elif sys.version_info < (3, 2):
|
if platform.system() == "Windows":
|
||||||
# assume posix
|
kwargs.update(creationflags=DETACHED_PROCESS | CREATE_NEW_PROCESS_GROUP)
|
||||||
kwargs.update(preexec_fn=os.setsid)
|
elif sys.version_info < (3, 2):
|
||||||
else: # Python 3.2+ and Unix
|
# assume posix
|
||||||
kwargs.update(start_new_session=True)
|
kwargs.update(preexec_fn=os.setsid)
|
||||||
|
else: # Python 3.2+ and Unix
|
||||||
# run
|
kwargs.update(start_new_session=True)
|
||||||
p = Popen([executable, *args], stdin=PIPE, stdout=PIPE, stderr=PIPE, **kwargs)
|
|
||||||
|
# run
|
||||||
# send pid to pipe
|
p = Popen([executable, *args], stdin=PIPE, stdout=PIPE, stderr=PIPE, **kwargs)
|
||||||
writer.send(p.pid)
|
|
||||||
sys.exit()
|
# send pid to pipe
|
||||||
|
writer.send(p.pid)
|
||||||
|
sys.exit()
|
||||||
def _cleanup():
|
|
||||||
for pid in REGISTERED:
|
|
||||||
try:
|
def _cleanup():
|
||||||
logging.getLogger(__name__).debug("cleaning up pid %d " % pid)
|
for pid in REGISTERED:
|
||||||
os.kill(pid, signal.SIGTERM)
|
try:
|
||||||
except: # noqa
|
logging.getLogger(__name__).debug("cleaning up pid %d " % pid)
|
||||||
pass
|
os.kill(pid, signal.SIGTERM)
|
||||||
|
except: # noqa
|
||||||
|
pass
|
||||||
atexit.register(_cleanup)
|
|
||||||
|
|
||||||
|
atexit.register(_cleanup)
|
||||||
|
|||||||
@@ -1,70 +1,85 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# this module is part of undetected_chromedriver
|
# this module is part of undetected_chromedriver
|
||||||
|
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
|
||||||
from selenium.webdriver.chromium.options import ChromiumOptions as _ChromiumOptions
|
from selenium.webdriver.chromium.options import ChromiumOptions as _ChromiumOptions
|
||||||
|
|
||||||
|
|
||||||
class ChromeOptions(_ChromiumOptions):
|
class ChromeOptions(_ChromiumOptions):
|
||||||
_session = None
|
_session = None
|
||||||
_user_data_dir = None
|
_user_data_dir = None
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def user_data_dir(self):
|
def user_data_dir(self):
|
||||||
return self._user_data_dir
|
return self._user_data_dir
|
||||||
|
|
||||||
@user_data_dir.setter
|
@user_data_dir.setter
|
||||||
def user_data_dir(self, path: str):
|
def user_data_dir(self, path: str):
|
||||||
"""
|
"""
|
||||||
Sets the browser profile folder to use, or creates a new profile
|
Sets the browser profile folder to use, or creates a new profile
|
||||||
at given <path>.
|
at given <path>.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
path: str
|
path: str
|
||||||
the path to a chrome profile folder
|
the path to a chrome profile folder
|
||||||
if it does not exist, a new profile will be created at given location
|
if it does not exist, a new profile will be created at given location
|
||||||
"""
|
"""
|
||||||
apath = os.path.abspath(path)
|
apath = os.path.abspath(path)
|
||||||
self._user_data_dir = os.path.normpath(apath)
|
self._user_data_dir = os.path.normpath(apath)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _undot_key(key, value):
|
def _undot_key(key, value):
|
||||||
"""turn a (dotted key, value) into a proper nested dict"""
|
"""turn a (dotted key, value) into a proper nested dict"""
|
||||||
if "." in key:
|
if "." in key:
|
||||||
key, rest = key.split(".", 1)
|
key, rest = key.split(".", 1)
|
||||||
value = ChromeOptions._undot_key(rest, value)
|
value = ChromeOptions._undot_key(rest, value)
|
||||||
return {key: value}
|
return {key: value}
|
||||||
|
|
||||||
def handle_prefs(self, user_data_dir):
|
@staticmethod
|
||||||
prefs = self.experimental_options.get("prefs")
|
def _merge_nested(a, b):
|
||||||
if prefs:
|
"""
|
||||||
|
merges b into a
|
||||||
user_data_dir = user_data_dir or self._user_data_dir
|
leaf values in a are overwritten with values from b
|
||||||
default_path = os.path.join(user_data_dir, "Default")
|
"""
|
||||||
os.makedirs(default_path, exist_ok=True)
|
for key in b:
|
||||||
|
if key in a:
|
||||||
# undot prefs dict keys
|
if isinstance(a[key], dict) and isinstance(b[key], dict):
|
||||||
undot_prefs = {}
|
ChromeOptions._merge_nested(a[key], b[key])
|
||||||
for key, value in prefs.items():
|
continue
|
||||||
undot_prefs.update(self._undot_key(key, value))
|
a[key] = b[key]
|
||||||
|
return a
|
||||||
prefs_file = os.path.join(default_path, "Preferences")
|
|
||||||
if os.path.exists(prefs_file):
|
def handle_prefs(self, user_data_dir):
|
||||||
with open(prefs_file, encoding="latin1", mode="r") as f:
|
prefs = self.experimental_options.get("prefs")
|
||||||
undot_prefs.update(json.load(f))
|
if prefs:
|
||||||
|
user_data_dir = user_data_dir or self._user_data_dir
|
||||||
with open(prefs_file, encoding="latin1", mode="w") as f:
|
default_path = os.path.join(user_data_dir, "Default")
|
||||||
json.dump(undot_prefs, f)
|
os.makedirs(default_path, exist_ok=True)
|
||||||
|
|
||||||
# remove the experimental_options to avoid an error
|
# undot prefs dict keys
|
||||||
del self._experimental_options["prefs"]
|
undot_prefs = {}
|
||||||
|
for key, value in prefs.items():
|
||||||
@classmethod
|
undot_prefs = self._merge_nested(
|
||||||
def from_options(cls, options):
|
undot_prefs, self._undot_key(key, value)
|
||||||
o = cls()
|
)
|
||||||
o.__dict__.update(options.__dict__)
|
|
||||||
return o
|
prefs_file = os.path.join(default_path, "Preferences")
|
||||||
|
if os.path.exists(prefs_file):
|
||||||
|
with open(prefs_file, encoding="latin1", mode="r") as f:
|
||||||
|
undot_prefs = self._merge_nested(json.load(f), undot_prefs)
|
||||||
|
|
||||||
|
with open(prefs_file, encoding="latin1", mode="w") as f:
|
||||||
|
json.dump(undot_prefs, f)
|
||||||
|
|
||||||
|
# remove the experimental_options to avoid an error
|
||||||
|
del self._experimental_options["prefs"]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_options(cls, options):
|
||||||
|
o = cls()
|
||||||
|
o.__dict__.update(options.__dict__)
|
||||||
|
return o
|
||||||
|
|||||||
@@ -1,276 +1,473 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# this module is part of undetected_chromedriver
|
# this module is part of undetected_chromedriver
|
||||||
|
|
||||||
import io
|
from packaging.version import Version as LooseVersion
|
||||||
import logging
|
import io
|
||||||
import os
|
import json
|
||||||
import random
|
import logging
|
||||||
import re
|
import os
|
||||||
import string
|
import pathlib
|
||||||
import sys
|
import platform
|
||||||
import time
|
import random
|
||||||
import zipfile
|
import re
|
||||||
from distutils.version import LooseVersion
|
import shutil
|
||||||
from urllib.request import urlopen, urlretrieve
|
import string
|
||||||
import secrets
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
logger = logging.getLogger(__name__)
|
from urllib.request import urlopen
|
||||||
|
from urllib.request import urlretrieve
|
||||||
IS_POSIX = sys.platform.startswith(("darwin", "cygwin", "linux"))
|
import zipfile
|
||||||
|
from multiprocessing import Lock
|
||||||
|
|
||||||
class Patcher(object):
|
logger = logging.getLogger(__name__)
|
||||||
url_repo = "https://chromedriver.storage.googleapis.com"
|
|
||||||
zip_name = "chromedriver_%s.zip"
|
IS_POSIX = sys.platform.startswith(("darwin", "cygwin", "linux", "linux2", "freebsd"))
|
||||||
exe_name = "chromedriver%s"
|
|
||||||
|
|
||||||
platform = sys.platform
|
class Patcher(object):
|
||||||
if platform.endswith("win32"):
|
lock = Lock()
|
||||||
zip_name %= "win32"
|
exe_name = "chromedriver%s"
|
||||||
exe_name %= ".exe"
|
|
||||||
if platform.endswith("linux"):
|
platform = sys.platform
|
||||||
zip_name %= "linux64"
|
if platform.endswith("win32"):
|
||||||
exe_name %= ""
|
d = "~/appdata/roaming/undetected_chromedriver"
|
||||||
if platform.endswith("darwin"):
|
elif "LAMBDA_TASK_ROOT" in os.environ:
|
||||||
zip_name %= "mac64"
|
d = "/tmp/undetected_chromedriver"
|
||||||
exe_name %= ""
|
elif platform.startswith(("linux", "linux2")):
|
||||||
|
d = "~/.local/share/undetected_chromedriver"
|
||||||
if platform.endswith("win32"):
|
elif platform.endswith("darwin"):
|
||||||
d = "~/appdata/roaming/undetected_chromedriver"
|
d = "~/Library/Application Support/undetected_chromedriver"
|
||||||
elif platform.startswith("linux"):
|
else:
|
||||||
d = "~/.local/share/undetected_chromedriver"
|
d = "~/.undetected_chromedriver"
|
||||||
elif platform.endswith("darwin"):
|
data_path = os.path.abspath(os.path.expanduser(d))
|
||||||
d = "~/Library/Application Support/undetected_chromedriver"
|
|
||||||
else:
|
def __init__(
|
||||||
d = "~/.undetected_chromedriver"
|
self,
|
||||||
data_path = os.path.abspath(os.path.expanduser(d))
|
executable_path=None,
|
||||||
|
force=False,
|
||||||
def __init__(self, executable_path=None, force=False, version_main: int = 0):
|
version_main: int = 0,
|
||||||
"""
|
user_multi_procs=False,
|
||||||
|
):
|
||||||
Args:
|
"""
|
||||||
executable_path: None = automatic
|
Args:
|
||||||
a full file path to the chromedriver executable
|
executable_path: None = automatic
|
||||||
force: False
|
a full file path to the chromedriver executable
|
||||||
terminate processes which are holding lock
|
force: False
|
||||||
version_main: 0 = auto
|
terminate processes which are holding lock
|
||||||
specify main chrome version (rounded, ex: 82)
|
version_main: 0 = auto
|
||||||
"""
|
specify main chrome version (rounded, ex: 82)
|
||||||
|
"""
|
||||||
self.force = force
|
self.force = force
|
||||||
self.executable_path = None
|
self._custom_exe_path = False
|
||||||
prefix = secrets.token_hex(8)
|
prefix = "undetected"
|
||||||
|
self.user_multi_procs = user_multi_procs
|
||||||
if not os.path.exists(self.data_path):
|
|
||||||
os.makedirs(self.data_path, exist_ok=True)
|
try:
|
||||||
|
# Try to convert version_main into an integer
|
||||||
if not executable_path:
|
version_main_int = int(version_main)
|
||||||
self.executable_path = os.path.join(
|
# check if version_main_int is less than or equal to e.g 114
|
||||||
self.data_path, "_".join([prefix, self.exe_name])
|
self.is_old_chromedriver = version_main and version_main_int <= 114
|
||||||
)
|
except (ValueError,TypeError):
|
||||||
|
# Check not running inside Docker
|
||||||
if not IS_POSIX:
|
if not os.path.exists("/app/chromedriver"):
|
||||||
if executable_path:
|
# If the conversion fails, log an error message
|
||||||
if not executable_path[-4:] == ".exe":
|
logging.info("version_main cannot be converted to an integer")
|
||||||
executable_path += ".exe"
|
# Set self.is_old_chromedriver to False if the conversion fails
|
||||||
|
self.is_old_chromedriver = False
|
||||||
self.zip_path = os.path.join(self.data_path, prefix)
|
|
||||||
|
# Needs to be called before self.exe_name is accessed
|
||||||
if not executable_path:
|
self._set_platform_name()
|
||||||
self.executable_path = os.path.abspath(
|
|
||||||
os.path.join(".", self.executable_path)
|
if not os.path.exists(self.data_path):
|
||||||
)
|
os.makedirs(self.data_path, exist_ok=True)
|
||||||
|
|
||||||
self._custom_exe_path = False
|
if not executable_path:
|
||||||
|
if sys.platform.startswith("freebsd"):
|
||||||
if executable_path:
|
self.executable_path = os.path.join(
|
||||||
self._custom_exe_path = True
|
self.data_path, self.exe_name
|
||||||
self.executable_path = executable_path
|
)
|
||||||
self.version_main = version_main
|
else:
|
||||||
self.version_full = None
|
self.executable_path = os.path.join(
|
||||||
|
self.data_path, "_".join([prefix, self.exe_name])
|
||||||
def auto(self, executable_path=None, force=False, version_main=None):
|
)
|
||||||
""""""
|
|
||||||
if executable_path:
|
if not IS_POSIX:
|
||||||
self.executable_path = executable_path
|
if executable_path:
|
||||||
self._custom_exe_path = True
|
if not executable_path[-4:] == ".exe":
|
||||||
|
executable_path += ".exe"
|
||||||
if self._custom_exe_path:
|
|
||||||
ispatched = self.is_binary_patched(self.executable_path)
|
self.zip_path = os.path.join(self.data_path, prefix)
|
||||||
if not ispatched:
|
|
||||||
return self.patch_exe()
|
if not executable_path:
|
||||||
else:
|
if not self.user_multi_procs:
|
||||||
return
|
self.executable_path = os.path.abspath(
|
||||||
|
os.path.join(".", self.executable_path)
|
||||||
if version_main:
|
)
|
||||||
self.version_main = version_main
|
|
||||||
if force is True:
|
if executable_path:
|
||||||
self.force = force
|
self._custom_exe_path = True
|
||||||
|
self.executable_path = executable_path
|
||||||
try:
|
|
||||||
os.unlink(self.executable_path)
|
# Set the correct repository to download the Chromedriver from
|
||||||
except PermissionError:
|
if self.is_old_chromedriver:
|
||||||
if self.force:
|
self.url_repo = "https://chromedriver.storage.googleapis.com"
|
||||||
self.force_kill_instances(self.executable_path)
|
else:
|
||||||
return self.auto(force=not self.force)
|
self.url_repo = "https://googlechromelabs.github.io/chrome-for-testing"
|
||||||
try:
|
|
||||||
if self.is_binary_patched():
|
self.version_main = version_main
|
||||||
# assumes already running AND patched
|
self.version_full = None
|
||||||
return True
|
|
||||||
except PermissionError:
|
def _set_platform_name(self):
|
||||||
pass
|
"""
|
||||||
# return False
|
Set the platform and exe name based on the platform undetected_chromedriver is running on
|
||||||
except FileNotFoundError:
|
in order to download the correct chromedriver.
|
||||||
pass
|
"""
|
||||||
|
if self.platform.endswith("win32"):
|
||||||
release = self.fetch_release_number()
|
self.platform_name = "win32"
|
||||||
self.version_main = release.version[0]
|
self.exe_name %= ".exe"
|
||||||
self.version_full = release
|
if self.platform.endswith(("linux", "linux2")):
|
||||||
self.unzip_package(self.fetch_package())
|
self.platform_name = "linux64"
|
||||||
return self.patch()
|
self.exe_name %= ""
|
||||||
|
if self.platform.endswith("darwin"):
|
||||||
def patch(self):
|
if self.is_old_chromedriver:
|
||||||
self.patch_exe()
|
self.platform_name = "mac64"
|
||||||
return self.is_binary_patched()
|
else:
|
||||||
|
self.platform_name = "mac-x64"
|
||||||
def fetch_release_number(self):
|
self.exe_name %= ""
|
||||||
"""
|
if self.platform.startswith("freebsd"):
|
||||||
Gets the latest major version available, or the latest major version of self.target_version if set explicitly.
|
self.platform_name = "freebsd"
|
||||||
:return: version string
|
self.exe_name %= ""
|
||||||
:rtype: LooseVersion
|
|
||||||
"""
|
def auto(self, executable_path=None, force=False, version_main=None, _=None):
|
||||||
path = "/latest_release"
|
"""
|
||||||
if self.version_main:
|
|
||||||
path += f"_{self.version_main}"
|
Args:
|
||||||
path = path.upper()
|
executable_path:
|
||||||
logger.debug("getting release number from %s" % path)
|
force:
|
||||||
return LooseVersion(urlopen(self.url_repo + path).read().decode())
|
version_main:
|
||||||
|
|
||||||
def parse_exe_version(self):
|
Returns:
|
||||||
with io.open(self.executable_path, "rb") as f:
|
|
||||||
for line in iter(lambda: f.readline(), b""):
|
"""
|
||||||
match = re.search(rb"platform_handle\x00content\x00([0-9.]*)", line)
|
p = pathlib.Path(self.data_path)
|
||||||
if match:
|
if self.user_multi_procs:
|
||||||
return LooseVersion(match[1].decode())
|
with Lock():
|
||||||
|
files = list(p.rglob("*chromedriver*"))
|
||||||
def fetch_package(self):
|
most_recent = max(files, key=lambda f: f.stat().st_mtime)
|
||||||
"""
|
files.remove(most_recent)
|
||||||
Downloads ChromeDriver from source
|
list(map(lambda f: f.unlink(), files))
|
||||||
|
if self.is_binary_patched(most_recent):
|
||||||
:return: path to downloaded file
|
self.executable_path = str(most_recent)
|
||||||
"""
|
return True
|
||||||
u = "%s/%s/%s" % (self.url_repo, self.version_full.vstring, self.zip_name)
|
|
||||||
logger.debug("downloading from %s" % u)
|
if executable_path:
|
||||||
# return urlretrieve(u, filename=self.data_path)[0]
|
self.executable_path = executable_path
|
||||||
return urlretrieve(u)[0]
|
self._custom_exe_path = True
|
||||||
|
|
||||||
def unzip_package(self, fp):
|
if self._custom_exe_path:
|
||||||
"""
|
ispatched = self.is_binary_patched(self.executable_path)
|
||||||
Does what it says
|
if not ispatched:
|
||||||
|
return self.patch_exe()
|
||||||
:return: path to unpacked executable
|
else:
|
||||||
"""
|
return
|
||||||
logger.debug("unzipping %s" % fp)
|
|
||||||
try:
|
if version_main:
|
||||||
os.unlink(self.zip_path)
|
self.version_main = version_main
|
||||||
except (FileNotFoundError, OSError):
|
if force is True:
|
||||||
pass
|
self.force = force
|
||||||
|
|
||||||
os.makedirs(self.zip_path, mode=0o755, exist_ok=True)
|
|
||||||
with zipfile.ZipFile(fp, mode="r") as zf:
|
if self.platform_name == "freebsd":
|
||||||
zf.extract(self.exe_name, self.zip_path)
|
chromedriver_path = shutil.which("chromedriver")
|
||||||
os.rename(os.path.join(self.zip_path, self.exe_name), self.executable_path)
|
|
||||||
os.remove(fp)
|
if not os.path.isfile(chromedriver_path) or not os.access(chromedriver_path, os.X_OK):
|
||||||
os.rmdir(self.zip_path)
|
logging.error("Chromedriver not installed!")
|
||||||
os.chmod(self.executable_path, 0o755)
|
return
|
||||||
return self.executable_path
|
|
||||||
|
version_path = os.path.join(os.path.dirname(self.executable_path), "version.txt")
|
||||||
@staticmethod
|
|
||||||
def force_kill_instances(exe_name):
|
process = os.popen(f'"{chromedriver_path}" --version')
|
||||||
"""
|
chromedriver_version = process.read().split(' ')[1].split(' ')[0]
|
||||||
kills running instances.
|
process.close()
|
||||||
:param: executable name to kill, may be a path as well
|
|
||||||
|
current_version = None
|
||||||
:return: True on success else False
|
if os.path.isfile(version_path) or os.access(version_path, os.X_OK):
|
||||||
"""
|
with open(version_path, 'r') as f:
|
||||||
exe_name = os.path.basename(exe_name)
|
current_version = f.read()
|
||||||
if IS_POSIX:
|
|
||||||
r = os.system("kill -f -9 $(pidof %s)" % exe_name)
|
if current_version != chromedriver_version:
|
||||||
else:
|
logging.info("Copying chromedriver executable...")
|
||||||
r = os.system("taskkill /f /im %s" % exe_name)
|
shutil.copy(chromedriver_path, self.executable_path)
|
||||||
return not r
|
os.chmod(self.executable_path, 0o755)
|
||||||
|
|
||||||
@staticmethod
|
with open(version_path, 'w') as f:
|
||||||
def gen_random_cdc():
|
f.write(chromedriver_version)
|
||||||
cdc = random.choices(string.ascii_lowercase, k=26)
|
|
||||||
cdc[-6:-4] = map(str.upper, cdc[-6:-4])
|
logging.info("Chromedriver executable copied!")
|
||||||
cdc[2] = cdc[0]
|
else:
|
||||||
cdc[3] = "_"
|
try:
|
||||||
return "".join(cdc).encode()
|
os.unlink(self.executable_path)
|
||||||
|
except PermissionError:
|
||||||
def is_binary_patched(self, executable_path=None):
|
if self.force:
|
||||||
"""simple check if executable is patched.
|
self.force_kill_instances(self.executable_path)
|
||||||
|
return self.auto(force=not self.force)
|
||||||
:return: False if not patched, else True
|
try:
|
||||||
"""
|
if self.is_binary_patched():
|
||||||
executable_path = executable_path or self.executable_path
|
# assumes already running AND patched
|
||||||
with io.open(executable_path, "rb") as fh:
|
return True
|
||||||
for line in iter(lambda: fh.readline(), b""):
|
except PermissionError:
|
||||||
if b"cdc_" in line:
|
pass
|
||||||
return False
|
# return False
|
||||||
else:
|
except FileNotFoundError:
|
||||||
return True
|
pass
|
||||||
|
|
||||||
def patch_exe(self):
|
release = self.fetch_release_number()
|
||||||
"""
|
self.version_main = release.major
|
||||||
Patches the ChromeDriver binary
|
self.version_full = release
|
||||||
|
self.unzip_package(self.fetch_package())
|
||||||
:return: False on failure, binary name on success
|
|
||||||
"""
|
return self.patch()
|
||||||
logger.info("patching driver executable %s" % self.executable_path)
|
|
||||||
|
def driver_binary_in_use(self, path: str = None) -> bool:
|
||||||
linect = 0
|
"""
|
||||||
replacement = self.gen_random_cdc()
|
naive test to check if a found chromedriver binary is
|
||||||
with io.open(self.executable_path, "r+b") as fh:
|
currently in use
|
||||||
for line in iter(lambda: fh.readline(), b""):
|
|
||||||
if b"cdc_" in line:
|
Args:
|
||||||
fh.seek(-len(line), 1)
|
path: a string or PathLike object to the binary to check.
|
||||||
newline = re.sub(b"cdc_.{22}", replacement, line)
|
if not specified, we check use this object's executable_path
|
||||||
fh.write(newline)
|
"""
|
||||||
linect += 1
|
if not path:
|
||||||
return linect
|
path = self.executable_path
|
||||||
|
p = pathlib.Path(path)
|
||||||
def __repr__(self):
|
|
||||||
return "{0:s}({1:s})".format(
|
if not p.exists():
|
||||||
self.__class__.__name__,
|
raise OSError("file does not exist: %s" % p)
|
||||||
self.executable_path,
|
try:
|
||||||
)
|
with open(p, mode="a+b") as fs:
|
||||||
|
exc = []
|
||||||
def __del__(self):
|
try:
|
||||||
|
|
||||||
if self._custom_exe_path:
|
fs.seek(0, 0)
|
||||||
# if the driver binary is specified by user
|
except PermissionError as e:
|
||||||
# we assume it is important enough to not delete it
|
exc.append(e) # since some systems apprently allow seeking
|
||||||
return
|
# we conduct another test
|
||||||
else:
|
try:
|
||||||
timeout = 3 # stop trying after this many seconds
|
fs.readline()
|
||||||
t = time.monotonic()
|
except PermissionError as e:
|
||||||
while True:
|
exc.append(e)
|
||||||
now = time.monotonic()
|
|
||||||
if now - t > timeout:
|
if exc:
|
||||||
# we don't want to wait until the end of time
|
|
||||||
logger.debug(
|
return True
|
||||||
"could not unlink %s in time (%d seconds)"
|
return False
|
||||||
% (self.executable_path, timeout)
|
# ok safe to assume this is in use
|
||||||
)
|
except Exception as e:
|
||||||
break
|
# logger.exception("whoops ", e)
|
||||||
try:
|
pass
|
||||||
os.unlink(self.executable_path)
|
|
||||||
logger.debug("successfully unlinked %s" % self.executable_path)
|
def cleanup_unused_files(self):
|
||||||
break
|
p = pathlib.Path(self.data_path)
|
||||||
except (OSError, RuntimeError, PermissionError):
|
items = list(p.glob("*undetected*"))
|
||||||
time.sleep(0.1)
|
for item in items:
|
||||||
continue
|
try:
|
||||||
except FileNotFoundError:
|
item.unlink()
|
||||||
break
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def patch(self):
|
||||||
|
self.patch_exe()
|
||||||
|
return self.is_binary_patched()
|
||||||
|
|
||||||
|
def fetch_release_number(self):
|
||||||
|
"""
|
||||||
|
Gets the latest major version available, or the latest major version of self.target_version if set explicitly.
|
||||||
|
:return: version string
|
||||||
|
:rtype: LooseVersion
|
||||||
|
"""
|
||||||
|
# Endpoint for old versions of Chromedriver (114 and below)
|
||||||
|
if self.is_old_chromedriver:
|
||||||
|
path = f"/latest_release_{self.version_main}"
|
||||||
|
path = path.upper()
|
||||||
|
logger.debug("getting release number from %s" % path)
|
||||||
|
return LooseVersion(urlopen(self.url_repo + path).read().decode())
|
||||||
|
|
||||||
|
# Endpoint for new versions of Chromedriver (115+)
|
||||||
|
if not self.version_main:
|
||||||
|
# Fetch the latest version
|
||||||
|
path = "/last-known-good-versions-with-downloads.json"
|
||||||
|
logger.debug("getting release number from %s" % path)
|
||||||
|
with urlopen(self.url_repo + path) as conn:
|
||||||
|
response = conn.read().decode()
|
||||||
|
|
||||||
|
last_versions = json.loads(response)
|
||||||
|
return LooseVersion(last_versions["channels"]["Stable"]["version"])
|
||||||
|
|
||||||
|
# Fetch the latest minor version of the major version provided
|
||||||
|
path = "/latest-versions-per-milestone-with-downloads.json"
|
||||||
|
logger.debug("getting release number from %s" % path)
|
||||||
|
with urlopen(self.url_repo + path) as conn:
|
||||||
|
response = conn.read().decode()
|
||||||
|
|
||||||
|
major_versions = json.loads(response)
|
||||||
|
return LooseVersion(major_versions["milestones"][str(self.version_main)]["version"])
|
||||||
|
|
||||||
|
def parse_exe_version(self):
|
||||||
|
with io.open(self.executable_path, "rb") as f:
|
||||||
|
for line in iter(lambda: f.readline(), b""):
|
||||||
|
match = re.search(rb"platform_handle\x00content\x00([0-9.]*)", line)
|
||||||
|
if match:
|
||||||
|
return LooseVersion(match[1].decode())
|
||||||
|
|
||||||
|
def fetch_package(self):
|
||||||
|
"""
|
||||||
|
Downloads ChromeDriver from source
|
||||||
|
|
||||||
|
:return: path to downloaded file
|
||||||
|
"""
|
||||||
|
zip_name = f"chromedriver_{self.platform_name}.zip"
|
||||||
|
if self.is_old_chromedriver:
|
||||||
|
download_url = "%s/%s/%s" % (self.url_repo, str(self.version_full), zip_name)
|
||||||
|
else:
|
||||||
|
zip_name = zip_name.replace("_", "-", 1)
|
||||||
|
download_url = "https://storage.googleapis.com/chrome-for-testing-public/%s/%s/%s"
|
||||||
|
download_url %= (str(self.version_full), self.platform_name, zip_name)
|
||||||
|
|
||||||
|
logger.debug("downloading from %s" % download_url)
|
||||||
|
return urlretrieve(download_url)[0]
|
||||||
|
|
||||||
|
def unzip_package(self, fp):
|
||||||
|
"""
|
||||||
|
Does what it says
|
||||||
|
|
||||||
|
:return: path to unpacked executable
|
||||||
|
"""
|
||||||
|
exe_path = self.exe_name
|
||||||
|
if not self.is_old_chromedriver:
|
||||||
|
# The new chromedriver unzips into its own folder
|
||||||
|
zip_name = f"chromedriver-{self.platform_name}"
|
||||||
|
exe_path = os.path.join(zip_name, self.exe_name)
|
||||||
|
|
||||||
|
logger.debug("unzipping %s" % fp)
|
||||||
|
try:
|
||||||
|
os.unlink(self.zip_path)
|
||||||
|
except (FileNotFoundError, OSError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
os.makedirs(self.zip_path, mode=0o755, exist_ok=True)
|
||||||
|
with zipfile.ZipFile(fp, mode="r") as zf:
|
||||||
|
zf.extractall(self.zip_path)
|
||||||
|
os.rename(os.path.join(self.zip_path, exe_path), self.executable_path)
|
||||||
|
os.remove(fp)
|
||||||
|
shutil.rmtree
|
||||||
|
os.chmod(self.executable_path, 0o755)
|
||||||
|
return self.executable_path
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def force_kill_instances(exe_name):
|
||||||
|
"""
|
||||||
|
kills running instances.
|
||||||
|
:param: executable name to kill, may be a path as well
|
||||||
|
|
||||||
|
:return: True on success else False
|
||||||
|
"""
|
||||||
|
exe_name = os.path.basename(exe_name)
|
||||||
|
if IS_POSIX:
|
||||||
|
# Using shell=True for pidof, consider a more robust pid finding method if issues arise.
|
||||||
|
# pgrep can be an alternative: ["pgrep", "-f", exe_name]
|
||||||
|
# Or psutil if adding a dependency is acceptable.
|
||||||
|
command = f"pidof {exe_name}"
|
||||||
|
try:
|
||||||
|
result = subprocess.run(command, shell=True, capture_output=True, text=True, check=True)
|
||||||
|
pids = result.stdout.strip().split()
|
||||||
|
if pids:
|
||||||
|
subprocess.run(["kill", "-9"] + pids, check=False) # Changed from -f -9 to -9 as -f is not standard for kill
|
||||||
|
return True
|
||||||
|
return False # No PIDs found
|
||||||
|
except subprocess.CalledProcessError: # pidof returns 1 if no process found
|
||||||
|
return False # No process found
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"Error killing process on POSIX: {e}")
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
# TASKKILL /F /IM chromedriver.exe
|
||||||
|
result = subprocess.run(["taskkill", "/f", "/im", exe_name], check=False, capture_output=True)
|
||||||
|
# taskkill returns 0 if process was killed, 128 if not found.
|
||||||
|
return result.returncode == 0
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"Error killing process on Windows: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def gen_random_cdc():
|
||||||
|
cdc = random.choices(string.ascii_letters, k=27)
|
||||||
|
return "".join(cdc).encode()
|
||||||
|
|
||||||
|
def is_binary_patched(self, executable_path=None):
|
||||||
|
executable_path = executable_path or self.executable_path
|
||||||
|
try:
|
||||||
|
with io.open(executable_path, "rb") as fh:
|
||||||
|
return fh.read().find(b"undetected chromedriver") != -1
|
||||||
|
except FileNotFoundError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def patch_exe(self):
|
||||||
|
start = time.perf_counter()
|
||||||
|
logger.info("patching driver executable %s" % self.executable_path)
|
||||||
|
with io.open(self.executable_path, "r+b") as fh:
|
||||||
|
content = fh.read()
|
||||||
|
# match_injected_codeblock = re.search(rb"{window.*;}", content)
|
||||||
|
match_injected_codeblock = re.search(rb"\{window\.cdc.*?;\}", content)
|
||||||
|
if match_injected_codeblock:
|
||||||
|
target_bytes = match_injected_codeblock[0]
|
||||||
|
new_target_bytes = (
|
||||||
|
b'{console.log("undetected chromedriver 1337!")}'.ljust(
|
||||||
|
len(target_bytes), b" "
|
||||||
|
)
|
||||||
|
)
|
||||||
|
new_content = content.replace(target_bytes, new_target_bytes)
|
||||||
|
if new_content == content:
|
||||||
|
logger.warning(
|
||||||
|
"something went wrong patching the driver binary. could not find injection code block"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.debug(
|
||||||
|
"found block:\n%s\nreplacing with:\n%s"
|
||||||
|
% (target_bytes, new_target_bytes)
|
||||||
|
)
|
||||||
|
fh.seek(0)
|
||||||
|
fh.write(new_content)
|
||||||
|
logger.debug(
|
||||||
|
"patching took us {:.2f} seconds".format(time.perf_counter() - start)
|
||||||
|
)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return "{0:s}({1:s})".format(
|
||||||
|
self.__class__.__name__,
|
||||||
|
self.executable_path,
|
||||||
|
)
|
||||||
|
|
||||||
|
def __del__(self):
|
||||||
|
if self._custom_exe_path:
|
||||||
|
# if the driver binary is specified by user
|
||||||
|
# we assume it is important enough to not delete it
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
timeout = 3 # stop trying after this many seconds
|
||||||
|
t = time.monotonic()
|
||||||
|
now = lambda: time.monotonic()
|
||||||
|
while now() - t > timeout:
|
||||||
|
# we don't want to wait until the end of time
|
||||||
|
try:
|
||||||
|
if self.user_multi_procs:
|
||||||
|
break
|
||||||
|
os.unlink(self.executable_path)
|
||||||
|
logger.debug("successfully unlinked %s" % self.executable_path)
|
||||||
|
break
|
||||||
|
except (OSError, RuntimeError, PermissionError):
|
||||||
|
time.sleep(0.01)
|
||||||
|
continue
|
||||||
|
except FileNotFoundError:
|
||||||
|
break
|
||||||
|
|||||||
@@ -1,102 +1,99 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# this module is part of undetected_chromedriver
|
# this module is part of undetected_chromedriver
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import threading
|
import threading
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
class Reactor(threading.Thread):
|
|
||||||
def __init__(self, driver: "Chrome"):
|
class Reactor(threading.Thread):
|
||||||
super().__init__()
|
def __init__(self, driver: "Chrome"):
|
||||||
|
super().__init__()
|
||||||
self.driver = driver
|
|
||||||
self.loop = asyncio.new_event_loop()
|
self.driver = driver
|
||||||
|
self.loop = asyncio.new_event_loop()
|
||||||
self.lock = threading.Lock()
|
|
||||||
self.event = threading.Event()
|
self.lock = threading.Lock()
|
||||||
self.daemon = True
|
self.event = threading.Event()
|
||||||
self.handlers = {}
|
self.daemon = True
|
||||||
|
self.handlers = {}
|
||||||
def add_event_handler(self, method_name, callback: callable):
|
|
||||||
"""
|
def add_event_handler(self, method_name, callback: callable):
|
||||||
|
"""
|
||||||
Parameters
|
|
||||||
----------
|
Parameters
|
||||||
event_name: str
|
----------
|
||||||
example "Network.responseReceived"
|
event_name: str
|
||||||
|
example "Network.responseReceived"
|
||||||
callback: callable
|
|
||||||
callable which accepts 1 parameter: the message object dictionary
|
callback: callable
|
||||||
|
callable which accepts 1 parameter: the message object dictionary
|
||||||
Returns
|
|
||||||
-------
|
Returns
|
||||||
|
-------
|
||||||
"""
|
|
||||||
with self.lock:
|
"""
|
||||||
self.handlers[method_name.lower()] = callback
|
with self.lock:
|
||||||
|
self.handlers[method_name.lower()] = callback
|
||||||
@property
|
|
||||||
def running(self):
|
@property
|
||||||
return not self.event.is_set()
|
def running(self):
|
||||||
|
return not self.event.is_set()
|
||||||
def run(self):
|
|
||||||
try:
|
def run(self):
|
||||||
asyncio.set_event_loop(self.loop)
|
try:
|
||||||
self.loop.run_until_complete(self.listen())
|
asyncio.set_event_loop(self.loop)
|
||||||
except Exception as e:
|
self.loop.run_until_complete(self.listen())
|
||||||
logger.warning("Reactor.run() => %s", e)
|
except Exception as e:
|
||||||
|
logger.warning("Reactor.run() => %s", e)
|
||||||
async def _wait_service_started(self):
|
|
||||||
while True:
|
async def _wait_service_started(self):
|
||||||
with self.lock:
|
while True:
|
||||||
if (
|
with self.lock:
|
||||||
getattr(self.driver, "service", None)
|
if (
|
||||||
and getattr(self.driver.service, "process", None)
|
getattr(self.driver, "service", None)
|
||||||
and self.driver.service.process.poll()
|
and getattr(self.driver.service, "process", None)
|
||||||
):
|
and self.driver.service.process.poll()
|
||||||
await asyncio.sleep(self.driver._delay or 0.25)
|
):
|
||||||
else:
|
await asyncio.sleep(self.driver._delay or 0.25)
|
||||||
break
|
else:
|
||||||
|
break
|
||||||
async def listen(self):
|
|
||||||
|
async def listen(self):
|
||||||
while self.running:
|
while self.running:
|
||||||
|
await self._wait_service_started()
|
||||||
await self._wait_service_started()
|
await asyncio.sleep(1)
|
||||||
await asyncio.sleep(1)
|
|
||||||
|
try:
|
||||||
try:
|
with self.lock:
|
||||||
with self.lock:
|
log_entries = self.driver.get_log("performance")
|
||||||
log_entries = self.driver.get_log("performance")
|
|
||||||
|
for entry in log_entries:
|
||||||
for entry in log_entries:
|
try:
|
||||||
|
obj_serialized: str = entry.get("message")
|
||||||
try:
|
obj = json.loads(obj_serialized)
|
||||||
|
message = obj.get("message")
|
||||||
obj_serialized: str = entry.get("message")
|
method = message.get("method")
|
||||||
obj = json.loads(obj_serialized)
|
|
||||||
message = obj.get("message")
|
if "*" in self.handlers:
|
||||||
method = message.get("method")
|
await self.loop.run_in_executor(
|
||||||
|
None, self.handlers["*"], message
|
||||||
if "*" in self.handlers:
|
)
|
||||||
await self.loop.run_in_executor(
|
elif method.lower() in self.handlers:
|
||||||
None, self.handlers["*"], message
|
await self.loop.run_in_executor(
|
||||||
)
|
None, self.handlers[method.lower()], message
|
||||||
elif method.lower() in self.handlers:
|
)
|
||||||
await self.loop.run_in_executor(
|
|
||||||
None, self.handlers[method.lower()], message
|
# print(type(message), message)
|
||||||
)
|
except Exception as e:
|
||||||
|
raise e from None
|
||||||
# print(type(message), message)
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise e from None
|
if "invalid session id" in str(e):
|
||||||
|
pass
|
||||||
except Exception as e:
|
else:
|
||||||
if "invalid session id" in str(e):
|
logging.debug("exception ignored :", e)
|
||||||
pass
|
|
||||||
else:
|
|
||||||
logging.debug("exception ignored :", e)
|
|
||||||
|
|||||||
@@ -1,4 +0,0 @@
|
|||||||
# for backward compatibility
|
|
||||||
import sys
|
|
||||||
|
|
||||||
sys.modules[__name__] = sys.modules[__package__]
|
|
||||||
@@ -1,37 +1,86 @@
|
|||||||
import selenium.webdriver.remote.webelement
|
from typing import List
|
||||||
|
|
||||||
|
from selenium.webdriver.common.by import By
|
||||||
class WebElement(selenium.webdriver.remote.webelement.WebElement):
|
import selenium.webdriver.remote.webelement
|
||||||
"""
|
|
||||||
Custom WebElement class which makes it easier to view elements when
|
|
||||||
working in an interactive environment.
|
class WebElement(selenium.webdriver.remote.webelement.WebElement):
|
||||||
|
def click_safe(self):
|
||||||
standard webelement repr:
|
super().click()
|
||||||
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
|
self._parent.reconnect(0.1)
|
||||||
|
|
||||||
using this WebElement class:
|
def children(
|
||||||
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
|
self, tag=None, recursive=False
|
||||||
|
) -> List[selenium.webdriver.remote.webelement.WebElement]:
|
||||||
"""
|
"""
|
||||||
|
returns direct child elements of current element
|
||||||
@property
|
:param tag: str, if supplied, returns <tag> nodes only
|
||||||
def attrs(self):
|
"""
|
||||||
if not hasattr(self, "_attrs"):
|
script = "return [... arguments[0].children]"
|
||||||
self._attrs = self._parent.execute_script(
|
if tag:
|
||||||
"""
|
script += ".filter( node => node.tagName === '%s')" % tag.upper()
|
||||||
var items = {};
|
if recursive:
|
||||||
for (index = 0; index < arguments[0].attributes.length; ++index)
|
return list(_recursive_children(self, tag))
|
||||||
{
|
return list(self._parent.execute_script(script, self))
|
||||||
items[arguments[0].attributes[index].name] = arguments[0].attributes[index].value
|
|
||||||
};
|
|
||||||
return items;
|
class UCWebElement(WebElement):
|
||||||
""",
|
"""
|
||||||
self,
|
Custom WebElement class which makes it easier to view elements when
|
||||||
)
|
working in an interactive environment.
|
||||||
return self._attrs
|
|
||||||
|
standard webelement repr:
|
||||||
def __repr__(self):
|
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
|
||||||
strattrs = " ".join([f'{k}="{v}"' for k, v in self.attrs.items()])
|
|
||||||
if strattrs:
|
using this WebElement class:
|
||||||
strattrs = " " + strattrs
|
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
|
||||||
return f"{self.__class__.__name__} <{self.tag_name}{strattrs}>"
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, parent, id_):
|
||||||
|
super().__init__(parent, id_)
|
||||||
|
self._attrs = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def attrs(self):
|
||||||
|
if not self._attrs:
|
||||||
|
self._attrs = self._parent.execute_script(
|
||||||
|
"""
|
||||||
|
var items = {};
|
||||||
|
for (index = 0; index < arguments[0].attributes.length; ++index)
|
||||||
|
{
|
||||||
|
items[arguments[0].attributes[index].name] = arguments[0].attributes[index].value
|
||||||
|
};
|
||||||
|
return items;
|
||||||
|
""",
|
||||||
|
self,
|
||||||
|
)
|
||||||
|
return self._attrs
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
strattrs = " ".join([f'{k}="{v}"' for k, v in self.attrs.items()])
|
||||||
|
if strattrs:
|
||||||
|
strattrs = " " + strattrs
|
||||||
|
return f"{self.__class__.__name__} <{self.tag_name}{strattrs}>"
|
||||||
|
|
||||||
|
|
||||||
|
def _recursive_children(element, tag: str = None, _results=None):
|
||||||
|
"""
|
||||||
|
returns all children of <element> recursively
|
||||||
|
|
||||||
|
:param element: `WebElement` object.
|
||||||
|
find children below this <element>
|
||||||
|
|
||||||
|
:param tag: str = None.
|
||||||
|
if provided, return only <tag> elements. example: 'a', or 'img'
|
||||||
|
:param _results: do not use!
|
||||||
|
"""
|
||||||
|
results = _results or set()
|
||||||
|
for element in element.children():
|
||||||
|
if tag:
|
||||||
|
if element.tag_name == tag:
|
||||||
|
results.add(element)
|
||||||
|
else:
|
||||||
|
results.add(element)
|
||||||
|
results |= _recursive_children(element, tag, results)
|
||||||
|
return results
|
||||||
|
|||||||
229
src/utils.py
229
src/utils.py
@@ -1,13 +1,19 @@
|
|||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
import platform
|
||||||
import re
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
|
import sys
|
||||||
|
import tempfile
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
from selenium.webdriver.chrome.webdriver import WebDriver
|
from selenium.webdriver.chrome.webdriver import WebDriver
|
||||||
import undetected_chromedriver as uc
|
import undetected_chromedriver as uc
|
||||||
|
|
||||||
FLARESOLVERR_VERSION = None
|
FLARESOLVERR_VERSION = None
|
||||||
|
PLATFORM_VERSION = None
|
||||||
|
CHROME_EXE_PATH = None
|
||||||
CHROME_MAJOR_VERSION = None
|
CHROME_MAJOR_VERSION = None
|
||||||
USER_AGENT = None
|
USER_AGENT = None
|
||||||
XVFB_DISPLAY = None
|
XVFB_DISPLAY = None
|
||||||
@@ -22,30 +28,147 @@ def get_config_headless() -> bool:
|
|||||||
return os.environ.get('HEADLESS', 'true').lower() == 'true'
|
return os.environ.get('HEADLESS', 'true').lower() == 'true'
|
||||||
|
|
||||||
|
|
||||||
|
def get_config_disable_media() -> bool:
|
||||||
|
return os.environ.get('DISABLE_MEDIA', 'false').lower() == 'true'
|
||||||
|
|
||||||
|
|
||||||
def get_flaresolverr_version() -> str:
|
def get_flaresolverr_version() -> str:
|
||||||
global FLARESOLVERR_VERSION
|
global FLARESOLVERR_VERSION
|
||||||
if FLARESOLVERR_VERSION is not None:
|
if FLARESOLVERR_VERSION is not None:
|
||||||
return FLARESOLVERR_VERSION
|
return FLARESOLVERR_VERSION
|
||||||
|
|
||||||
package_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, 'package.json')
|
package_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, 'package.json')
|
||||||
|
if not os.path.isfile(package_path):
|
||||||
|
package_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'package.json')
|
||||||
with open(package_path) as f:
|
with open(package_path) as f:
|
||||||
FLARESOLVERR_VERSION = json.loads(f.read())['version']
|
FLARESOLVERR_VERSION = json.loads(f.read())['version']
|
||||||
return FLARESOLVERR_VERSION
|
return FLARESOLVERR_VERSION
|
||||||
|
|
||||||
|
def get_current_platform() -> str:
|
||||||
|
global PLATFORM_VERSION
|
||||||
|
if PLATFORM_VERSION is not None:
|
||||||
|
return PLATFORM_VERSION
|
||||||
|
PLATFORM_VERSION = os.name
|
||||||
|
return PLATFORM_VERSION
|
||||||
|
|
||||||
def get_webdriver() -> WebDriver:
|
|
||||||
global PATCHED_DRIVER_PATH
|
def create_proxy_extension(proxy: dict) -> str:
|
||||||
|
parsed_url = urllib.parse.urlparse(proxy['url'])
|
||||||
|
scheme = parsed_url.scheme
|
||||||
|
host = parsed_url.hostname
|
||||||
|
port = parsed_url.port
|
||||||
|
username = proxy['username']
|
||||||
|
password = proxy['password']
|
||||||
|
manifest_json = """
|
||||||
|
{
|
||||||
|
"version": "1.0.0",
|
||||||
|
"manifest_version": 3,
|
||||||
|
"name": "Chrome Proxy",
|
||||||
|
"permissions": [
|
||||||
|
"proxy",
|
||||||
|
"tabs",
|
||||||
|
"storage",
|
||||||
|
"webRequest",
|
||||||
|
"webRequestAuthProvider"
|
||||||
|
],
|
||||||
|
"host_permissions": [
|
||||||
|
"<all_urls>"
|
||||||
|
],
|
||||||
|
"background": {
|
||||||
|
"service_worker": "background.js"
|
||||||
|
},
|
||||||
|
"minimum_chrome_version": "76.0.0"
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
|
||||||
|
background_js = """
|
||||||
|
var config = {
|
||||||
|
mode: "fixed_servers",
|
||||||
|
rules: {
|
||||||
|
singleProxy: {
|
||||||
|
scheme: "%s",
|
||||||
|
host: "%s",
|
||||||
|
port: %d
|
||||||
|
},
|
||||||
|
bypassList: ["localhost"]
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
chrome.proxy.settings.set({value: config, scope: "regular"}, function() {});
|
||||||
|
|
||||||
|
function callbackFn(details) {
|
||||||
|
return {
|
||||||
|
authCredentials: {
|
||||||
|
username: "%s",
|
||||||
|
password: "%s"
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
chrome.webRequest.onAuthRequired.addListener(
|
||||||
|
callbackFn,
|
||||||
|
{ urls: ["<all_urls>"] },
|
||||||
|
['blocking']
|
||||||
|
);
|
||||||
|
""" % (
|
||||||
|
scheme,
|
||||||
|
host,
|
||||||
|
port,
|
||||||
|
username,
|
||||||
|
password
|
||||||
|
)
|
||||||
|
|
||||||
|
proxy_extension_dir = tempfile.mkdtemp()
|
||||||
|
|
||||||
|
with open(os.path.join(proxy_extension_dir, "manifest.json"), "w") as f:
|
||||||
|
f.write(manifest_json)
|
||||||
|
|
||||||
|
with open(os.path.join(proxy_extension_dir, "background.js"), "w") as f:
|
||||||
|
f.write(background_js)
|
||||||
|
|
||||||
|
return proxy_extension_dir
|
||||||
|
|
||||||
|
|
||||||
|
def get_webdriver(proxy: dict = None) -> WebDriver:
|
||||||
|
global PATCHED_DRIVER_PATH, USER_AGENT
|
||||||
logging.debug('Launching web browser...')
|
logging.debug('Launching web browser...')
|
||||||
|
|
||||||
# undetected_chromedriver
|
# undetected_chromedriver
|
||||||
options = uc.ChromeOptions()
|
options = uc.ChromeOptions()
|
||||||
options.add_argument('--no-sandbox')
|
options.add_argument('--no-sandbox')
|
||||||
options.add_argument('--window-size=1920,1080')
|
options.add_argument('--window-size=1920,1080')
|
||||||
|
options.add_argument('--disable-search-engine-choice-screen')
|
||||||
# todo: this param shows a warning in chrome head-full
|
# todo: this param shows a warning in chrome head-full
|
||||||
options.add_argument('--disable-setuid-sandbox')
|
options.add_argument('--disable-setuid-sandbox')
|
||||||
options.add_argument('--disable-dev-shm-usage')
|
options.add_argument('--disable-dev-shm-usage')
|
||||||
|
# this option removes the zygote sandbox (it seems that the resolution is a bit faster)
|
||||||
|
options.add_argument('--no-zygote')
|
||||||
|
# attempt to fix Docker ARM32 build
|
||||||
|
IS_ARMARCH = platform.machine().startswith(('arm', 'aarch'))
|
||||||
|
if IS_ARMARCH:
|
||||||
|
options.add_argument('--disable-gpu-sandbox')
|
||||||
|
options.add_argument('--ignore-certificate-errors')
|
||||||
|
options.add_argument('--ignore-ssl-errors')
|
||||||
|
|
||||||
# note: headless mode is detected (options.headless = True)
|
language = os.environ.get('LANG', None)
|
||||||
|
if language is not None:
|
||||||
|
options.add_argument('--accept-lang=%s' % language)
|
||||||
|
|
||||||
|
# Fix for Chrome 117 | https://github.com/FlareSolverr/FlareSolverr/issues/910
|
||||||
|
if USER_AGENT is not None:
|
||||||
|
options.add_argument('--user-agent=%s' % USER_AGENT)
|
||||||
|
|
||||||
|
proxy_extension_dir = None
|
||||||
|
if proxy and all(key in proxy for key in ['url', 'username', 'password']):
|
||||||
|
proxy_extension_dir = create_proxy_extension(proxy)
|
||||||
|
options.add_argument("--disable-features=DisableLoadExtensionCommandLineSwitch")
|
||||||
|
options.add_argument("--load-extension=%s" % os.path.abspath(proxy_extension_dir))
|
||||||
|
elif proxy and 'url' in proxy:
|
||||||
|
proxy_url = proxy['url']
|
||||||
|
logging.debug("Using webdriver proxy: %s", proxy_url)
|
||||||
|
options.add_argument('--proxy-server=%s' % proxy_url)
|
||||||
|
|
||||||
|
# note: headless mode is detected (headless = True)
|
||||||
# we launch the browser in head-full mode with the window hidden
|
# we launch the browser in head-full mode with the window hidden
|
||||||
windows_headless = False
|
windows_headless = False
|
||||||
if get_config_headless():
|
if get_config_headless():
|
||||||
@@ -53,6 +176,8 @@ def get_webdriver() -> WebDriver:
|
|||||||
windows_headless = True
|
windows_headless = True
|
||||||
else:
|
else:
|
||||||
start_xvfb_display()
|
start_xvfb_display()
|
||||||
|
# For normal headless mode:
|
||||||
|
# options.add_argument('--headless')
|
||||||
|
|
||||||
# if we are inside the Docker container, we avoid downloading the driver
|
# if we are inside the Docker container, we avoid downloading the driver
|
||||||
driver_exe_path = None
|
driver_exe_path = None
|
||||||
@@ -65,15 +190,29 @@ def get_webdriver() -> WebDriver:
|
|||||||
if PATCHED_DRIVER_PATH is not None:
|
if PATCHED_DRIVER_PATH is not None:
|
||||||
driver_exe_path = PATCHED_DRIVER_PATH
|
driver_exe_path = PATCHED_DRIVER_PATH
|
||||||
|
|
||||||
|
# detect chrome path
|
||||||
|
browser_executable_path = get_chrome_exe_path()
|
||||||
|
|
||||||
# downloads and patches the chromedriver
|
# downloads and patches the chromedriver
|
||||||
# if we don't set driver_executable_path it downloads, patches, and deletes the driver each time
|
# if we don't set driver_executable_path it downloads, patches, and deletes the driver each time
|
||||||
driver = uc.Chrome(options=options, driver_executable_path=driver_exe_path, version_main=version_main,
|
try:
|
||||||
windows_headless=windows_headless)
|
driver = uc.Chrome(options=options, browser_executable_path=browser_executable_path,
|
||||||
|
driver_executable_path=driver_exe_path, version_main=version_main,
|
||||||
|
windows_headless=windows_headless, headless=get_config_headless())
|
||||||
|
except Exception as e:
|
||||||
|
logging.error("Error starting Chrome: %s" % e)
|
||||||
|
# No point in continuing if we cannot retrieve the driver
|
||||||
|
raise e
|
||||||
|
|
||||||
# save the patched driver to avoid re-downloads
|
# save the patched driver to avoid re-downloads
|
||||||
if driver_exe_path is None:
|
if driver_exe_path is None:
|
||||||
PATCHED_DRIVER_PATH = os.path.join(driver.patcher.data_path, driver.patcher.exe_name)
|
PATCHED_DRIVER_PATH = os.path.join(driver.patcher.data_path, driver.patcher.exe_name)
|
||||||
shutil.copy(driver.patcher.executable_path, PATCHED_DRIVER_PATH)
|
if PATCHED_DRIVER_PATH != driver.patcher.executable_path:
|
||||||
|
shutil.copy(driver.patcher.executable_path, PATCHED_DRIVER_PATH)
|
||||||
|
|
||||||
|
# clean up proxy extension directory
|
||||||
|
if proxy_extension_dir is not None:
|
||||||
|
shutil.rmtree(proxy_extension_dir)
|
||||||
|
|
||||||
# selenium vanilla
|
# selenium vanilla
|
||||||
# options = webdriver.ChromeOptions()
|
# options = webdriver.ChromeOptions()
|
||||||
@@ -86,23 +225,45 @@ def get_webdriver() -> WebDriver:
|
|||||||
return driver
|
return driver
|
||||||
|
|
||||||
|
|
||||||
|
def get_chrome_exe_path() -> str:
|
||||||
|
global CHROME_EXE_PATH
|
||||||
|
if CHROME_EXE_PATH is not None:
|
||||||
|
return CHROME_EXE_PATH
|
||||||
|
# linux pyinstaller bundle
|
||||||
|
chrome_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'chrome', "chrome")
|
||||||
|
if os.path.exists(chrome_path):
|
||||||
|
if not os.access(chrome_path, os.X_OK):
|
||||||
|
raise Exception(f'Chrome binary "{chrome_path}" is not executable. '
|
||||||
|
f'Please, extract the archive with "tar xzf <file.tar.gz>".')
|
||||||
|
CHROME_EXE_PATH = chrome_path
|
||||||
|
return CHROME_EXE_PATH
|
||||||
|
# windows pyinstaller bundle
|
||||||
|
chrome_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'chrome', "chrome.exe")
|
||||||
|
if os.path.exists(chrome_path):
|
||||||
|
CHROME_EXE_PATH = chrome_path
|
||||||
|
return CHROME_EXE_PATH
|
||||||
|
# system
|
||||||
|
CHROME_EXE_PATH = uc.find_chrome_executable()
|
||||||
|
return CHROME_EXE_PATH
|
||||||
|
|
||||||
|
|
||||||
def get_chrome_major_version() -> str:
|
def get_chrome_major_version() -> str:
|
||||||
global CHROME_MAJOR_VERSION
|
global CHROME_MAJOR_VERSION
|
||||||
if CHROME_MAJOR_VERSION is not None:
|
if CHROME_MAJOR_VERSION is not None:
|
||||||
return CHROME_MAJOR_VERSION
|
return CHROME_MAJOR_VERSION
|
||||||
|
|
||||||
if os.name == 'nt':
|
if os.name == 'nt':
|
||||||
|
# Example: '104.0.5112.79'
|
||||||
try:
|
try:
|
||||||
stream = os.popen(
|
complete_version = extract_version_nt_executable(get_chrome_exe_path())
|
||||||
'reg query "HKLM\\SOFTWARE\\Wow6432Node\\Microsoft\\Windows\\CurrentVersion\\Uninstall\\Google Chrome"')
|
|
||||||
output = stream.read()
|
|
||||||
# Example: '104.0.5112.79'
|
|
||||||
complete_version = extract_version_registry(output)
|
|
||||||
except Exception:
|
except Exception:
|
||||||
# Example: '104.0.5112.79'
|
try:
|
||||||
complete_version = extract_version_folder()
|
complete_version = extract_version_nt_registry()
|
||||||
|
except Exception:
|
||||||
|
# Example: '104.0.5112.79'
|
||||||
|
complete_version = extract_version_nt_folder()
|
||||||
else:
|
else:
|
||||||
chrome_path = uc.find_chrome_executable()
|
chrome_path = get_chrome_exe_path()
|
||||||
process = os.popen(f'"{chrome_path}" --version')
|
process = os.popen(f'"{chrome_path}" --version')
|
||||||
# Example 1: 'Chromium 104.0.5112.79 Arch Linux\n'
|
# Example 1: 'Chromium 104.0.5112.79 Arch Linux\n'
|
||||||
# Example 2: 'Google Chrome 104.0.5112.79 Arch Linux\n'
|
# Example 2: 'Google Chrome 104.0.5112.79 Arch Linux\n'
|
||||||
@@ -110,24 +271,32 @@ def get_chrome_major_version() -> str:
|
|||||||
process.close()
|
process.close()
|
||||||
|
|
||||||
CHROME_MAJOR_VERSION = complete_version.split('.')[0].split(' ')[-1]
|
CHROME_MAJOR_VERSION = complete_version.split('.')[0].split(' ')[-1]
|
||||||
logging.info(f"Chrome major version: {CHROME_MAJOR_VERSION}")
|
|
||||||
return CHROME_MAJOR_VERSION
|
return CHROME_MAJOR_VERSION
|
||||||
|
|
||||||
|
|
||||||
def extract_version_registry(output) -> str:
|
def extract_version_nt_executable(exe_path: str) -> str:
|
||||||
try:
|
import pefile
|
||||||
google_version = ''
|
pe = pefile.PE(exe_path, fast_load=True)
|
||||||
for letter in output[output.rindex('DisplayVersion REG_SZ') + 24:]:
|
pe.parse_data_directories(
|
||||||
if letter != '\n':
|
directories=[pefile.DIRECTORY_ENTRY["IMAGE_DIRECTORY_ENTRY_RESOURCE"]]
|
||||||
google_version += letter
|
)
|
||||||
else:
|
return pe.FileInfo[0][0].StringTable[0].entries[b"FileVersion"].decode('utf-8')
|
||||||
break
|
|
||||||
return google_version.strip()
|
|
||||||
except TypeError:
|
|
||||||
return ''
|
|
||||||
|
|
||||||
|
|
||||||
def extract_version_folder() -> str:
|
def extract_version_nt_registry() -> str:
|
||||||
|
stream = os.popen(
|
||||||
|
'reg query "HKLM\\SOFTWARE\\Wow6432Node\\Microsoft\\Windows\\CurrentVersion\\Uninstall\\Google Chrome"')
|
||||||
|
output = stream.read()
|
||||||
|
google_version = ''
|
||||||
|
for letter in output[output.rindex('DisplayVersion REG_SZ') + 24:]:
|
||||||
|
if letter != '\n':
|
||||||
|
google_version += letter
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
return google_version.strip()
|
||||||
|
|
||||||
|
|
||||||
|
def extract_version_nt_folder() -> str:
|
||||||
# Check if the Chrome folder exists in the x32 or x64 Program Files folders.
|
# Check if the Chrome folder exists in the x32 or x64 Program Files folders.
|
||||||
for i in range(2):
|
for i in range(2):
|
||||||
path = 'C:\\Program Files' + (' (x86)' if i else '') + '\\Google\\Chrome\\Application'
|
path = 'C:\\Program Files' + (' (x86)' if i else '') + '\\Google\\Chrome\\Application'
|
||||||
@@ -135,7 +304,7 @@ def extract_version_folder() -> str:
|
|||||||
paths = [f.path for f in os.scandir(path) if f.is_dir()]
|
paths = [f.path for f in os.scandir(path) if f.is_dir()]
|
||||||
for path in paths:
|
for path in paths:
|
||||||
filename = os.path.basename(path)
|
filename = os.path.basename(path)
|
||||||
pattern = '\d+\.\d+\.\d+\.\d+'
|
pattern = r'\d+\.\d+\.\d+\.\d+'
|
||||||
match = re.search(pattern, filename)
|
match = re.search(pattern, filename)
|
||||||
if match and match.group():
|
if match and match.group():
|
||||||
# Found a Chrome version.
|
# Found a Chrome version.
|
||||||
@@ -152,11 +321,15 @@ def get_user_agent(driver=None) -> str:
|
|||||||
if driver is None:
|
if driver is None:
|
||||||
driver = get_webdriver()
|
driver = get_webdriver()
|
||||||
USER_AGENT = driver.execute_script("return navigator.userAgent")
|
USER_AGENT = driver.execute_script("return navigator.userAgent")
|
||||||
|
# Fix for Chrome 117 | https://github.com/FlareSolverr/FlareSolverr/issues/910
|
||||||
|
USER_AGENT = re.sub('HEADLESS', '', USER_AGENT, flags=re.IGNORECASE)
|
||||||
return USER_AGENT
|
return USER_AGENT
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise Exception("Error getting browser User-Agent. " + str(e))
|
raise Exception("Error getting browser User-Agent. " + str(e))
|
||||||
finally:
|
finally:
|
||||||
if driver is not None:
|
if driver is not None:
|
||||||
|
if PLATFORM_VERSION == "nt":
|
||||||
|
driver.close()
|
||||||
driver.quit()
|
driver.quit()
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1 +1 @@
|
|||||||
WebTest==3.0.0
|
WebTest==3.0.7
|
||||||
|
|||||||
Reference in New Issue
Block a user