mirror of
https://github.com/FlareSolverr/FlareSolverr.git
synced 2025-12-05 17:18:19 +01:00
Compare commits
226 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0fe9958afe | ||
|
|
9f8c71131f | ||
|
|
2405c00521 | ||
|
|
ff65b7cc68 | ||
|
|
409e0844a7 | ||
|
|
368d5d4e05 | ||
|
|
c7505e3cbf | ||
|
|
5a27090abe | ||
|
|
e505ea4fe4 | ||
|
|
63b6fc53e3 | ||
|
|
8d72617219 | ||
|
|
8a8b9415c3 | ||
|
|
16722ef963 | ||
|
|
bbc24e9d86 | ||
|
|
7dfdfc5e33 | ||
|
|
136422c85c | ||
|
|
05a72f2709 | ||
|
|
da810830da | ||
|
|
d27f57c27c | ||
|
|
a916d93779 | ||
|
|
0d889cb0b2 | ||
|
|
d430404de8 | ||
|
|
d3b1ba6e88 | ||
|
|
75e5b190d6 | ||
|
|
cdc3db3c21 | ||
|
|
2dbb0442e0 | ||
|
|
6faab19533 | ||
|
|
af0a7af757 | ||
|
|
ff74b50b60 | ||
|
|
3e51ac1188 | ||
|
|
6627de4fa6 | ||
|
|
fe649255f2 | ||
|
|
3e338fce2e | ||
|
|
3dd3e7559d | ||
|
|
f21c1d51bc | ||
|
|
957347f73a | ||
|
|
c55080b0ec | ||
|
|
639bfca020 | ||
|
|
237694df76 | ||
|
|
6e5d6f1795 | ||
|
|
30804a86e5 | ||
|
|
e0bdaf7745 | ||
|
|
795365dbe4 | ||
|
|
ce5369dd41 | ||
|
|
600b09d498 | ||
|
|
d1f19405a1 | ||
|
|
82a1366d34 | ||
|
|
a2fe9e7776 | ||
|
|
6cc628df9e | ||
|
|
8b1851eeb1 | ||
|
|
54668a11e7 | ||
|
|
701d8fb4ff | ||
|
|
39a265ccb8 | ||
|
|
e32b247014 | ||
|
|
0d8fe8fe50 | ||
|
|
718da3a36f | ||
|
|
a798561338 | ||
|
|
eb680efc90 | ||
|
|
0f8f0bec25 | ||
|
|
3d9bc5627b | ||
|
|
dd7eaee2e3 | ||
|
|
031177bbdb | ||
|
|
a8644532a1 | ||
|
|
e96161c873 | ||
|
|
5a1f25cd52 | ||
|
|
a2c0e4348e | ||
|
|
2ecf88895b | ||
|
|
984368edb5 | ||
|
|
6c1d78cb84 | ||
|
|
5a2c61601e | ||
|
|
c304da2964 | ||
|
|
b811412699 | ||
|
|
0bb8de144f | ||
|
|
38166dfaa0 | ||
|
|
8dea0ed017 | ||
|
|
20cd2944a7 | ||
|
|
fd773e5909 | ||
|
|
35c7bff3c8 | ||
|
|
afdc1c7a8e | ||
|
|
0bc7a4498c | ||
|
|
c5a5f6d65e | ||
|
|
aaf29be8e1 | ||
|
|
800866d033 | ||
|
|
043f18b231 | ||
|
|
d21a332519 | ||
|
|
3ca6d08f41 | ||
|
|
227bd7ac72 | ||
|
|
e6a08584c0 | ||
|
|
df06d13cf8 | ||
|
|
993b8c41ac | ||
|
|
a4d42d7834 | ||
|
|
1c855b8af0 | ||
|
|
745c69491f | ||
|
|
f7e316fd5a | ||
|
|
16c8ab5f3d | ||
|
|
7af311b73c | ||
|
|
daec97532d | ||
|
|
8d7ed48f21 | ||
|
|
220f2599ae | ||
|
|
d772cf3f50 | ||
|
|
ab4365894b | ||
|
|
3fa9631559 | ||
|
|
04858c22fd | ||
|
|
5085ca6990 | ||
|
|
cd4df1e061 | ||
|
|
6c79783f7c | ||
|
|
4139e8d47c | ||
|
|
1942eb5fdc | ||
|
|
401bf5be76 | ||
|
|
d8ffdd3061 | ||
|
|
2d66590b08 | ||
|
|
a217510dc7 | ||
|
|
553bd8ab4f | ||
|
|
1b197c3e53 | ||
|
|
fd308f01be | ||
|
|
b5eef32615 | ||
|
|
644a843d89 | ||
|
|
82e1c94c6f | ||
|
|
fbc71516f5 | ||
|
|
40bd1cba4c | ||
|
|
d1588c1156 | ||
|
|
b4ad583baa | ||
|
|
5d31e551cc | ||
|
|
d92845f34f | ||
|
|
5d3b73ea9d | ||
|
|
2aa095ed5d | ||
|
|
687c8f75ae | ||
|
|
22ed3d324b | ||
|
|
5ba9ef03f3 | ||
|
|
d2e144ea12 | ||
|
|
313fb2c14b | ||
|
|
6d69f40b58 | ||
|
|
a1c36f60d2 | ||
|
|
0edc50e271 | ||
|
|
f4a4baa57c | ||
|
|
f7e434c6e3 | ||
|
|
7728f2ab31 | ||
|
|
c920bea4ca | ||
|
|
a785f83034 | ||
|
|
b42c22f5b1 | ||
|
|
9c62410a8b | ||
|
|
b8768ae17d | ||
|
|
9b2c602a1f | ||
|
|
8316350b98 | ||
|
|
33307ce461 | ||
|
|
cedb7bc54e | ||
|
|
6ecaf2362c | ||
|
|
3c97c9603a | ||
|
|
efaa5f31b6 | ||
|
|
4db85a2d0f | ||
|
|
66b9db21e5 | ||
|
|
ab0fe58d4a | ||
|
|
f68ddb7573 | ||
|
|
ac77110578 | ||
|
|
a9d1a2de2d | ||
|
|
ab5f14d6c3 | ||
|
|
e0bf02fb8b | ||
|
|
82a1cd835a | ||
|
|
7017715e21 | ||
|
|
ae18559db1 | ||
|
|
2680521008 | ||
|
|
2297bab185 | ||
|
|
8d9bac9dd4 | ||
|
|
30ccf18e85 | ||
|
|
a15d041a0c | ||
|
|
c6c74e7c9d | ||
|
|
49fd1aacfc | ||
|
|
f6879c70de | ||
|
|
24f59a39cb | ||
|
|
4d16105176 | ||
|
|
5957b7b3bc | ||
|
|
8de16058d0 | ||
|
|
5fc4f966a5 | ||
|
|
b903a5dd84 | ||
|
|
7e9d5f424f | ||
|
|
fc6d2d9095 | ||
|
|
aef9b2d4d6 | ||
|
|
6dc279a9d3 | ||
|
|
96fcd21174 | ||
|
|
3a6e8e0f92 | ||
|
|
2d97f88276 | ||
|
|
ac5c64319e | ||
|
|
c93834e2f0 | ||
|
|
e3b4200d94 | ||
|
|
0941861f80 | ||
|
|
8a10eb27a6 | ||
|
|
e9c08c84ef | ||
|
|
2aa1744476 | ||
|
|
a89679a52d | ||
|
|
410ee7981f | ||
|
|
e163019f28 | ||
|
|
7d84f1b663 | ||
|
|
4807e9dbe2 | ||
|
|
85360df336 | ||
|
|
fd42fcee66 | ||
|
|
90a831a13b | ||
|
|
904072267a | ||
|
|
f63816905b | ||
|
|
258225a091 | ||
|
|
da2263c85b | ||
|
|
70a423805d | ||
|
|
99d074084a | ||
|
|
c7251da54f | ||
|
|
676a930f02 | ||
|
|
ecaac2e1d9 | ||
|
|
60a22625be | ||
|
|
b32d7b70be | ||
|
|
f10f7269ca | ||
|
|
33bed9428e | ||
|
|
5092b1eef9 | ||
|
|
0cfcb57e3a | ||
|
|
59bd7c8bdd | ||
|
|
b90d263159 | ||
|
|
5257f47d34 | ||
|
|
613e49e6cb | ||
|
|
1505595591 | ||
|
|
8d1ac09bf2 | ||
|
|
31265a510d | ||
|
|
e967e135a3 | ||
|
|
383025032b | ||
|
|
345628e3e4 | ||
|
|
35c2f09202 | ||
|
|
197258e921 | ||
|
|
c99101f74b | ||
|
|
5703caa9d3 | ||
|
|
aa254eb830 |
@@ -1,7 +1,5 @@
|
||||
.git/
|
||||
.github/
|
||||
.idea/
|
||||
bin/
|
||||
dist/
|
||||
node_modules/
|
||||
resources/
|
||||
.git/
|
||||
.github/
|
||||
.idea/
|
||||
html_samples/
|
||||
resources/
|
||||
|
||||
15
.eslintrc.js
15
.eslintrc.js
@@ -1,15 +0,0 @@
|
||||
module.exports = {
|
||||
env: {
|
||||
browser: true,
|
||||
commonjs: true,
|
||||
es2020: true
|
||||
},
|
||||
extends: [
|
||||
'standard'
|
||||
],
|
||||
parserOptions: {
|
||||
ecmaVersion: 11
|
||||
},
|
||||
rules: {
|
||||
}
|
||||
}
|
||||
32
.github/ISSUE_TEMPLATE.md
vendored
32
.github/ISSUE_TEMPLATE.md
vendored
@@ -1,32 +0,0 @@
|
||||
**Please use the search bar** at the top of the page and make sure you are not creating an already submitted issue.
|
||||
Check closed issues as well, because your issue may have already been fixed.
|
||||
|
||||
### How to enable debug and html traces
|
||||
|
||||
[Follow the instructions from this wiki page](https://github.com/FlareSolverr/FlareSolverr/wiki/How-to-enable-debug-and-html-trace)
|
||||
|
||||
### Environment
|
||||
|
||||
* **FlareSolverr version**:
|
||||
* **Last working FlareSolverr version**:
|
||||
* **Operating system**:
|
||||
* **Are you using Docker**: [yes/no]
|
||||
* **FlareSolverr User-Agent (see log traces or / endpoint)**:
|
||||
* **Are you using a proxy or VPN?** [yes/no]
|
||||
* **Are you using Captcha Solver:** [yes/no]
|
||||
* **If using captcha solver, which one:**
|
||||
* **URL to test this issue:**
|
||||
|
||||
### Description
|
||||
|
||||
[List steps to reproduce the error and details on what happens and what you expected to happen]
|
||||
|
||||
### Logged Error Messages
|
||||
|
||||
[Place any relevant error messages you noticed from the logs here.]
|
||||
|
||||
[Make sure you attach the full logs with your personal information removed in case we need more information]
|
||||
|
||||
### Screenshots
|
||||
|
||||
[Place any screenshots of the issue here if needed]
|
||||
78
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
Normal file
78
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
Normal file
@@ -0,0 +1,78 @@
|
||||
name: Bug report
|
||||
description: Create a report of your issue
|
||||
body:
|
||||
- type: checkboxes
|
||||
attributes:
|
||||
label: Have you checked our README?
|
||||
description: Please check the <a href="https://github.com/FlareSolverr/FlareSolverr/blob/master/README.md">README</a>.
|
||||
options:
|
||||
- label: I have checked the README
|
||||
required: true
|
||||
- type: checkboxes
|
||||
attributes:
|
||||
label: Have you followed our Troubleshooting?
|
||||
description: Please follow our <a href="https://github.com/FlareSolverr/FlareSolverr/wiki/Troubleshooting">Troubleshooting</a>.
|
||||
options:
|
||||
- label: I have followed your Troubleshooting
|
||||
required: true
|
||||
- type: checkboxes
|
||||
attributes:
|
||||
label: Is there already an issue for your problem?
|
||||
description: Please make sure you are not creating an already submitted <a href="https://github.com/FlareSolverr/FlareSolverr/issues">Issue</a>. Check closed issues as well, because your issue may have already been fixed.
|
||||
options:
|
||||
- label: I have checked older issues, open and closed
|
||||
required: true
|
||||
- type: checkboxes
|
||||
attributes:
|
||||
label: Have you checked the discussions?
|
||||
description: Please read our <a href="https://github.com/FlareSolverr/FlareSolverr/discussions">Discussions</a> before submitting your issue, some wider problems may be dealt with there.
|
||||
options:
|
||||
- label: I have read the Discussions
|
||||
required: true
|
||||
- type: input
|
||||
attributes:
|
||||
label: Have you ACTUALLY checked all these?
|
||||
description: Please do not waste our time and yours; these checks are there for a reason, it is not just so you can tick boxes for fun. If you type <b>YES</b> and it is clear you did not or have put in no effort, your issue will be closed and locked without comment. If you type <b>NO</b> but still open this issue, you will be permanently blocked for timewasting.
|
||||
placeholder: YES or NO
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Environment
|
||||
description: Please provide the details of the system FlareSolverr is running on.
|
||||
value: |
|
||||
- FlareSolverr version:
|
||||
- Last working FlareSolverr version:
|
||||
- Operating system:
|
||||
- Are you using Docker: [yes/no]
|
||||
- FlareSolverr User-Agent (see log traces or / endpoint):
|
||||
- Are you using a VPN: [yes/no]
|
||||
- Are you using a Proxy: [yes/no]
|
||||
- Are you using Captcha Solver: [yes/no]
|
||||
- If using captcha solver, which one:
|
||||
- URL to test this issue:
|
||||
render: markdown
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Description
|
||||
description: List steps to reproduce the error and details on what happens and what you expected to happen.
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Logged Error Messages
|
||||
description: |
|
||||
Place any relevant error messages you noticed from the logs here.
|
||||
Make sure you attach the full logs with your personal information removed in case we need more information.
|
||||
If you wish to provide debug logs, follow the instructions from this <a href="https://github.com/FlareSolverr/FlareSolverr/wiki/How-to-enable-debug-and-html-trace">wiki page</a>.
|
||||
render: text
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Screenshots
|
||||
description: Place any screenshots of the issue here if needed
|
||||
validations:
|
||||
required: false
|
||||
8
.github/ISSUE_TEMPLATE/config.yml
vendored
Normal file
8
.github/ISSUE_TEMPLATE/config.yml
vendored
Normal file
@@ -0,0 +1,8 @@
|
||||
blank_issues_enabled: false
|
||||
contact_links:
|
||||
- name: Requesting new features or changes
|
||||
url: https://github.com/FlareSolverr/FlareSolverr/discussions
|
||||
about: Please create a new discussion topic, grouped under "Ideas".
|
||||
- name: Asking questions
|
||||
url: https://github.com/FlareSolverr/FlareSolverr/discussions
|
||||
about: Please create a new discussion topic, grouped under "Q&A".
|
||||
13
.github/workflows/autotag.yml
vendored
13
.github/workflows/autotag.yml
vendored
@@ -1,4 +1,4 @@
|
||||
name: autotag
|
||||
name: Autotag
|
||||
|
||||
on:
|
||||
push:
|
||||
@@ -6,14 +6,13 @@ on:
|
||||
- "master"
|
||||
|
||||
jobs:
|
||||
build:
|
||||
tag-release:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
-
|
||||
name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
-
|
||||
name: Auto Tag
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v5
|
||||
|
||||
- name: Auto Tag
|
||||
uses: Klemensas/action-autotag@stable
|
||||
with:
|
||||
GITHUB_TOKEN: "${{ secrets.GH_PAT }}"
|
||||
|
||||
74
.github/workflows/release-docker.yml
vendored
74
.github/workflows/release-docker.yml
vendored
@@ -1,53 +1,67 @@
|
||||
name: release-docker
|
||||
name: Docker release
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- 'v*.*.*'
|
||||
- "v*.*.*"
|
||||
pull_request:
|
||||
branches:
|
||||
- master
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
build:
|
||||
build-docker-images:
|
||||
if: ${{ !github.event.pull_request.head.repo.fork }}
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
-
|
||||
name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
-
|
||||
name: Downcase repo
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v5
|
||||
|
||||
- name: Downcase repo
|
||||
run: echo REPOSITORY=$(echo ${{ github.repository }} | tr '[:upper:]' '[:lower:]') >> $GITHUB_ENV
|
||||
-
|
||||
name: Docker meta
|
||||
|
||||
- name: Docker meta
|
||||
id: docker_meta
|
||||
uses: crazy-max/ghaction-docker-meta@v1
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ${{ env.REPOSITORY }},ghcr.io/${{ env.REPOSITORY }}
|
||||
tag-sha: false
|
||||
-
|
||||
name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v1.0.1
|
||||
-
|
||||
name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v1
|
||||
-
|
||||
name: Login to DockerHub
|
||||
uses: docker/login-action@v1
|
||||
images: |
|
||||
${{ env.REPOSITORY }},enable=${{ github.event_name != 'pull_request' }}
|
||||
ghcr.io/${{ env.REPOSITORY }}
|
||||
tags: |
|
||||
type=semver,pattern={{version}},prefix=v
|
||||
type=ref,event=pr
|
||||
flavor: |
|
||||
latest=auto
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to DockerHub
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
-
|
||||
name: Login to GitHub Container Registry
|
||||
uses: docker/login-action@v1
|
||||
|
||||
- name: Login to GitHub Container Registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.repository_owner }}
|
||||
password: ${{ secrets.GH_PAT }}
|
||||
-
|
||||
name: Build and push
|
||||
uses: docker/build-push-action@v2
|
||||
|
||||
- name: Build and push
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: .
|
||||
file: ./Dockerfile
|
||||
platforms: linux/amd64,linux/arm/v7,linux/arm64
|
||||
push: ${{ github.event_name != 'pull_request' }}
|
||||
platforms: linux/386,linux/amd64,linux/arm/v7,linux/arm64/v8
|
||||
push: true
|
||||
tags: ${{ steps.docker_meta.outputs.tags }}
|
||||
labels: ${{ steps.docker_meta.outputs.labels }}
|
||||
|
||||
70
.github/workflows/release.yml
vendored
70
.github/workflows/release.yml
vendored
@@ -1,55 +1,63 @@
|
||||
name: release
|
||||
name: Release
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- 'v*.*.*'
|
||||
- "v*.*.*"
|
||||
|
||||
jobs:
|
||||
build:
|
||||
create-release:
|
||||
name: Create release
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v5
|
||||
with:
|
||||
fetch-depth: 0 # get all commits, branches and tags (required for the changelog)
|
||||
|
||||
- name: Setup Node
|
||||
uses: actions/setup-node@v2
|
||||
with:
|
||||
node-version: '16'
|
||||
|
||||
- name: Build artifacts
|
||||
run: |
|
||||
npm install
|
||||
npm run build
|
||||
npm run package
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Build changelog
|
||||
id: github_changelog
|
||||
run: |
|
||||
changelog=$(git log $(git tag | tail -2 | head -1)..HEAD --no-merges --oneline)
|
||||
changelog="${changelog//'%'/'%25'}"
|
||||
changelog="${changelog//$'\n'/'%0A'}"
|
||||
changelog="${changelog//$'\r'/'%0D'}"
|
||||
echo "##[set-output name=changelog;]${changelog}"
|
||||
echo "changelog<<EOF" >> $GITHUB_OUTPUT
|
||||
echo "$changelog" >> $GITHUB_OUTPUT
|
||||
echo "EOF" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Create release
|
||||
id: create_release
|
||||
uses: actions/create-release@v1
|
||||
uses: softprops/action-gh-release@v2
|
||||
with:
|
||||
body: ${{ steps.github_changelog.outputs.changelog }}
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GH_PAT }}
|
||||
|
||||
build-package:
|
||||
name: Build binaries
|
||||
needs: create-release
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ubuntu-latest, windows-latest]
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v5
|
||||
with:
|
||||
tag_name: ${{ github.ref }}
|
||||
release_name: ${{ github.ref }}
|
||||
body: ${{ steps.github_changelog.outputs.changelog }}
|
||||
draft: false
|
||||
prerelease: false
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: "3.13"
|
||||
|
||||
- name: Build artifacts
|
||||
run: |
|
||||
python -m pip install -r requirements.txt
|
||||
python -m pip install pyinstaller==6.16.0
|
||||
cd src
|
||||
python build_package.py
|
||||
|
||||
- name: Upload release artifacts
|
||||
uses: alexellis/upload-assets@0.2.2
|
||||
uses: softprops/action-gh-release@v2
|
||||
with:
|
||||
files: ./dist/flaresolverr_*
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GH_PAT }}
|
||||
with:
|
||||
asset_paths: '["./bin/*.zip"]'
|
||||
|
||||
249
.gitignore
vendored
249
.gitignore
vendored
@@ -1,126 +1,129 @@
|
||||
# Logs
|
||||
logs
|
||||
*.log
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
lerna-debug.log*
|
||||
|
||||
# Diagnostic reports (https://nodejs.org/api/report.html)
|
||||
report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
|
||||
|
||||
# Runtime data
|
||||
pids
|
||||
*.pid
|
||||
*.seed
|
||||
*.pid.lock
|
||||
|
||||
# Directory for instrumented libs generated by jscoverage/JSCover
|
||||
lib-cov
|
||||
|
||||
# Coverage directory used by tools like istanbul
|
||||
coverage
|
||||
*.lcov
|
||||
|
||||
# nyc test coverage
|
||||
.nyc_output
|
||||
|
||||
# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
|
||||
.grunt
|
||||
|
||||
# Bower dependency directory (https://bower.io/)
|
||||
bower_components
|
||||
|
||||
# node-waf configuration
|
||||
.lock-wscript
|
||||
|
||||
# Compiled binary addons (https://nodejs.org/api/addons.html)
|
||||
build/Release
|
||||
|
||||
# Dependency directories
|
||||
node_modules/
|
||||
jspm_packages/
|
||||
|
||||
# Snowpack dependency directory (https://snowpack.dev/)
|
||||
web_modules/
|
||||
|
||||
# TypeScript cache
|
||||
*.tsbuildinfo
|
||||
|
||||
# Optional npm cache directory
|
||||
.npm
|
||||
|
||||
# Optional eslint cache
|
||||
.eslintcache
|
||||
|
||||
# Microbundle cache
|
||||
.rpt2_cache/
|
||||
.rts2_cache_cjs/
|
||||
.rts2_cache_es/
|
||||
.rts2_cache_umd/
|
||||
|
||||
# Optional REPL history
|
||||
.node_repl_history
|
||||
|
||||
# Output of 'npm pack'
|
||||
*.tgz
|
||||
|
||||
# Yarn Integrity file
|
||||
.yarn-integrity
|
||||
|
||||
# dotenv environment variables file
|
||||
.env
|
||||
.env.test
|
||||
|
||||
# parcel-bundler cache (https://parceljs.org/)
|
||||
.cache
|
||||
.parcel-cache
|
||||
|
||||
# Next.js build output
|
||||
.next
|
||||
out
|
||||
|
||||
# Nuxt.js build / generate output
|
||||
.nuxt
|
||||
dist
|
||||
|
||||
# Gatsby files
|
||||
.cache/
|
||||
# Comment in the public line in if your project uses Gatsby and not Next.js
|
||||
# https://nextjs.org/blog/next-9-1#public-directory-support
|
||||
# public
|
||||
|
||||
# vuepress build output
|
||||
.vuepress/dist
|
||||
|
||||
# Serverless directories
|
||||
.serverless/
|
||||
|
||||
# FuseBox cache
|
||||
.fusebox/
|
||||
|
||||
# DynamoDB Local files
|
||||
.dynamodb/
|
||||
|
||||
# TernJS port file
|
||||
.tern-port
|
||||
|
||||
# Stores VSCode versions used for testing VSCode extensions
|
||||
.vscode-test
|
||||
|
||||
# yarn v2
|
||||
.yarn/cache
|
||||
.yarn/unplugged
|
||||
.yarn/build-state.yml
|
||||
.yarn/install-state.gz
|
||||
.pnp.*
|
||||
|
||||
# IntelliJ IDEA
|
||||
# Editors
|
||||
.vscode/
|
||||
.idea/
|
||||
*.iml
|
||||
|
||||
# Project Development
|
||||
testing/
|
||||
# Vagrant
|
||||
.vagrant/
|
||||
|
||||
# Binaries
|
||||
bin/
|
||||
# Mac/OSX
|
||||
.DS_Store
|
||||
|
||||
# Windows
|
||||
Thumbs.db
|
||||
|
||||
# Source for the following rules: https://raw.githubusercontent.com/github/gitignore/master/Python.gitignore
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
dist_chrome/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.nox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# IPython
|
||||
profile_default/
|
||||
ipython_config.py
|
||||
|
||||
# pyenv
|
||||
.python-version
|
||||
|
||||
# celery beat schedule file
|
||||
celerybeat-schedule
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# node
|
||||
node_modules/
|
||||
470
CHANGELOG.md
Normal file
470
CHANGELOG.md
Normal file
@@ -0,0 +1,470 @@
|
||||
# Changelog
|
||||
|
||||
## v3.4.6 (2025/11/29)
|
||||
* Add disable image, css, fonts option with CDP. Thanks @Ananto30
|
||||
|
||||
## v3.4.5 (2025/11/11)
|
||||
* Revert to Python v3.13
|
||||
|
||||
## v3.4.4 (2025/11/04)
|
||||
* Bump dependencies, Chromium, and some other general fixes. Thanks @flowerey
|
||||
|
||||
## v3.4.3 (2025/10/28)
|
||||
* Update proxy extension
|
||||
|
||||
## v3.4.2 (2025/10/09)
|
||||
* Bump dependencies & CI actions. Thanks @flowerey
|
||||
* Add optional wait time after resolving the challenge before returning. Thanks @kennedyoliveira
|
||||
* Add proxy ENVs. Thanks @Robokishan
|
||||
* Handle empty string and keys without value in postData. Thanks @eZ4RK0
|
||||
* Add quote protection for password containing it. Thanks @warrenberberd
|
||||
* Add returnScreenshot parameter to screenshot the final web page. Thanks @estebanthi
|
||||
* Add log file support. Thanks @acg5159
|
||||
|
||||
## v3.4.1 (2025/09/15)
|
||||
* Fix regex pattern syntax in utils.py
|
||||
* Change access denied title check to use startswith
|
||||
|
||||
## v3.4.0 (2025/08/25)
|
||||
* Modernize and upgrade application. Thanks @TheCrazyLex
|
||||
* Remove disable software rasterizer option for ARM builds. Thanks @smrodman83
|
||||
|
||||
## v3.3.25 (2025/06/14)
|
||||
* Remove `use-gl` argument. Thanks @qwerty12
|
||||
* u_c: remove apparent c&p typo. Thanks @ok3721
|
||||
* Bump requirements
|
||||
|
||||
## v3.3.24 (2025/06/04)
|
||||
* Remove hidden character
|
||||
|
||||
## v3.3.23 (2025/06/04)
|
||||
* Update base image to bookworm. Thanks @rwjack
|
||||
|
||||
## v3.3.22 (2025/06/03)
|
||||
* Disable search engine choice screen
|
||||
* Fix headless=false stalling. Thanks @MAKMED1337
|
||||
* Change from click to keys. Thanks @sh4dowb
|
||||
* Don't open devtools
|
||||
* Bump Chromium to v137 for build
|
||||
* Bump requirements
|
||||
|
||||
## v3.3.21 (2024/06/26)
|
||||
* Add challenge selector to catch reloading page on non-English systems
|
||||
* Escape values for generated form used in request.post. Thanks @mynameisbogdan
|
||||
|
||||
## v3.3.20 (2024/06/21)
|
||||
* maxTimeout should always be int
|
||||
* Check not running in Docker before logging version_main error
|
||||
* Update Cloudflare challenge and checkbox selectors. Thanks @tenettow & @21hsmw
|
||||
|
||||
## v3.3.19 (2024/05/23)
|
||||
* Fix occasional headless issue on Linux when set to "false". Thanks @21hsmw
|
||||
|
||||
## v3.3.18 (2024/05/20)
|
||||
|
||||
* Fix LANG ENV for Linux
|
||||
* Fix Chrome v124+ not closing on Windows. Thanks @RileyXX
|
||||
|
||||
## v3.3.17 (2024/04/09)
|
||||
|
||||
* Fix file descriptor leak in service on quit(). Thanks @zkulis
|
||||
|
||||
## v3.3.16 (2024/02/28)
|
||||
|
||||
* Fix of the subprocess.STARTUPINFO() call. Thanks @ceconelo
|
||||
* Add FreeBSD support. Thanks @Asthowen
|
||||
* Use headless configuration properly. Thanks @hashworks
|
||||
|
||||
## v3.3.15 (2024/02/20)
|
||||
|
||||
* Fix looping challenges
|
||||
|
||||
## v3.3.14-hotfix2 (2024/02/17)
|
||||
|
||||
* Hotfix 2 - bad Chromium build, instances failed to terminate
|
||||
|
||||
## v3.3.14-hotfix (2024/02/17)
|
||||
|
||||
* Hotfix for Linux build - some Chrome files no longer exist
|
||||
|
||||
## v3.3.14 (2024/02/17)
|
||||
|
||||
* Update Chrome downloads. Thanks @opemvbs
|
||||
|
||||
## v3.3.13 (2024/01/07)
|
||||
|
||||
* Fix too many open files error
|
||||
|
||||
## v3.3.12 (2023/12/15)
|
||||
|
||||
* Fix looping challenges and invalid cookies
|
||||
|
||||
## v3.3.11 (2023/12/11)
|
||||
|
||||
* Update UC 3.5.4 & Selenium 4.15.2. Thanks @txtsd
|
||||
|
||||
## v3.3.10 (2023/11/14)
|
||||
|
||||
* Add LANG ENV - resolves issues with YGGtorrent
|
||||
|
||||
## v3.3.9 (2023/11/13)
|
||||
|
||||
* Fix for Docker build, capture TypeError
|
||||
|
||||
## v3.3.8 (2023/11/13)
|
||||
|
||||
* Fix headless=true for Chrome 117+. Thanks @NabiKAZ
|
||||
* Support running Chrome 119 from source. Thanks @koleg and @Chris7X
|
||||
* Fix "OSError: [WinError 6] The handle is invalid" on exit. Thanks @enesgorkemgenc
|
||||
|
||||
## v3.3.7 (2023/11/05)
|
||||
|
||||
* Bump to rebuild. Thanks @JoachimDorchies
|
||||
|
||||
## v3.3.6 (2023/09/15)
|
||||
|
||||
* Update checkbox selector, again
|
||||
|
||||
## v3.3.5 (2023/09/13)
|
||||
|
||||
* Change checkbox selector, support languages other than English
|
||||
|
||||
## v3.3.4 (2023/09/02)
|
||||
|
||||
* Update checkbox selector
|
||||
|
||||
## v3.3.3 (2023/08/31)
|
||||
|
||||
* Update undetected_chromedriver to v3.5.3
|
||||
|
||||
## v3.3.2 (2023/08/03)
|
||||
|
||||
* Fix URL domain in Prometheus exporter
|
||||
|
||||
## v3.3.1 (2023/08/03)
|
||||
|
||||
* Fix for Cloudflare verify checkbox
|
||||
* Fix HEADLESS=false in Windows binary
|
||||
* Fix Prometheus exporter for management and health endpoints
|
||||
* Remove misleading stack trace when the verify checkbox is not found
|
||||
* Revert "Update base Docker image to Debian Bookworm" #849
|
||||
* Revert "Install Chromium 115 from Debian testing" #849
|
||||
|
||||
## v3.3.0 (2023/08/02)
|
||||
|
||||
* Fix for new Cloudflare detection. Thanks @cedric-bour for #845
|
||||
* Add support for proxy authentication username/password. Thanks @jacobprice808 for #807
|
||||
* Implement Prometheus metrics
|
||||
* Fix Chromium Driver for Chrome / Chromium version > 114
|
||||
* Use Chromium 115 in binary packages (Windows and Linux)
|
||||
* Install Chromium 115 from Debian testing (Docker)
|
||||
* Update base Docker image to Debian Bookworm
|
||||
* Update Selenium 4.11.2
|
||||
* Update pyinstaller 5.13.0
|
||||
* Add more traces in build_package.py
|
||||
|
||||
## v3.2.2 (2023/07/16)
|
||||
|
||||
* Workaround for updated 'verify you are human' check
|
||||
|
||||
## v3.2.1 (2023/06/10)
|
||||
|
||||
* Kill dead Chrome processes in Windows
|
||||
* Fix Chrome GL erros in ASUSTOR NAS
|
||||
|
||||
## v3.2.0 (2023/05/23)
|
||||
|
||||
* Support "proxy" param in requests and sessions
|
||||
* Support "cookies" param in requests
|
||||
* Fix Chromium exec permissions in Linux package
|
||||
* Update Python dependencies
|
||||
|
||||
## v3.1.2 (2023/04/02)
|
||||
|
||||
* Fix headless mode in macOS
|
||||
* Remove redundant artifact from Windows binary package
|
||||
* Bump Selenium dependency
|
||||
|
||||
## v3.1.1 (2023/03/25)
|
||||
|
||||
* Distribute binary executables in compressed package
|
||||
* Add icon for binary executable
|
||||
* Include information about supported architectures in the readme
|
||||
* Check Python version on start
|
||||
|
||||
## v3.1.0 (2023/03/20)
|
||||
|
||||
* Build binaries for Linux x64 and Windows x64
|
||||
* Sessions with auto-creation on fetch request and TTL
|
||||
* Fix error trace: Crash Reports/pending No such file or directory
|
||||
* Fix Waitress server error with asyncore_use_poll=true
|
||||
* Attempt to fix Docker ARM32 build
|
||||
* Print platform information on start up
|
||||
* Add Fairlane challenge selector
|
||||
* Update DDOS-GUARD title
|
||||
* Update dependencies
|
||||
|
||||
## v3.0.4 (2023/03/07)
|
||||
|
||||
* Click on the Cloudflare's 'Verify you are human' button if necessary
|
||||
|
||||
## v3.0.3 (2023/03/06)
|
||||
|
||||
* Update undetected_chromedriver version to 3.4.6
|
||||
|
||||
## v3.0.2 (2023/01/08)
|
||||
|
||||
* Detect Cloudflare blocked access
|
||||
* Check Chrome / Chromium web browser is installed correctly
|
||||
|
||||
## v3.0.1 (2023/01/06)
|
||||
|
||||
* Kill Chromium processes properly to avoid defunct/zombie processes
|
||||
* Update undetected-chromedriver
|
||||
* Disable Zygote sandbox in Chromium browser
|
||||
* Add more selectors to detect blocked access
|
||||
* Include procps (ps), curl and vim packages in the Docker image
|
||||
|
||||
## v3.0.0 (2023/01/04)
|
||||
|
||||
* This is the first release of FlareSolverr v3. There are some breaking changes
|
||||
* Docker images for linux/386, linux/amd64, linux/arm/v7 and linux/arm64/v8
|
||||
* Replaced Firefox with Chrome
|
||||
* Replaced NodeJS / Typescript with Python
|
||||
* Replaced Puppeter with Selenium
|
||||
* No binaries for Linux / Windows. You have to use the Docker image or install from Source code
|
||||
* No proxy support
|
||||
* No session support
|
||||
|
||||
## v2.2.10 (2022/10/22)
|
||||
|
||||
* Detect DDoS-Guard through title content
|
||||
|
||||
## v2.2.9 (2022/09/25)
|
||||
|
||||
* Detect Cloudflare Access Denied
|
||||
* Commit the complete changelog
|
||||
|
||||
## v2.2.8 (2022/09/17)
|
||||
|
||||
* Remove 30 s delay and clean legacy code
|
||||
|
||||
## v2.2.7 (2022/09/12)
|
||||
|
||||
* Temporary fix: add 30s delay
|
||||
* Update README.md
|
||||
|
||||
## v2.2.6 (2022/07/31)
|
||||
|
||||
* Fix Cloudflare detection in POST requests
|
||||
|
||||
## v2.2.5 (2022/07/30)
|
||||
|
||||
* Update GitHub actions to build executables with NodeJs 16
|
||||
* Update Cloudflare selectors and add HTML samples
|
||||
* Install Firefox 94 instead of the latest Nightly
|
||||
* Update dependencies
|
||||
* Upgrade Puppeteer (#396)
|
||||
|
||||
## v2.2.4 (2022/04/17)
|
||||
|
||||
* Detect DDoS-Guard challenge
|
||||
|
||||
## v2.2.3 (2022/04/16)
|
||||
|
||||
* Fix 2000 ms navigation timeout
|
||||
* Update README.md (libseccomp2 package in Debian)
|
||||
* Update README.md (clarify proxy parameter) (#307)
|
||||
* Update NPM dependencies
|
||||
* Disable Cloudflare ban detection
|
||||
|
||||
## v2.2.2 (2022/03/19)
|
||||
|
||||
* Fix ban detection. Resolves #330 (#336)
|
||||
|
||||
## v2.2.1 (2022/02/06)
|
||||
|
||||
* Fix max timeout error in some pages
|
||||
* Avoid crashing in NodeJS 17 due to Unhandled promise rejection
|
||||
* Improve proxy validation and debug traces
|
||||
* Remove @types/puppeteer dependency
|
||||
|
||||
## v2.2.0 (2022/01/31)
|
||||
|
||||
* Increase default BROWSER_TIMEOUT=40000 (40 seconds)
|
||||
* Fix Puppeter deprecation warnings
|
||||
* Update base Docker image Alpine 3.15 / NodeJS 16
|
||||
* Build precompiled binaries with NodeJS 16
|
||||
* Update Puppeter and other dependencies
|
||||
* Add support for Custom CloudFlare challenge
|
||||
* Add support for DDoS-GUARD challenge
|
||||
|
||||
## v2.1.0 (2021/12/12)
|
||||
|
||||
* Add aarch64 to user agents to be replaced (#248)
|
||||
* Fix SOCKSv4 and SOCKSv5 proxy. resolves #214 #220
|
||||
* Remove redundant JSON key (postData) (#242)
|
||||
* Make test URL configurable with TEST_URL env var. resolves #240
|
||||
* Bypass new Cloudflare protection
|
||||
* Update donation links
|
||||
|
||||
## v2.0.2 (2021/10/31)
|
||||
|
||||
* Fix SOCKS5 proxy. Resolves #214
|
||||
* Replace Firefox ERS with a newer version
|
||||
* Catch startup exceptions and give some advices
|
||||
* Add env var BROWSER_TIMEOUT for slow systems
|
||||
* Fix NPM warning in Docker images
|
||||
|
||||
## v2.0.1 (2021/10/24)
|
||||
|
||||
* Check user home dir before testing web browser installation
|
||||
|
||||
## v2.0.0 (2021/10/20)
|
||||
|
||||
FlareSolverr 2.0.0 is out with some important changes:
|
||||
|
||||
* It is capable of solving the automatic challenges of Cloudflare. CAPTCHAs (hCaptcha) cannot be resolved and the old solvers have been removed.
|
||||
* The Chrome browser has been replaced by Firefox. This has caused some functionality to be removed. Parameters: `userAgent`, `headers`, `rawHtml` and `downloadare` no longer available.
|
||||
* Included `proxy` support without user/password credentials. If you are writing your own integration with FlareSolverr, make sure your client uses the same User-Agent header and Proxy that FlareSolverr uses. Those values together with the Cookie are checked and detected by Cloudflare.
|
||||
* FlareSolverr has been rewritten from scratch. From now on it should be easier to maintain and test.
|
||||
* If you are using Jackett make sure you have version v0.18.1041 or higher. FlareSolverSharp v2.0.0 is out too.
|
||||
|
||||
Complete changelog:
|
||||
|
||||
* Bump version 2.0.0
|
||||
* Set puppeteer timeout half of maxTimeout param. Resolves #180
|
||||
* Add test for blocked IP
|
||||
* Avoid reloading the page in case of error
|
||||
* Improve Cloudflare detection
|
||||
* Fix version
|
||||
* Fix browser preferences and proxy
|
||||
* Fix request.post method and clean error traces
|
||||
* Use Firefox ESR for Docker images
|
||||
* Improve Firefox start time and code clean up
|
||||
* Improve bad request management and tests
|
||||
* Build native packages with Firefox
|
||||
* Update readme
|
||||
* Improve Docker image and clean TODOs
|
||||
* Add proxy support
|
||||
* Implement request.post method for Firefox
|
||||
* Code clean up, remove returnRawHtml, download, headers params
|
||||
* Remove outdated chaptcha solvers
|
||||
* Refactor the app to use Express server and Jest for tests
|
||||
* Fix Cloudflare resolver for Linux ARM builds
|
||||
* Fix Cloudflare resolver
|
||||
* Replace Chrome web browser with Firefox
|
||||
* Remove userAgent parameter since any modification is detected by CF
|
||||
* Update dependencies
|
||||
* Remove Puppeter steath plugin
|
||||
|
||||
## v1.2.9 (2021/08/01)
|
||||
|
||||
* Improve "Execution context was destroyed" error handling
|
||||
* Implement returnRawHtml parameter. resolves #172 resolves #165
|
||||
* Capture Docker stop signal. resolves #158
|
||||
* Reduce Docker image size 20 MB
|
||||
* Fix page reload after challenge is solved. resolves #162 resolves #143
|
||||
* Avoid loading images/css/fonts to speed up page load
|
||||
* Improve Cloudflare IP ban detection
|
||||
* Fix vulnerabilities
|
||||
|
||||
## v1.2.8 (2021/06/01)
|
||||
|
||||
* Improve old JS challenge waiting. Resolves #129
|
||||
|
||||
## v1.2.7 (2021/06/01)
|
||||
|
||||
* Improvements in Cloudflare redirect detection. Resolves #140
|
||||
* Fix installation instructions
|
||||
|
||||
## v1.2.6 (2021/05/30)
|
||||
|
||||
* Handle new Cloudflare challenge. Resolves #135 Resolves #134
|
||||
* Provide reference Systemd unit file. Resolves #72
|
||||
* Fix EACCES: permission denied, open '/tmp/flaresolverr.txt'. Resolves #120
|
||||
* Configure timezone with TZ env var. Resolves #109
|
||||
* Return the redirected URL in the response (#126)
|
||||
* Show an error in hcaptcha-solver. Resolves #132
|
||||
* Regenerate package-lock.json lockfileVersion 2
|
||||
* Update issue template. Resolves #130
|
||||
* Bump ws from 7.4.1 to 7.4.6 (#137)
|
||||
* Bump hosted-git-info from 2.8.8 to 2.8.9 (#124)
|
||||
* Bump lodash from 4.17.20 to 4.17.21 (#125)
|
||||
|
||||
## v1.2.5 (2021/04/05)
|
||||
|
||||
* Fix memory regression, close test browser
|
||||
* Fix release-docker GitHub action
|
||||
|
||||
## v1.2.4 (2021/04/04)
|
||||
|
||||
* Include license in release zips. resolves #75
|
||||
* Validate Chrome is working at startup
|
||||
* Speedup Docker image build
|
||||
* Add health check endpoint
|
||||
* Update issue template
|
||||
* Minor improvements in debug traces
|
||||
* Validate environment variables at startup. resolves #101
|
||||
* Add FlareSolverr logo. resolves #23
|
||||
|
||||
## v1.2.3 (2021/01/10)
|
||||
|
||||
* CI/CD: Generate release changelog from commits. resolves #34
|
||||
* Update README.md
|
||||
* Add donation links
|
||||
* Simplify docker-compose.yml
|
||||
* Allow to configure "none" captcha resolver
|
||||
* Override docker-compose.yml variables via .env resolves #64 (#66)
|
||||
|
||||
## v1.2.2 (2021/01/09)
|
||||
|
||||
* Add documentation for precompiled binaries installation
|
||||
* Add instructions to set environment variables in Windows
|
||||
* Build Windows and Linux binaries. resolves #18
|
||||
* Add release badge in the readme
|
||||
* CI/CD: Generate release changelog from commits. resolves #34
|
||||
* Add a notice about captcha solvers
|
||||
* Add Chrome flag --disable-dev-shm-usage to fix crashes. resolves #45
|
||||
* Fix Docker CLI documentation
|
||||
* Add traces with captcha solver service. resolves #39
|
||||
* Improve logic to detect Cloudflare captcha. resolves #48
|
||||
* Move Cloudflare provider logic to his own class
|
||||
* Simplify and document the "return only cookies" parameter
|
||||
* Show message when debug log is enabled
|
||||
* Update readme to add more clarifications. resolves #53 (#60)
|
||||
* issue_template: typo fix (#52)
|
||||
|
||||
## v1.2.1 (2020/12/20)
|
||||
|
||||
* Change version to match release tag / 1.2.0 => v1.2.0
|
||||
* CI/CD Publish release in GitHub repository. resolves #34
|
||||
* Add welcome message in / endpoint
|
||||
* Rewrite request timeout handling (maxTimeout) resolves #42
|
||||
* Add http status for better logging
|
||||
* Return an error when no selectors are found, #25
|
||||
* Add issue template, fix #32
|
||||
* Moving log.html right after loading the page and add one on reload, fix #30
|
||||
* Update User-Agent to match chromium version, ref: #15 (#28)
|
||||
* Update install from source code documentation
|
||||
* Update readme to add Docker instructions (#20)
|
||||
* Clean up readme (#19)
|
||||
* Add docker-compose
|
||||
* Change default log level to info
|
||||
|
||||
## v1.2.0 (2020/12/20)
|
||||
|
||||
* Fix User-Agent detected by CouldFlare (Docker ARM) resolves #15
|
||||
* Include exception message in error response
|
||||
* CI/CD: Rename GitHub Action build => publish
|
||||
* Bump version
|
||||
* Fix TypeScript compilation and bump minor version
|
||||
* CI/CD: Bump minor version
|
||||
* CI/CD: Configure GitHub Actions
|
||||
* CI/CD: Configure GitHub Actions
|
||||
* CI/CD: Bump minor version
|
||||
* CI/CD: Configure Build GitHub Action
|
||||
* CI/CD: Configure AutoTag GitHub Action (#14)
|
||||
* CI/CD: Build the Docker images with GitHub Actions (#13)
|
||||
* Update dependencies
|
||||
* Backport changes from Cloudproxy (#11)
|
||||
112
Dockerfile
112
Dockerfile
@@ -1,29 +1,83 @@
|
||||
FROM node:16-alpine3.15
|
||||
|
||||
# Install the web browser (package firefox-esr is available too)
|
||||
RUN apk update && \
|
||||
apk add --no-cache firefox dumb-init && \
|
||||
rm -Rf /var/cache
|
||||
|
||||
# Copy FlareSolverr code
|
||||
USER node
|
||||
RUN mkdir -p /home/node/flaresolverr
|
||||
WORKDIR /home/node/flaresolverr
|
||||
COPY --chown=node:node package.json package-lock.json tsconfig.json install.js ./
|
||||
COPY --chown=node:node src ./src/
|
||||
|
||||
# Install package. Skip installing the browser, we will use the installed package.
|
||||
ENV PUPPETEER_PRODUCT=firefox \
|
||||
PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true \
|
||||
PUPPETEER_EXECUTABLE_PATH=/usr/bin/firefox
|
||||
RUN npm install && \
|
||||
npm run build && \
|
||||
npm prune --production && \
|
||||
rm -rf /home/node/.npm
|
||||
|
||||
EXPOSE 8191
|
||||
ENTRYPOINT ["/usr/bin/dumb-init", "--"]
|
||||
CMD ["node", "./dist/server.js"]
|
||||
|
||||
# docker build -t flaresolverr:custom .
|
||||
# docker run -p 8191:8191 -e LOG_LEVEL=debug flaresolverr:custom
|
||||
FROM python:3.13-slim-bookworm AS builder
|
||||
|
||||
# Build dummy packages to skip installing them and their dependencies
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends equivs \
|
||||
&& equivs-control libgl1-mesa-dri \
|
||||
&& printf 'Section: misc\nPriority: optional\nStandards-Version: 3.9.2\nPackage: libgl1-mesa-dri\nVersion: 99.0.0\nDescription: Dummy package for libgl1-mesa-dri\n' >> libgl1-mesa-dri \
|
||||
&& equivs-build libgl1-mesa-dri \
|
||||
&& mv libgl1-mesa-dri_*.deb /libgl1-mesa-dri.deb \
|
||||
&& equivs-control adwaita-icon-theme \
|
||||
&& printf 'Section: misc\nPriority: optional\nStandards-Version: 3.9.2\nPackage: adwaita-icon-theme\nVersion: 99.0.0\nDescription: Dummy package for adwaita-icon-theme\n' >> adwaita-icon-theme \
|
||||
&& equivs-build adwaita-icon-theme \
|
||||
&& mv adwaita-icon-theme_*.deb /adwaita-icon-theme.deb
|
||||
|
||||
FROM python:3.13-slim-bookworm
|
||||
|
||||
# Copy dummy packages
|
||||
COPY --from=builder /*.deb /
|
||||
|
||||
# Install dependencies and create flaresolverr user
|
||||
# You can test Chromium running this command inside the container:
|
||||
# xvfb-run -s "-screen 0 1600x1200x24" chromium --no-sandbox
|
||||
# The error traces is like this: "*** stack smashing detected ***: terminated"
|
||||
# To check the package versions available you can use this command:
|
||||
# apt-cache madison chromium
|
||||
WORKDIR /app
|
||||
# Install dummy packages
|
||||
RUN dpkg -i /libgl1-mesa-dri.deb \
|
||||
&& dpkg -i /adwaita-icon-theme.deb \
|
||||
# Install dependencies
|
||||
&& apt-get update \
|
||||
&& apt-get install -y --no-install-recommends chromium chromium-common chromium-driver xvfb dumb-init \
|
||||
procps curl vim xauth \
|
||||
# Remove temporary files and hardware decoding libraries
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& rm -f /usr/lib/x86_64-linux-gnu/libmfxhw* \
|
||||
&& rm -f /usr/lib/x86_64-linux-gnu/mfx/* \
|
||||
# Create flaresolverr user
|
||||
&& useradd --home-dir /app --shell /bin/sh flaresolverr \
|
||||
&& mv /usr/bin/chromedriver chromedriver \
|
||||
&& chown -R flaresolverr:flaresolverr . \
|
||||
# Create config dir
|
||||
&& mkdir /config \
|
||||
&& chown flaresolverr:flaresolverr /config
|
||||
|
||||
VOLUME /config
|
||||
|
||||
# Install Python dependencies
|
||||
COPY requirements.txt .
|
||||
RUN pip install -r requirements.txt \
|
||||
# Remove temporary files
|
||||
&& rm -rf /root/.cache
|
||||
|
||||
USER flaresolverr
|
||||
|
||||
RUN mkdir -p "/app/.config/chromium/Crash Reports/pending"
|
||||
|
||||
COPY src .
|
||||
COPY package.json ../
|
||||
|
||||
EXPOSE 8191
|
||||
EXPOSE 8192
|
||||
|
||||
# dumb-init avoids zombie chromium processes
|
||||
ENTRYPOINT ["/usr/bin/dumb-init", "--"]
|
||||
|
||||
CMD ["/usr/local/bin/python", "-u", "/app/flaresolverr.py"]
|
||||
|
||||
# Local build
|
||||
# docker build -t ngosang/flaresolverr:3.4.6 .
|
||||
# docker run -p 8191:8191 ngosang/flaresolverr:3.4.6
|
||||
|
||||
# Multi-arch build
|
||||
# docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
|
||||
# docker buildx create --use
|
||||
# docker buildx build -t ngosang/flaresolverr:3.4.6 --platform linux/386,linux/amd64,linux/arm/v7,linux/arm64/v8 .
|
||||
# add --push to publish in DockerHub
|
||||
|
||||
# Test multi-arch build
|
||||
# docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
|
||||
# docker buildx create --use
|
||||
# docker buildx build -t ngosang/flaresolverr:3.4.6 --platform linux/arm/v7 --load .
|
||||
# docker run -p 8191:8191 --platform linux/arm/v7 ngosang/flaresolverr:3.4.6
|
||||
|
||||
3
LICENSE
3
LICENSE
@@ -1,7 +1,6 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2020 Diego Heras (ngosang)
|
||||
Copyright (c) 2020 Noah Cardoza (NoahCardoza)
|
||||
Copyright (c) 2025 Diego Heras (ngosang / ngosang@hotmail.es)
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
|
||||
322
README.md
322
README.md
@@ -13,9 +13,9 @@ FlareSolverr is a proxy server to bypass Cloudflare and DDoS-GUARD protection.
|
||||
## How it works
|
||||
|
||||
FlareSolverr starts a proxy server, and it waits for user requests in an idle state using few resources.
|
||||
When some request arrives, it uses [puppeteer](https://github.com/puppeteer/puppeteer) with the
|
||||
[stealth plugin](https://github.com/berstend/puppeteer-extra/tree/master/packages/puppeteer-extra-plugin-stealth)
|
||||
to create a headless browser (Firefox). It opens the URL with user parameters and waits until the Cloudflare challenge
|
||||
When some request arrives, it uses [Selenium](https://www.selenium.dev) with the
|
||||
[undetected-chromedriver](https://github.com/ultrafunkamsterdam/undetected-chromedriver)
|
||||
to create a web browser (Chrome). It opens the URL with user parameters and waits until the Cloudflare challenge
|
||||
is solved (or timeout). The HTML code and the cookies are sent back to the user, and those cookies can be used to
|
||||
bypass Cloudflare using other HTTP clients.
|
||||
|
||||
@@ -33,20 +33,25 @@ It is recommended to install using a Docker container because the project depend
|
||||
already included within the image.
|
||||
|
||||
Docker images are available in:
|
||||
* GitHub Registry => https://github.com/orgs/FlareSolverr/packages/container/package/flaresolverr
|
||||
* DockerHub => https://hub.docker.com/r/flaresolverr/flaresolverr
|
||||
|
||||
- GitHub Registry => https://github.com/orgs/FlareSolverr/packages/container/package/flaresolverr
|
||||
- DockerHub => https://hub.docker.com/r/flaresolverr/flaresolverr
|
||||
|
||||
Supported architectures are:
|
||||
| Architecture | Tag |
|
||||
| :----: | --- |
|
||||
| x86-64 | linux/amd64 |
|
||||
| ARM64 | linux/arm64 |
|
||||
| ARM32 | linux/arm/v7 |
|
||||
|
||||
We provide a `docker-compose.yml` configuration file. Clone this repository and execute `docker-compose up -d` to start
|
||||
| Architecture | Tag |
|
||||
| ------------ | ------------ |
|
||||
| x86 | linux/386 |
|
||||
| x86-64 | linux/amd64 |
|
||||
| ARM32 | linux/arm/v7 |
|
||||
| ARM64 | linux/arm64 |
|
||||
|
||||
We provide a `docker-compose.yml` configuration file. Clone this repository and execute
|
||||
`docker-compose up -d` _(Compose V1)_ or `docker compose up -d` _(Compose V2)_ to start
|
||||
the container.
|
||||
|
||||
If you prefer the `docker cli` execute the following command.
|
||||
|
||||
```bash
|
||||
docker run -d \
|
||||
--name=flaresolverr \
|
||||
@@ -56,27 +61,39 @@ docker run -d \
|
||||
ghcr.io/flaresolverr/flaresolverr:latest
|
||||
```
|
||||
|
||||
If your host OS is Debian, make sure `libseccomp2` version is 2.5.x. You can check the version with `sudo apt-cache policy libseccomp2`
|
||||
If your host OS is Debian, make sure `libseccomp2` version is 2.5.x. You can check the version with `sudo apt-cache policy libseccomp2`
|
||||
and update the package with `sudo apt install libseccomp2=2.5.1-1~bpo10+1` or `sudo apt install libseccomp2=2.5.1-1+deb11u1`.
|
||||
Remember to restart the Docker daemon and the container after the update.
|
||||
|
||||
### Precompiled binaries
|
||||
|
||||
> **Warning**
|
||||
> Precompiled binaries are only available for x64 architecture. For other architectures see Docker images.
|
||||
|
||||
This is the recommended way for Windows users.
|
||||
* Download the [FlareSolverr zip](https://github.com/FlareSolverr/FlareSolverr/releases) from the release's assets. It is available for Windows and Linux.
|
||||
* Extract the zip file. FlareSolverr executable and firefox folder must be in the same directory.
|
||||
* Execute FlareSolverr binary. In the environment variables section you can find how to change the configuration.
|
||||
|
||||
- Download the [FlareSolverr executable](https://github.com/FlareSolverr/FlareSolverr/releases) from the release's page. It is available for Windows x64 and Linux x64.
|
||||
- Execute FlareSolverr binary. In the environment variables section you can find how to change the configuration.
|
||||
|
||||
### From source code
|
||||
|
||||
This is the recommended way for macOS users and for developers.
|
||||
* Install [NodeJS](https://nodejs.org/) 16.
|
||||
* Clone this repository and open a shell in that path.
|
||||
* Run `export PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true` (Linux/macOS) or `set PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true` (Windows).
|
||||
* Run `npm install` command to install FlareSolverr dependencies.
|
||||
* Run `npm start` command to compile TypeScript code and start FlareSolverr.
|
||||
> **Warning**
|
||||
> Installing from source code only works for x64 architecture. For other architectures see Docker images.
|
||||
|
||||
If you get errors related to firefox not installed try running `node install.js` to install Firefox.
|
||||
- Install [Python 3.13](https://www.python.org/downloads/).
|
||||
- Install [Chrome](https://www.google.com/intl/en_us/chrome/) (all OS) or [Chromium](https://www.chromium.org/getting-involved/download-chromium/) (just Linux, it doesn't work in Windows) web browser.
|
||||
- (Only in Linux) Install [Xvfb](https://en.wikipedia.org/wiki/Xvfb) package.
|
||||
- (Only in macOS) Install [XQuartz](https://www.xquartz.org/) package.
|
||||
- Clone this repository and open a shell in that path.
|
||||
- Run `pip install -r requirements.txt` command to install FlareSolverr dependencies.
|
||||
- Run `python src/flaresolverr.py` command to start FlareSolverr.
|
||||
|
||||
### From source code (FreeBSD/TrueNAS CORE)
|
||||
|
||||
- Run `pkg install chromium python313 py313-pip xorg-vfbserver` command to install the required dependencies.
|
||||
- Clone this repository and open a shell in that path.
|
||||
- Run `python3.13 -m pip install -r requirements.txt` command to install FlareSolverr dependencies.
|
||||
- Run `python3.13 src/flaresolverr.py` command to start FlareSolverr.
|
||||
|
||||
### Systemd service
|
||||
|
||||
@@ -84,17 +101,46 @@ We provide an example Systemd unit file `flaresolverr.service` as reference. You
|
||||
|
||||
## Usage
|
||||
|
||||
Example request:
|
||||
Example Bash request:
|
||||
|
||||
```bash
|
||||
curl -L -X POST 'http://localhost:8191/v1' \
|
||||
-H 'Content-Type: application/json' \
|
||||
--data-raw '{
|
||||
"cmd": "request.get",
|
||||
"url":"http://www.google.com/",
|
||||
"url": "http://www.google.com/",
|
||||
"maxTimeout": 60000
|
||||
}'
|
||||
```
|
||||
|
||||
Example Python request:
|
||||
|
||||
```py
|
||||
import requests
|
||||
|
||||
url = "http://localhost:8191/v1"
|
||||
headers = {"Content-Type": "application/json"}
|
||||
data = {
|
||||
"cmd": "request.get",
|
||||
"url": "http://www.google.com/",
|
||||
"maxTimeout": 60000
|
||||
}
|
||||
response = requests.post(url, headers=headers, json=data)
|
||||
print(response.text)
|
||||
```
|
||||
|
||||
Example PowerShell request:
|
||||
|
||||
```ps1
|
||||
$body = @{
|
||||
cmd = "request.get"
|
||||
url = "http://www.google.com/"
|
||||
maxTimeout = 60000
|
||||
} | ConvertTo-Json
|
||||
|
||||
irm -UseBasicParsing 'http://localhost:8191/v1' -Headers @{"Content-Type"="application/json"} -Method Post -Body $body
|
||||
```
|
||||
|
||||
### Commands
|
||||
|
||||
#### + `sessions.create`
|
||||
@@ -105,10 +151,10 @@ cookies for the browser to use.
|
||||
|
||||
This also speeds up the requests since it won't have to launch a new browser instance for every request.
|
||||
|
||||
Parameter | Notes
|
||||
|--|--|
|
||||
session | Optional. The session ID that you want to be assigned to the instance. If isn't set a random UUID will be assigned.
|
||||
proxy | Optional, default disabled. Eg: `"proxy": {"url": "http://127.0.0.1:8888"}`. You must include the proxy schema in the URL: `http://`, `socks4://` or `socks5://`. Authorization (username/password) is not supported.
|
||||
| Parameter | Notes |
|
||||
| --------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| session | Optional. The session ID that you want to be assigned to the instance. If isn't set a random UUID will be assigned. |
|
||||
| proxy | Optional, default disabled. Eg: `"proxy": {"url": "http://127.0.0.1:8888"}`. You must include the proxy schema in the URL: `http://`, `socks4://` or `socks5://`. Authorization (username/password) is supported. Eg: `"proxy": {"url": "http://127.0.0.1:8888", "username": "testuser", "password": "testpass"}` |
|
||||
|
||||
#### + `sessions.list`
|
||||
|
||||
@@ -120,11 +166,7 @@ Example response:
|
||||
|
||||
```json
|
||||
{
|
||||
"sessions": [
|
||||
"session_id_1",
|
||||
"session_id_2",
|
||||
"session_id_3..."
|
||||
]
|
||||
"sessions": ["session_id_1", "session_id_2", "session_id_3..."]
|
||||
}
|
||||
```
|
||||
|
||||
@@ -133,120 +175,164 @@ Example response:
|
||||
This will properly shutdown a browser instance and remove all files associated with it to free up resources for a new
|
||||
session. When you no longer need to use a session you should make sure to close it.
|
||||
|
||||
Parameter | Notes
|
||||
|--|--|
|
||||
session | The session ID that you want to be destroyed.
|
||||
| Parameter | Notes |
|
||||
| --------- | --------------------------------------------- |
|
||||
| session | The session ID that you want to be destroyed. |
|
||||
|
||||
#### + `request.get`
|
||||
|
||||
Parameter | Notes
|
||||
|--|--|
|
||||
url | Mandatory
|
||||
session | Optional. Will send the request from and existing browser instance. If one is not sent it will create a temporary instance that will be destroyed immediately after the request is completed.
|
||||
maxTimeout | Optional, default value 60000. Max timeout to solve the challenge in milliseconds.
|
||||
cookies | Optional. Will be used by the headless browser. Follow [this](https://github.com/puppeteer/puppeteer/blob/v3.3.0/docs/api.md#pagesetcookiecookies) format.
|
||||
returnOnlyCookies | Optional, default false. Only returns the cookies. Response data, headers and other parts of the response are removed.
|
||||
proxy | Optional, default disabled. Eg: `"proxy": {"url": "http://127.0.0.1:8888"}`. You must include the proxy schema in the URL: `http://`, `socks4://` or `socks5://`. Authorization (username/password) is not supported. (When the `session` parameter is set, the proxy is ignored; a session specific proxy can be set in `sessions.create`.)
|
||||
| Parameter | Notes |
|
||||
| ------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| url | Mandatory |
|
||||
| session | Optional. Will send the request from and existing browser instance. If one is not sent it will create a temporary instance that will be destroyed immediately after the request is completed. |
|
||||
| session_ttl_minutes | Optional. FlareSolverr will automatically rotate expired sessions based on the TTL provided in minutes. |
|
||||
| maxTimeout | Optional, default value 60000. Max timeout to solve the challenge in milliseconds. |
|
||||
| cookies | Optional. Will be used by the headless browser. Eg: `"cookies": [{"name": "cookie1", "value": "value1"}, {"name": "cookie2", "value": "value2"}]`. |
|
||||
| returnOnlyCookies | Optional, default false. Only returns the cookies. Response data, headers and other parts of the response are removed. |
|
||||
| returnScreenshot | Optional, default false. Captures a screenshot of the final rendered page after all challenges and waits are completed. The screenshot is returned as a Base64-encoded PNG string in the `screenshot` field of the response. |
|
||||
| proxy | Optional, default disabled. Eg: `"proxy": {"url": "http://127.0.0.1:8888"}`. You must include the proxy schema in the URL: `http://`, `socks4://` or `socks5://`. Authorization (username/password) is not supported. (When the `session` parameter is set, the proxy is ignored; a session specific proxy can be set in `sessions.create`.) |
|
||||
| waitInSeconds | Optional, default none. Length to wait in seconds after solving the challenge, and before returning the results. Useful to allow it to load dynamic content. |
|
||||
| disableMedia | Optional, default false. When true FlareSolverr will prevent media resources (images, CSS, and fonts) from being loaded to speed up navigation. |
|
||||
| tabs_till_verify | Optional, default none. Number of times the `Tab` button is needed to be pressed to end up on the turnstile captcha, in order to verify it. After verifying the captcha, the result will be stored in the solution under `turnstile_token`. |
|
||||
|
||||
:warning: If you want to use Cloudflare clearance cookie in your scripts, make sure you use the FlareSolverr User-Agent too. If they don't match you will see the challenge.
|
||||
> **Warning**
|
||||
> If you want to use Cloudflare clearance cookie in your scripts, make sure you use the FlareSolverr User-Agent too. If they don't match you will see the challenge.
|
||||
|
||||
Example response from running the `curl` above:
|
||||
|
||||
```json
|
||||
{
|
||||
"solution": {
|
||||
"url": "https://www.google.com/?gws_rd=ssl",
|
||||
"status": 200,
|
||||
"headers": {
|
||||
"status": "200",
|
||||
"date": "Thu, 16 Jul 2020 04:15:49 GMT",
|
||||
"expires": "-1",
|
||||
"cache-control": "private, max-age=0",
|
||||
"content-type": "text/html; charset=UTF-8",
|
||||
"strict-transport-security": "max-age=31536000",
|
||||
"p3p": "CP=\"This is not a P3P policy! See g.co/p3phelp for more info.\"",
|
||||
"content-encoding": "br",
|
||||
"server": "gws",
|
||||
"content-length": "61587",
|
||||
"x-xss-protection": "0",
|
||||
"x-frame-options": "SAMEORIGIN",
|
||||
"set-cookie": "1P_JAR=2020-07-16-04; expires=Sat..."
|
||||
},
|
||||
"response":"<!DOCTYPE html>...",
|
||||
"cookies": [
|
||||
{
|
||||
"name": "NID",
|
||||
"value": "204=QE3Ocq15XalczqjuDy52HeseG3zAZuJzID3R57...",
|
||||
"domain": ".google.com",
|
||||
"path": "/",
|
||||
"expires": 1610684149.307722,
|
||||
"size": 178,
|
||||
"httpOnly": true,
|
||||
"secure": true,
|
||||
"session": false,
|
||||
"sameSite": "None"
|
||||
},
|
||||
{
|
||||
"name": "1P_JAR",
|
||||
"value": "2020-07-16-04",
|
||||
"domain": ".google.com",
|
||||
"path": "/",
|
||||
"expires": 1597464949.307626,
|
||||
"size": 19,
|
||||
"httpOnly": false,
|
||||
"secure": true,
|
||||
"session": false,
|
||||
"sameSite": "None"
|
||||
}
|
||||
],
|
||||
"userAgent": "Windows NT 10.0; Win64; x64) AppleWebKit/5..."
|
||||
"solution": {
|
||||
"url": "https://www.google.com/?gws_rd=ssl",
|
||||
"status": 200,
|
||||
"headers": {
|
||||
"status": "200",
|
||||
"date": "Thu, 16 Jul 2020 04:15:49 GMT",
|
||||
"expires": "-1",
|
||||
"cache-control": "private, max-age=0",
|
||||
"content-type": "text/html; charset=UTF-8",
|
||||
"strict-transport-security": "max-age=31536000",
|
||||
"p3p": "CP=\"This is not a P3P policy! See g.co/p3phelp for more info.\"",
|
||||
"content-encoding": "br",
|
||||
"server": "gws",
|
||||
"content-length": "61587",
|
||||
"x-xss-protection": "0",
|
||||
"x-frame-options": "SAMEORIGIN",
|
||||
"set-cookie": "1P_JAR=2020-07-16-04; expires=Sat..."
|
||||
},
|
||||
"status": "ok",
|
||||
"message": "",
|
||||
"startTimestamp": 1594872947467,
|
||||
"endTimestamp": 1594872949617,
|
||||
"version": "1.0.0"
|
||||
"response": "<!DOCTYPE html>...",
|
||||
"cookies": [
|
||||
{
|
||||
"name": "NID",
|
||||
"value": "204=QE3Ocq15XalczqjuDy52HeseG3zAZuJzID3R57...",
|
||||
"domain": ".google.com",
|
||||
"path": "/",
|
||||
"expires": 1610684149.307722,
|
||||
"size": 178,
|
||||
"httpOnly": true,
|
||||
"secure": true,
|
||||
"session": false,
|
||||
"sameSite": "None"
|
||||
},
|
||||
{
|
||||
"name": "1P_JAR",
|
||||
"value": "2020-07-16-04",
|
||||
"domain": ".google.com",
|
||||
"path": "/",
|
||||
"expires": 1597464949.307626,
|
||||
"size": 19,
|
||||
"httpOnly": false,
|
||||
"secure": true,
|
||||
"session": false,
|
||||
"sameSite": "None"
|
||||
}
|
||||
],
|
||||
"userAgent": "Windows NT 10.0; Win64; x64) AppleWebKit/5...",
|
||||
"turnstile_token": "03AGdBq24k3lK7JH2v8uN1T5F..."
|
||||
},
|
||||
"status": "ok",
|
||||
"message": "",
|
||||
"startTimestamp": 1594872947467,
|
||||
"endTimestamp": 1594872949617,
|
||||
"version": "1.0.0"
|
||||
}
|
||||
```
|
||||
|
||||
### + `request.post`
|
||||
|
||||
This is the same as `request.get` but it takes one more param:
|
||||
This works like `request.get`, with the addition of the postData parameter. Note that `tabs_till_verify` is currently supported only for GET requests and requires one extra argument.
|
||||
|
||||
Parameter | Notes
|
||||
|--|--|
|
||||
postData | Must be a string with `application/x-www-form-urlencoded`. Eg: `a=b&c=d`
|
||||
| Parameter | Notes |
|
||||
| --------- | ------------------------------------------------------------------------ |
|
||||
| postData | Must be a string with `application/x-www-form-urlencoded`. Eg: `a=b&c=d` |
|
||||
|
||||
## Environment variables
|
||||
|
||||
Name | Default | Notes
|
||||
|--|--|--|
|
||||
LOG_LEVEL | info | Verbosity of the logging. Use `LOG_LEVEL=debug` for more information.
|
||||
LOG_HTML | false | Only for debugging. If `true` all HTML that passes through the proxy will be logged to the console in `debug` level.
|
||||
CAPTCHA_SOLVER | none | Captcha solving method. It is used when a captcha is encountered. See the Captcha Solvers section.
|
||||
TZ | UTC | Timezone used in the logs and the web browser. Example: `TZ=Europe/London`.
|
||||
HEADLESS | true | Only for debugging. To run the web browser in headless mode or visible.
|
||||
BROWSER_TIMEOUT | 40000 | If you are experiencing errors/timeouts because your system is slow, you can try to increase this value. Remember to increase the `maxTimeout` parameter too.
|
||||
TEST_URL | https://www.google.com | FlareSolverr makes a request on start to make sure the web browser is working. You can change that URL if it is blocked in your country.
|
||||
PORT | 8191 | Listening port. You don't need to change this if you are running on Docker.
|
||||
HOST | 0.0.0.0 | Listening interface. You don't need to change this if you are running on Docker.
|
||||
| Name | Default | Notes |
|
||||
| ------------------ | ---------------------- | ---------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| LOG_LEVEL | info | Verbosity of the logging. Use `LOG_LEVEL=debug` for more information. |
|
||||
| LOG_FILE | none | Path to capture log to file. Example: `/config/flaresolverr.log`. |
|
||||
| LOG_HTML | false | Only for debugging. If `true` all HTML that passes through the proxy will be logged to the console in `debug` level. |
|
||||
| PROXY_URL | none | URL for proxy. Will be overwritten by `request` or `sessions` proxy, if used. Example: `http://127.0.0.1:8080`. |
|
||||
| PROXY_USERNAME | none | Username for proxy. Will be overwritten by `request` or `sessions` proxy, if used. Example: `testuser`. |
|
||||
| PROXY_PASSWORD | none | Password for proxy. Will be overwritten by `request` or `sessions` proxy, if used. Example: `testpass`. |
|
||||
| CAPTCHA_SOLVER | none | Captcha solving method. It is used when a captcha is encountered. See the Captcha Solvers section. |
|
||||
| TZ | UTC | Timezone used in the logs and the web browser. Example: `TZ=Europe/London`. |
|
||||
| LANG | none | Language used in the web browser. Example: `LANG=en_GB`. |
|
||||
| HEADLESS | true | Only for debugging. To run the web browser in headless mode or visible. |
|
||||
| DISABLE_MEDIA | false | To disable loading images, CSS, and other media in the web browser to save network bandwidth. |
|
||||
| TEST_URL | https://www.google.com | FlareSolverr makes a request on start to make sure the web browser is working. You can change that URL if it is blocked in your country. |
|
||||
| PORT | 8191 | Listening port. You don't need to change this if you are running on Docker. |
|
||||
| HOST | 0.0.0.0 | Listening interface. You don't need to change this if you are running on Docker. |
|
||||
| PROMETHEUS_ENABLED | false | Enable Prometheus exporter. See the Prometheus section below. |
|
||||
| PROMETHEUS_PORT | 8192 | Listening port for Prometheus exporter. See the Prometheus section below. |
|
||||
|
||||
Environment variables are set differently depending on the operating system. Some examples:
|
||||
* Docker: Take a look at the Docker section in this document. Environment variables can be set in the `docker-compose.yml` file or in the Docker CLI command.
|
||||
* Linux: Run `export LOG_LEVEL=debug` and then start FlareSolverr in the same shell.
|
||||
* Windows: Open `cmd.exe`, run `set LOG_LEVEL=debug` and then start FlareSolverr in the same shell.
|
||||
|
||||
- Docker: Take a look at the Docker section in this document. Environment variables can be set in the `docker-compose.yml` file or in the Docker CLI command.
|
||||
- Linux: Run `export LOG_LEVEL=debug` and then run `flaresolverr` in the same shell.
|
||||
- Windows: Open `cmd.exe`, run `set LOG_LEVEL=debug` and then run `flaresolverr.exe` in the same shell.
|
||||
|
||||
## Prometheus exporter
|
||||
|
||||
The Prometheus exporter for FlareSolverr is disabled by default. It can be enabled with the environment variable `PROMETHEUS_ENABLED`. If you are using Docker make sure you expose the `PROMETHEUS_PORT`.
|
||||
|
||||
Example metrics:
|
||||
|
||||
```shell
|
||||
# HELP flaresolverr_request_total Total requests with result
|
||||
# TYPE flaresolverr_request_total counter
|
||||
flaresolverr_request_total{domain="nowsecure.nl",result="solved"} 1.0
|
||||
# HELP flaresolverr_request_created Total requests with result
|
||||
# TYPE flaresolverr_request_created gauge
|
||||
flaresolverr_request_created{domain="nowsecure.nl",result="solved"} 1.690141657157109e+09
|
||||
# HELP flaresolverr_request_duration Request duration in seconds
|
||||
# TYPE flaresolverr_request_duration histogram
|
||||
flaresolverr_request_duration_bucket{domain="nowsecure.nl",le="0.0"} 0.0
|
||||
flaresolverr_request_duration_bucket{domain="nowsecure.nl",le="10.0"} 1.0
|
||||
flaresolverr_request_duration_bucket{domain="nowsecure.nl",le="25.0"} 1.0
|
||||
flaresolverr_request_duration_bucket{domain="nowsecure.nl",le="50.0"} 1.0
|
||||
flaresolverr_request_duration_bucket{domain="nowsecure.nl",le="+Inf"} 1.0
|
||||
flaresolverr_request_duration_count{domain="nowsecure.nl"} 1.0
|
||||
flaresolverr_request_duration_sum{domain="nowsecure.nl"} 5.858
|
||||
# HELP flaresolverr_request_duration_created Request duration in seconds
|
||||
# TYPE flaresolverr_request_duration_created gauge
|
||||
flaresolverr_request_duration_created{domain="nowsecure.nl"} 1.6901416571570296e+09
|
||||
```
|
||||
|
||||
## Captcha Solvers
|
||||
|
||||
:warning: At this time none of the captcha solvers work. You can check the status in the open issues. Any help is welcome.
|
||||
> **Warning**
|
||||
> At this time none of the captcha solvers work. You can check the status in the open issues. Any help is welcome.
|
||||
|
||||
Sometimes CloudFlare not only gives mathematical computations and browser tests, sometimes they also require the user to
|
||||
solve a captcha.
|
||||
If this is the case, FlareSolverr will return the error `Captcha detected but no automatic solver is configured.`
|
||||
|
||||
FlareSolverr can be customized to solve the captchas automatically by setting the environment variable `CAPTCHA_SOLVER`
|
||||
to the file name of one of the adapters inside the [/captcha](src/captcha) directory.
|
||||
FlareSolverr can be customized to solve the CAPTCHA automatically by setting the environment variable `CAPTCHA_SOLVER`
|
||||
to the file name of one of the adapters inside the `/captcha` directory.
|
||||
|
||||
## Related projects
|
||||
|
||||
* C# implementation => https://github.com/FlareSolverr/FlareSolverrSharp
|
||||
- C# implementation => https://github.com/FlareSolverr/FlareSolverrSharp
|
||||
|
||||
|
||||
@@ -1,90 +0,0 @@
|
||||
const fs = require('fs')
|
||||
const path = require('path')
|
||||
const { execSync } = require('child_process')
|
||||
const archiver = require('archiver')
|
||||
const https = require('https')
|
||||
const puppeteer = require('puppeteer')
|
||||
const version = 'v' + require('./package.json').version;
|
||||
|
||||
(async () => {
|
||||
const builds = [
|
||||
{
|
||||
platform: 'linux',
|
||||
firefoxFolder: 'firefox',
|
||||
fsExec: 'flaresolverr-linux',
|
||||
fsZipExec: 'flaresolverr',
|
||||
fsZipName: 'linux-x64',
|
||||
fsLicenseName: 'LICENSE'
|
||||
},
|
||||
{
|
||||
platform: 'win64',
|
||||
firefoxFolder: 'firefox',
|
||||
fsExec: 'flaresolverr-win.exe',
|
||||
fsZipExec: 'flaresolverr.exe',
|
||||
fsZipName: 'windows-x64',
|
||||
fsLicenseName: 'LICENSE.txt'
|
||||
}
|
||||
// todo: this has to be build in macOS (hdiutil is required). changes required in sessions.ts too
|
||||
// {
|
||||
// platform: 'mac',
|
||||
// firefoxFolder: 'firefox',
|
||||
// fsExec: 'flaresolverr-macos',
|
||||
// fsZipExec: 'flaresolverr',
|
||||
// fsZipName: 'macos',
|
||||
// fsLicenseName: 'LICENSE'
|
||||
// }
|
||||
]
|
||||
|
||||
// generate executables
|
||||
console.log('Generating executables...')
|
||||
if (fs.existsSync('bin')) {
|
||||
fs.rmSync('bin', { recursive: true })
|
||||
}
|
||||
execSync('./node_modules/.bin/pkg -t node16-win-x64,node16-linux-x64 --out-path bin .')
|
||||
// execSync('./node_modules/.bin/pkg -t node16-win-x64,node16-mac-x64,node16-linux-x64 --out-path bin .')
|
||||
|
||||
// Puppeteer does not allow to download Firefox revisions, just the last Nightly
|
||||
// We this script we can download any version
|
||||
const revision = '94.0a1';
|
||||
const downloadHost = 'https://archive.mozilla.org/pub/firefox/nightly/2021/10/2021-10-01-09-33-23-mozilla-central';
|
||||
|
||||
// download firefox and zip together
|
||||
for (const os of builds) {
|
||||
console.log('Building ' + os.fsZipName + ' artifact')
|
||||
|
||||
// download firefox
|
||||
console.log(`Downloading firefox ${revision} for ${os.platform} ...`)
|
||||
const f = puppeteer.createBrowserFetcher({
|
||||
product: 'firefox',
|
||||
platform: os.platform,
|
||||
host: downloadHost,
|
||||
path: path.join(__dirname, 'bin', 'puppeteer')
|
||||
})
|
||||
await f.download(revision)
|
||||
|
||||
// compress in zip
|
||||
console.log('Compressing zip file...')
|
||||
const zipName = 'bin/flaresolverr-' + version + '-' + os.fsZipName + '.zip'
|
||||
const output = fs.createWriteStream(zipName)
|
||||
const archive = archiver('zip')
|
||||
|
||||
output.on('close', function () {
|
||||
console.log('File ' + zipName + ' created. Size: ' + archive.pointer() + ' bytes')
|
||||
})
|
||||
|
||||
archive.on('error', function (err) {
|
||||
throw err
|
||||
})
|
||||
|
||||
archive.pipe(output)
|
||||
|
||||
archive.file('LICENSE', { name: 'flaresolverr/' + os.fsLicenseName })
|
||||
archive.file('bin/' + os.fsExec, { name: 'flaresolverr/' + os.fsZipExec })
|
||||
archive.directory('bin/puppeteer/' + os.platform + '-' + revision + '/' + os.firefoxFolder, 'flaresolverr/firefox')
|
||||
if (os.platform === 'linux') {
|
||||
archive.file('flaresolverr.service', { name: 'flaresolverr/flaresolverr.service' })
|
||||
}
|
||||
|
||||
await archive.finalize()
|
||||
}
|
||||
})()
|
||||
@@ -7,9 +7,12 @@ services:
|
||||
container_name: flaresolverr
|
||||
environment:
|
||||
- LOG_LEVEL=${LOG_LEVEL:-info}
|
||||
- LOG_FILE=${LOG_FILE:-none}
|
||||
- LOG_HTML=${LOG_HTML:-false}
|
||||
- CAPTCHA_SOLVER=${CAPTCHA_SOLVER:-none}
|
||||
- TZ=Europe/London
|
||||
ports:
|
||||
- "${PORT:-8191}:8191"
|
||||
volumes:
|
||||
- /var/lib/flaresolver:/config
|
||||
restart: unless-stopped
|
||||
|
||||
40
install.js
40
install.js
@@ -1,40 +0,0 @@
|
||||
const fs = require('fs');
|
||||
const puppeteer = require('puppeteer');
|
||||
|
||||
(async () => {
|
||||
|
||||
// Puppeteer does not allow to download Firefox revisions, just the last Nightly
|
||||
// We this script we can download any version
|
||||
const revision = '94.0a1';
|
||||
const downloadHost = 'https://archive.mozilla.org/pub/firefox/nightly/2021/10/2021-10-01-09-33-23-mozilla-central';
|
||||
|
||||
// skip installation (for Dockerfile)
|
||||
if (process.env.PUPPETEER_EXECUTABLE_PATH) {
|
||||
console.log('Skipping Firefox installation because the environment variable "PUPPETEER_EXECUTABLE_PATH" is set.');
|
||||
return;
|
||||
}
|
||||
|
||||
// check if Firefox is already installed
|
||||
const f = puppeteer.createBrowserFetcher({
|
||||
product: 'firefox',
|
||||
host: downloadHost
|
||||
})
|
||||
if (fs.existsSync(f._getFolderPath(revision))) {
|
||||
console.log(`Firefox ${revision} already installed...`)
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`Installing firefox ${revision} ...`)
|
||||
const downloadPath = f._downloadsFolder;
|
||||
console.log(`Download path: ${downloadPath}`)
|
||||
if (fs.existsSync(downloadPath)) {
|
||||
console.log(`Removing previous downloads...`)
|
||||
fs.rmSync(downloadPath, { recursive: true })
|
||||
}
|
||||
|
||||
console.log(`Downloading firefox ${revision} ...`)
|
||||
await f.download(revision)
|
||||
|
||||
console.log('Installation complete...')
|
||||
|
||||
})()
|
||||
@@ -1,12 +0,0 @@
|
||||
module.exports = {
|
||||
// A list of paths to directories that Jest should use to search for files in
|
||||
roots: [
|
||||
"./src/"
|
||||
],
|
||||
// Compile Typescript
|
||||
transform: {
|
||||
'^.+\\.(ts|tsx)$': 'ts-jest'
|
||||
},
|
||||
// Default value for FlareSolverr maxTimeout is 60000
|
||||
testTimeout: 70000
|
||||
}
|
||||
11811
package-lock.json
generated
11811
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
47
package.json
47
package.json
@@ -1,46 +1,7 @@
|
||||
{
|
||||
"name": "flaresolverr",
|
||||
"version": "2.2.8",
|
||||
"description": "Proxy server to bypass Cloudflare protection.",
|
||||
"scripts": {
|
||||
"install": "node install.js",
|
||||
"start": "tsc && node ./dist/server.js",
|
||||
"build": "tsc",
|
||||
"dev": "nodemon -e ts --exec ts-node src/server.ts",
|
||||
"package": "tsc && node build-binaries.js",
|
||||
"test": "jest --runInBand"
|
||||
},
|
||||
"author": "Diego Heras (ngosang)",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/ngosang/FlareSolverr"
|
||||
},
|
||||
"bin": {
|
||||
"flaresolverr": "dist/server.js"
|
||||
},
|
||||
"dependencies": {
|
||||
"await-timeout": "^1.1.1",
|
||||
"body-parser": "^1.20.0",
|
||||
"console-log-level": "^1.4.1",
|
||||
"express": "^4.18.1",
|
||||
"puppeteer": "^13.7.0",
|
||||
"uuid": "^8.3.2"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/await-timeout": "^0.3.1",
|
||||
"@types/body-parser": "^1.19.2",
|
||||
"@types/express": "^4.17.13",
|
||||
"@types/jest": "^28.1.6",
|
||||
"@types/node": "^18.6.2",
|
||||
"@types/supertest": "^2.0.12",
|
||||
"@types/uuid": "^8.3.4",
|
||||
"archiver": "^5.3.1",
|
||||
"nodemon": "^2.0.19",
|
||||
"pkg": "^5.8.0",
|
||||
"supertest": "^6.2.4",
|
||||
"ts-jest": "^28.0.7",
|
||||
"ts-node": "^10.9.1",
|
||||
"typescript": "^4.7.4"
|
||||
}
|
||||
"version": "3.4.6",
|
||||
"description": "Proxy server to bypass Cloudflare protection",
|
||||
"author": "Diego Heras (ngosang / ngosang@hotmail.es)",
|
||||
"license": "MIT"
|
||||
}
|
||||
|
||||
14
requirements.txt
Normal file
14
requirements.txt
Normal file
@@ -0,0 +1,14 @@
|
||||
bottle==0.13.4
|
||||
waitress==3.0.2
|
||||
selenium==4.38.0
|
||||
func-timeout==4.3.5
|
||||
prometheus-client==0.23.1
|
||||
# Required by undetected_chromedriver
|
||||
requests==2.32.5
|
||||
certifi==2025.10.5
|
||||
websockets==15.0.1
|
||||
packaging==25.0
|
||||
# Only required for Linux and macOS
|
||||
xvfbwrapper==0.2.15; platform_system != "Windows"
|
||||
# Only required for Windows
|
||||
pefile==2024.8.26; platform_system == "Windows"
|
||||
BIN
resources/flaresolverr_logo.ico
Normal file
BIN
resources/flaresolverr_logo.ico
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 8.8 KiB |
83
src/app.ts
83
src/app.ts
@@ -1,83 +0,0 @@
|
||||
import log from './services/log'
|
||||
import {NextFunction, Request, Response} from 'express';
|
||||
import {getUserAgent} from "./services/sessions";
|
||||
import {controllerV1} from "./controllers/v1";
|
||||
|
||||
const express = require('express');
|
||||
const app = express();
|
||||
const bodyParser = require('body-parser');
|
||||
const version: string = 'v' + require('../package.json').version
|
||||
|
||||
// Convert request objects to JSON
|
||||
app.use(bodyParser.json({
|
||||
limit: '50mb',
|
||||
verify(req: Request, res: Response, buf: any) {
|
||||
req.body = buf;
|
||||
}
|
||||
}));
|
||||
|
||||
// Access log
|
||||
app.use(function(req: Request, res: Response, next: NextFunction) {
|
||||
if (req.url != '/health') {
|
||||
// count the request for the log prefix
|
||||
log.incRequests()
|
||||
// build access message
|
||||
let body = "";
|
||||
if (req.method == 'POST' && req.body) {
|
||||
body += " body: "
|
||||
try {
|
||||
body += JSON.stringify(req.body)
|
||||
} catch(e) {
|
||||
body += req.body
|
||||
}
|
||||
}
|
||||
log.info(`Incoming request => ${req.method} ${req.url}${body}`);
|
||||
}
|
||||
next();
|
||||
});
|
||||
|
||||
// *********************************************************************************************************************
|
||||
// Routes
|
||||
|
||||
// Show welcome message
|
||||
app.get("/", ( req: Request, res: Response ) => {
|
||||
res.send({
|
||||
"msg": "FlareSolverr is ready!",
|
||||
"version": version,
|
||||
"userAgent": getUserAgent()
|
||||
});
|
||||
});
|
||||
|
||||
// Health endpoint. this endpoint is special because it doesn't print traces
|
||||
app.get("/health", ( req: Request, res: Response ) => {
|
||||
res.send({
|
||||
"status": "ok"
|
||||
});
|
||||
});
|
||||
|
||||
// Controller v1
|
||||
app.post("/v1", async( req: Request, res: Response ) => {
|
||||
await controllerV1(req, res);
|
||||
});
|
||||
|
||||
// *********************************************************************************************************************
|
||||
|
||||
// Unknown paths or verbs
|
||||
app.use(function (req : Request, res : Response) {
|
||||
res.status(404)
|
||||
.send({"error": "Unknown resource or HTTP verb"})
|
||||
})
|
||||
|
||||
// Errors
|
||||
app.use(function (err: any, req: Request, res: Response, next: NextFunction) {
|
||||
if (err) {
|
||||
let msg = 'Invalid request: ' + err;
|
||||
msg = msg.replace("\n", "").replace("\r", "")
|
||||
log.error(msg)
|
||||
res.send({"error": msg})
|
||||
} else {
|
||||
next()
|
||||
}
|
||||
})
|
||||
|
||||
module.exports = app;
|
||||
0
src/bottle_plugins/__init__.py
Normal file
0
src/bottle_plugins/__init__.py
Normal file
22
src/bottle_plugins/error_plugin.py
Normal file
22
src/bottle_plugins/error_plugin.py
Normal file
@@ -0,0 +1,22 @@
|
||||
from bottle import response
|
||||
import logging
|
||||
|
||||
|
||||
def error_plugin(callback):
|
||||
"""
|
||||
Bottle plugin to handle exceptions
|
||||
https://stackoverflow.com/a/32764250
|
||||
"""
|
||||
|
||||
def wrapper(*args, **kwargs):
|
||||
try:
|
||||
actual_response = callback(*args, **kwargs)
|
||||
except Exception as e:
|
||||
logging.error(str(e))
|
||||
actual_response = {
|
||||
"error": str(e)
|
||||
}
|
||||
response.status = 500
|
||||
return actual_response
|
||||
|
||||
return wrapper
|
||||
23
src/bottle_plugins/logger_plugin.py
Normal file
23
src/bottle_plugins/logger_plugin.py
Normal file
@@ -0,0 +1,23 @@
|
||||
from bottle import request, response
|
||||
import logging
|
||||
|
||||
|
||||
def logger_plugin(callback):
|
||||
"""
|
||||
Bottle plugin to use logging module
|
||||
https://bottlepy.org/docs/dev/plugindev.html
|
||||
|
||||
Wrap a Bottle request so that a log line is emitted after it's handled.
|
||||
(This decorator can be extended to take the desired logger as a param.)
|
||||
"""
|
||||
|
||||
def wrapper(*args, **kwargs):
|
||||
actual_response = callback(*args, **kwargs)
|
||||
if not request.url.endswith("/health"):
|
||||
logging.info('%s %s %s %s' % (request.remote_addr,
|
||||
request.method,
|
||||
request.url,
|
||||
response.status))
|
||||
return actual_response
|
||||
|
||||
return wrapper
|
||||
66
src/bottle_plugins/prometheus_plugin.py
Normal file
66
src/bottle_plugins/prometheus_plugin.py
Normal file
@@ -0,0 +1,66 @@
|
||||
import logging
|
||||
import os
|
||||
import urllib.parse
|
||||
|
||||
from bottle import request
|
||||
from dtos import V1RequestBase, V1ResponseBase
|
||||
from metrics import start_metrics_http_server, REQUEST_COUNTER, REQUEST_DURATION
|
||||
|
||||
PROMETHEUS_ENABLED = os.environ.get('PROMETHEUS_ENABLED', 'false').lower() == 'true'
|
||||
PROMETHEUS_PORT = int(os.environ.get('PROMETHEUS_PORT', 8192))
|
||||
|
||||
|
||||
def setup():
|
||||
if PROMETHEUS_ENABLED:
|
||||
start_metrics_http_server(PROMETHEUS_PORT)
|
||||
|
||||
|
||||
def prometheus_plugin(callback):
|
||||
"""
|
||||
Bottle plugin to expose Prometheus metrics
|
||||
https://bottlepy.org/docs/dev/plugindev.html
|
||||
"""
|
||||
def wrapper(*args, **kwargs):
|
||||
actual_response = callback(*args, **kwargs)
|
||||
|
||||
if PROMETHEUS_ENABLED:
|
||||
try:
|
||||
export_metrics(actual_response)
|
||||
except Exception as e:
|
||||
logging.warning("Error exporting metrics: " + str(e))
|
||||
|
||||
return actual_response
|
||||
|
||||
def export_metrics(actual_response):
|
||||
res = V1ResponseBase(actual_response)
|
||||
|
||||
if res.startTimestamp is None or res.endTimestamp is None:
|
||||
# skip management and healthcheck endpoints
|
||||
return
|
||||
|
||||
domain = "unknown"
|
||||
if res.solution and res.solution.url:
|
||||
domain = parse_domain_url(res.solution.url)
|
||||
else:
|
||||
# timeout error
|
||||
req = V1RequestBase(request.json)
|
||||
if req.url:
|
||||
domain = parse_domain_url(req.url)
|
||||
|
||||
run_time = (res.endTimestamp - res.startTimestamp) / 1000
|
||||
REQUEST_DURATION.labels(domain=domain).observe(run_time)
|
||||
|
||||
result = "unknown"
|
||||
if res.message == "Challenge solved!":
|
||||
result = "solved"
|
||||
elif res.message == "Challenge not detected!":
|
||||
result = "not_detected"
|
||||
elif res.message.startswith("Error"):
|
||||
result = "error"
|
||||
REQUEST_COUNTER.labels(domain=domain, result=result).inc()
|
||||
|
||||
def parse_domain_url(url):
|
||||
parsed_url = urllib.parse.urlparse(url)
|
||||
return parsed_url.hostname
|
||||
|
||||
return wrapper
|
||||
110
src/build_package.py
Normal file
110
src/build_package.py
Normal file
@@ -0,0 +1,110 @@
|
||||
import os
|
||||
import platform
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import zipfile
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
def clean_files():
|
||||
try:
|
||||
shutil.rmtree(os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, 'build'))
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
shutil.rmtree(os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, 'dist'))
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
shutil.rmtree(os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, 'dist_chrome'))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def download_chromium():
|
||||
# https://commondatastorage.googleapis.com/chromium-browser-snapshots/index.html?prefix=Linux_x64/
|
||||
revision = "1522586" if os.name == 'nt' else '1522586'
|
||||
arch = 'Win_x64' if os.name == 'nt' else 'Linux_x64'
|
||||
dl_file = 'chrome-win' if os.name == 'nt' else 'chrome-linux'
|
||||
dl_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, 'dist_chrome')
|
||||
dl_path_folder = os.path.join(dl_path, dl_file)
|
||||
dl_path_zip = dl_path_folder + '.zip'
|
||||
|
||||
# response = requests.get(
|
||||
# f'https://commondatastorage.googleapis.com/chromium-browser-snapshots/{arch}/LAST_CHANGE',
|
||||
# timeout=30)
|
||||
# revision = response.text.strip()
|
||||
print("Downloading revision: " + revision)
|
||||
|
||||
os.mkdir(dl_path)
|
||||
with requests.get(
|
||||
f'https://commondatastorage.googleapis.com/chromium-browser-snapshots/{arch}/{revision}/{dl_file}.zip',
|
||||
stream=True) as r:
|
||||
r.raise_for_status()
|
||||
with open(dl_path_zip, 'wb') as f:
|
||||
for chunk in r.iter_content(chunk_size=8192):
|
||||
f.write(chunk)
|
||||
print("File downloaded: " + dl_path_zip)
|
||||
with zipfile.ZipFile(dl_path_zip, 'r') as zip_ref:
|
||||
zip_ref.extractall(dl_path)
|
||||
os.remove(dl_path_zip)
|
||||
|
||||
chrome_path = os.path.join(dl_path, "chrome")
|
||||
shutil.move(dl_path_folder, chrome_path)
|
||||
print("Extracted in: " + chrome_path)
|
||||
|
||||
if os.name != 'nt':
|
||||
# Give executable permissions for *nix
|
||||
# file * | grep executable | cut -d: -f1
|
||||
print("Giving executable permissions...")
|
||||
execs = ['chrome', 'chrome_crashpad_handler', 'chrome_sandbox', 'chrome-wrapper', 'xdg-mime', 'xdg-settings']
|
||||
for exec_file in execs:
|
||||
exec_path = os.path.join(chrome_path, exec_file)
|
||||
os.chmod(exec_path, 0o755)
|
||||
|
||||
|
||||
def run_pyinstaller():
|
||||
sep = ';' if os.name == 'nt' else ':'
|
||||
result = subprocess.run([sys.executable, "-m", "PyInstaller",
|
||||
"--icon", "resources/flaresolverr_logo.ico",
|
||||
"--add-data", f"package.json{sep}.",
|
||||
"--add-data", f"{os.path.join('dist_chrome', 'chrome')}{sep}chrome",
|
||||
os.path.join("src", "flaresolverr.py")],
|
||||
cwd=os.pardir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
if result.returncode != 0:
|
||||
print(result.stderr.decode('utf-8'))
|
||||
raise Exception("Error running pyInstaller")
|
||||
|
||||
|
||||
def compress_package():
|
||||
dist_folder = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, 'dist')
|
||||
package_folder = os.path.join(dist_folder, 'package')
|
||||
shutil.move(os.path.join(dist_folder, 'flaresolverr'), os.path.join(package_folder, 'flaresolverr'))
|
||||
print("Package folder: " + package_folder)
|
||||
|
||||
compr_format = 'zip' if os.name == 'nt' else 'gztar'
|
||||
compr_file_name = 'flaresolverr_windows_x64' if os.name == 'nt' else 'flaresolverr_linux_x64'
|
||||
compr_file_path = os.path.join(dist_folder, compr_file_name)
|
||||
shutil.make_archive(compr_file_path, compr_format, package_folder)
|
||||
print("Compressed file path: " + compr_file_path)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("Building package...")
|
||||
print("Platform: " + platform.platform())
|
||||
|
||||
print("Cleaning previous build...")
|
||||
clean_files()
|
||||
|
||||
print("Downloading Chromium...")
|
||||
download_chromium()
|
||||
|
||||
print("Building pyinstaller executable... ")
|
||||
run_pyinstaller()
|
||||
|
||||
print("Compressing package... ")
|
||||
compress_package()
|
||||
|
||||
# NOTE: python -m pip install pyinstaller
|
||||
@@ -1,41 +0,0 @@
|
||||
import log from "../services/log";
|
||||
|
||||
export enum CaptchaType {
|
||||
re = 'reCaptcha',
|
||||
h = 'hCaptcha'
|
||||
}
|
||||
|
||||
export interface SolverOptions {
|
||||
url: string
|
||||
sitekey: string
|
||||
type: CaptchaType
|
||||
}
|
||||
|
||||
export type Solver = (options: SolverOptions) => Promise<string>
|
||||
|
||||
const captchaSolvers: { [key: string]: Solver } = {}
|
||||
|
||||
export default (): Solver => {
|
||||
const method = process.env.CAPTCHA_SOLVER
|
||||
|
||||
if (!method || method.toLowerCase() == 'none') {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!(method in captchaSolvers)) {
|
||||
try {
|
||||
captchaSolvers[method] = require('./' + method).default as Solver
|
||||
} catch (e) {
|
||||
if (e.code === 'MODULE_NOT_FOUND') {
|
||||
throw Error(`The solver '${method}' is not a valid captcha solving method.`)
|
||||
} else {
|
||||
console.error(e)
|
||||
throw Error(`An error occurred loading the solver '${method}'.`)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
log.info(`Using '${method}' to solve the captcha.`);
|
||||
|
||||
return captchaSolvers[method]
|
||||
}
|
||||
@@ -1,178 +0,0 @@
|
||||
import {Request, Response} from 'express';
|
||||
import {Protocol} from "devtools-protocol";
|
||||
|
||||
import log from '../services/log'
|
||||
import {browserRequest, ChallengeResolutionResultT, ChallengeResolutionT} from "../services/solver";
|
||||
import {SessionCreateOptions} from "../services/sessions";
|
||||
const sessions = require('../services/sessions')
|
||||
const version: string = 'v' + require('../../package.json').version
|
||||
|
||||
interface V1Routes {
|
||||
[key: string]: (params: V1RequestBase, response: V1ResponseBase) => Promise<void>
|
||||
}
|
||||
|
||||
export interface Proxy {
|
||||
url?: string
|
||||
username?: string
|
||||
password?: string
|
||||
}
|
||||
|
||||
export interface V1RequestBase {
|
||||
cmd: string
|
||||
cookies?: Protocol.Network.CookieParam[],
|
||||
maxTimeout?: number
|
||||
proxy?: Proxy
|
||||
session: string
|
||||
headers?: Record<string, string> // deprecated v2, not used
|
||||
userAgent?: string // deprecated v2, not used
|
||||
}
|
||||
|
||||
interface V1RequestSession extends V1RequestBase {
|
||||
}
|
||||
|
||||
export interface V1Request extends V1RequestBase {
|
||||
url: string
|
||||
method?: string
|
||||
postData?: string
|
||||
returnOnlyCookies?: boolean
|
||||
download?: boolean // deprecated v2, not used
|
||||
returnRawHtml?: boolean // deprecated v2, not used
|
||||
}
|
||||
|
||||
export interface V1ResponseBase {
|
||||
status: string
|
||||
message: string
|
||||
startTimestamp: number
|
||||
endTimestamp: number
|
||||
version: string
|
||||
}
|
||||
|
||||
export interface V1ResponseSolution extends V1ResponseBase {
|
||||
solution: ChallengeResolutionResultT
|
||||
}
|
||||
|
||||
export interface V1ResponseSession extends V1ResponseBase {
|
||||
session: string
|
||||
}
|
||||
|
||||
export interface V1ResponseSessions extends V1ResponseBase {
|
||||
sessions: string[]
|
||||
}
|
||||
|
||||
export const routes: V1Routes = {
|
||||
'sessions.create': async (params: V1RequestSession, response: V1ResponseSession): Promise<void> => {
|
||||
const options: SessionCreateOptions = {
|
||||
oneTimeSession: false,
|
||||
cookies: params.cookies,
|
||||
maxTimeout: params.maxTimeout,
|
||||
proxy: params.proxy
|
||||
}
|
||||
const { sessionId, browser } = await sessions.create(params.session, options)
|
||||
if (browser) {
|
||||
response.status = "ok";
|
||||
response.message = "Session created successfully.";
|
||||
response.session = sessionId
|
||||
} else {
|
||||
throw Error('Error creating session.')
|
||||
}
|
||||
},
|
||||
'sessions.list': async (params: V1RequestSession, response: V1ResponseSessions): Promise<void> => {
|
||||
response.status = "ok";
|
||||
response.message = "";
|
||||
response.sessions = sessions.list();
|
||||
},
|
||||
'sessions.destroy': async (params: V1RequestSession, response: V1ResponseBase): Promise<void> => {
|
||||
if (await sessions.destroy(params.session)) {
|
||||
response.status = "ok";
|
||||
response.message = "The session has been removed.";
|
||||
} else {
|
||||
throw Error('This session does not exist.')
|
||||
}
|
||||
},
|
||||
'request.get': async (params: V1Request, response: V1ResponseSolution): Promise<void> => {
|
||||
params.method = 'GET'
|
||||
if (params.postData) {
|
||||
throw Error('Cannot use "postBody" when sending a GET request.')
|
||||
}
|
||||
if (params.returnRawHtml) {
|
||||
log.warn("Request parameter 'returnRawHtml' was removed in FlareSolverr v2.")
|
||||
}
|
||||
if (params.download) {
|
||||
log.warn("Request parameter 'download' was removed in FlareSolverr v2.")
|
||||
}
|
||||
const result: ChallengeResolutionT = await browserRequest(params)
|
||||
|
||||
response.status = result.status;
|
||||
response.message = result.message;
|
||||
response.solution = result.result;
|
||||
if (response.message) {
|
||||
log.info(response.message)
|
||||
}
|
||||
},
|
||||
'request.post': async (params: V1Request, response: V1ResponseSolution): Promise<void> => {
|
||||
params.method = 'POST'
|
||||
if (!params.postData) {
|
||||
throw Error('Must send param "postBody" when sending a POST request.')
|
||||
}
|
||||
if (params.returnRawHtml) {
|
||||
log.warn("Request parameter 'returnRawHtml' was removed in FlareSolverr v2.")
|
||||
}
|
||||
if (params.download) {
|
||||
log.warn("Request parameter 'download' was removed in FlareSolverr v2.")
|
||||
}
|
||||
const result: ChallengeResolutionT = await browserRequest(params)
|
||||
|
||||
response.status = result.status;
|
||||
response.message = result.message;
|
||||
response.solution = result.result;
|
||||
if (response.message) {
|
||||
log.info(response.message)
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
export async function controllerV1(req: Request, res: Response): Promise<void> {
|
||||
const response: V1ResponseBase = {
|
||||
status: null,
|
||||
message: null,
|
||||
startTimestamp: Date.now(),
|
||||
endTimestamp: 0,
|
||||
version: version
|
||||
}
|
||||
|
||||
try {
|
||||
const params: V1RequestBase = req.body
|
||||
// do some validations
|
||||
if (!params.cmd) {
|
||||
throw Error("Request parameter 'cmd' is mandatory.")
|
||||
}
|
||||
if (params.headers) {
|
||||
log.warn("Request parameter 'headers' was removed in FlareSolverr v2.")
|
||||
}
|
||||
if (params.userAgent) {
|
||||
log.warn("Request parameter 'userAgent' was removed in FlareSolverr v2.")
|
||||
}
|
||||
|
||||
// set default values
|
||||
if (!params.maxTimeout || params.maxTimeout < 1) {
|
||||
params.maxTimeout = 60000;
|
||||
}
|
||||
|
||||
// execute the command
|
||||
const route = routes[params.cmd]
|
||||
if (route) {
|
||||
await route(params, response)
|
||||
} else {
|
||||
throw Error(`The command '${params.cmd}' is invalid.`)
|
||||
}
|
||||
} catch (e) {
|
||||
res.status(500)
|
||||
response.status = "error";
|
||||
response.message = e.toString();
|
||||
log.error(response.message)
|
||||
}
|
||||
|
||||
response.endTimestamp = Date.now()
|
||||
log.info(`Response in ${(response.endTimestamp - response.startTimestamp) / 1000} s`)
|
||||
res.send(response)
|
||||
}
|
||||
94
src/dtos.py
Normal file
94
src/dtos.py
Normal file
@@ -0,0 +1,94 @@
|
||||
|
||||
STATUS_OK = "ok"
|
||||
STATUS_ERROR = "error"
|
||||
|
||||
|
||||
class ChallengeResolutionResultT:
|
||||
url: str = None
|
||||
status: int = None
|
||||
headers: list = None
|
||||
response: str = None
|
||||
cookies: list = None
|
||||
userAgent: str = None
|
||||
screenshot: str | None = None
|
||||
turnstile_token: str = None
|
||||
|
||||
def __init__(self, _dict):
|
||||
self.__dict__.update(_dict)
|
||||
|
||||
|
||||
class ChallengeResolutionT:
|
||||
status: str = None
|
||||
message: str = None
|
||||
result: ChallengeResolutionResultT = None
|
||||
|
||||
def __init__(self, _dict):
|
||||
self.__dict__.update(_dict)
|
||||
if self.result is not None:
|
||||
self.result = ChallengeResolutionResultT(self.result)
|
||||
|
||||
|
||||
class V1RequestBase(object):
|
||||
# V1RequestBase
|
||||
cmd: str = None
|
||||
cookies: list = None
|
||||
maxTimeout: int = None
|
||||
proxy: dict = None
|
||||
session: str = None
|
||||
session_ttl_minutes: int = None
|
||||
headers: list = None # deprecated v2.0.0, not used
|
||||
userAgent: str = None # deprecated v2.0.0, not used
|
||||
|
||||
# V1Request
|
||||
url: str = None
|
||||
postData: str = None
|
||||
returnOnlyCookies: bool = None
|
||||
returnScreenshot: bool = None
|
||||
download: bool = None # deprecated v2.0.0, not used
|
||||
returnRawHtml: bool = None # deprecated v2.0.0, not used
|
||||
waitInSeconds: int = None
|
||||
# Optional resource blocking flag (blocks images, CSS, and fonts)
|
||||
disableMedia: bool = None
|
||||
# Optional when you've got a turnstile captcha that needs to be clicked after X number of Tab presses
|
||||
tabs_till_verify : int = None
|
||||
|
||||
def __init__(self, _dict):
|
||||
self.__dict__.update(_dict)
|
||||
|
||||
|
||||
class V1ResponseBase(object):
|
||||
# V1ResponseBase
|
||||
status: str = None
|
||||
message: str = None
|
||||
session: str = None
|
||||
sessions: list[str] = None
|
||||
startTimestamp: int = None
|
||||
endTimestamp: int = None
|
||||
version: str = None
|
||||
|
||||
# V1ResponseSolution
|
||||
solution: ChallengeResolutionResultT = None
|
||||
|
||||
# hidden vars
|
||||
__error_500__: bool = False
|
||||
|
||||
def __init__(self, _dict):
|
||||
self.__dict__.update(_dict)
|
||||
if self.solution is not None:
|
||||
self.solution = ChallengeResolutionResultT(self.solution)
|
||||
|
||||
|
||||
class IndexResponse(object):
|
||||
msg: str = None
|
||||
version: str = None
|
||||
userAgent: str = None
|
||||
|
||||
def __init__(self, _dict):
|
||||
self.__dict__.update(_dict)
|
||||
|
||||
|
||||
class HealthResponse(object):
|
||||
status: str = None
|
||||
|
||||
def __init__(self, _dict):
|
||||
self.__dict__.update(_dict)
|
||||
152
src/flaresolverr.py
Normal file
152
src/flaresolverr.py
Normal file
@@ -0,0 +1,152 @@
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
|
||||
import certifi
|
||||
from bottle import run, response, Bottle, request, ServerAdapter
|
||||
|
||||
from bottle_plugins.error_plugin import error_plugin
|
||||
from bottle_plugins.logger_plugin import logger_plugin
|
||||
from bottle_plugins import prometheus_plugin
|
||||
from dtos import V1RequestBase
|
||||
import flaresolverr_service
|
||||
import utils
|
||||
|
||||
env_proxy_url = os.environ.get('PROXY_URL', None)
|
||||
env_proxy_username = os.environ.get('PROXY_USERNAME', None)
|
||||
env_proxy_password = os.environ.get('PROXY_PASSWORD', None)
|
||||
|
||||
|
||||
class JSONErrorBottle(Bottle):
|
||||
"""
|
||||
Handle 404 errors
|
||||
"""
|
||||
def default_error_handler(self, res):
|
||||
response.content_type = 'application/json'
|
||||
return json.dumps(dict(error=res.body, status_code=res.status_code))
|
||||
|
||||
|
||||
app = JSONErrorBottle()
|
||||
|
||||
|
||||
@app.route('/')
|
||||
def index():
|
||||
"""
|
||||
Show welcome message
|
||||
"""
|
||||
res = flaresolverr_service.index_endpoint()
|
||||
return utils.object_to_dict(res)
|
||||
|
||||
|
||||
@app.route('/health')
|
||||
def health():
|
||||
"""
|
||||
Healthcheck endpoint.
|
||||
This endpoint is special because it doesn't print traces
|
||||
"""
|
||||
res = flaresolverr_service.health_endpoint()
|
||||
return utils.object_to_dict(res)
|
||||
|
||||
|
||||
@app.post('/v1')
|
||||
def controller_v1():
|
||||
"""
|
||||
Controller v1
|
||||
"""
|
||||
data = request.json or {}
|
||||
if (('proxy' not in data or not data.get('proxy')) and env_proxy_url is not None and (env_proxy_username is None and env_proxy_password is None)):
|
||||
logging.info('Using proxy URL ENV')
|
||||
data['proxy'] = {"url": env_proxy_url}
|
||||
if (('proxy' not in data or not data.get('proxy')) and env_proxy_url is not None and (env_proxy_username is not None or env_proxy_password is not None)):
|
||||
logging.info('Using proxy URL, username & password ENVs')
|
||||
data['proxy'] = {"url": env_proxy_url, "username": env_proxy_username, "password": env_proxy_password}
|
||||
req = V1RequestBase(data)
|
||||
res = flaresolverr_service.controller_v1_endpoint(req)
|
||||
if res.__error_500__:
|
||||
response.status = 500
|
||||
return utils.object_to_dict(res)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# check python version
|
||||
if sys.version_info < (3, 9):
|
||||
raise Exception("The Python version is less than 3.9, a version equal to or higher is required.")
|
||||
|
||||
# fix for HEADLESS=false in Windows binary
|
||||
# https://stackoverflow.com/a/27694505
|
||||
if os.name == 'nt':
|
||||
import multiprocessing
|
||||
multiprocessing.freeze_support()
|
||||
|
||||
# fix ssl certificates for compiled binaries
|
||||
# https://github.com/pyinstaller/pyinstaller/issues/7229
|
||||
# https://stackoverflow.com/q/55736855
|
||||
os.environ["REQUESTS_CA_BUNDLE"] = certifi.where()
|
||||
os.environ["SSL_CERT_FILE"] = certifi.where()
|
||||
|
||||
# validate configuration
|
||||
log_level = os.environ.get('LOG_LEVEL', 'info').upper()
|
||||
log_file = os.environ.get('LOG_FILE', None)
|
||||
log_html = utils.get_config_log_html()
|
||||
headless = utils.get_config_headless()
|
||||
server_host = os.environ.get('HOST', '0.0.0.0')
|
||||
server_port = int(os.environ.get('PORT', 8191))
|
||||
|
||||
# configure logger
|
||||
logger_format = '%(asctime)s %(levelname)-8s %(message)s'
|
||||
if log_level == 'DEBUG':
|
||||
logger_format = '%(asctime)s %(levelname)-8s ReqId %(thread)s %(message)s'
|
||||
if log_file:
|
||||
log_file = os.path.realpath(log_file)
|
||||
log_path = os.path.dirname(log_file)
|
||||
os.makedirs(log_path, exist_ok=True)
|
||||
logging.basicConfig(
|
||||
format=logger_format,
|
||||
level=log_level,
|
||||
datefmt='%Y-%m-%d %H:%M:%S',
|
||||
handlers=[
|
||||
logging.StreamHandler(sys.stdout),
|
||||
logging.FileHandler(log_file)
|
||||
]
|
||||
)
|
||||
else:
|
||||
logging.basicConfig(
|
||||
format=logger_format,
|
||||
level=log_level,
|
||||
datefmt='%Y-%m-%d %H:%M:%S',
|
||||
handlers=[
|
||||
logging.StreamHandler(sys.stdout)
|
||||
]
|
||||
)
|
||||
|
||||
# disable warning traces from urllib3
|
||||
logging.getLogger('urllib3').setLevel(logging.ERROR)
|
||||
logging.getLogger('selenium.webdriver.remote.remote_connection').setLevel(logging.WARNING)
|
||||
logging.getLogger('undetected_chromedriver').setLevel(logging.WARNING)
|
||||
|
||||
logging.info(f'FlareSolverr {utils.get_flaresolverr_version()}')
|
||||
logging.debug('Debug log enabled')
|
||||
|
||||
# Get current OS for global variable
|
||||
utils.get_current_platform()
|
||||
|
||||
# test browser installation
|
||||
flaresolverr_service.test_browser_installation()
|
||||
|
||||
# start bootle plugins
|
||||
# plugin order is important
|
||||
app.install(logger_plugin)
|
||||
app.install(error_plugin)
|
||||
prometheus_plugin.setup()
|
||||
app.install(prometheus_plugin.prometheus_plugin)
|
||||
|
||||
# start webserver
|
||||
# default server 'wsgiref' does not support concurrent requests
|
||||
# https://github.com/FlareSolverr/FlareSolverr/issues/680
|
||||
# https://github.com/Pylons/waitress/issues/31
|
||||
class WaitressServerPoll(ServerAdapter):
|
||||
def run(self, handler):
|
||||
from waitress import serve
|
||||
serve(handler, host=self.host, port=self.port, asyncore_use_poll=True)
|
||||
run(app, host=server_host, port=server_port, quiet=True, server=WaitressServerPoll)
|
||||
519
src/flaresolverr_service.py
Normal file
519
src/flaresolverr_service.py
Normal file
@@ -0,0 +1,519 @@
|
||||
import logging
|
||||
import platform
|
||||
import sys
|
||||
import time
|
||||
from datetime import timedelta
|
||||
from html import escape
|
||||
from urllib.parse import unquote, quote
|
||||
|
||||
from func_timeout import FunctionTimedOut, func_timeout
|
||||
from selenium.common import TimeoutException
|
||||
from selenium.webdriver.chrome.webdriver import WebDriver
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.common.keys import Keys
|
||||
from selenium.webdriver.support.expected_conditions import (
|
||||
presence_of_element_located, staleness_of, title_is)
|
||||
from selenium.webdriver.common.action_chains import ActionChains
|
||||
from selenium.webdriver.support.wait import WebDriverWait
|
||||
|
||||
import utils
|
||||
from dtos import (STATUS_ERROR, STATUS_OK, ChallengeResolutionResultT,
|
||||
ChallengeResolutionT, HealthResponse, IndexResponse,
|
||||
V1RequestBase, V1ResponseBase)
|
||||
from sessions import SessionsStorage
|
||||
|
||||
ACCESS_DENIED_TITLES = [
|
||||
# Cloudflare
|
||||
'Access denied',
|
||||
# Cloudflare http://bitturk.net/ Firefox
|
||||
'Attention Required! | Cloudflare'
|
||||
]
|
||||
ACCESS_DENIED_SELECTORS = [
|
||||
# Cloudflare
|
||||
'div.cf-error-title span.cf-code-label span',
|
||||
# Cloudflare http://bitturk.net/ Firefox
|
||||
'#cf-error-details div.cf-error-overview h1'
|
||||
]
|
||||
CHALLENGE_TITLES = [
|
||||
# Cloudflare
|
||||
'Just a moment...',
|
||||
# DDoS-GUARD
|
||||
'DDoS-Guard'
|
||||
]
|
||||
CHALLENGE_SELECTORS = [
|
||||
# Cloudflare
|
||||
'#cf-challenge-running', '.ray_id', '.attack-box', '#cf-please-wait', '#challenge-spinner', '#trk_jschal_js', '#turnstile-wrapper', '.lds-ring',
|
||||
# Custom CloudFlare for EbookParadijs, Film-Paleis, MuziekFabriek and Puur-Hollands
|
||||
'td.info #js_info',
|
||||
# Fairlane / pararius.com
|
||||
'div.vc div.text-box h2'
|
||||
]
|
||||
|
||||
TURNSTILE_SELECTORS = [
|
||||
"input[name='cf-turnstile-response']"
|
||||
]
|
||||
|
||||
SHORT_TIMEOUT = 1
|
||||
SESSIONS_STORAGE = SessionsStorage()
|
||||
|
||||
|
||||
def test_browser_installation():
|
||||
logging.info("Testing web browser installation...")
|
||||
logging.info("Platform: " + platform.platform())
|
||||
|
||||
chrome_exe_path = utils.get_chrome_exe_path()
|
||||
if chrome_exe_path is None:
|
||||
logging.error("Chrome / Chromium web browser not installed!")
|
||||
sys.exit(1)
|
||||
else:
|
||||
logging.info("Chrome / Chromium path: " + chrome_exe_path)
|
||||
|
||||
chrome_major_version = utils.get_chrome_major_version()
|
||||
if chrome_major_version == '':
|
||||
logging.error("Chrome / Chromium version not detected!")
|
||||
sys.exit(1)
|
||||
else:
|
||||
logging.info("Chrome / Chromium major version: " + chrome_major_version)
|
||||
|
||||
logging.info("Launching web browser...")
|
||||
user_agent = utils.get_user_agent()
|
||||
logging.info("FlareSolverr User-Agent: " + user_agent)
|
||||
logging.info("Test successful!")
|
||||
|
||||
|
||||
def index_endpoint() -> IndexResponse:
|
||||
res = IndexResponse({})
|
||||
res.msg = "FlareSolverr is ready!"
|
||||
res.version = utils.get_flaresolverr_version()
|
||||
res.userAgent = utils.get_user_agent()
|
||||
return res
|
||||
|
||||
|
||||
def health_endpoint() -> HealthResponse:
|
||||
res = HealthResponse({})
|
||||
res.status = STATUS_OK
|
||||
return res
|
||||
|
||||
|
||||
def controller_v1_endpoint(req: V1RequestBase) -> V1ResponseBase:
|
||||
start_ts = int(time.time() * 1000)
|
||||
logging.info(f"Incoming request => POST /v1 body: {utils.object_to_dict(req)}")
|
||||
res: V1ResponseBase
|
||||
try:
|
||||
res = _controller_v1_handler(req)
|
||||
except Exception as e:
|
||||
res = V1ResponseBase({})
|
||||
res.__error_500__ = True
|
||||
res.status = STATUS_ERROR
|
||||
res.message = "Error: " + str(e)
|
||||
logging.error(res.message)
|
||||
|
||||
res.startTimestamp = start_ts
|
||||
res.endTimestamp = int(time.time() * 1000)
|
||||
res.version = utils.get_flaresolverr_version()
|
||||
logging.debug(f"Response => POST /v1 body: {utils.object_to_dict(res)}")
|
||||
logging.info(f"Response in {(res.endTimestamp - res.startTimestamp) / 1000} s")
|
||||
return res
|
||||
|
||||
|
||||
def _controller_v1_handler(req: V1RequestBase) -> V1ResponseBase:
|
||||
# do some validations
|
||||
if req.cmd is None:
|
||||
raise Exception("Request parameter 'cmd' is mandatory.")
|
||||
if req.headers is not None:
|
||||
logging.warning("Request parameter 'headers' was removed in FlareSolverr v2.")
|
||||
if req.userAgent is not None:
|
||||
logging.warning("Request parameter 'userAgent' was removed in FlareSolverr v2.")
|
||||
|
||||
# set default values
|
||||
if req.maxTimeout is None or int(req.maxTimeout) < 1:
|
||||
req.maxTimeout = 60000
|
||||
|
||||
# execute the command
|
||||
res: V1ResponseBase
|
||||
if req.cmd == 'sessions.create':
|
||||
res = _cmd_sessions_create(req)
|
||||
elif req.cmd == 'sessions.list':
|
||||
res = _cmd_sessions_list(req)
|
||||
elif req.cmd == 'sessions.destroy':
|
||||
res = _cmd_sessions_destroy(req)
|
||||
elif req.cmd == 'request.get':
|
||||
res = _cmd_request_get(req)
|
||||
elif req.cmd == 'request.post':
|
||||
res = _cmd_request_post(req)
|
||||
else:
|
||||
raise Exception(f"Request parameter 'cmd' = '{req.cmd}' is invalid.")
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def _cmd_request_get(req: V1RequestBase) -> V1ResponseBase:
|
||||
# do some validations
|
||||
if req.url is None:
|
||||
raise Exception("Request parameter 'url' is mandatory in 'request.get' command.")
|
||||
if req.postData is not None:
|
||||
raise Exception("Cannot use 'postBody' when sending a GET request.")
|
||||
if req.returnRawHtml is not None:
|
||||
logging.warning("Request parameter 'returnRawHtml' was removed in FlareSolverr v2.")
|
||||
if req.download is not None:
|
||||
logging.warning("Request parameter 'download' was removed in FlareSolverr v2.")
|
||||
|
||||
challenge_res = _resolve_challenge(req, 'GET')
|
||||
res = V1ResponseBase({})
|
||||
res.status = challenge_res.status
|
||||
res.message = challenge_res.message
|
||||
res.solution = challenge_res.result
|
||||
return res
|
||||
|
||||
|
||||
def _cmd_request_post(req: V1RequestBase) -> V1ResponseBase:
|
||||
# do some validations
|
||||
if req.postData is None:
|
||||
raise Exception("Request parameter 'postData' is mandatory in 'request.post' command.")
|
||||
if req.returnRawHtml is not None:
|
||||
logging.warning("Request parameter 'returnRawHtml' was removed in FlareSolverr v2.")
|
||||
if req.download is not None:
|
||||
logging.warning("Request parameter 'download' was removed in FlareSolverr v2.")
|
||||
|
||||
challenge_res = _resolve_challenge(req, 'POST')
|
||||
res = V1ResponseBase({})
|
||||
res.status = challenge_res.status
|
||||
res.message = challenge_res.message
|
||||
res.solution = challenge_res.result
|
||||
return res
|
||||
|
||||
|
||||
def _cmd_sessions_create(req: V1RequestBase) -> V1ResponseBase:
|
||||
logging.debug("Creating new session...")
|
||||
|
||||
session, fresh = SESSIONS_STORAGE.create(session_id=req.session, proxy=req.proxy)
|
||||
session_id = session.session_id
|
||||
|
||||
if not fresh:
|
||||
return V1ResponseBase({
|
||||
"status": STATUS_OK,
|
||||
"message": "Session already exists.",
|
||||
"session": session_id
|
||||
})
|
||||
|
||||
return V1ResponseBase({
|
||||
"status": STATUS_OK,
|
||||
"message": "Session created successfully.",
|
||||
"session": session_id
|
||||
})
|
||||
|
||||
|
||||
def _cmd_sessions_list(req: V1RequestBase) -> V1ResponseBase:
|
||||
session_ids = SESSIONS_STORAGE.session_ids()
|
||||
|
||||
return V1ResponseBase({
|
||||
"status": STATUS_OK,
|
||||
"message": "",
|
||||
"sessions": session_ids
|
||||
})
|
||||
|
||||
|
||||
def _cmd_sessions_destroy(req: V1RequestBase) -> V1ResponseBase:
|
||||
session_id = req.session
|
||||
existed = SESSIONS_STORAGE.destroy(session_id)
|
||||
|
||||
if not existed:
|
||||
raise Exception("The session doesn't exist.")
|
||||
|
||||
return V1ResponseBase({
|
||||
"status": STATUS_OK,
|
||||
"message": "The session has been removed."
|
||||
})
|
||||
|
||||
|
||||
def _resolve_challenge(req: V1RequestBase, method: str) -> ChallengeResolutionT:
|
||||
timeout = int(req.maxTimeout) / 1000
|
||||
driver = None
|
||||
try:
|
||||
if req.session:
|
||||
session_id = req.session
|
||||
ttl = timedelta(minutes=req.session_ttl_minutes) if req.session_ttl_minutes else None
|
||||
session, fresh = SESSIONS_STORAGE.get(session_id, ttl)
|
||||
|
||||
if fresh:
|
||||
logging.debug(f"new session created to perform the request (session_id={session_id})")
|
||||
else:
|
||||
logging.debug(f"existing session is used to perform the request (session_id={session_id}, "
|
||||
f"lifetime={str(session.lifetime())}, ttl={str(ttl)})")
|
||||
|
||||
driver = session.driver
|
||||
else:
|
||||
driver = utils.get_webdriver(req.proxy)
|
||||
logging.debug('New instance of webdriver has been created to perform the request')
|
||||
return func_timeout(timeout, _evil_logic, (req, driver, method))
|
||||
except FunctionTimedOut:
|
||||
raise Exception(f'Error solving the challenge. Timeout after {timeout} seconds.')
|
||||
except Exception as e:
|
||||
raise Exception('Error solving the challenge. ' + str(e).replace('\n', '\\n'))
|
||||
finally:
|
||||
if not req.session and driver is not None:
|
||||
if utils.PLATFORM_VERSION == "nt":
|
||||
driver.close()
|
||||
driver.quit()
|
||||
logging.debug('A used instance of webdriver has been destroyed')
|
||||
|
||||
|
||||
def click_verify(driver: WebDriver, num_tabs: int = 1):
|
||||
try:
|
||||
logging.debug("Try to find the Cloudflare verify checkbox...")
|
||||
actions = ActionChains(driver)
|
||||
actions.pause(5)
|
||||
for _ in range(num_tabs):
|
||||
actions.send_keys(Keys.TAB).pause(0.1)
|
||||
actions.pause(1)
|
||||
actions.send_keys(Keys.SPACE).perform()
|
||||
|
||||
logging.debug(f"Cloudflare verify checkbox clicked after {num_tabs} tabs!")
|
||||
except Exception:
|
||||
logging.debug("Cloudflare verify checkbox not found on the page.")
|
||||
finally:
|
||||
driver.switch_to.default_content()
|
||||
|
||||
try:
|
||||
logging.debug("Try to find the Cloudflare 'Verify you are human' button...")
|
||||
button = driver.find_element(
|
||||
by=By.XPATH,
|
||||
value="//input[@type='button' and @value='Verify you are human']",
|
||||
)
|
||||
if button:
|
||||
actions = ActionChains(driver)
|
||||
actions.move_to_element_with_offset(button, 5, 7)
|
||||
actions.click(button)
|
||||
actions.perform()
|
||||
logging.debug("The Cloudflare 'Verify you are human' button found and clicked!")
|
||||
except Exception:
|
||||
logging.debug("The Cloudflare 'Verify you are human' button not found on the page.")
|
||||
|
||||
time.sleep(2)
|
||||
|
||||
def _get_turnstile_token(driver: WebDriver, tabs: int):
|
||||
token_input = driver.find_element(By.CSS_SELECTOR, "input[name='cf-turnstile-response']")
|
||||
current_value = token_input.get_attribute("value")
|
||||
while True:
|
||||
click_verify(driver, num_tabs=tabs)
|
||||
turnstile_token = token_input.get_attribute("value")
|
||||
if turnstile_token:
|
||||
if turnstile_token != current_value:
|
||||
logging.info(f"Turnstile token: {turnstile_token}")
|
||||
return turnstile_token
|
||||
logging.debug(f"Failed to extract token possibly click failed")
|
||||
|
||||
# reset focus
|
||||
driver.execute_script("""
|
||||
let el = document.createElement('button');
|
||||
el.style.position='fixed';
|
||||
el.style.top='0';
|
||||
el.style.left='0';
|
||||
document.body.prepend(el);
|
||||
el.focus();
|
||||
""")
|
||||
time.sleep(1)
|
||||
|
||||
def _resolve_turnstile_captcha(req: V1RequestBase, driver: WebDriver):
|
||||
turnstile_token = None
|
||||
if req.tabs_till_verify is not None:
|
||||
logging.debug(f'Navigating to... {req.url} in order to pass the turnstile challenge')
|
||||
driver.get(req.url)
|
||||
|
||||
turnstile_challenge_found = False
|
||||
for selector in TURNSTILE_SELECTORS:
|
||||
found_elements = driver.find_elements(By.CSS_SELECTOR, selector)
|
||||
if len(found_elements) > 0:
|
||||
turnstile_challenge_found = True
|
||||
logging.info("Turnstile challenge detected. Selector found: " + selector)
|
||||
break
|
||||
if turnstile_challenge_found:
|
||||
turnstile_token = _get_turnstile_token(driver=driver, tabs=req.tabs_till_verify)
|
||||
else:
|
||||
logging.debug(f'Turnstile challenge not found')
|
||||
return turnstile_token
|
||||
|
||||
def _evil_logic(req: V1RequestBase, driver: WebDriver, method: str) -> ChallengeResolutionT:
|
||||
res = ChallengeResolutionT({})
|
||||
res.status = STATUS_OK
|
||||
res.message = ""
|
||||
|
||||
# optionally block resources like images/css/fonts using CDP
|
||||
disable_media = utils.get_config_disable_media()
|
||||
if req.disableMedia is not None:
|
||||
disable_media = req.disableMedia
|
||||
if disable_media:
|
||||
block_urls = [
|
||||
# Images
|
||||
"*.png", "*.jpg", "*.jpeg", "*.gif", "*.webp", "*.bmp", "*.svg", "*.ico",
|
||||
"*.PNG", "*.JPG", "*.JPEG", "*.GIF", "*.WEBP", "*.BMP", "*.SVG", "*.ICO",
|
||||
"*.tiff", "*.tif", "*.jpe", "*.apng", "*.avif", "*.heic", "*.heif",
|
||||
"*.TIFF", "*.TIF", "*.JPE", "*.APNG", "*.AVIF", "*.HEIC", "*.HEIF",
|
||||
# Stylesheets
|
||||
"*.css",
|
||||
"*.CSS",
|
||||
# Fonts
|
||||
"*.woff", "*.woff2", "*.ttf", "*.otf", "*.eot",
|
||||
"*.WOFF", "*.WOFF2", "*.TTF", "*.OTF", "*.EOT"
|
||||
]
|
||||
try:
|
||||
logging.debug("Network.setBlockedURLs: %s", block_urls)
|
||||
driver.execute_cdp_cmd("Network.enable", {})
|
||||
driver.execute_cdp_cmd("Network.setBlockedURLs", {"urls": block_urls})
|
||||
except Exception:
|
||||
# if CDP commands are not available or fail, ignore and continue
|
||||
logging.debug("Network.setBlockedURLs failed or unsupported on this webdriver")
|
||||
|
||||
# navigate to the page
|
||||
logging.debug(f"Navigating to... {req.url}")
|
||||
turnstile_token = None
|
||||
|
||||
if method == "POST":
|
||||
_post_request(req, driver)
|
||||
else:
|
||||
if req.tabs_till_verify is None:
|
||||
driver.get(req.url)
|
||||
else:
|
||||
turnstile_token = _resolve_turnstile_captcha(req, driver)
|
||||
|
||||
# set cookies if required
|
||||
if req.cookies is not None and len(req.cookies) > 0:
|
||||
logging.debug(f'Setting cookies...')
|
||||
for cookie in req.cookies:
|
||||
driver.delete_cookie(cookie['name'])
|
||||
driver.add_cookie(cookie)
|
||||
# reload the page
|
||||
if method == 'POST':
|
||||
_post_request(req, driver)
|
||||
else:
|
||||
driver.get(req.url)
|
||||
|
||||
# wait for the page
|
||||
if utils.get_config_log_html():
|
||||
logging.debug(f"Response HTML:\n{driver.page_source}")
|
||||
html_element = driver.find_element(By.TAG_NAME, "html")
|
||||
page_title = driver.title
|
||||
|
||||
# find access denied titles
|
||||
for title in ACCESS_DENIED_TITLES:
|
||||
if page_title.startswith(title):
|
||||
raise Exception('Cloudflare has blocked this request. '
|
||||
'Probably your IP is banned for this site, check in your web browser.')
|
||||
# find access denied selectors
|
||||
for selector in ACCESS_DENIED_SELECTORS:
|
||||
found_elements = driver.find_elements(By.CSS_SELECTOR, selector)
|
||||
if len(found_elements) > 0:
|
||||
raise Exception('Cloudflare has blocked this request. '
|
||||
'Probably your IP is banned for this site, check in your web browser.')
|
||||
|
||||
# find challenge by title
|
||||
challenge_found = False
|
||||
for title in CHALLENGE_TITLES:
|
||||
if title.lower() == page_title.lower():
|
||||
challenge_found = True
|
||||
logging.info("Challenge detected. Title found: " + page_title)
|
||||
break
|
||||
if not challenge_found:
|
||||
# find challenge by selectors
|
||||
for selector in CHALLENGE_SELECTORS:
|
||||
found_elements = driver.find_elements(By.CSS_SELECTOR, selector)
|
||||
if len(found_elements) > 0:
|
||||
challenge_found = True
|
||||
logging.info("Challenge detected. Selector found: " + selector)
|
||||
break
|
||||
|
||||
attempt = 0
|
||||
if challenge_found:
|
||||
while True:
|
||||
try:
|
||||
attempt = attempt + 1
|
||||
# wait until the title changes
|
||||
for title in CHALLENGE_TITLES:
|
||||
logging.debug("Waiting for title (attempt " + str(attempt) + "): " + title)
|
||||
WebDriverWait(driver, SHORT_TIMEOUT).until_not(title_is(title))
|
||||
|
||||
# then wait until all the selectors disappear
|
||||
for selector in CHALLENGE_SELECTORS:
|
||||
logging.debug("Waiting for selector (attempt " + str(attempt) + "): " + selector)
|
||||
WebDriverWait(driver, SHORT_TIMEOUT).until_not(
|
||||
presence_of_element_located((By.CSS_SELECTOR, selector)))
|
||||
|
||||
# all elements not found
|
||||
break
|
||||
|
||||
except TimeoutException:
|
||||
logging.debug("Timeout waiting for selector")
|
||||
|
||||
click_verify(driver)
|
||||
|
||||
# update the html (cloudflare reloads the page every 5 s)
|
||||
html_element = driver.find_element(By.TAG_NAME, "html")
|
||||
|
||||
# waits until cloudflare redirection ends
|
||||
logging.debug("Waiting for redirect")
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
WebDriverWait(driver, SHORT_TIMEOUT).until(staleness_of(html_element))
|
||||
except Exception:
|
||||
logging.debug("Timeout waiting for redirect")
|
||||
|
||||
logging.info("Challenge solved!")
|
||||
res.message = "Challenge solved!"
|
||||
else:
|
||||
logging.info("Challenge not detected!")
|
||||
res.message = "Challenge not detected!"
|
||||
|
||||
challenge_res = ChallengeResolutionResultT({})
|
||||
challenge_res.url = driver.current_url
|
||||
challenge_res.status = 200 # todo: fix, selenium not provides this info
|
||||
challenge_res.cookies = driver.get_cookies()
|
||||
challenge_res.userAgent = utils.get_user_agent(driver)
|
||||
challenge_res.turnstile_token = turnstile_token
|
||||
|
||||
if not req.returnOnlyCookies:
|
||||
challenge_res.headers = {} # todo: fix, selenium not provides this info
|
||||
|
||||
if req.waitInSeconds and req.waitInSeconds > 0:
|
||||
logging.info("Waiting " + str(req.waitInSeconds) + " seconds before returning the response...")
|
||||
time.sleep(req.waitInSeconds)
|
||||
|
||||
challenge_res.response = driver.page_source
|
||||
|
||||
if req.returnScreenshot:
|
||||
challenge_res.screenshot = driver.get_screenshot_as_base64()
|
||||
|
||||
res.result = challenge_res
|
||||
return res
|
||||
|
||||
|
||||
def _post_request(req: V1RequestBase, driver: WebDriver):
|
||||
post_form = f'<form id="hackForm" action="{req.url}" method="POST">'
|
||||
query_string = req.postData if req.postData and req.postData[0] != '?' else req.postData[1:] if req.postData else ''
|
||||
pairs = query_string.split('&')
|
||||
for pair in pairs:
|
||||
parts = pair.split('=', 1)
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
name = unquote(parts[0])
|
||||
except Exception:
|
||||
name = parts[0]
|
||||
if name == 'submit':
|
||||
continue
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
value = unquote(parts[1]) if len(parts) > 1 else ''
|
||||
except Exception:
|
||||
value = parts[1] if len(parts) > 1 else ''
|
||||
# Protection of " character, for syntax
|
||||
value=value.replace('"','"')
|
||||
post_form += f'<input type="text" name="{escape(quote(name))}" value="{escape(quote(value))}"><br>'
|
||||
post_form += '</form>'
|
||||
html_content = f"""
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<body>
|
||||
{post_form}
|
||||
<script>document.getElementById('hackForm').submit();</script>
|
||||
</body>
|
||||
</html>"""
|
||||
driver.get("data:text/html;charset=utf-8,{html_content}".format(html_content=html_content))
|
||||
32
src/metrics.py
Normal file
32
src/metrics.py
Normal file
@@ -0,0 +1,32 @@
|
||||
import logging
|
||||
|
||||
from prometheus_client import Counter, Histogram, start_http_server
|
||||
import time
|
||||
|
||||
REQUEST_COUNTER = Counter(
|
||||
name='flaresolverr_request',
|
||||
documentation='Total requests with result',
|
||||
labelnames=['domain', 'result']
|
||||
)
|
||||
REQUEST_DURATION = Histogram(
|
||||
name='flaresolverr_request_duration',
|
||||
documentation='Request duration in seconds',
|
||||
labelnames=['domain'],
|
||||
buckets=[0, 10, 25, 50]
|
||||
)
|
||||
|
||||
|
||||
def serve(port):
|
||||
start_http_server(port=port)
|
||||
while True:
|
||||
time.sleep(600)
|
||||
|
||||
|
||||
def start_metrics_http_server(prometheus_port: int):
|
||||
logging.info(f"Serving Prometheus exporter on http://0.0.0.0:{prometheus_port}/metrics")
|
||||
from threading import Thread
|
||||
Thread(
|
||||
target=serve,
|
||||
kwargs=dict(port=prometheus_port),
|
||||
daemon=True,
|
||||
).start()
|
||||
@@ -1,133 +0,0 @@
|
||||
import {Page, HTTPResponse} from 'puppeteer'
|
||||
|
||||
import log from "../services/log";
|
||||
|
||||
/**
|
||||
* This class contains the logic to solve protections provided by CloudFlare
|
||||
**/
|
||||
|
||||
const BAN_SELECTORS: string[] = [];
|
||||
const CHALLENGE_SELECTORS: string[] = [
|
||||
// todo: deprecate '#trk_jschal_js', '#cf-please-wait'
|
||||
'#cf-challenge-running', '#trk_jschal_js', '#cf-please-wait', // CloudFlare
|
||||
'#link-ddg', // DDoS-GUARD
|
||||
'td.info #js_info' // Custom CloudFlare for EbookParadijs, Film-Paleis, MuziekFabriek and Puur-Hollands
|
||||
];
|
||||
const CAPTCHA_SELECTORS: string[] = [
|
||||
// todo: deprecate 'input[name="cf_captcha_kind"]'
|
||||
'#cf-challenge-hcaptcha-wrapper', '#cf-norobot-container', 'input[name="cf_captcha_kind"]'
|
||||
];
|
||||
|
||||
export default async function resolveChallenge(url: string, page: Page, response: HTTPResponse): Promise<HTTPResponse> {
|
||||
|
||||
// look for challenge and return fast if not detected
|
||||
let cfDetected = response.headers().server &&
|
||||
(response.headers().server.startsWith('cloudflare') || response.headers().server.startsWith('ddos-guard'));
|
||||
if (cfDetected) {
|
||||
if (response.status() == 403 || response.status() == 503) {
|
||||
cfDetected = true; // Defected CloudFlare and DDoS-GUARD
|
||||
} else if (response.headers().vary && response.headers().vary.trim() == 'Accept-Encoding,User-Agent' &&
|
||||
response.headers()['content-encoding'] && response.headers()['content-encoding'].trim() == 'br') {
|
||||
cfDetected = true; // Detected Custom CloudFlare for EbookParadijs, Film-Paleis, MuziekFabriek and Puur-Hollands
|
||||
} else {
|
||||
cfDetected = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (cfDetected) {
|
||||
log.info('Cloudflare detected');
|
||||
} else {
|
||||
log.info('Cloudflare not detected');
|
||||
return response;
|
||||
}
|
||||
|
||||
if (await findAnySelector(page, BAN_SELECTORS)) {
|
||||
throw new Error('Cloudflare has blocked this request. Probably your IP is banned for this site, check in your web browser.');
|
||||
}
|
||||
|
||||
// find Cloudflare selectors
|
||||
let selectorFound = false;
|
||||
let selector: string = await findAnySelector(page, CHALLENGE_SELECTORS)
|
||||
if (selector) {
|
||||
selectorFound = true;
|
||||
log.debug(`Javascript challenge element '${selector}' detected.`)
|
||||
log.debug('Waiting for Cloudflare challenge...')
|
||||
|
||||
while (true) {
|
||||
try {
|
||||
|
||||
selector = await findAnySelector(page, CHALLENGE_SELECTORS)
|
||||
if (!selector) {
|
||||
// solved!
|
||||
log.debug('Challenge element not found')
|
||||
break
|
||||
|
||||
} else {
|
||||
log.debug(`Javascript challenge element '${selector}' detected.`)
|
||||
|
||||
// check for CAPTCHA challenge
|
||||
if (await findAnySelector(page, CAPTCHA_SELECTORS)) {
|
||||
// captcha detected
|
||||
break
|
||||
}
|
||||
}
|
||||
log.debug('Found challenge element again')
|
||||
|
||||
} catch (error)
|
||||
{
|
||||
log.debug("Unexpected error: " + error);
|
||||
if (!error.toString().includes("Execution context was destroyed")) {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
log.debug('Waiting for Cloudflare challenge...')
|
||||
await page.waitForTimeout(1000)
|
||||
}
|
||||
|
||||
log.debug('Validating HTML code...')
|
||||
} else {
|
||||
log.debug(`No challenge element detected.`)
|
||||
}
|
||||
|
||||
// check for CAPTCHA challenge
|
||||
if (await findAnySelector(page, CAPTCHA_SELECTORS)) {
|
||||
log.info('CAPTCHA challenge detected');
|
||||
throw new Error('FlareSolverr can not resolve CAPTCHA challenges. Since the captcha doesn\'t always appear, you may have better luck with the next request.');
|
||||
|
||||
// const captchaSolver = getCaptchaSolver()
|
||||
// if (captchaSolver) {
|
||||
// // to-do: get the params
|
||||
// log.info('Waiting to receive captcha token to bypass challenge...')
|
||||
// const token = await captchaSolver({
|
||||
// url,
|
||||
// sitekey,
|
||||
// type: captchaType
|
||||
// })
|
||||
// log.debug(`Token received: ${token}`);
|
||||
// // to-do: send the token
|
||||
// }
|
||||
// } else {
|
||||
// throw new Error('Captcha detected but no automatic solver is configured.');
|
||||
// }
|
||||
} else {
|
||||
if (!selectorFound)
|
||||
{
|
||||
throw new Error('No challenge selectors found, unable to proceed.')
|
||||
} else {
|
||||
log.info('Challenge solved');
|
||||
}
|
||||
}
|
||||
|
||||
return response;
|
||||
}
|
||||
|
||||
async function findAnySelector(page: Page, selectors: string[]) {
|
||||
for (const selector of selectors) {
|
||||
const cfChallengeElem = await page.$(selector)
|
||||
if (cfChallengeElem) {
|
||||
return selector;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
@@ -1,63 +0,0 @@
|
||||
import log from './services/log'
|
||||
import {testWebBrowserInstallation} from "./services/sessions";
|
||||
|
||||
const app = require("./app");
|
||||
const version: string = 'v' + require('../package.json').version
|
||||
const serverPort: number = Number(process.env.PORT) || 8191
|
||||
const serverHost: string = process.env.HOST || '0.0.0.0'
|
||||
|
||||
function validateEnvironmentVariables() {
|
||||
// ip and port variables are validated by nodejs
|
||||
if (process.env.LOG_LEVEL && ['error', 'warn', 'info', 'verbose', 'debug'].indexOf(process.env.LOG_LEVEL) == -1) {
|
||||
log.error(`The environment variable 'LOG_LEVEL' is wrong. Check the documentation.`);
|
||||
process.exit(1);
|
||||
}
|
||||
if (process.env.LOG_HTML && ['true', 'false'].indexOf(process.env.LOG_HTML) == -1) {
|
||||
log.error(`The environment variable 'LOG_HTML' is wrong. Check the documentation.`);
|
||||
process.exit(1);
|
||||
}
|
||||
if (process.env.HEADLESS && ['true', 'false'].indexOf(process.env.HEADLESS) == -1) {
|
||||
log.error(`The environment variable 'HEADLESS' is wrong. Check the documentation.`);
|
||||
process.exit(1);
|
||||
}
|
||||
// todo: fix resolvers
|
||||
// try {
|
||||
// getCaptchaSolver();
|
||||
// } catch (e) {
|
||||
// log.error(`The environment variable 'CAPTCHA_SOLVER' is wrong. ${e.message}`);
|
||||
// process.exit(1);
|
||||
// }
|
||||
}
|
||||
|
||||
// Init
|
||||
log.info(`FlareSolverr ${version}`);
|
||||
log.debug('Debug log enabled');
|
||||
|
||||
process.on('SIGTERM', () => {
|
||||
// Capture signal on Docker Stop #158
|
||||
log.info("Process interrupted")
|
||||
process.exit(0)
|
||||
})
|
||||
|
||||
process.on('uncaughtException', function(err) {
|
||||
// Avoid crashing in NodeJS 17 due to UnhandledPromiseRejectionWarning: Unhandled promise rejection.
|
||||
log.error(err)
|
||||
})
|
||||
|
||||
validateEnvironmentVariables();
|
||||
|
||||
testWebBrowserInstallation().then(() => {
|
||||
// Start server
|
||||
app.listen(serverPort, serverHost, () => {
|
||||
log.info(`Listening on http://${serverHost}:${serverPort}`);
|
||||
})
|
||||
}).catch(function(e) {
|
||||
log.error(e);
|
||||
const msg: string = "" + e;
|
||||
if (msg.includes('while trying to connect to the browser!')) {
|
||||
log.error(`It seems that the system is too slow to run FlareSolverr.
|
||||
If you are running with Docker, try to remove CPU limits in the container.
|
||||
If not, try setting the 'BROWSER_TIMEOUT' environment variable and the 'maxTimeout' parameter to higher values.`);
|
||||
}
|
||||
process.exit(1);
|
||||
})
|
||||
@@ -1,41 +0,0 @@
|
||||
let requests = 0
|
||||
|
||||
const LOG_HTML: boolean = process.env.LOG_HTML == 'true';
|
||||
|
||||
function toIsoString(date: Date) {
|
||||
// this function fixes Date.toISOString() adding timezone
|
||||
let tzo = -date.getTimezoneOffset(),
|
||||
dif = tzo >= 0 ? '+' : '-',
|
||||
pad = function(num: number) {
|
||||
let norm = Math.floor(Math.abs(num));
|
||||
return (norm < 10 ? '0' : '') + norm;
|
||||
};
|
||||
|
||||
return date.getFullYear() +
|
||||
'-' + pad(date.getMonth() + 1) +
|
||||
'-' + pad(date.getDate()) +
|
||||
'T' + pad(date.getHours()) +
|
||||
':' + pad(date.getMinutes()) +
|
||||
':' + pad(date.getSeconds()) +
|
||||
dif + pad(tzo / 60) +
|
||||
':' + pad(tzo % 60);
|
||||
}
|
||||
|
||||
export default {
|
||||
incRequests: () => {
|
||||
requests++
|
||||
},
|
||||
html(html: string) {
|
||||
if (LOG_HTML) {
|
||||
this.debug(html)
|
||||
}
|
||||
},
|
||||
...require('console-log-level')(
|
||||
{level: process.env.LOG_LEVEL || 'info',
|
||||
prefix(level: string) {
|
||||
const req = (requests > 0) ? ` REQ-${requests}` : '';
|
||||
return `${toIsoString(new Date())} ${level.toUpperCase()}${req}`
|
||||
}
|
||||
}
|
||||
)
|
||||
}
|
||||
@@ -1,194 +0,0 @@
|
||||
import {v1 as UUIDv1} from 'uuid'
|
||||
import * as path from 'path'
|
||||
import {Browser} from 'puppeteer'
|
||||
import {Protocol} from "devtools-protocol";
|
||||
|
||||
import log from './log'
|
||||
import {Proxy} from "../controllers/v1";
|
||||
|
||||
const os = require('os');
|
||||
const fs = require('fs');
|
||||
const puppeteer = require('puppeteer');
|
||||
|
||||
export interface SessionsCacheItem {
|
||||
sessionId: string
|
||||
browser: Browser
|
||||
}
|
||||
|
||||
interface SessionsCache {
|
||||
[key: string]: SessionsCacheItem
|
||||
}
|
||||
|
||||
export interface SessionCreateOptions {
|
||||
oneTimeSession: boolean
|
||||
cookies?: Protocol.Network.CookieParam[],
|
||||
maxTimeout?: number
|
||||
proxy?: Proxy
|
||||
}
|
||||
|
||||
const sessionCache: SessionsCache = {}
|
||||
let webBrowserUserAgent: string;
|
||||
|
||||
function buildExtraPrefsFirefox(proxy: Proxy): object {
|
||||
// Default configurations are defined here
|
||||
// https://github.com/puppeteer/puppeteer/blob/v3.3.0/src/Launcher.ts#L481
|
||||
const extraPrefsFirefox = {
|
||||
// Disable newtabpage
|
||||
"browser.newtabpage.enabled": false,
|
||||
"browser.startup.homepage": "about:blank",
|
||||
|
||||
// Do not warn when closing all open tabs
|
||||
"browser.tabs.warnOnClose": false,
|
||||
|
||||
// Disable telemetry
|
||||
"toolkit.telemetry.reportingpolicy.firstRun": false,
|
||||
|
||||
// Disable first-run welcome page
|
||||
"startup.homepage_welcome_url": "about:blank",
|
||||
"startup.homepage_welcome_url.additional": "",
|
||||
|
||||
// Detected !
|
||||
// // Disable images to speed up load
|
||||
// "permissions.default.image": 2,
|
||||
|
||||
// Limit content processes to 1
|
||||
"dom.ipc.processCount": 1
|
||||
}
|
||||
|
||||
// proxy.url format => http://<host>:<port>
|
||||
if (proxy && proxy.url) {
|
||||
log.debug(`Using proxy: ${proxy.url}`)
|
||||
const [host, portStr] = proxy.url.replace(/.+:\/\//g, '').split(':');
|
||||
const port = parseInt(portStr);
|
||||
if (!host || !portStr || !port) {
|
||||
throw new Error("Proxy configuration is invalid! Use the format: protocol://ip:port")
|
||||
}
|
||||
|
||||
const proxyPrefs = {
|
||||
"network.proxy.type": 1,
|
||||
"network.proxy.share_proxy_settings": true
|
||||
}
|
||||
if (proxy.url.indexOf("socks") != -1) {
|
||||
// SOCKSv4 & SOCKSv5
|
||||
Object.assign(proxyPrefs, {
|
||||
"network.proxy.socks": host,
|
||||
"network.proxy.socks_port": port,
|
||||
"network.proxy.socks_remote_dns": true
|
||||
});
|
||||
if (proxy.url.indexOf("socks4") != -1) {
|
||||
Object.assign(proxyPrefs, {
|
||||
"network.proxy.socks_version": 4
|
||||
});
|
||||
} else {
|
||||
Object.assign(proxyPrefs, {
|
||||
"network.proxy.socks_version": 5
|
||||
});
|
||||
}
|
||||
} else {
|
||||
// HTTP
|
||||
Object.assign(proxyPrefs, {
|
||||
"network.proxy.ftp": host,
|
||||
"network.proxy.ftp_port": port,
|
||||
"network.proxy.http": host,
|
||||
"network.proxy.http_port": port,
|
||||
"network.proxy.ssl": host,
|
||||
"network.proxy.ssl_port": port
|
||||
});
|
||||
}
|
||||
|
||||
// merge objects
|
||||
Object.assign(extraPrefsFirefox, proxyPrefs);
|
||||
}
|
||||
|
||||
return extraPrefsFirefox;
|
||||
}
|
||||
|
||||
export function getUserAgent() {
|
||||
return webBrowserUserAgent
|
||||
}
|
||||
|
||||
export async function testWebBrowserInstallation(): Promise<void> {
|
||||
log.info("Testing web browser installation...")
|
||||
|
||||
// check user home dir. this dir will be used by Firefox
|
||||
const homeDir = os.homedir();
|
||||
fs.accessSync(homeDir, fs.constants.F_OK | fs.constants.R_OK | fs.constants.W_OK | fs.constants.X_OK);
|
||||
log.debug("FlareSolverr user home directory is OK: " + homeDir)
|
||||
|
||||
// test web browser
|
||||
const testUrl = process.env.TEST_URL || "https://www.google.com";
|
||||
log.debug("Test URL: " + testUrl)
|
||||
const session = await create(null, {
|
||||
oneTimeSession: true
|
||||
})
|
||||
const page = await session.browser.newPage()
|
||||
const pageTimeout = Number(process.env.BROWSER_TIMEOUT) || 40000
|
||||
await page.goto(testUrl, {waitUntil: 'domcontentloaded', timeout: pageTimeout})
|
||||
webBrowserUserAgent = await page.evaluate(() => navigator.userAgent)
|
||||
|
||||
// replace Linux ARM user-agent because it's detected
|
||||
if (["arm", "aarch64"].some(arch => webBrowserUserAgent.toLocaleLowerCase().includes('linux ' + arch))) {
|
||||
webBrowserUserAgent = webBrowserUserAgent.replace(/linux \w+;/i, 'Linux x86_64;')
|
||||
}
|
||||
|
||||
log.info("FlareSolverr User-Agent: " + webBrowserUserAgent)
|
||||
await page.close()
|
||||
await destroy(session.sessionId)
|
||||
|
||||
log.info("Test successful")
|
||||
}
|
||||
|
||||
export async function create(session: string, options: SessionCreateOptions): Promise<SessionsCacheItem> {
|
||||
log.debug('Creating new session...')
|
||||
|
||||
const sessionId = session || UUIDv1()
|
||||
|
||||
// NOTE: cookies can't be set in the session, you need to open the page first
|
||||
|
||||
const puppeteerOptions: any = {
|
||||
product: 'firefox',
|
||||
headless: process.env.HEADLESS !== 'false',
|
||||
timeout: Number(process.env.BROWSER_TIMEOUT) || 40000
|
||||
}
|
||||
|
||||
puppeteerOptions.extraPrefsFirefox = buildExtraPrefsFirefox(options.proxy)
|
||||
|
||||
// if we are running inside executable binary, change browser path
|
||||
if (typeof (process as any).pkg !== 'undefined') {
|
||||
const exe = process.platform === "win32" ? 'firefox.exe' : 'firefox';
|
||||
puppeteerOptions.executablePath = path.join(path.dirname(process.execPath), 'firefox', exe)
|
||||
}
|
||||
|
||||
log.debug('Launching web browser...')
|
||||
let browser: Browser = await puppeteer.launch(puppeteerOptions)
|
||||
if (!browser) {
|
||||
throw Error(`Failed to launch web browser.`)
|
||||
}
|
||||
|
||||
sessionCache[sessionId] = {
|
||||
sessionId: sessionId,
|
||||
browser: browser
|
||||
}
|
||||
|
||||
return sessionCache[sessionId]
|
||||
}
|
||||
|
||||
export function list(): string[] {
|
||||
return Object.keys(sessionCache)
|
||||
}
|
||||
|
||||
export async function destroy(id: string): Promise<boolean>{
|
||||
if (id && sessionCache.hasOwnProperty(id)) {
|
||||
const { browser } = sessionCache[id]
|
||||
if (browser) {
|
||||
await browser.close()
|
||||
delete sessionCache[id]
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
export function get(id: string): SessionsCacheItem {
|
||||
return sessionCache[id]
|
||||
}
|
||||
@@ -1,206 +0,0 @@
|
||||
import {Page, HTTPResponse} from 'puppeteer'
|
||||
const Timeout = require('await-timeout');
|
||||
|
||||
import log from './log'
|
||||
import {SessionCreateOptions, SessionsCacheItem} from "./sessions";
|
||||
import {V1Request} from "../controllers/v1";
|
||||
import cloudflareProvider from '../providers/cloudflare';
|
||||
|
||||
const sessions = require('./sessions')
|
||||
|
||||
export interface ChallengeResolutionResultT {
|
||||
url: string
|
||||
status: number,
|
||||
headers?: Record<string, string>,
|
||||
response: string,
|
||||
cookies: object[]
|
||||
userAgent: string
|
||||
}
|
||||
|
||||
export interface ChallengeResolutionT {
|
||||
status?: string
|
||||
message: string
|
||||
result: ChallengeResolutionResultT
|
||||
}
|
||||
|
||||
async function resolveChallengeWithTimeout(params: V1Request, session: SessionsCacheItem) {
|
||||
const timer = new Timeout();
|
||||
try {
|
||||
const promise = resolveChallenge(params, session);
|
||||
return await Promise.race([
|
||||
promise,
|
||||
timer.set(params.maxTimeout, `Maximum timeout reached. maxTimeout=${params.maxTimeout} (ms)`)
|
||||
]);
|
||||
} finally {
|
||||
timer.clear();
|
||||
}
|
||||
}
|
||||
|
||||
async function resolveChallenge(params: V1Request, session: SessionsCacheItem): Promise<ChallengeResolutionT | void> {
|
||||
try {
|
||||
let status = 'ok'
|
||||
let message = ''
|
||||
|
||||
const page: Page = await session.browser.newPage()
|
||||
|
||||
// the Puppeter timeout should be half the maxTimeout because we reload the page and wait for challenge
|
||||
// the user can set a really high maxTimeout if he wants to
|
||||
await page.setDefaultNavigationTimeout(params.maxTimeout / 2)
|
||||
|
||||
// the user-agent is changed just for linux arm build
|
||||
await page.setUserAgent(sessions.getUserAgent())
|
||||
|
||||
// set the proxy
|
||||
if (params.proxy) {
|
||||
log.debug(`Using proxy: ${params.proxy.url}`);
|
||||
// todo: credentials are not working
|
||||
// if (params.proxy.username) {
|
||||
// await page.authenticate({
|
||||
// username: params.proxy.username,
|
||||
// password: params.proxy.password
|
||||
// });
|
||||
// }
|
||||
}
|
||||
|
||||
// go to the page
|
||||
log.debug(`Navigating to... ${params.url}`)
|
||||
let response: HTTPResponse = await gotoPage(params, page);
|
||||
|
||||
// set cookies
|
||||
if (params.cookies) {
|
||||
for (const cookie of params.cookies) {
|
||||
// the other fields in the cookie can cause issues
|
||||
await page.setCookie({
|
||||
"name": cookie.name,
|
||||
"value": cookie.value
|
||||
})
|
||||
}
|
||||
// reload the page
|
||||
response = await gotoPage(params, page);
|
||||
}
|
||||
|
||||
// log html in debug mode
|
||||
log.html(await page.content())
|
||||
|
||||
// detect protection services and solve challenges
|
||||
try {
|
||||
response = await cloudflareProvider(params.url, page, response);
|
||||
|
||||
// is response is ok
|
||||
// reload the page to be sure we get the real page
|
||||
log.debug("Reloading the page")
|
||||
try {
|
||||
response = await gotoPage(params, page, params.method);
|
||||
} catch (e) {
|
||||
log.warn("Page not reloaded (do not report!): Cause: " + e.toString())
|
||||
}
|
||||
|
||||
} catch (e) {
|
||||
status = "error";
|
||||
message = "Cloudflare " + e.toString();
|
||||
}
|
||||
|
||||
const payload: ChallengeResolutionT = {
|
||||
status,
|
||||
message,
|
||||
result: {
|
||||
url: page.url(),
|
||||
status: response.status(),
|
||||
headers: response.headers(),
|
||||
response: null,
|
||||
cookies: await page.cookies(),
|
||||
userAgent: sessions.getUserAgent()
|
||||
}
|
||||
}
|
||||
|
||||
if (params.returnOnlyCookies) {
|
||||
payload.result.headers = null;
|
||||
payload.result.userAgent = null;
|
||||
} else {
|
||||
payload.result.response = await page.content()
|
||||
}
|
||||
|
||||
// make sure the page is closed because if it isn't and error will be thrown
|
||||
// when a user uses a temporary session, the browser make be quit before
|
||||
// the page is properly closed.
|
||||
await page.close()
|
||||
|
||||
return payload
|
||||
} catch (e) {
|
||||
log.error("Unexpected error: " + e);
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
async function gotoPage(params: V1Request, page: Page, method: string = 'GET'): Promise<HTTPResponse> {
|
||||
let pageTimeout = params.maxTimeout / 3;
|
||||
let response: HTTPResponse
|
||||
|
||||
try {
|
||||
response = await page.goto(params.url, {waitUntil: 'domcontentloaded', timeout: pageTimeout});
|
||||
} catch (e) {
|
||||
// retry
|
||||
response = await page.goto(params.url, {waitUntil: 'domcontentloaded', timeout: pageTimeout});
|
||||
}
|
||||
|
||||
if (method == 'POST') {
|
||||
// post hack, it only works with utf-8 encoding
|
||||
|
||||
let postForm = `<form id="hackForm" action="${params.url}" method="POST">`;
|
||||
let queryString = params.postData;
|
||||
let pairs = (queryString[0] === '?' ? queryString.substr(1) : queryString).split('&');
|
||||
for (let i = 0; i < pairs.length; i++) {
|
||||
let pair = pairs[i].split('=');
|
||||
let name; try { name = decodeURIComponent(pair[0]) } catch { name = pair[0] }
|
||||
if (name == 'submit') continue;
|
||||
let value; try { value = decodeURIComponent(pair[1] || '') } catch { value = pair[1] || '' }
|
||||
postForm += `<input type="text" name="${name}" value="${value}"><br>`;
|
||||
}
|
||||
postForm += `</form>`;
|
||||
|
||||
await page.setContent(`
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<body>
|
||||
${postForm}
|
||||
<script>document.getElementById('hackForm').submit();</script>
|
||||
</body>
|
||||
</html>`
|
||||
);
|
||||
await page.waitForTimeout(2000)
|
||||
try {
|
||||
await page.waitForNavigation({waitUntil: 'domcontentloaded', timeout: 2000})
|
||||
} catch (e) {}
|
||||
|
||||
}
|
||||
return response
|
||||
}
|
||||
|
||||
export async function browserRequest(params: V1Request): Promise<ChallengeResolutionT> {
|
||||
const oneTimeSession = params.session === undefined;
|
||||
|
||||
const options: SessionCreateOptions = {
|
||||
oneTimeSession: oneTimeSession,
|
||||
cookies: params.cookies,
|
||||
maxTimeout: params.maxTimeout,
|
||||
proxy: params.proxy
|
||||
}
|
||||
|
||||
const session: SessionsCacheItem = oneTimeSession
|
||||
? await sessions.create(null, options)
|
||||
: sessions.get(params.session)
|
||||
|
||||
if (!session) {
|
||||
throw Error('This session does not exist. Use \'list_sessions\' to see all the existing sessions.')
|
||||
}
|
||||
|
||||
try {
|
||||
return await resolveChallengeWithTimeout(params, session)
|
||||
} catch (error) {
|
||||
throw Error("Unable to process browser request. " + error)
|
||||
} finally {
|
||||
if (oneTimeSession) {
|
||||
await sessions.destroy(session.sessionId)
|
||||
}
|
||||
}
|
||||
}
|
||||
84
src/sessions.py
Normal file
84
src/sessions.py
Normal file
@@ -0,0 +1,84 @@
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Optional, Tuple
|
||||
from uuid import uuid1
|
||||
|
||||
from selenium.webdriver.chrome.webdriver import WebDriver
|
||||
|
||||
import utils
|
||||
|
||||
|
||||
@dataclass
|
||||
class Session:
|
||||
session_id: str
|
||||
driver: WebDriver
|
||||
created_at: datetime
|
||||
|
||||
def lifetime(self) -> timedelta:
|
||||
return datetime.now() - self.created_at
|
||||
|
||||
|
||||
class SessionsStorage:
|
||||
"""SessionsStorage creates, stores and process all the sessions"""
|
||||
|
||||
def __init__(self):
|
||||
self.sessions = {}
|
||||
|
||||
def create(self, session_id: Optional[str] = None, proxy: Optional[dict] = None,
|
||||
force_new: Optional[bool] = False) -> Tuple[Session, bool]:
|
||||
"""create creates new instance of WebDriver if necessary,
|
||||
assign defined (or newly generated) session_id to the instance
|
||||
and returns the session object. If a new session has been created
|
||||
second argument is set to True.
|
||||
|
||||
Note: The function is idempotent, so in case if session_id
|
||||
already exists in the storage a new instance of WebDriver won't be created
|
||||
and existing session will be returned. Second argument defines if
|
||||
new session has been created (True) or an existing one was used (False).
|
||||
"""
|
||||
session_id = session_id or str(uuid1())
|
||||
|
||||
if force_new:
|
||||
self.destroy(session_id)
|
||||
|
||||
if self.exists(session_id):
|
||||
return self.sessions[session_id], False
|
||||
|
||||
driver = utils.get_webdriver(proxy)
|
||||
created_at = datetime.now()
|
||||
session = Session(session_id, driver, created_at)
|
||||
|
||||
self.sessions[session_id] = session
|
||||
|
||||
return session, True
|
||||
|
||||
def exists(self, session_id: str) -> bool:
|
||||
return session_id in self.sessions
|
||||
|
||||
def destroy(self, session_id: str) -> bool:
|
||||
"""destroy closes the driver instance and removes session from the storage.
|
||||
The function is noop if session_id doesn't exist.
|
||||
The function returns True if session was found and destroyed,
|
||||
and False if session_id wasn't found.
|
||||
"""
|
||||
if not self.exists(session_id):
|
||||
return False
|
||||
|
||||
session = self.sessions.pop(session_id)
|
||||
if utils.PLATFORM_VERSION == "nt":
|
||||
session.driver.close()
|
||||
session.driver.quit()
|
||||
return True
|
||||
|
||||
def get(self, session_id: str, ttl: Optional[timedelta] = None) -> Tuple[Session, bool]:
|
||||
session, fresh = self.create(session_id)
|
||||
|
||||
if ttl is not None and not fresh and session.lifetime() > ttl:
|
||||
logging.debug(f'session\'s lifetime has expired, so the session is recreated (session_id={session_id})')
|
||||
session, fresh = self.create(session_id, force_new=True)
|
||||
|
||||
return session, fresh
|
||||
|
||||
def session_ids(self) -> list[str]:
|
||||
return list(self.sessions.keys())
|
||||
655
src/tests.py
Normal file
655
src/tests.py
Normal file
@@ -0,0 +1,655 @@
|
||||
import unittest
|
||||
from typing import Optional
|
||||
|
||||
from webtest import TestApp
|
||||
|
||||
from dtos import IndexResponse, HealthResponse, V1ResponseBase, STATUS_OK, STATUS_ERROR
|
||||
import flaresolverr
|
||||
import utils
|
||||
|
||||
|
||||
def _find_obj_by_key(key: str, value: str, _list: list) -> Optional[dict]:
|
||||
for obj in _list:
|
||||
if obj[key] == value:
|
||||
return obj
|
||||
return None
|
||||
|
||||
|
||||
class TestFlareSolverr(unittest.TestCase):
|
||||
|
||||
proxy_url = "http://127.0.0.1:8888"
|
||||
proxy_socks_url = "socks5://127.0.0.1:1080"
|
||||
google_url = "https://www.google.com"
|
||||
post_url = "https://httpbin.org/post"
|
||||
cloudflare_url = "https://nowsecure.nl/"
|
||||
cloudflare_url_2 = "https://idope.se/torrent-list/harry/"
|
||||
ddos_guard_url = "https://www.litres.ru/"
|
||||
fairlane_url = "https://www.pararius.com/apartments/amsterdam"
|
||||
custom_cloudflare_url = "https://www.muziekfabriek.org/"
|
||||
cloudflare_blocked_url = "https://cpasbiens3.fr/index.php?do=search&subaction=search"
|
||||
|
||||
app = TestApp(flaresolverr.app)
|
||||
# wait until the server is ready
|
||||
app.get('/')
|
||||
|
||||
def test_wrong_endpoint(self):
|
||||
res = self.app.get('/wrong', status=404)
|
||||
self.assertEqual(res.status_code, 404)
|
||||
|
||||
body = res.json
|
||||
self.assertEqual("Not found: '/wrong'", body['error'])
|
||||
self.assertEqual(404, body['status_code'])
|
||||
|
||||
def test_index_endpoint(self):
|
||||
res = self.app.get('/')
|
||||
self.assertEqual(res.status_code, 200)
|
||||
|
||||
body = IndexResponse(res.json)
|
||||
self.assertEqual("FlareSolverr is ready!", body.msg)
|
||||
self.assertEqual(utils.get_flaresolverr_version(), body.version)
|
||||
self.assertIn("Chrome/", body.userAgent)
|
||||
|
||||
def test_health_endpoint(self):
|
||||
res = self.app.get('/health')
|
||||
self.assertEqual(res.status_code, 200)
|
||||
|
||||
body = HealthResponse(res.json)
|
||||
self.assertEqual(STATUS_OK, body.status)
|
||||
|
||||
def test_v1_endpoint_wrong_cmd(self):
|
||||
res = self.app.post_json('/v1', {
|
||||
"cmd": "request.bad",
|
||||
"url": self.google_url
|
||||
}, status=500)
|
||||
self.assertEqual(res.status_code, 500)
|
||||
|
||||
body = V1ResponseBase(res.json)
|
||||
self.assertEqual(STATUS_ERROR, body.status)
|
||||
self.assertEqual("Error: Request parameter 'cmd' = 'request.bad' is invalid.", body.message)
|
||||
self.assertGreater(body.startTimestamp, 10000)
|
||||
self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
|
||||
self.assertEqual(utils.get_flaresolverr_version(), body.version)
|
||||
|
||||
def test_v1_endpoint_request_get_no_cloudflare(self):
|
||||
res = self.app.post_json('/v1', {
|
||||
"cmd": "request.get",
|
||||
"url": self.google_url
|
||||
})
|
||||
self.assertEqual(res.status_code, 200)
|
||||
|
||||
body = V1ResponseBase(res.json)
|
||||
self.assertEqual(STATUS_OK, body.status)
|
||||
self.assertEqual("Challenge not detected!", body.message)
|
||||
self.assertGreater(body.startTimestamp, 10000)
|
||||
self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
|
||||
self.assertEqual(utils.get_flaresolverr_version(), body.version)
|
||||
|
||||
solution = body.solution
|
||||
self.assertIn(self.google_url, solution.url)
|
||||
self.assertEqual(solution.status, 200)
|
||||
self.assertIs(len(solution.headers), 0)
|
||||
self.assertIn("<title>Google</title>", solution.response)
|
||||
self.assertGreater(len(solution.cookies), 0)
|
||||
self.assertIn("Chrome/", solution.userAgent)
|
||||
|
||||
def test_v1_endpoint_request_get_disable_resources(self):
|
||||
res = self.app.post_json("/v1", {
|
||||
"cmd": "request.get",
|
||||
"url": self.google_url,
|
||||
"disableMedia": True
|
||||
})
|
||||
self.assertEqual(res.status_code, 200)
|
||||
|
||||
body = V1ResponseBase(res.json)
|
||||
self.assertEqual(STATUS_OK, body.status)
|
||||
self.assertEqual("Challenge not detected!", body.message)
|
||||
self.assertGreater(body.startTimestamp, 10000)
|
||||
self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
|
||||
self.assertEqual(utils.get_flaresolverr_version(), body.version)
|
||||
|
||||
solution = body.solution
|
||||
self.assertIn(self.google_url, solution.url)
|
||||
self.assertEqual(solution.status, 200)
|
||||
self.assertIs(len(solution.headers), 0)
|
||||
self.assertIn("<title>Google</title>", solution.response)
|
||||
self.assertGreater(len(solution.cookies), 0)
|
||||
self.assertIn("Chrome/", solution.userAgent)
|
||||
|
||||
def test_v1_endpoint_request_get_cloudflare_js_1(self):
|
||||
res = self.app.post_json('/v1', {
|
||||
"cmd": "request.get",
|
||||
"url": self.cloudflare_url
|
||||
})
|
||||
self.assertEqual(res.status_code, 200)
|
||||
|
||||
body = V1ResponseBase(res.json)
|
||||
self.assertEqual(STATUS_OK, body.status)
|
||||
self.assertEqual("Challenge solved!", body.message)
|
||||
self.assertGreater(body.startTimestamp, 10000)
|
||||
self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
|
||||
self.assertEqual(utils.get_flaresolverr_version(), body.version)
|
||||
|
||||
solution = body.solution
|
||||
self.assertIn(self.cloudflare_url, solution.url)
|
||||
self.assertEqual(solution.status, 200)
|
||||
self.assertIs(len(solution.headers), 0)
|
||||
self.assertIn("<title>nowSecure</title>", solution.response)
|
||||
self.assertGreater(len(solution.cookies), 0)
|
||||
self.assertIn("Chrome/", solution.userAgent)
|
||||
|
||||
cf_cookie = _find_obj_by_key("name", "cf_clearance", solution.cookies)
|
||||
self.assertIsNotNone(cf_cookie, "Cloudflare cookie not found")
|
||||
self.assertGreater(len(cf_cookie["value"]), 30)
|
||||
|
||||
def test_v1_endpoint_request_get_cloudflare_js_2(self):
|
||||
res = self.app.post_json('/v1', {
|
||||
"cmd": "request.get",
|
||||
"url": self.cloudflare_url_2
|
||||
})
|
||||
self.assertEqual(res.status_code, 200)
|
||||
|
||||
body = V1ResponseBase(res.json)
|
||||
self.assertEqual(STATUS_OK, body.status)
|
||||
self.assertEqual("Challenge solved!", body.message)
|
||||
self.assertGreater(body.startTimestamp, 10000)
|
||||
self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
|
||||
self.assertEqual(utils.get_flaresolverr_version(), body.version)
|
||||
|
||||
solution = body.solution
|
||||
self.assertIn(self.cloudflare_url_2, solution.url)
|
||||
self.assertEqual(solution.status, 200)
|
||||
self.assertIs(len(solution.headers), 0)
|
||||
self.assertIn("<title>harry - idope torrent search</title>", solution.response)
|
||||
self.assertGreater(len(solution.cookies), 0)
|
||||
self.assertIn("Chrome/", solution.userAgent)
|
||||
|
||||
cf_cookie = _find_obj_by_key("name", "cf_clearance", solution.cookies)
|
||||
self.assertIsNotNone(cf_cookie, "Cloudflare cookie not found")
|
||||
self.assertGreater(len(cf_cookie["value"]), 30)
|
||||
|
||||
def test_v1_endpoint_request_get_ddos_guard_js(self):
|
||||
res = self.app.post_json('/v1', {
|
||||
"cmd": "request.get",
|
||||
"url": self.ddos_guard_url
|
||||
})
|
||||
self.assertEqual(res.status_code, 200)
|
||||
|
||||
body = V1ResponseBase(res.json)
|
||||
self.assertEqual(STATUS_OK, body.status)
|
||||
self.assertEqual("Challenge solved!", body.message)
|
||||
self.assertGreater(body.startTimestamp, 10000)
|
||||
self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
|
||||
self.assertEqual(utils.get_flaresolverr_version(), body.version)
|
||||
|
||||
solution = body.solution
|
||||
self.assertIn(self.ddos_guard_url, solution.url)
|
||||
self.assertEqual(solution.status, 200)
|
||||
self.assertIs(len(solution.headers), 0)
|
||||
self.assertIn("<title>Литрес", solution.response)
|
||||
self.assertGreater(len(solution.cookies), 0)
|
||||
self.assertIn("Chrome/", solution.userAgent)
|
||||
|
||||
cf_cookie = _find_obj_by_key("name", "__ddg1_", solution.cookies)
|
||||
self.assertIsNotNone(cf_cookie, "DDOS-Guard cookie not found")
|
||||
self.assertGreater(len(cf_cookie["value"]), 10)
|
||||
|
||||
def test_v1_endpoint_request_get_fairlane_js(self):
|
||||
res = self.app.post_json('/v1', {
|
||||
"cmd": "request.get",
|
||||
"url": self.fairlane_url
|
||||
})
|
||||
self.assertEqual(res.status_code, 200)
|
||||
|
||||
body = V1ResponseBase(res.json)
|
||||
self.assertEqual(STATUS_OK, body.status)
|
||||
self.assertEqual("Challenge solved!", body.message)
|
||||
self.assertGreater(body.startTimestamp, 10000)
|
||||
self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
|
||||
self.assertEqual(utils.get_flaresolverr_version(), body.version)
|
||||
|
||||
solution = body.solution
|
||||
self.assertIn(self.fairlane_url, solution.url)
|
||||
self.assertEqual(solution.status, 200)
|
||||
self.assertIs(len(solution.headers), 0)
|
||||
self.assertIn("<title>Rental Apartments Amsterdam</title>", solution.response)
|
||||
self.assertGreater(len(solution.cookies), 0)
|
||||
self.assertIn("Chrome/", solution.userAgent)
|
||||
|
||||
cf_cookie = _find_obj_by_key("name", "fl_pass_v2_b", solution.cookies)
|
||||
self.assertIsNotNone(cf_cookie, "Fairlane cookie not found")
|
||||
self.assertGreater(len(cf_cookie["value"]), 50)
|
||||
|
||||
def test_v1_endpoint_request_get_custom_cloudflare_js(self):
|
||||
res = self.app.post_json('/v1', {
|
||||
"cmd": "request.get",
|
||||
"url": self.custom_cloudflare_url
|
||||
})
|
||||
self.assertEqual(res.status_code, 200)
|
||||
|
||||
body = V1ResponseBase(res.json)
|
||||
self.assertEqual(STATUS_OK, body.status)
|
||||
self.assertEqual("Challenge solved!", body.message)
|
||||
self.assertGreater(body.startTimestamp, 10000)
|
||||
self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
|
||||
self.assertEqual(utils.get_flaresolverr_version(), body.version)
|
||||
|
||||
solution = body.solution
|
||||
self.assertIn(self.custom_cloudflare_url, solution.url)
|
||||
self.assertEqual(solution.status, 200)
|
||||
self.assertIs(len(solution.headers), 0)
|
||||
self.assertIn("<title>MuziekFabriek : Aanmelden</title>", solution.response)
|
||||
self.assertGreater(len(solution.cookies), 0)
|
||||
self.assertIn("Chrome/", solution.userAgent)
|
||||
|
||||
cf_cookie = _find_obj_by_key("name", "ct_anti_ddos_key", solution.cookies)
|
||||
self.assertIsNotNone(cf_cookie, "Custom Cloudflare cookie not found")
|
||||
self.assertGreater(len(cf_cookie["value"]), 10)
|
||||
|
||||
# todo: test Cmd 'request.get' should return fail with Cloudflare CAPTCHA
|
||||
|
||||
def test_v1_endpoint_request_get_cloudflare_blocked(self):
|
||||
res = self.app.post_json('/v1', {
|
||||
"cmd": "request.get",
|
||||
"url": self.cloudflare_blocked_url
|
||||
}, status=500)
|
||||
self.assertEqual(res.status_code, 500)
|
||||
|
||||
body = V1ResponseBase(res.json)
|
||||
self.assertEqual(STATUS_ERROR, body.status)
|
||||
self.assertEqual("Error: Error solving the challenge. Cloudflare has blocked this request. "
|
||||
"Probably your IP is banned for this site, check in your web browser.", body.message)
|
||||
self.assertGreater(body.startTimestamp, 10000)
|
||||
self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
|
||||
self.assertEqual(utils.get_flaresolverr_version(), body.version)
|
||||
|
||||
def test_v1_endpoint_request_get_cookies_param(self):
|
||||
res = self.app.post_json('/v1', {
|
||||
"cmd": "request.get",
|
||||
"url": self.google_url,
|
||||
"cookies": [
|
||||
{
|
||||
"name": "testcookie1",
|
||||
"value": "testvalue1"
|
||||
},
|
||||
{
|
||||
"name": "testcookie2",
|
||||
"value": "testvalue2"
|
||||
}
|
||||
]
|
||||
})
|
||||
self.assertEqual(res.status_code, 200)
|
||||
|
||||
body = V1ResponseBase(res.json)
|
||||
self.assertEqual(STATUS_OK, body.status)
|
||||
self.assertEqual("Challenge not detected!", body.message)
|
||||
self.assertGreater(body.startTimestamp, 10000)
|
||||
self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
|
||||
self.assertEqual(utils.get_flaresolverr_version(), body.version)
|
||||
|
||||
solution = body.solution
|
||||
self.assertIn(self.google_url, solution.url)
|
||||
self.assertEqual(solution.status, 200)
|
||||
self.assertIs(len(solution.headers), 0)
|
||||
self.assertIn("<title>Google</title>", solution.response)
|
||||
self.assertGreater(len(solution.cookies), 1)
|
||||
self.assertIn("Chrome/", solution.userAgent)
|
||||
|
||||
user_cookie1 = _find_obj_by_key("name", "testcookie1", solution.cookies)
|
||||
self.assertIsNotNone(user_cookie1, "User cookie 1 not found")
|
||||
self.assertEqual("testvalue1", user_cookie1["value"])
|
||||
|
||||
user_cookie2 = _find_obj_by_key("name", "testcookie2", solution.cookies)
|
||||
self.assertIsNotNone(user_cookie2, "User cookie 2 not found")
|
||||
self.assertEqual("testvalue2", user_cookie2["value"])
|
||||
|
||||
def test_v1_endpoint_request_get_returnOnlyCookies_param(self):
|
||||
res = self.app.post_json('/v1', {
|
||||
"cmd": "request.get",
|
||||
"url": self.google_url,
|
||||
"returnOnlyCookies": True
|
||||
})
|
||||
self.assertEqual(res.status_code, 200)
|
||||
|
||||
body = V1ResponseBase(res.json)
|
||||
self.assertEqual(STATUS_OK, body.status)
|
||||
self.assertEqual("Challenge not detected!", body.message)
|
||||
self.assertGreater(body.startTimestamp, 10000)
|
||||
self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
|
||||
self.assertEqual(utils.get_flaresolverr_version(), body.version)
|
||||
|
||||
solution = body.solution
|
||||
self.assertIn(self.google_url, solution.url)
|
||||
self.assertEqual(solution.status, 200)
|
||||
self.assertIsNone(solution.headers)
|
||||
self.assertIsNone(solution.response)
|
||||
self.assertGreater(len(solution.cookies), 0)
|
||||
self.assertIn("Chrome/", solution.userAgent)
|
||||
|
||||
def test_v1_endpoint_request_get_proxy_http_param(self):
|
||||
"""
|
||||
To configure TinyProxy in local:
|
||||
* sudo vim /etc/tinyproxy/tinyproxy.conf
|
||||
* edit => LogFile "/tmp/tinyproxy.log"
|
||||
* edit => Syslog Off
|
||||
* sudo tinyproxy -d
|
||||
* sudo tail -f /tmp/tinyproxy.log
|
||||
"""
|
||||
res = self.app.post_json('/v1', {
|
||||
"cmd": "request.get",
|
||||
"url": self.google_url,
|
||||
"proxy": {
|
||||
"url": self.proxy_url
|
||||
}
|
||||
})
|
||||
self.assertEqual(res.status_code, 200)
|
||||
|
||||
body = V1ResponseBase(res.json)
|
||||
self.assertEqual(STATUS_OK, body.status)
|
||||
self.assertEqual("Challenge not detected!", body.message)
|
||||
self.assertGreater(body.startTimestamp, 10000)
|
||||
self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
|
||||
self.assertEqual(utils.get_flaresolverr_version(), body.version)
|
||||
|
||||
solution = body.solution
|
||||
self.assertIn(self.google_url, solution.url)
|
||||
self.assertEqual(solution.status, 200)
|
||||
self.assertIs(len(solution.headers), 0)
|
||||
self.assertIn("<title>Google</title>", solution.response)
|
||||
self.assertGreater(len(solution.cookies), 0)
|
||||
self.assertIn("Chrome/", solution.userAgent)
|
||||
|
||||
def test_v1_endpoint_request_get_proxy_http_param_with_credentials(self):
|
||||
"""
|
||||
To configure TinyProxy in local:
|
||||
* sudo vim /etc/tinyproxy/tinyproxy.conf
|
||||
* edit => LogFile "/tmp/tinyproxy.log"
|
||||
* edit => Syslog Off
|
||||
* add => BasicAuth testuser testpass
|
||||
* sudo tinyproxy -d
|
||||
* sudo tail -f /tmp/tinyproxy.log
|
||||
"""
|
||||
res = self.app.post_json('/v1', {
|
||||
"cmd": "request.get",
|
||||
"url": self.google_url,
|
||||
"proxy": {
|
||||
"url": self.proxy_url,
|
||||
"username": "testuser",
|
||||
"password": "testpass"
|
||||
}
|
||||
})
|
||||
self.assertEqual(res.status_code, 200)
|
||||
|
||||
body = V1ResponseBase(res.json)
|
||||
self.assertEqual(STATUS_OK, body.status)
|
||||
self.assertEqual("Challenge not detected!", body.message)
|
||||
self.assertGreater(body.startTimestamp, 10000)
|
||||
self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
|
||||
self.assertEqual(utils.get_flaresolverr_version(), body.version)
|
||||
|
||||
solution = body.solution
|
||||
self.assertIn(self.google_url, solution.url)
|
||||
self.assertEqual(solution.status, 200)
|
||||
self.assertIs(len(solution.headers), 0)
|
||||
self.assertIn("<title>Google</title>", solution.response)
|
||||
self.assertGreater(len(solution.cookies), 0)
|
||||
self.assertIn("Chrome/", solution.userAgent)
|
||||
|
||||
def test_v1_endpoint_request_get_proxy_socks_param(self):
|
||||
"""
|
||||
To configure Dante in local:
|
||||
* https://linuxhint.com/set-up-a-socks5-proxy-on-ubuntu-with-dante/
|
||||
* sudo vim /etc/sockd.conf
|
||||
* sudo systemctl restart sockd.service
|
||||
* curl --socks5 socks5://127.0.0.1:1080 https://www.google.com
|
||||
"""
|
||||
res = self.app.post_json('/v1', {
|
||||
"cmd": "request.get",
|
||||
"url": self.google_url,
|
||||
"proxy": {
|
||||
"url": self.proxy_socks_url
|
||||
}
|
||||
})
|
||||
self.assertEqual(res.status_code, 200)
|
||||
|
||||
body = V1ResponseBase(res.json)
|
||||
self.assertEqual(STATUS_OK, body.status)
|
||||
self.assertEqual("Challenge not detected!", body.message)
|
||||
self.assertGreater(body.startTimestamp, 10000)
|
||||
self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
|
||||
self.assertEqual(utils.get_flaresolverr_version(), body.version)
|
||||
|
||||
solution = body.solution
|
||||
self.assertIn(self.google_url, solution.url)
|
||||
self.assertEqual(solution.status, 200)
|
||||
self.assertIs(len(solution.headers), 0)
|
||||
self.assertIn("<title>Google</title>", solution.response)
|
||||
self.assertGreater(len(solution.cookies), 0)
|
||||
self.assertIn("Chrome/", solution.userAgent)
|
||||
|
||||
def test_v1_endpoint_request_get_proxy_wrong_param(self):
|
||||
res = self.app.post_json('/v1', {
|
||||
"cmd": "request.get",
|
||||
"url": self.google_url,
|
||||
"proxy": {
|
||||
"url": "http://127.0.0.1:43210"
|
||||
}
|
||||
}, status=500)
|
||||
self.assertEqual(res.status_code, 500)
|
||||
|
||||
body = V1ResponseBase(res.json)
|
||||
self.assertEqual(STATUS_ERROR, body.status)
|
||||
self.assertIn("Error: Error solving the challenge. Message: unknown error: net::ERR_PROXY_CONNECTION_FAILED",
|
||||
body.message)
|
||||
self.assertGreater(body.startTimestamp, 10000)
|
||||
self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
|
||||
self.assertEqual(utils.get_flaresolverr_version(), body.version)
|
||||
|
||||
def test_v1_endpoint_request_get_fail_timeout(self):
|
||||
res = self.app.post_json('/v1', {
|
||||
"cmd": "request.get",
|
||||
"url": self.google_url,
|
||||
"maxTimeout": 10
|
||||
}, status=500)
|
||||
self.assertEqual(res.status_code, 500)
|
||||
|
||||
body = V1ResponseBase(res.json)
|
||||
self.assertEqual(STATUS_ERROR, body.status)
|
||||
self.assertEqual("Error: Error solving the challenge. Timeout after 0.01 seconds.", body.message)
|
||||
self.assertGreater(body.startTimestamp, 10000)
|
||||
self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
|
||||
self.assertEqual(utils.get_flaresolverr_version(), body.version)
|
||||
|
||||
def test_v1_endpoint_request_get_fail_bad_domain(self):
|
||||
res = self.app.post_json('/v1', {
|
||||
"cmd": "request.get",
|
||||
"url": "https://www.google.combad"
|
||||
}, status=500)
|
||||
self.assertEqual(res.status_code, 500)
|
||||
|
||||
body = V1ResponseBase(res.json)
|
||||
self.assertEqual(STATUS_ERROR, body.status)
|
||||
self.assertIn("Message: unknown error: net::ERR_NAME_NOT_RESOLVED", body.message)
|
||||
|
||||
def test_v1_endpoint_request_get_deprecated_param(self):
|
||||
res = self.app.post_json('/v1', {
|
||||
"cmd": "request.get",
|
||||
"url": self.google_url,
|
||||
"userAgent": "Test User-Agent" # was removed in v2, not used
|
||||
})
|
||||
self.assertEqual(res.status_code, 200)
|
||||
|
||||
body = V1ResponseBase(res.json)
|
||||
self.assertEqual(STATUS_OK, body.status)
|
||||
self.assertEqual("Challenge not detected!", body.message)
|
||||
|
||||
def test_v1_endpoint_request_post_no_cloudflare(self):
|
||||
res = self.app.post_json('/v1', {
|
||||
"cmd": "request.post",
|
||||
"url": self.post_url,
|
||||
"postData": "param1=value1¶m2=value2"
|
||||
})
|
||||
self.assertEqual(res.status_code, 200)
|
||||
|
||||
body = V1ResponseBase(res.json)
|
||||
self.assertEqual(STATUS_OK, body.status)
|
||||
self.assertEqual("Challenge not detected!", body.message)
|
||||
self.assertGreater(body.startTimestamp, 10000)
|
||||
self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
|
||||
self.assertEqual(utils.get_flaresolverr_version(), body.version)
|
||||
|
||||
solution = body.solution
|
||||
self.assertIn(self.post_url, solution.url)
|
||||
self.assertEqual(solution.status, 200)
|
||||
self.assertIs(len(solution.headers), 0)
|
||||
self.assertIn('"form": {\n "param1": "value1", \n "param2": "value2"\n }', solution.response)
|
||||
self.assertEqual(len(solution.cookies), 0)
|
||||
self.assertIn("Chrome/", solution.userAgent)
|
||||
|
||||
def test_v1_endpoint_request_post_cloudflare(self):
|
||||
res = self.app.post_json('/v1', {
|
||||
"cmd": "request.post",
|
||||
"url": self.cloudflare_url,
|
||||
"postData": "param1=value1¶m2=value2"
|
||||
})
|
||||
self.assertEqual(res.status_code, 200)
|
||||
|
||||
body = V1ResponseBase(res.json)
|
||||
self.assertEqual(STATUS_OK, body.status)
|
||||
self.assertEqual("Challenge solved!", body.message)
|
||||
self.assertGreater(body.startTimestamp, 10000)
|
||||
self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
|
||||
self.assertEqual(utils.get_flaresolverr_version(), body.version)
|
||||
|
||||
solution = body.solution
|
||||
self.assertIn(self.cloudflare_url, solution.url)
|
||||
self.assertEqual(solution.status, 200)
|
||||
self.assertIs(len(solution.headers), 0)
|
||||
self.assertIn("<title>405 Not Allowed</title>", solution.response)
|
||||
self.assertGreater(len(solution.cookies), 0)
|
||||
self.assertIn("Chrome/", solution.userAgent)
|
||||
|
||||
cf_cookie = _find_obj_by_key("name", "cf_clearance", solution.cookies)
|
||||
self.assertIsNotNone(cf_cookie, "Cloudflare cookie not found")
|
||||
self.assertGreater(len(cf_cookie["value"]), 30)
|
||||
|
||||
def test_v1_endpoint_request_post_fail_no_post_data(self):
|
||||
res = self.app.post_json('/v1', {
|
||||
"cmd": "request.post",
|
||||
"url": self.google_url
|
||||
}, status=500)
|
||||
self.assertEqual(res.status_code, 500)
|
||||
|
||||
body = V1ResponseBase(res.json)
|
||||
self.assertEqual(STATUS_ERROR, body.status)
|
||||
self.assertIn("Request parameter 'postData' is mandatory in 'request.post' command", body.message)
|
||||
|
||||
def test_v1_endpoint_request_post_deprecated_param(self):
|
||||
res = self.app.post_json('/v1', {
|
||||
"cmd": "request.post",
|
||||
"url": self.google_url,
|
||||
"postData": "param1=value1¶m2=value2",
|
||||
"userAgent": "Test User-Agent" # was removed in v2, not used
|
||||
})
|
||||
self.assertEqual(res.status_code, 200)
|
||||
|
||||
body = V1ResponseBase(res.json)
|
||||
self.assertEqual(STATUS_OK, body.status)
|
||||
self.assertEqual("Challenge not detected!", body.message)
|
||||
|
||||
def test_v1_endpoint_sessions_create_without_session(self):
|
||||
res = self.app.post_json('/v1', {
|
||||
"cmd": "sessions.create"
|
||||
})
|
||||
self.assertEqual(res.status_code, 200)
|
||||
|
||||
body = V1ResponseBase(res.json)
|
||||
self.assertEqual(STATUS_OK, body.status)
|
||||
self.assertEqual("Session created successfully.", body.message)
|
||||
self.assertIsNotNone(body.session)
|
||||
|
||||
def test_v1_endpoint_sessions_create_with_session(self):
|
||||
res = self.app.post_json('/v1', {
|
||||
"cmd": "sessions.create",
|
||||
"session": "test_create_session"
|
||||
})
|
||||
self.assertEqual(res.status_code, 200)
|
||||
|
||||
body = V1ResponseBase(res.json)
|
||||
self.assertEqual(STATUS_OK, body.status)
|
||||
self.assertEqual("Session created successfully.", body.message)
|
||||
self.assertEqual(body.session, "test_create_session")
|
||||
|
||||
def test_v1_endpoint_sessions_create_with_proxy(self):
|
||||
res = self.app.post_json('/v1', {
|
||||
"cmd": "sessions.create",
|
||||
"proxy": {
|
||||
"url": self.proxy_url
|
||||
}
|
||||
})
|
||||
self.assertEqual(res.status_code, 200)
|
||||
|
||||
body = V1ResponseBase(res.json)
|
||||
self.assertEqual(STATUS_OK, body.status)
|
||||
self.assertEqual("Session created successfully.", body.message)
|
||||
self.assertIsNotNone(body.session)
|
||||
|
||||
def test_v1_endpoint_sessions_list(self):
|
||||
self.app.post_json('/v1', {
|
||||
"cmd": "sessions.create",
|
||||
"session": "test_list_sessions"
|
||||
})
|
||||
res = self.app.post_json('/v1', {
|
||||
"cmd": "sessions.list"
|
||||
})
|
||||
self.assertEqual(res.status_code, 200)
|
||||
|
||||
body = V1ResponseBase(res.json)
|
||||
self.assertEqual(STATUS_OK, body.status)
|
||||
self.assertEqual("", body.message)
|
||||
self.assertGreaterEqual(len(body.sessions), 1)
|
||||
self.assertIn("test_list_sessions", body.sessions)
|
||||
|
||||
def test_v1_endpoint_sessions_destroy_existing_session(self):
|
||||
self.app.post_json('/v1', {
|
||||
"cmd": "sessions.create",
|
||||
"session": "test_destroy_sessions"
|
||||
})
|
||||
res = self.app.post_json('/v1', {
|
||||
"cmd": "sessions.destroy",
|
||||
"session": "test_destroy_sessions"
|
||||
})
|
||||
self.assertEqual(res.status_code, 200)
|
||||
|
||||
body = V1ResponseBase(res.json)
|
||||
self.assertEqual(STATUS_OK, body.status)
|
||||
self.assertEqual("The session has been removed.", body.message)
|
||||
|
||||
def test_v1_endpoint_sessions_destroy_non_existing_session(self):
|
||||
res = self.app.post_json('/v1', {
|
||||
"cmd": "sessions.destroy",
|
||||
"session": "non_existing_session_name"
|
||||
}, status=500)
|
||||
self.assertEqual(res.status_code, 500)
|
||||
|
||||
body = V1ResponseBase(res.json)
|
||||
self.assertEqual(STATUS_ERROR, body.status)
|
||||
self.assertEqual("Error: The session doesn't exist.", body.message)
|
||||
|
||||
def test_v1_endpoint_request_get_with_session(self):
|
||||
self.app.post_json('/v1', {
|
||||
"cmd": "sessions.create",
|
||||
"session": "test_request_sessions"
|
||||
})
|
||||
res = self.app.post_json('/v1', {
|
||||
"cmd": "request.get",
|
||||
"session": "test_request_sessions",
|
||||
"url": self.google_url
|
||||
})
|
||||
self.assertEqual(res.status_code, 200)
|
||||
|
||||
body = V1ResponseBase(res.json)
|
||||
self.assertEqual(STATUS_OK, body.status)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
@@ -1,625 +0,0 @@
|
||||
// noinspection DuplicatedCode
|
||||
|
||||
import {Response} from "superagent";
|
||||
import {V1ResponseBase, V1ResponseSession, V1ResponseSessions, V1ResponseSolution} from "../controllers/v1"
|
||||
|
||||
const request = require("supertest");
|
||||
const app = require("../app");
|
||||
const sessions = require('../services/sessions');
|
||||
const version: string = 'v' + require('../../package.json').version
|
||||
|
||||
const proxyUrl = "http://127.0.0.1:8888"
|
||||
const proxySocksUrl = "socks5://127.0.0.1:1080"
|
||||
const googleUrl = "https://www.google.com";
|
||||
const postUrl = "https://ptsv2.com/t/qv4j3-1634496523";
|
||||
const cfUrl = "https://nowsecure.nl";
|
||||
const cfCaptchaUrl = "https://idope.se"
|
||||
const cfBlockedUrl = "https://www.torrentmafya.org/table.php"
|
||||
const ddgUrl = "https://anidex.info/";
|
||||
const ccfUrl = "https://www.muziekfabriek.org";
|
||||
|
||||
beforeAll(async () => {
|
||||
// Init session
|
||||
await sessions.testWebBrowserInstallation();
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
// Clean sessions
|
||||
const sessionList = sessions.list();
|
||||
for (const session of sessionList) {
|
||||
await sessions.destroy(session);
|
||||
}
|
||||
});
|
||||
|
||||
describe("Test '/' path", () => {
|
||||
test("GET method should return OK ", async () => {
|
||||
const response: Response = await request(app).get("/");
|
||||
expect(response.statusCode).toBe(200);
|
||||
expect(response.body.msg).toBe("FlareSolverr is ready!");
|
||||
expect(response.body.version).toBe(version);
|
||||
expect(response.body.userAgent).toContain("Firefox/")
|
||||
});
|
||||
|
||||
test("POST method should fail", async () => {
|
||||
const response: Response = await request(app).post("/");
|
||||
expect(response.statusCode).toBe(404);
|
||||
expect(response.body.error).toBe("Unknown resource or HTTP verb");
|
||||
});
|
||||
});
|
||||
|
||||
describe("Test '/health' path", () => {
|
||||
test("GET method should return OK", async () => {
|
||||
const response: Response = await request(app).get("/health");
|
||||
expect(response.statusCode).toBe(200);
|
||||
expect(response.body.status).toBe("ok");
|
||||
});
|
||||
});
|
||||
|
||||
describe("Test '/wrong' path", () => {
|
||||
test("GET method should fail", async () => {
|
||||
const response: Response = await request(app).get("/wrong");
|
||||
expect(response.statusCode).toBe(404);
|
||||
expect(response.body.error).toBe("Unknown resource or HTTP verb");
|
||||
});
|
||||
});
|
||||
|
||||
describe("Test '/v1' path", () => {
|
||||
test("Cmd 'request.bad' should fail", async () => {
|
||||
const payload = {
|
||||
"cmd": "request.bad",
|
||||
"url": googleUrl
|
||||
}
|
||||
const response: Response = await request(app).post("/v1").send(payload);
|
||||
expect(response.statusCode).toBe(500);
|
||||
|
||||
const apiResponse: V1ResponseBase = response.body;
|
||||
expect(apiResponse.status).toBe("error");
|
||||
expect(apiResponse.message).toBe("Error: The command 'request.bad' is invalid.");
|
||||
expect(apiResponse.startTimestamp).toBeGreaterThan(1000);
|
||||
expect(apiResponse.endTimestamp).toBeGreaterThanOrEqual(apiResponse.startTimestamp);
|
||||
expect(apiResponse.version).toBe(version);
|
||||
});
|
||||
|
||||
test("Cmd 'request.get' should return OK with no Cloudflare", async () => {
|
||||
const payload = {
|
||||
"cmd": "request.get",
|
||||
"url": googleUrl
|
||||
}
|
||||
const response: Response = await request(app).post("/v1").send(payload);
|
||||
expect(response.statusCode).toBe(200);
|
||||
|
||||
const apiResponse: V1ResponseSolution = response.body;
|
||||
expect(apiResponse.status).toBe("ok");
|
||||
expect(apiResponse.message).toBe("");
|
||||
expect(apiResponse.startTimestamp).toBeGreaterThan(1000);
|
||||
expect(apiResponse.endTimestamp).toBeGreaterThan(apiResponse.startTimestamp);
|
||||
expect(apiResponse.version).toBe(version);
|
||||
|
||||
const solution = apiResponse.solution;
|
||||
expect(solution.url).toContain(googleUrl)
|
||||
expect(solution.status).toBe(200);
|
||||
expect(Object.keys(solution.headers).length).toBeGreaterThan(0)
|
||||
expect(solution.response).toContain("<!DOCTYPE html>")
|
||||
expect(Object.keys(solution.cookies).length).toBeGreaterThan(0)
|
||||
expect(solution.userAgent).toContain("Firefox/")
|
||||
});
|
||||
|
||||
test("Cmd 'request.get' should return OK with Cloudflare JS", async () => {
|
||||
const payload = {
|
||||
"cmd": "request.get",
|
||||
"url": cfUrl
|
||||
}
|
||||
const response: Response = await request(app).post("/v1").send(payload);
|
||||
expect(response.statusCode).toBe(200);
|
||||
|
||||
const apiResponse: V1ResponseSolution = response.body;
|
||||
expect(apiResponse.status).toBe("ok");
|
||||
expect(apiResponse.message).toBe("");
|
||||
expect(apiResponse.startTimestamp).toBeGreaterThan(1000);
|
||||
expect(apiResponse.endTimestamp).toBeGreaterThan(apiResponse.startTimestamp);
|
||||
expect(apiResponse.version).toBe(version);
|
||||
|
||||
const solution = apiResponse.solution;
|
||||
expect(solution.url).toContain(cfUrl)
|
||||
expect(solution.status).toBe(200);
|
||||
expect(Object.keys(solution.headers).length).toBeGreaterThan(0)
|
||||
expect(solution.response).toContain("<!DOCTYPE html>")
|
||||
expect(Object.keys(solution.cookies).length).toBeGreaterThan(0)
|
||||
expect(solution.userAgent).toContain("Firefox/")
|
||||
|
||||
const cfCookie: string = (solution.cookies as any[]).filter(function(cookie) {
|
||||
return cookie.name == "cf_clearance";
|
||||
})[0].value
|
||||
expect(cfCookie.length).toBeGreaterThan(30)
|
||||
});
|
||||
|
||||
test("Cmd 'request.get' should return fail with Cloudflare CAPTCHA", async () => {
|
||||
const payload = {
|
||||
"cmd": "request.get",
|
||||
"url": cfCaptchaUrl
|
||||
}
|
||||
const response: Response = await request(app).post("/v1").send(payload);
|
||||
expect(response.statusCode).toBe(200);
|
||||
|
||||
const apiResponse: V1ResponseSolution = response.body;
|
||||
expect(apiResponse.status).toBe("error");
|
||||
expect(apiResponse.message).toBe("Cloudflare Error: FlareSolverr can not resolve CAPTCHA challenges. Since the captcha doesn't always appear, you may have better luck with the next request.");
|
||||
expect(apiResponse.startTimestamp).toBeGreaterThan(1000);
|
||||
expect(apiResponse.endTimestamp).toBeGreaterThan(apiResponse.startTimestamp);
|
||||
expect(apiResponse.version).toBe(version);
|
||||
// solution is filled but not useful
|
||||
expect(apiResponse.solution.url).toContain(cfCaptchaUrl)
|
||||
});
|
||||
|
||||
test("Cmd 'request.post' should return fail with Cloudflare Blocked", async () => {
|
||||
const payload = {
|
||||
"cmd": "request.post",
|
||||
"url": cfBlockedUrl,
|
||||
"postData": "test1=test2"
|
||||
}
|
||||
const response: Response = await request(app).post("/v1").send(payload);
|
||||
expect(response.statusCode).toBe(200);
|
||||
|
||||
const apiResponse: V1ResponseSolution = response.body;
|
||||
expect(apiResponse.status).toBe("error");
|
||||
expect(apiResponse.message).toBe("Cloudflare Error: Cloudflare has blocked this request. Probably your IP is banned for this site, check in your web browser.");
|
||||
expect(apiResponse.startTimestamp).toBeGreaterThan(1000);
|
||||
expect(apiResponse.endTimestamp).toBeGreaterThan(apiResponse.startTimestamp);
|
||||
expect(apiResponse.version).toBe(version);
|
||||
// solution is filled but not useful
|
||||
expect(apiResponse.solution.url).toContain(cfBlockedUrl)
|
||||
});
|
||||
|
||||
test("Cmd 'request.get' should return OK with DDoS-GUARD JS", async () => {
|
||||
const payload = {
|
||||
"cmd": "request.get",
|
||||
"url": ddgUrl
|
||||
}
|
||||
const response: Response = await request(app).post("/v1").send(payload);
|
||||
expect(response.statusCode).toBe(200);
|
||||
|
||||
const apiResponse: V1ResponseSolution = response.body;
|
||||
expect(apiResponse.status).toBe("ok");
|
||||
expect(apiResponse.message).toBe("");
|
||||
expect(apiResponse.startTimestamp).toBeGreaterThan(1000);
|
||||
expect(apiResponse.endTimestamp).toBeGreaterThan(apiResponse.startTimestamp);
|
||||
expect(apiResponse.version).toBe(version);
|
||||
|
||||
const solution = apiResponse.solution;
|
||||
expect(solution.url).toContain(ddgUrl)
|
||||
expect(solution.status).toBe(200);
|
||||
expect(Object.keys(solution.headers).length).toBeGreaterThan(0)
|
||||
expect(solution.response).toContain("<!DOCTYPE html>")
|
||||
expect(Object.keys(solution.cookies).length).toBeGreaterThan(0)
|
||||
expect(solution.userAgent).toContain("Firefox/")
|
||||
|
||||
const cfCookie: string = (solution.cookies as any[]).filter(function(cookie) {
|
||||
return cookie.name == "__ddg1_";
|
||||
})[0].value
|
||||
expect(cfCookie.length).toBeGreaterThan(10)
|
||||
});
|
||||
|
||||
test("Cmd 'request.get' should return OK with Custom CloudFlare JS", async () => {
|
||||
const payload = {
|
||||
"cmd": "request.get",
|
||||
"url": ccfUrl
|
||||
}
|
||||
const response: Response = await request(app).post("/v1").send(payload);
|
||||
expect(response.statusCode).toBe(200);
|
||||
|
||||
const apiResponse: V1ResponseSolution = response.body;
|
||||
expect(apiResponse.status).toBe("ok");
|
||||
expect(apiResponse.message).toBe("");
|
||||
expect(apiResponse.startTimestamp).toBeGreaterThan(1000);
|
||||
expect(apiResponse.endTimestamp).toBeGreaterThan(apiResponse.startTimestamp);
|
||||
expect(apiResponse.version).toBe(version);
|
||||
|
||||
const solution = apiResponse.solution;
|
||||
expect(solution.url).toContain(ccfUrl)
|
||||
expect(solution.status).toBe(200);
|
||||
expect(Object.keys(solution.headers).length).toBeGreaterThan(0)
|
||||
expect(solution.response).toContain("<html><head>")
|
||||
expect(Object.keys(solution.cookies).length).toBeGreaterThan(0)
|
||||
expect(solution.userAgent).toContain("Firefox/")
|
||||
|
||||
const cfCookie: string = (solution.cookies as any[]).filter(function(cookie) {
|
||||
return cookie.name == "ct_anti_ddos_key";
|
||||
})[0].value
|
||||
expect(cfCookie.length).toBeGreaterThan(10)
|
||||
});
|
||||
|
||||
test("Cmd 'request.get' should return OK with 'cookies' param", async () => {
|
||||
const payload = {
|
||||
"cmd": "request.get",
|
||||
"url": googleUrl,
|
||||
"cookies": [
|
||||
{
|
||||
"name": "testcookie1",
|
||||
"value": "testvalue1"
|
||||
},
|
||||
{
|
||||
"name": "testcookie2",
|
||||
"value": "testvalue2"
|
||||
}
|
||||
]
|
||||
}
|
||||
const response: Response = await request(app).post("/v1").send(payload);
|
||||
expect(response.statusCode).toBe(200);
|
||||
|
||||
const apiResponse: V1ResponseSolution = response.body;
|
||||
expect(apiResponse.status).toBe("ok");
|
||||
|
||||
const solution = apiResponse.solution;
|
||||
expect(solution.url).toContain(googleUrl)
|
||||
expect(Object.keys(solution.cookies).length).toBeGreaterThan(1)
|
||||
const cookie1: string = (solution.cookies as any[]).filter(function(cookie) {
|
||||
return cookie.name == "testcookie1";
|
||||
})[0].value
|
||||
expect(cookie1).toBe("testvalue1")
|
||||
const cookie2: string = (solution.cookies as any[]).filter(function(cookie) {
|
||||
return cookie.name == "testcookie2";
|
||||
})[0].value
|
||||
expect(cookie2).toBe("testvalue2")
|
||||
});
|
||||
|
||||
test("Cmd 'request.get' should return OK with 'returnOnlyCookies' param", async () => {
|
||||
const payload = {
|
||||
"cmd": "request.get",
|
||||
"url": googleUrl,
|
||||
"returnOnlyCookies": true
|
||||
}
|
||||
const response: Response = await request(app).post("/v1").send(payload);
|
||||
expect(response.statusCode).toBe(200);
|
||||
|
||||
const apiResponse: V1ResponseSolution = response.body;
|
||||
|
||||
const solution = apiResponse.solution;
|
||||
expect(solution.url).toContain(googleUrl)
|
||||
expect(solution.status).toBe(200);
|
||||
expect(solution.headers).toBe(null)
|
||||
expect(solution.response).toBe(null)
|
||||
expect(Object.keys(solution.cookies).length).toBeGreaterThan(0)
|
||||
expect(solution.userAgent).toBe(null)
|
||||
});
|
||||
|
||||
test("Cmd 'request.get' should return OK with HTTP 'proxy' param", async () => {
|
||||
/*
|
||||
To configure TinyProxy in local:
|
||||
* sudo vim /etc/tinyproxy/tinyproxy.conf
|
||||
* edit => LogFile "/tmp/tinyproxy.log"
|
||||
* edit => Syslog Off
|
||||
* sudo tinyproxy -d
|
||||
* sudo tail -f /tmp/tinyproxy.log
|
||||
*/
|
||||
const payload = {
|
||||
"cmd": "request.get",
|
||||
"url": googleUrl,
|
||||
"proxy": {
|
||||
"url": proxyUrl
|
||||
}
|
||||
}
|
||||
const response: Response = await request(app).post("/v1").send(payload);
|
||||
expect(response.statusCode).toBe(200);
|
||||
|
||||
const apiResponse: V1ResponseSolution = response.body;
|
||||
expect(apiResponse.status).toBe("ok");
|
||||
|
||||
const solution = apiResponse.solution;
|
||||
expect(solution.url).toContain(googleUrl)
|
||||
expect(solution.status).toBe(200);
|
||||
});
|
||||
|
||||
// todo: credentials are not working
|
||||
test.skip("Cmd 'request.get' should return OK with HTTP 'proxy' param with credentials", async () => {
|
||||
/*
|
||||
To configure TinyProxy in local:
|
||||
* sudo vim /etc/tinyproxy/tinyproxy.conf
|
||||
* edit => LogFile "/tmp/tinyproxy.log"
|
||||
* edit => Syslog Off
|
||||
* add => BasicAuth testuser testpass
|
||||
* sudo tinyproxy -d
|
||||
* sudo tail -f /tmp/tinyproxy.log
|
||||
*/
|
||||
const payload = {
|
||||
"cmd": "request.get",
|
||||
"url": googleUrl,
|
||||
"proxy": {
|
||||
"url": proxyUrl,
|
||||
"username": "testuser",
|
||||
"password": "testpass"
|
||||
}
|
||||
}
|
||||
const response: Response = await request(app).post("/v1").send(payload);
|
||||
expect(response.statusCode).toBe(200);
|
||||
|
||||
const apiResponse: V1ResponseSolution = response.body;
|
||||
expect(apiResponse.status).toBe("ok");
|
||||
|
||||
const solution = apiResponse.solution;
|
||||
expect(solution.url).toContain(googleUrl)
|
||||
expect(solution.status).toContain(200)
|
||||
});
|
||||
|
||||
test("Cmd 'request.get' should return OK with SOCKSv5 'proxy' param", async () => {
|
||||
/*
|
||||
To configure Dante in local:
|
||||
* https://linuxhint.com/set-up-a-socks5-proxy-on-ubuntu-with-dante/
|
||||
* sudo vim /etc/sockd.conf
|
||||
* sudo systemctl restart sockd.service
|
||||
* curl --socks5 socks5://127.0.0.1:1080 https://www.google.com
|
||||
*/
|
||||
const payload = {
|
||||
"cmd": "request.get",
|
||||
"url": googleUrl,
|
||||
"proxy": {
|
||||
"url": proxySocksUrl
|
||||
}
|
||||
}
|
||||
const response: Response = await request(app).post("/v1").send(payload);
|
||||
expect(response.statusCode).toBe(200);
|
||||
|
||||
const apiResponse: V1ResponseSolution = response.body;
|
||||
expect(apiResponse.status).toBe("ok");
|
||||
|
||||
const solution = apiResponse.solution;
|
||||
expect(solution.url).toContain(googleUrl)
|
||||
expect(solution.status).toBe(200);
|
||||
});
|
||||
|
||||
test("Cmd 'request.get' should fail with wrong 'proxy' param", async () => {
|
||||
const payload = {
|
||||
"cmd": "request.get",
|
||||
"url": googleUrl,
|
||||
"proxy": {
|
||||
"url": "http://127.0.0.1:43210"
|
||||
}
|
||||
}
|
||||
const response: Response = await request(app).post("/v1").send(payload);
|
||||
expect(response.statusCode).toBe(500);
|
||||
|
||||
const apiResponse: V1ResponseSolution = response.body;
|
||||
expect(apiResponse.status).toBe("error");
|
||||
expect(apiResponse.message).toBe("Error: Unable to process browser request. Error: NS_ERROR_PROXY_CONNECTION_REFUSED at https://www.google.com");
|
||||
});
|
||||
|
||||
test("Cmd 'request.get' should return fail with timeout", async () => {
|
||||
const payload = {
|
||||
"cmd": "request.get",
|
||||
"url": googleUrl,
|
||||
"maxTimeout": 10
|
||||
}
|
||||
const response: Response = await request(app).post("/v1").send(payload);
|
||||
expect(response.statusCode).toBe(500);
|
||||
|
||||
const apiResponse: V1ResponseBase = response.body;
|
||||
expect(apiResponse.status).toBe("error");
|
||||
expect(apiResponse.message).toBe("Error: Unable to process browser request. Error: Maximum timeout reached. maxTimeout=10 (ms)");
|
||||
expect(apiResponse.startTimestamp).toBeGreaterThan(1000);
|
||||
expect(apiResponse.endTimestamp).toBeGreaterThan(apiResponse.startTimestamp);
|
||||
expect(apiResponse.version).toBe(version);
|
||||
});
|
||||
|
||||
test("Cmd 'request.get' should return fail with bad domain", async () => {
|
||||
const payload = {
|
||||
"cmd": "request.get",
|
||||
"url": "https://www.google.combad"
|
||||
}
|
||||
const response: Response = await request(app).post("/v1").send(payload);
|
||||
expect(response.statusCode).toBe(500);
|
||||
|
||||
const apiResponse: V1ResponseBase = response.body;
|
||||
expect(apiResponse.status).toBe("error");
|
||||
expect(apiResponse.message).toBe("Error: Unable to process browser request. Error: NS_ERROR_UNKNOWN_HOST at https://www.google.combad");
|
||||
});
|
||||
|
||||
test("Cmd 'request.get' should accept deprecated params", async () => {
|
||||
const payload = {
|
||||
"cmd": "request.get",
|
||||
"url": googleUrl,
|
||||
"userAgent": "Test User-Agent" // was removed in v2, not used
|
||||
}
|
||||
const response: Response = await request(app).post("/v1").send(payload);
|
||||
expect(response.statusCode).toBe(200);
|
||||
|
||||
const apiResponse: V1ResponseSolution = response.body;
|
||||
expect(apiResponse.status).toBe("ok");
|
||||
|
||||
const solution = apiResponse.solution;
|
||||
expect(solution.url).toContain(googleUrl)
|
||||
expect(solution.status).toBe(200);
|
||||
expect(solution.userAgent).toContain("Firefox/")
|
||||
});
|
||||
|
||||
test("Cmd 'request.post' should return OK with no Cloudflare", async () => {
|
||||
const payload = {
|
||||
"cmd": "request.post",
|
||||
"url": postUrl + '/post',
|
||||
"postData": "param1=value1¶m2=value2"
|
||||
}
|
||||
const response: Response = await request(app).post("/v1").send(payload);
|
||||
expect(response.statusCode).toBe(200);
|
||||
|
||||
const apiResponse: V1ResponseSolution = response.body;
|
||||
expect(apiResponse.status).toBe("ok");
|
||||
expect(apiResponse.message).toBe("");
|
||||
expect(apiResponse.startTimestamp).toBeGreaterThan(1000);
|
||||
expect(apiResponse.endTimestamp).toBeGreaterThan(apiResponse.startTimestamp);
|
||||
expect(apiResponse.version).toBe(version);
|
||||
|
||||
const solution = apiResponse.solution;
|
||||
expect(solution.url).toContain(postUrl)
|
||||
expect(solution.status).toBe(200);
|
||||
expect(Object.keys(solution.headers).length).toBeGreaterThan(0)
|
||||
expect(solution.response).toContain(" I hope you have a lovely day!")
|
||||
expect(Object.keys(solution.cookies).length).toBe(0)
|
||||
expect(solution.userAgent).toContain("Firefox/")
|
||||
|
||||
// check that we sent the date
|
||||
const payload2 = {
|
||||
"cmd": "request.get",
|
||||
"url": postUrl
|
||||
}
|
||||
const response2: Response = await request(app).post("/v1").send(payload2);
|
||||
expect(response2.statusCode).toBe(200);
|
||||
|
||||
const apiResponse2: V1ResponseSolution = response2.body;
|
||||
expect(apiResponse2.status).toBe("ok");
|
||||
|
||||
const solution2 = apiResponse2.solution;
|
||||
expect(solution2.status).toBe(200);
|
||||
expect(solution2.response).toContain(new Date().toISOString().split(':')[0].replace('T', ' '))
|
||||
});
|
||||
|
||||
test("Cmd 'request.post' should fail without 'postData' param", async () => {
|
||||
const payload = {
|
||||
"cmd": "request.post",
|
||||
"url": googleUrl
|
||||
}
|
||||
const response: Response = await request(app).post("/v1").send(payload);
|
||||
expect(response.statusCode).toBe(500);
|
||||
|
||||
const apiResponse: V1ResponseBase = response.body;
|
||||
expect(apiResponse.status).toBe("error");
|
||||
expect(apiResponse.message).toBe("Error: Must send param \"postBody\" when sending a POST request.");
|
||||
expect(apiResponse.startTimestamp).toBeGreaterThan(1000);
|
||||
expect(apiResponse.endTimestamp).toBeGreaterThanOrEqual(apiResponse.startTimestamp);
|
||||
expect(apiResponse.version).toBe(version);
|
||||
});
|
||||
|
||||
test("Cmd 'sessions.create' should return OK", async () => {
|
||||
const payload = {
|
||||
"cmd": "sessions.create"
|
||||
}
|
||||
const response: Response = await request(app).post("/v1").send(payload);
|
||||
expect(response.statusCode).toBe(200);
|
||||
|
||||
const apiResponse: V1ResponseSession = response.body;
|
||||
expect(apiResponse.status).toBe("ok");
|
||||
expect(apiResponse.message).toBe("Session created successfully.");
|
||||
expect(apiResponse.startTimestamp).toBeGreaterThan(1000);
|
||||
expect(apiResponse.endTimestamp).toBeGreaterThan(apiResponse.startTimestamp);
|
||||
expect(apiResponse.version).toBe(version);
|
||||
expect(apiResponse.session.length).toBe(36);
|
||||
});
|
||||
|
||||
test("Cmd 'sessions.create' should return OK with session", async () => {
|
||||
const payload = {
|
||||
"cmd": "sessions.create",
|
||||
"session": "2bc6bb20-2f56-11ec-9543-test"
|
||||
}
|
||||
const response: Response = await request(app).post("/v1").send(payload);
|
||||
expect(response.statusCode).toBe(200);
|
||||
|
||||
const apiResponse: V1ResponseSession = response.body;
|
||||
expect(apiResponse.status).toBe("ok");
|
||||
expect(apiResponse.message).toBe("Session created successfully.");
|
||||
expect(apiResponse.startTimestamp).toBeGreaterThan(1000);
|
||||
expect(apiResponse.endTimestamp).toBeGreaterThan(apiResponse.startTimestamp);
|
||||
expect(apiResponse.version).toBe(version);
|
||||
expect(apiResponse.session).toBe("2bc6bb20-2f56-11ec-9543-test");
|
||||
});
|
||||
|
||||
test("Cmd 'sessions.list' should return OK", async () => {
|
||||
// create one session for testing
|
||||
const payload0 = {
|
||||
"cmd": "sessions.create"
|
||||
}
|
||||
const response0: Response = await request(app).post("/v1").send(payload0);
|
||||
expect(response0.statusCode).toBe(200);
|
||||
|
||||
const payload = {
|
||||
"cmd": "sessions.list"
|
||||
}
|
||||
const response: Response = await request(app).post("/v1").send(payload);
|
||||
expect(response.statusCode).toBe(200);
|
||||
|
||||
const apiResponse: V1ResponseSessions = response.body;
|
||||
expect(apiResponse.status).toBe("ok");
|
||||
expect(apiResponse.message).toBe("");
|
||||
expect(apiResponse.startTimestamp).toBeGreaterThan(1000);
|
||||
expect(apiResponse.endTimestamp).toBeGreaterThanOrEqual(apiResponse.startTimestamp);
|
||||
expect(apiResponse.version).toBe(version);
|
||||
expect(apiResponse.sessions.length).toBeGreaterThan(0)
|
||||
});
|
||||
|
||||
test("Cmd 'sessions.destroy' should return OK", async () => {
|
||||
// create one session for testing
|
||||
const payload0 = {
|
||||
"cmd": "sessions.create"
|
||||
}
|
||||
const response0: Response = await request(app).post("/v1").send(payload0);
|
||||
expect(response0.statusCode).toBe(200);
|
||||
const apiResponse0: V1ResponseSession = response0.body;
|
||||
const sessionId0 = apiResponse0.session
|
||||
|
||||
const payload = {
|
||||
"cmd": "sessions.destroy",
|
||||
"session": sessionId0
|
||||
}
|
||||
const response: Response = await request(app).post("/v1").send(payload);
|
||||
expect(response.statusCode).toBe(200);
|
||||
|
||||
const apiResponse: V1ResponseBase = response.body;
|
||||
expect(apiResponse.status).toBe("ok");
|
||||
expect(apiResponse.message).toBe("The session has been removed.");
|
||||
expect(apiResponse.startTimestamp).toBeGreaterThan(1000);
|
||||
expect(apiResponse.endTimestamp).toBeGreaterThanOrEqual(apiResponse.startTimestamp);
|
||||
expect(apiResponse.version).toBe(version);
|
||||
});
|
||||
|
||||
test("Cmd 'sessions.destroy' should fail", async () => {
|
||||
const payload = {
|
||||
"cmd": "sessions.destroy",
|
||||
"session": "bad-session"
|
||||
}
|
||||
const response: Response = await request(app).post("/v1").send(payload);
|
||||
expect(response.statusCode).toBe(500);
|
||||
|
||||
const apiResponse: V1ResponseBase = response.body;
|
||||
expect(apiResponse.status).toBe("error");
|
||||
expect(apiResponse.message).toBe("Error: This session does not exist.");
|
||||
expect(apiResponse.startTimestamp).toBeGreaterThan(1000);
|
||||
expect(apiResponse.endTimestamp).toBeGreaterThan(apiResponse.startTimestamp);
|
||||
expect(apiResponse.version).toBe(version);
|
||||
});
|
||||
|
||||
test("Cmd 'request.get' should use session", async () => {
|
||||
// create one session for testing
|
||||
const payload0 = {
|
||||
"cmd": "sessions.create"
|
||||
}
|
||||
const response0: Response = await request(app).post("/v1").send(payload0);
|
||||
expect(response0.statusCode).toBe(200);
|
||||
const apiResponse0: V1ResponseSession = response0.body;
|
||||
const sessionId0 = apiResponse0.session
|
||||
|
||||
// first request should solve the challenge
|
||||
const payload = {
|
||||
"cmd": "request.get",
|
||||
"url": cfUrl,
|
||||
"session": sessionId0
|
||||
}
|
||||
const response: Response = await request(app).post("/v1").send(payload);
|
||||
expect(response.statusCode).toBe(200);
|
||||
|
||||
const apiResponse: V1ResponseSolution = response.body;
|
||||
expect(apiResponse.status).toBe("ok");
|
||||
const cfCookie: string = (apiResponse.solution.cookies as any[]).filter(function(cookie) {
|
||||
return cookie.name == "cf_clearance";
|
||||
})[0].value
|
||||
expect(cfCookie.length).toBeGreaterThan(30)
|
||||
|
||||
// second request should have the same cookie
|
||||
const response2: Response = await request(app).post("/v1").send(payload);
|
||||
expect(response2.statusCode).toBe(200);
|
||||
|
||||
const apiResponse2: V1ResponseSolution = response2.body;
|
||||
expect(apiResponse2.status).toBe("ok");
|
||||
const cfCookie2: string = (apiResponse2.solution.cookies as any[]).filter(function(cookie) {
|
||||
return cookie.name == "cf_clearance";
|
||||
})[0].value
|
||||
expect(cfCookie2.length).toBeGreaterThan(30)
|
||||
expect(cfCookie2).toBe(cfCookie)
|
||||
});
|
||||
|
||||
});
|
||||
102
src/tests_sites.py
Normal file
102
src/tests_sites.py
Normal file
@@ -0,0 +1,102 @@
|
||||
import unittest
|
||||
|
||||
from webtest import TestApp
|
||||
|
||||
from dtos import V1ResponseBase, STATUS_OK
|
||||
import flaresolverr
|
||||
import utils
|
||||
|
||||
|
||||
def _find_obj_by_key(key: str, value: str, _list: list) -> dict | None:
|
||||
for obj in _list:
|
||||
if obj[key] == value:
|
||||
return obj
|
||||
return None
|
||||
|
||||
|
||||
def asset_cloudflare_solution(self, res, site_url, site_text):
|
||||
self.assertEqual(res.status_code, 200)
|
||||
|
||||
body = V1ResponseBase(res.json)
|
||||
self.assertEqual(STATUS_OK, body.status)
|
||||
self.assertEqual("Challenge solved!", body.message)
|
||||
self.assertGreater(body.startTimestamp, 10000)
|
||||
self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
|
||||
self.assertEqual(utils.get_flaresolverr_version(), body.version)
|
||||
|
||||
solution = body.solution
|
||||
self.assertIn(site_url, solution.url)
|
||||
self.assertEqual(solution.status, 200)
|
||||
self.assertIs(len(solution.headers), 0)
|
||||
self.assertIn(site_text, solution.response)
|
||||
self.assertGreater(len(solution.cookies), 0)
|
||||
self.assertIn("Chrome/", solution.userAgent)
|
||||
|
||||
cf_cookie = _find_obj_by_key("name", "cf_clearance", solution.cookies)
|
||||
self.assertIsNotNone(cf_cookie, "Cloudflare cookie not found")
|
||||
self.assertGreater(len(cf_cookie["value"]), 30)
|
||||
|
||||
|
||||
class TestFlareSolverr(unittest.TestCase):
|
||||
app = TestApp(flaresolverr.app)
|
||||
# wait until the server is ready
|
||||
app.get('/')
|
||||
|
||||
def test_v1_endpoint_request_get_cloudflare(self):
|
||||
sites_get = [
|
||||
('nowsecure', 'https://nowsecure.nl', '<title>nowSecure</title>'),
|
||||
('0magnet', 'https://0magnet.com/search?q=2022', 'Torrent Search - ØMagnet'),
|
||||
('1337x', 'https://1337x.unblockit.cat/cat/Movies/time/desc/1/', ''),
|
||||
('avistaz', 'https://avistaz.to/api/v1/jackett/torrents?in=1&type=0&search=',
|
||||
'<title>Access denied</title>'),
|
||||
('badasstorrents', 'https://badasstorrents.com/torrents/search/720p/date/desc',
|
||||
'<title>Latest Torrents - BadassTorrents</title>'),
|
||||
('bt4g', 'https://bt4g.org/search/2022', '<title>Download 2022 Torrents - BT4G</title>'),
|
||||
('cinemaz', 'https://cinemaz.to/api/v1/jackett/torrents?in=1&type=0&search=',
|
||||
'<title>Access denied</title>'),
|
||||
('epublibre', 'https://epublibre.unblockit.cat/catalogo/index/0/nuevo/todos/sin/todos/--/ajax',
|
||||
'<title>epublibre - catálogo</title>'),
|
||||
('ext', 'https://ext.to/latest/?order=age&sort=desc',
|
||||
'<title>Download Latest Torrents - EXT Torrents</title>'),
|
||||
('extratorrent', 'https://extratorrent.st/search/?srt=added&order=desc&search=720p&new=1&x=0&y=0',
|
||||
'Page 1 - ExtraTorrent'),
|
||||
('idope', 'https://idope.se/browse.html', '<title>Recent Torrents</title>'),
|
||||
('limetorrents', 'https://limetorrents.unblockninja.com/latest100',
|
||||
'<title>Latest 100 torrents - LimeTorrents</title>'),
|
||||
('privatehd', 'https://privatehd.to/api/v1/jackett/torrents?in=1&type=0&search=',
|
||||
'<title>Access denied</title>'),
|
||||
('torrentcore', 'https://torrentcore.xyz/index', '<title>Torrent[CORE] - Torrent community.</title>'),
|
||||
('torrentqq223', 'https://torrentqq223.com/torrent/newest.html', 'https://torrentqq223.com/ads/'),
|
||||
('36dm', 'https://www.36dm.club/1.html', 'https://www.36dm.club/yesterday-1.html'),
|
||||
('erai-raws', 'https://www.erai-raws.info/feed/?type=magnet', '403 Forbidden'),
|
||||
('teamos', 'https://www.teamos.xyz/torrents/?filename=&freeleech=',
|
||||
'<title>Log in | Team OS : Your Only Destination To Custom OS !!</title>'),
|
||||
('yts', 'https://yts.unblockninja.com/api/v2/list_movies.json?query_term=&limit=50&sort=date_added',
|
||||
'{"movie_count":')
|
||||
]
|
||||
for site_name, site_url, site_text in sites_get:
|
||||
with self.subTest(msg=site_name):
|
||||
res = self.app.post_json('/v1', {
|
||||
"cmd": "request.get",
|
||||
"url": site_url
|
||||
})
|
||||
asset_cloudflare_solution(self, res, site_url, site_text)
|
||||
|
||||
def test_v1_endpoint_request_post_cloudflare(self):
|
||||
sites_post = [
|
||||
('nnmclub', 'https://nnmclub.to/forum/tracker.php', '<title>Трекер :: NNM-Club</title>',
|
||||
'prev_sd=0&prev_a=0&prev_my=0&prev_n=0&prev_shc=0&prev_shf=1&prev_sha=1&prev_shs=0&prev_shr=0&prev_sht=0&f%5B%5D=-1&o=1&s=2&tm=-1&shf=1&sha=1&ta=-1&sns=-1&sds=-1&nm=&pn=&submit=%CF%EE%E8%F1%EA')
|
||||
]
|
||||
|
||||
for site_name, site_url, site_text, post_data in sites_post:
|
||||
with self.subTest(msg=site_name):
|
||||
res = self.app.post_json('/v1', {
|
||||
"cmd": "request.post",
|
||||
"url": site_url,
|
||||
"postData": post_data
|
||||
})
|
||||
asset_cloudflare_solution(self, res, site_url, site_text)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
910
src/undetected_chromedriver/__init__.py
Normal file
910
src/undetected_chromedriver/__init__.py
Normal file
@@ -0,0 +1,910 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
|
||||
888 888 d8b
|
||||
888 888 Y8P
|
||||
888 888
|
||||
.d8888b 88888b. 888d888 .d88b. 88888b.d88b. .d88b. .d88888 888d888 888 888 888 .d88b. 888d888
|
||||
d88P" 888 "88b 888P" d88""88b 888 "888 "88b d8P Y8b d88" 888 888P" 888 888 888 d8P Y8b 888P"
|
||||
888 888 888 888 888 888 888 888 888 88888888 888 888 888 888 Y88 88P 88888888 888
|
||||
Y88b. 888 888 888 Y88..88P 888 888 888 Y8b. Y88b 888 888 888 Y8bd8P Y8b. 888
|
||||
"Y8888P 888 888 888 "Y88P" 888 888 888 "Y8888 "Y88888 888 888 Y88P "Y8888 888 88888888
|
||||
|
||||
by UltrafunkAmsterdam (https://github.com/ultrafunkamsterdam)
|
||||
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
__version__ = "3.5.5"
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import pathlib
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
from weakref import finalize
|
||||
|
||||
import selenium.webdriver.chrome.service
|
||||
import selenium.webdriver.chrome.webdriver
|
||||
from selenium.webdriver.common.by import By
|
||||
import selenium.webdriver.chromium.service
|
||||
import selenium.webdriver.remote.command
|
||||
import selenium.webdriver.remote.webdriver
|
||||
|
||||
from .cdp import CDP
|
||||
from .dprocess import start_detached
|
||||
from .options import ChromeOptions
|
||||
from .patcher import IS_POSIX
|
||||
from .patcher import Patcher
|
||||
from .reactor import Reactor
|
||||
from .webelement import UCWebElement
|
||||
from .webelement import WebElement
|
||||
|
||||
|
||||
__all__ = (
|
||||
"Chrome",
|
||||
"ChromeOptions",
|
||||
"Patcher",
|
||||
"Reactor",
|
||||
"CDP",
|
||||
"find_chrome_executable",
|
||||
)
|
||||
|
||||
logger = logging.getLogger("uc")
|
||||
logger.setLevel(logging.getLogger().getEffectiveLevel())
|
||||
|
||||
|
||||
class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
||||
"""
|
||||
|
||||
Controls the ChromeDriver and allows you to drive the browser.
|
||||
|
||||
The webdriver file will be downloaded by this module automatically,
|
||||
you do not need to specify this. however, you may if you wish.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
|
||||
Methods
|
||||
-------
|
||||
|
||||
reconnect()
|
||||
|
||||
this can be useful in case of heavy detection methods
|
||||
-stops the chromedriver service which runs in the background
|
||||
-starts the chromedriver service which runs in the background
|
||||
-recreate session
|
||||
|
||||
|
||||
start_session(capabilities=None, browser_profile=None)
|
||||
|
||||
differentiates from the regular method in that it does not
|
||||
require a capabilities argument. The capabilities are automatically
|
||||
recreated from the options at creation time.
|
||||
|
||||
--------------------------------------------------------------------------
|
||||
NOTE:
|
||||
Chrome has everything included to work out of the box.
|
||||
it does not `need` customizations.
|
||||
any customizations MAY lead to trigger bot migitation systems.
|
||||
|
||||
--------------------------------------------------------------------------
|
||||
"""
|
||||
|
||||
_instances = set()
|
||||
session_id = None
|
||||
debug = False
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
options=None,
|
||||
user_data_dir=None,
|
||||
driver_executable_path=None,
|
||||
browser_executable_path=None,
|
||||
port=0,
|
||||
enable_cdp_events=False,
|
||||
# service_args=None,
|
||||
# service_creationflags=None,
|
||||
desired_capabilities=None,
|
||||
advanced_elements=False,
|
||||
# service_log_path=None,
|
||||
keep_alive=True,
|
||||
log_level=0,
|
||||
headless=False,
|
||||
version_main=None,
|
||||
patcher_force_close=False,
|
||||
suppress_welcome=True,
|
||||
use_subprocess=False,
|
||||
debug=False,
|
||||
no_sandbox=True,
|
||||
windows_headless=False,
|
||||
user_multi_procs: bool = False,
|
||||
**kw,
|
||||
):
|
||||
"""
|
||||
Creates a new instance of the chrome driver.
|
||||
|
||||
Starts the service and then creates new instance of chrome driver.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
|
||||
options: ChromeOptions, optional, default: None - automatic useful defaults
|
||||
this takes an instance of ChromeOptions, mainly to customize browser behavior.
|
||||
anything other dan the default, for example extensions or startup options
|
||||
are not supported in case of failure, and can probably lowers your undetectability.
|
||||
|
||||
|
||||
user_data_dir: str , optional, default: None (creates temp profile)
|
||||
if user_data_dir is a path to a valid chrome profile directory, use it,
|
||||
and turn off automatic removal mechanism at exit.
|
||||
|
||||
driver_executable_path: str, optional, default: None(=downloads and patches new binary)
|
||||
|
||||
browser_executable_path: str, optional, default: None - use find_chrome_executable
|
||||
Path to the browser executable.
|
||||
If not specified, make sure the executable's folder is in $PATH
|
||||
|
||||
port: int, optional, default: 0
|
||||
port to be used by the chromedriver executable, this is NOT the debugger port.
|
||||
leave it at 0 unless you know what you are doing.
|
||||
the default value of 0 automatically picks an available port.
|
||||
|
||||
enable_cdp_events: bool, default: False
|
||||
:: currently for chrome only
|
||||
this enables the handling of wire messages
|
||||
when enabled, you can subscribe to CDP events by using:
|
||||
|
||||
driver.add_cdp_listener("Network.dataReceived", yourcallback)
|
||||
# yourcallback is an callable which accepts exactly 1 dict as parameter
|
||||
|
||||
|
||||
service_args: list of str, optional, default: None
|
||||
arguments to pass to the driver service
|
||||
|
||||
desired_capabilities: dict, optional, default: None - auto from config
|
||||
Dictionary object with non-browser specific capabilities only, such as "item" or "loggingPref".
|
||||
|
||||
advanced_elements: bool, optional, default: False
|
||||
makes it easier to recognize elements like you know them from html/browser inspection, especially when working
|
||||
in an interactive environment
|
||||
|
||||
default webelement repr:
|
||||
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
|
||||
|
||||
advanced webelement repr
|
||||
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
|
||||
|
||||
note: when retrieving large amounts of elements ( example: find_elements_by_tag("*") ) and print them, it does take a little more time.
|
||||
|
||||
|
||||
service_log_path: str, optional, default: None
|
||||
path to log information from the driver.
|
||||
|
||||
keep_alive: bool, optional, default: True
|
||||
Whether to configure ChromeRemoteConnection to use HTTP keep-alive.
|
||||
|
||||
log_level: int, optional, default: adapts to python global log level
|
||||
|
||||
headless: bool, optional, default: False
|
||||
can also be specified in the options instance.
|
||||
Specify whether you want to use the browser in headless mode.
|
||||
warning: this lowers undetectability and not fully supported.
|
||||
|
||||
version_main: int, optional, default: None (=auto)
|
||||
if you, for god knows whatever reason, use
|
||||
an older version of Chrome. You can specify it's full rounded version number
|
||||
here. Example: 87 for all versions of 87
|
||||
|
||||
patcher_force_close: bool, optional, default: False
|
||||
instructs the patcher to do whatever it can to access the chromedriver binary
|
||||
if the file is locked, it will force shutdown all instances.
|
||||
setting it is not recommended, unless you know the implications and think
|
||||
you might need it.
|
||||
|
||||
suppress_welcome: bool, optional , default: True
|
||||
a "welcome" alert might show up on *nix-like systems asking whether you want to set
|
||||
chrome as your default browser, and if you want to send even more data to google.
|
||||
now, in case you are nag-fetishist, or a diagnostics data feeder to google, you can set this to False.
|
||||
Note: if you don't handle the nag screen in time, the browser loses it's connection and throws an Exception.
|
||||
|
||||
use_subprocess: bool, optional , default: True,
|
||||
|
||||
False (the default) makes sure Chrome will get it's own process (so no subprocess of chromedriver.exe or python
|
||||
This fixes a LOT of issues, like multithreaded run, but mst importantly. shutting corectly after
|
||||
program exits or using .quit()
|
||||
you should be knowing what you're doing, and know how python works.
|
||||
|
||||
unfortunately, there is always an edge case in which one would like to write an single script with the only contents being:
|
||||
--start script--
|
||||
import undetected_chromedriver as uc
|
||||
d = uc.Chrome()
|
||||
d.get('https://somesite/')
|
||||
---end script --
|
||||
|
||||
and will be greeted with an error, since the program exists before chrome has a change to launch.
|
||||
in that case you can set this to `True`. The browser will start via subprocess, and will keep running most of times.
|
||||
! setting it to True comes with NO support when being detected. !
|
||||
|
||||
no_sandbox: bool, optional, default=True
|
||||
uses the --no-sandbox option, and additionally does suppress the "unsecure option" status bar
|
||||
this option has a default of True since many people seem to run this as root (....) , and chrome does not start
|
||||
when running as root without using --no-sandbox flag.
|
||||
|
||||
user_multi_procs:
|
||||
set to true when you are using multithreads/multiprocessing
|
||||
ensures not all processes are trying to modify a binary which is in use by another.
|
||||
for this to work. YOU MUST HAVE AT LEAST 1 UNDETECTED_CHROMEDRIVER BINARY IN YOUR ROAMING DATA FOLDER.
|
||||
this requirement can be easily satisfied, by just running this program "normal" and close/kill it.
|
||||
|
||||
|
||||
"""
|
||||
|
||||
finalize(self, self._ensure_close, self)
|
||||
self.debug = debug
|
||||
self.patcher = Patcher(
|
||||
executable_path=driver_executable_path,
|
||||
force=patcher_force_close,
|
||||
version_main=version_main,
|
||||
user_multi_procs=user_multi_procs,
|
||||
)
|
||||
# self.patcher.auto(user_multiprocess = user_multi_num_procs)
|
||||
self.patcher.auto()
|
||||
|
||||
# self.patcher = patcher
|
||||
if not options:
|
||||
options = ChromeOptions()
|
||||
|
||||
try:
|
||||
if hasattr(options, "_session") and options._session is not None:
|
||||
# prevent reuse of options,
|
||||
# as it just appends arguments, not replace them
|
||||
# you'll get conflicts starting chrome
|
||||
raise RuntimeError("you cannot reuse the ChromeOptions object")
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
options._session = self
|
||||
|
||||
if not options.debugger_address:
|
||||
debug_port = (
|
||||
port
|
||||
if port != 0
|
||||
else selenium.webdriver.common.service.utils.free_port()
|
||||
)
|
||||
debug_host = "127.0.0.1"
|
||||
options.debugger_address = "%s:%d" % (debug_host, debug_port)
|
||||
else:
|
||||
debug_host, debug_port = options.debugger_address.split(":")
|
||||
debug_port = int(debug_port)
|
||||
|
||||
if enable_cdp_events:
|
||||
options.set_capability(
|
||||
"goog:loggingPrefs", {"performance": "ALL", "browser": "ALL"}
|
||||
)
|
||||
|
||||
options.add_argument("--remote-debugging-host=%s" % debug_host)
|
||||
options.add_argument("--remote-debugging-port=%s" % debug_port)
|
||||
|
||||
if user_data_dir:
|
||||
options.add_argument("--user-data-dir=%s" % user_data_dir)
|
||||
|
||||
language, keep_user_data_dir = None, bool(user_data_dir)
|
||||
|
||||
# see if a custom user profile is specified in options
|
||||
for arg in options.arguments:
|
||||
|
||||
if any([_ in arg for _ in ("--headless", "headless")]):
|
||||
options.arguments.remove(arg)
|
||||
options.headless = True
|
||||
|
||||
if "lang" in arg:
|
||||
m = re.search("(?:--)?lang(?:[ =])?(.*)", arg)
|
||||
try:
|
||||
language = m[1]
|
||||
except IndexError:
|
||||
logger.debug("will set the language to en-US,en;q=0.9")
|
||||
language = "en-US,en;q=0.9"
|
||||
|
||||
if "user-data-dir" in arg:
|
||||
m = re.search("(?:--)?user-data-dir(?:[ =])?(.*)", arg)
|
||||
try:
|
||||
user_data_dir = m[1]
|
||||
logger.debug(
|
||||
"user-data-dir found in user argument %s => %s" % (arg, m[1])
|
||||
)
|
||||
keep_user_data_dir = True
|
||||
|
||||
except IndexError:
|
||||
logger.debug(
|
||||
"no user data dir could be extracted from supplied argument %s "
|
||||
% arg
|
||||
)
|
||||
|
||||
if not user_data_dir:
|
||||
# backward compatiblity
|
||||
# check if an old uc.ChromeOptions is used, and extract the user data dir
|
||||
|
||||
if hasattr(options, "user_data_dir") and getattr(
|
||||
options, "user_data_dir", None
|
||||
):
|
||||
import warnings
|
||||
|
||||
warnings.warn(
|
||||
"using ChromeOptions.user_data_dir might stop working in future versions."
|
||||
"use uc.Chrome(user_data_dir='/xyz/some/data') in case you need existing profile folder"
|
||||
)
|
||||
options.add_argument("--user-data-dir=%s" % options.user_data_dir)
|
||||
keep_user_data_dir = True
|
||||
logger.debug(
|
||||
"user_data_dir property found in options object: %s" % user_data_dir
|
||||
)
|
||||
|
||||
else:
|
||||
user_data_dir = os.path.normpath(tempfile.mkdtemp())
|
||||
keep_user_data_dir = False
|
||||
arg = "--user-data-dir=%s" % user_data_dir
|
||||
options.add_argument(arg)
|
||||
logger.debug(
|
||||
"created a temporary folder in which the user-data (profile) will be stored during this\n"
|
||||
"session, and added it to chrome startup arguments: %s" % arg
|
||||
)
|
||||
|
||||
if not language:
|
||||
try:
|
||||
import locale
|
||||
|
||||
language = locale.getdefaultlocale()[0].replace("_", "-")
|
||||
except Exception:
|
||||
pass
|
||||
if not language:
|
||||
language = "en-US"
|
||||
|
||||
options.add_argument("--lang=%s" % language)
|
||||
|
||||
if not options.binary_location:
|
||||
options.binary_location = (
|
||||
browser_executable_path or find_chrome_executable()
|
||||
)
|
||||
|
||||
if not options.binary_location or not \
|
||||
pathlib.Path(options.binary_location).exists():
|
||||
raise FileNotFoundError(
|
||||
"\n---------------------\n"
|
||||
"Could not determine browser executable."
|
||||
"\n---------------------\n"
|
||||
"Make sure your browser is installed in the default location (path).\n"
|
||||
"If you are sure about the browser executable, you can specify it using\n"
|
||||
"the `browser_executable_path='{}` parameter.\n\n"
|
||||
.format("/path/to/browser/executable" if IS_POSIX else "c:/path/to/your/browser.exe")
|
||||
)
|
||||
|
||||
self._delay = 3
|
||||
|
||||
self.user_data_dir = user_data_dir
|
||||
self.keep_user_data_dir = keep_user_data_dir
|
||||
|
||||
if suppress_welcome:
|
||||
options.arguments.extend(["--no-default-browser-check", "--no-first-run"])
|
||||
if no_sandbox:
|
||||
options.arguments.extend(["--no-sandbox", "--test-type"])
|
||||
|
||||
if headless or getattr(options, 'headless', None):
|
||||
#workaround until a better checking is found
|
||||
try:
|
||||
v_main = int(self.patcher.version_main) if self.patcher.version_main else 108
|
||||
if v_main < 108:
|
||||
options.add_argument("--headless=chrome")
|
||||
elif v_main >= 108:
|
||||
options.add_argument("--headless=new")
|
||||
except:
|
||||
logger.warning("could not detect version_main."
|
||||
"therefore, we are assuming it is chrome 108 or higher")
|
||||
options.add_argument("--headless=new")
|
||||
|
||||
options.add_argument("--window-size=1920,1080")
|
||||
options.add_argument("--start-maximized")
|
||||
options.add_argument("--no-sandbox")
|
||||
# fixes "could not connect to chrome" error when running
|
||||
# on linux using privileged user like root (which i don't recommend)
|
||||
|
||||
options.add_argument(
|
||||
"--log-level=%d" % log_level
|
||||
or divmod(logging.getLogger().getEffectiveLevel(), 10)[0]
|
||||
)
|
||||
|
||||
if hasattr(options, "handle_prefs"):
|
||||
options.handle_prefs(user_data_dir)
|
||||
|
||||
# fix exit_type flag to prevent tab-restore nag
|
||||
try:
|
||||
with open(
|
||||
os.path.join(user_data_dir, "Default/Preferences"),
|
||||
encoding="latin1",
|
||||
mode="r+",
|
||||
) as fs:
|
||||
config = json.load(fs)
|
||||
if config["profile"]["exit_type"] is not None:
|
||||
# fixing the restore-tabs-nag
|
||||
config["profile"]["exit_type"] = None
|
||||
fs.seek(0, 0)
|
||||
json.dump(config, fs)
|
||||
fs.truncate() # the file might be shorter
|
||||
logger.debug("fixed exit_type flag")
|
||||
except Exception as e:
|
||||
logger.debug("did not find a bad exit_type flag ")
|
||||
|
||||
self.options = options
|
||||
|
||||
if not desired_capabilities:
|
||||
desired_capabilities = options.to_capabilities()
|
||||
|
||||
if not use_subprocess and not windows_headless:
|
||||
self.browser_pid = start_detached(
|
||||
options.binary_location, *options.arguments
|
||||
)
|
||||
else:
|
||||
startupinfo = None
|
||||
if os.name == 'nt' and windows_headless:
|
||||
# STARTUPINFO() is Windows only
|
||||
startupinfo = subprocess.STARTUPINFO()
|
||||
startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
|
||||
browser = subprocess.Popen(
|
||||
[options.binary_location, *options.arguments],
|
||||
stdin=subprocess.PIPE,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
close_fds=IS_POSIX,
|
||||
startupinfo=startupinfo
|
||||
)
|
||||
self.browser_pid = browser.pid
|
||||
|
||||
|
||||
service = selenium.webdriver.chromium.service.ChromiumService(
|
||||
self.patcher.executable_path
|
||||
)
|
||||
|
||||
super().__init__(
|
||||
service=service,
|
||||
options=options,
|
||||
keep_alive=keep_alive,
|
||||
)
|
||||
|
||||
self.reactor = None
|
||||
|
||||
if enable_cdp_events:
|
||||
if logging.getLogger().getEffectiveLevel() == logging.DEBUG:
|
||||
logging.getLogger(
|
||||
"selenium.webdriver.remote.remote_connection"
|
||||
).setLevel(20)
|
||||
reactor = Reactor(self)
|
||||
reactor.start()
|
||||
self.reactor = reactor
|
||||
|
||||
if advanced_elements:
|
||||
self._web_element_cls = UCWebElement
|
||||
else:
|
||||
self._web_element_cls = WebElement
|
||||
|
||||
if headless or getattr(options, 'headless', None):
|
||||
self._configure_headless()
|
||||
|
||||
def _configure_headless(self):
|
||||
orig_get = self.get
|
||||
logger.info("setting properties for headless")
|
||||
|
||||
def get_wrapped(*args, **kwargs):
|
||||
if self.execute_script("return navigator.webdriver"):
|
||||
logger.info("patch navigator.webdriver")
|
||||
self.execute_cdp_cmd(
|
||||
"Page.addScriptToEvaluateOnNewDocument",
|
||||
{
|
||||
"source": """
|
||||
Object.defineProperty(window, "navigator", {
|
||||
value: new Proxy(navigator, {
|
||||
has: (target, key) => (key === "webdriver" ? false : key in target),
|
||||
get: (target, key) =>
|
||||
key === "webdriver"
|
||||
? false
|
||||
: typeof target[key] === "function"
|
||||
? target[key].bind(target)
|
||||
: target[key],
|
||||
}),
|
||||
});
|
||||
"""
|
||||
},
|
||||
)
|
||||
|
||||
logger.info("patch user-agent string")
|
||||
self.execute_cdp_cmd(
|
||||
"Network.setUserAgentOverride",
|
||||
{
|
||||
"userAgent": self.execute_script(
|
||||
"return navigator.userAgent"
|
||||
).replace("Headless", "")
|
||||
},
|
||||
)
|
||||
self.execute_cdp_cmd(
|
||||
"Page.addScriptToEvaluateOnNewDocument",
|
||||
{
|
||||
"source": """
|
||||
Object.defineProperty(navigator, 'maxTouchPoints', {get: () => 1});
|
||||
Object.defineProperty(navigator.connection, 'rtt', {get: () => 100});
|
||||
|
||||
// https://github.com/microlinkhq/browserless/blob/master/packages/goto/src/evasions/chrome-runtime.js
|
||||
window.chrome = {
|
||||
app: {
|
||||
isInstalled: false,
|
||||
InstallState: {
|
||||
DISABLED: 'disabled',
|
||||
INSTALLED: 'installed',
|
||||
NOT_INSTALLED: 'not_installed'
|
||||
},
|
||||
RunningState: {
|
||||
CANNOT_RUN: 'cannot_run',
|
||||
READY_TO_RUN: 'ready_to_run',
|
||||
RUNNING: 'running'
|
||||
}
|
||||
},
|
||||
runtime: {
|
||||
OnInstalledReason: {
|
||||
CHROME_UPDATE: 'chrome_update',
|
||||
INSTALL: 'install',
|
||||
SHARED_MODULE_UPDATE: 'shared_module_update',
|
||||
UPDATE: 'update'
|
||||
},
|
||||
OnRestartRequiredReason: {
|
||||
APP_UPDATE: 'app_update',
|
||||
OS_UPDATE: 'os_update',
|
||||
PERIODIC: 'periodic'
|
||||
},
|
||||
PlatformArch: {
|
||||
ARM: 'arm',
|
||||
ARM64: 'arm64',
|
||||
MIPS: 'mips',
|
||||
MIPS64: 'mips64',
|
||||
X86_32: 'x86-32',
|
||||
X86_64: 'x86-64'
|
||||
},
|
||||
PlatformNaclArch: {
|
||||
ARM: 'arm',
|
||||
MIPS: 'mips',
|
||||
MIPS64: 'mips64',
|
||||
X86_32: 'x86-32',
|
||||
X86_64: 'x86-64'
|
||||
},
|
||||
PlatformOs: {
|
||||
ANDROID: 'android',
|
||||
CROS: 'cros',
|
||||
LINUX: 'linux',
|
||||
MAC: 'mac',
|
||||
OPENBSD: 'openbsd',
|
||||
WIN: 'win'
|
||||
},
|
||||
RequestUpdateCheckStatus: {
|
||||
NO_UPDATE: 'no_update',
|
||||
THROTTLED: 'throttled',
|
||||
UPDATE_AVAILABLE: 'update_available'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// https://github.com/microlinkhq/browserless/blob/master/packages/goto/src/evasions/navigator-permissions.js
|
||||
if (!window.Notification) {
|
||||
window.Notification = {
|
||||
permission: 'denied'
|
||||
}
|
||||
}
|
||||
|
||||
const originalQuery = window.navigator.permissions.query
|
||||
window.navigator.permissions.__proto__.query = parameters =>
|
||||
parameters.name === 'notifications'
|
||||
? Promise.resolve({ state: window.Notification.permission })
|
||||
: originalQuery(parameters)
|
||||
|
||||
const oldCall = Function.prototype.call
|
||||
function call() {
|
||||
return oldCall.apply(this, arguments)
|
||||
}
|
||||
Function.prototype.call = call
|
||||
|
||||
const nativeToStringFunctionString = Error.toString().replace(/Error/g, 'toString')
|
||||
const oldToString = Function.prototype.toString
|
||||
|
||||
function functionToString() {
|
||||
if (this === window.navigator.permissions.query) {
|
||||
return 'function query() { [native code] }'
|
||||
}
|
||||
if (this === functionToString) {
|
||||
return nativeToStringFunctionString
|
||||
}
|
||||
return oldCall.call(oldToString, this)
|
||||
}
|
||||
// eslint-disable-next-line
|
||||
Function.prototype.toString = functionToString
|
||||
"""
|
||||
},
|
||||
)
|
||||
return orig_get(*args, **kwargs)
|
||||
|
||||
self.get = get_wrapped
|
||||
|
||||
# def _get_cdc_props(self):
|
||||
# return self.execute_script(
|
||||
# """
|
||||
# let objectToInspect = window,
|
||||
# result = [];
|
||||
# while(objectToInspect !== null)
|
||||
# { result = result.concat(Object.getOwnPropertyNames(objectToInspect));
|
||||
# objectToInspect = Object.getPrototypeOf(objectToInspect); }
|
||||
#
|
||||
# return result.filter(i => i.match(/^([a-zA-Z]){27}(Array|Promise|Symbol)$/ig))
|
||||
# """
|
||||
# )
|
||||
#
|
||||
# def _hook_remove_cdc_props(self):
|
||||
# self.execute_cdp_cmd(
|
||||
# "Page.addScriptToEvaluateOnNewDocument",
|
||||
# {
|
||||
# "source": """
|
||||
# let objectToInspect = window,
|
||||
# result = [];
|
||||
# while(objectToInspect !== null)
|
||||
# { result = result.concat(Object.getOwnPropertyNames(objectToInspect));
|
||||
# objectToInspect = Object.getPrototypeOf(objectToInspect); }
|
||||
# result.forEach(p => p.match(/^([a-zA-Z]){27}(Array|Promise|Symbol)$/ig)
|
||||
# &&delete window[p]&&console.log('removed',p))
|
||||
# """
|
||||
# },
|
||||
# )
|
||||
|
||||
def get(self, url):
|
||||
# if self._get_cdc_props():
|
||||
# self._hook_remove_cdc_props()
|
||||
return super().get(url)
|
||||
|
||||
def add_cdp_listener(self, event_name, callback):
|
||||
if (
|
||||
self.reactor
|
||||
and self.reactor is not None
|
||||
and isinstance(self.reactor, Reactor)
|
||||
):
|
||||
self.reactor.add_event_handler(event_name, callback)
|
||||
return self.reactor.handlers
|
||||
return False
|
||||
|
||||
def clear_cdp_listeners(self):
|
||||
if self.reactor and isinstance(self.reactor, Reactor):
|
||||
self.reactor.handlers.clear()
|
||||
|
||||
def window_new(self):
|
||||
self.execute(
|
||||
selenium.webdriver.remote.command.Command.NEW_WINDOW, {"type": "window"}
|
||||
)
|
||||
|
||||
def tab_new(self, url: str):
|
||||
"""
|
||||
this opens a url in a new tab.
|
||||
apparently, that passes all tests directly!
|
||||
|
||||
Parameters
|
||||
----------
|
||||
url
|
||||
|
||||
Returns
|
||||
-------
|
||||
|
||||
"""
|
||||
if not hasattr(self, "cdp"):
|
||||
from .cdp import CDP
|
||||
|
||||
cdp = CDP(self.options)
|
||||
cdp.tab_new(url)
|
||||
|
||||
def reconnect(self, timeout=0.1):
|
||||
try:
|
||||
self.service.stop()
|
||||
except Exception as e:
|
||||
logger.debug(e)
|
||||
time.sleep(timeout)
|
||||
try:
|
||||
self.service.start()
|
||||
except Exception as e:
|
||||
logger.debug(e)
|
||||
|
||||
try:
|
||||
self.start_session()
|
||||
except Exception as e:
|
||||
logger.debug(e)
|
||||
|
||||
def start_session(self, capabilities=None, browser_profile=None):
|
||||
if not capabilities:
|
||||
capabilities = self.options.to_capabilities()
|
||||
super().start_session(capabilities)
|
||||
# super(Chrome, self).start_session(capabilities, browser_profile) # Original explicit call commented out
|
||||
|
||||
def find_elements_recursive(self, by, value):
|
||||
"""
|
||||
find elements in all frames
|
||||
this is a generator function, which is needed
|
||||
since if it would return a list of elements, they
|
||||
will be stale on arrival.
|
||||
using generator, when the element is returned we are in the correct frame
|
||||
to use it directly
|
||||
Args:
|
||||
by: By
|
||||
value: str
|
||||
Returns: Generator[webelement.WebElement]
|
||||
"""
|
||||
def search_frame(f=None):
|
||||
if not f:
|
||||
# ensure we are on main content frame
|
||||
self.switch_to.default_content()
|
||||
else:
|
||||
self.switch_to.frame(f)
|
||||
for elem in self.find_elements(by, value):
|
||||
yield elem
|
||||
# switch back to main content, otherwise we will get StaleElementReferenceException
|
||||
self.switch_to.default_content()
|
||||
|
||||
# search root frame
|
||||
for elem in search_frame():
|
||||
yield elem
|
||||
# get iframes
|
||||
frames = self.find_elements('css selector', 'iframe')
|
||||
|
||||
# search per frame
|
||||
for f in frames:
|
||||
for elem in search_frame(f):
|
||||
yield elem
|
||||
|
||||
def quit(self):
|
||||
try:
|
||||
self.service.stop()
|
||||
self.service.process.kill()
|
||||
self.command_executor.close()
|
||||
self.service.process.wait(5)
|
||||
logger.debug("webdriver process ended")
|
||||
except (AttributeError, RuntimeError, OSError):
|
||||
pass
|
||||
try:
|
||||
self.reactor.event.set()
|
||||
logger.debug("shutting down reactor")
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
os.kill(self.browser_pid, 15)
|
||||
logger.debug("gracefully closed browser")
|
||||
except Exception as e: # noqa
|
||||
pass
|
||||
if (
|
||||
hasattr(self, "keep_user_data_dir")
|
||||
and hasattr(self, "user_data_dir")
|
||||
and not self.keep_user_data_dir
|
||||
):
|
||||
for _ in range(5):
|
||||
try:
|
||||
shutil.rmtree(self.user_data_dir, ignore_errors=False)
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
except (RuntimeError, OSError, PermissionError) as e:
|
||||
logger.debug(
|
||||
"When removing the temp profile, a %s occured: %s\nretrying..."
|
||||
% (e.__class__.__name__, e)
|
||||
)
|
||||
else:
|
||||
logger.debug("successfully removed %s" % self.user_data_dir)
|
||||
break
|
||||
|
||||
try:
|
||||
time.sleep(0.1)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
# dereference patcher, so patcher can start cleaning up as well.
|
||||
# this must come last, otherwise it will throw 'in use' errors
|
||||
self.patcher = None
|
||||
|
||||
def __getattribute__(self, item):
|
||||
if not super().__getattribute__("debug"):
|
||||
return super().__getattribute__(item)
|
||||
else:
|
||||
import inspect
|
||||
|
||||
original = super().__getattribute__(item)
|
||||
if inspect.ismethod(original) and not inspect.isclass(original):
|
||||
|
||||
def newfunc(*args, **kwargs):
|
||||
logger.debug(
|
||||
"calling %s with args %s and kwargs %s\n"
|
||||
% (original.__qualname__, args, kwargs)
|
||||
)
|
||||
return original(*args, **kwargs)
|
||||
|
||||
return newfunc
|
||||
return original
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
self.service.stop()
|
||||
time.sleep(self._delay)
|
||||
self.service.start()
|
||||
self.start_session()
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.options.debugger_address)
|
||||
|
||||
def __dir__(self):
|
||||
return object.__dir__(self)
|
||||
|
||||
def __del__(self):
|
||||
try:
|
||||
self.service.process.kill()
|
||||
except: # noqa
|
||||
pass
|
||||
self.quit()
|
||||
|
||||
@classmethod
|
||||
def _ensure_close(cls, self):
|
||||
# needs to be a classmethod so finalize can find the reference
|
||||
logger.info("ensuring close")
|
||||
if (
|
||||
hasattr(self, "service")
|
||||
and hasattr(self.service, "process")
|
||||
and hasattr(self.service.process, "kill")
|
||||
):
|
||||
self.service.process.kill()
|
||||
|
||||
|
||||
def find_chrome_executable():
|
||||
"""
|
||||
Finds the chrome, chrome beta, chrome canary, chromium executable
|
||||
|
||||
Returns
|
||||
-------
|
||||
executable_path : str
|
||||
the full file path to found executable
|
||||
|
||||
"""
|
||||
candidates = set()
|
||||
if IS_POSIX:
|
||||
for item in os.environ.get("PATH").split(os.pathsep):
|
||||
for subitem in (
|
||||
"google-chrome",
|
||||
"chromium",
|
||||
"chromium-browser",
|
||||
"chrome",
|
||||
"google-chrome-stable",
|
||||
):
|
||||
candidates.add(os.sep.join((item, subitem)))
|
||||
if "darwin" in sys.platform:
|
||||
candidates.update(
|
||||
[
|
||||
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
|
||||
"/Applications/Chromium.app/Contents/MacOS/Chromium",
|
||||
]
|
||||
)
|
||||
else:
|
||||
for item in map(
|
||||
os.environ.get,
|
||||
("PROGRAMFILES", "PROGRAMFILES(X86)", "LOCALAPPDATA", "PROGRAMW6432"),
|
||||
):
|
||||
if item is not None:
|
||||
for subitem in (
|
||||
"Google/Chrome/Application",
|
||||
):
|
||||
candidates.add(os.sep.join((item, subitem, "chrome.exe")))
|
||||
for candidate in candidates:
|
||||
logger.debug('checking if %s exists and is executable' % candidate)
|
||||
if os.path.exists(candidate) and os.access(candidate, os.X_OK):
|
||||
logger.debug('found! using %s' % candidate)
|
||||
return os.path.normpath(candidate)
|
||||
112
src/undetected_chromedriver/cdp.py
Normal file
112
src/undetected_chromedriver/cdp.py
Normal file
@@ -0,0 +1,112 @@
|
||||
#!/usr/bin/env python3
|
||||
# this module is part of undetected_chromedriver
|
||||
|
||||
import json
|
||||
import logging
|
||||
|
||||
import requests
|
||||
import websockets
|
||||
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class CDPObject(dict):
|
||||
def __init__(self, *a, **k):
|
||||
super().__init__(*a, **k)
|
||||
self.__dict__ = self
|
||||
for k in self.__dict__:
|
||||
if isinstance(self.__dict__[k], dict):
|
||||
self.__dict__[k] = CDPObject(self.__dict__[k])
|
||||
elif isinstance(self.__dict__[k], list):
|
||||
for i in range(len(self.__dict__[k])):
|
||||
if isinstance(self.__dict__[k][i], dict):
|
||||
self.__dict__[k][i] = CDPObject(self)
|
||||
|
||||
def __repr__(self):
|
||||
tpl = f"{self.__class__.__name__}(\n\t{{}}\n\t)"
|
||||
return tpl.format("\n ".join(f"{k} = {v}" for k, v in self.items()))
|
||||
|
||||
|
||||
class PageElement(CDPObject):
|
||||
pass
|
||||
|
||||
|
||||
class CDP:
|
||||
log = logging.getLogger("CDP")
|
||||
|
||||
endpoints = CDPObject(
|
||||
{
|
||||
"json": "/json",
|
||||
"protocol": "/json/protocol",
|
||||
"list": "/json/list",
|
||||
"new": "/json/new?{url}",
|
||||
"activate": "/json/activate/{id}",
|
||||
"close": "/json/close/{id}",
|
||||
}
|
||||
)
|
||||
|
||||
def __init__(self, options: "ChromeOptions"): # noqa
|
||||
self.server_addr = "http://{0}:{1}".format(*options.debugger_address.split(":"))
|
||||
|
||||
self._reqid = 0
|
||||
self._session = requests.Session()
|
||||
self._last_resp = None
|
||||
self._last_json = None
|
||||
|
||||
resp = self.get(self.endpoints.json) # noqa
|
||||
self.sessionId = resp[0]["id"]
|
||||
self.wsurl = resp[0]["webSocketDebuggerUrl"]
|
||||
|
||||
def tab_activate(self, id=None):
|
||||
if not id:
|
||||
active_tab = self.tab_list()[0]
|
||||
id = active_tab.id # noqa
|
||||
self.wsurl = active_tab.webSocketDebuggerUrl # noqa
|
||||
return self.post(self.endpoints["activate"].format(id=id))
|
||||
|
||||
def tab_list(self):
|
||||
retval = self.get(self.endpoints["list"])
|
||||
return [PageElement(o) for o in retval]
|
||||
|
||||
def tab_new(self, url):
|
||||
return self.post(self.endpoints["new"].format(url=url))
|
||||
|
||||
def tab_close_last_opened(self):
|
||||
sessions = self.tab_list()
|
||||
opentabs = [s for s in sessions if s["type"] == "page"]
|
||||
return self.post(self.endpoints["close"].format(id=opentabs[-1]["id"]))
|
||||
|
||||
async def send(self, method: str, params: dict):
|
||||
self._reqid += 1
|
||||
async with websockets.connect(self.wsurl) as ws:
|
||||
await ws.send(
|
||||
json.dumps({"method": method, "params": params, "id": self._reqid})
|
||||
)
|
||||
self._last_resp = await ws.recv()
|
||||
self._last_json = json.loads(self._last_resp)
|
||||
self.log.info(self._last_json)
|
||||
|
||||
def get(self, uri):
|
||||
resp = self._session.get(self.server_addr + uri)
|
||||
try:
|
||||
self._last_resp = resp
|
||||
self._last_json = resp.json()
|
||||
except Exception:
|
||||
return
|
||||
else:
|
||||
return self._last_json
|
||||
|
||||
def post(self, uri, data: dict = None):
|
||||
if not data:
|
||||
data = {}
|
||||
resp = self._session.post(self.server_addr + uri, json=data)
|
||||
try:
|
||||
self._last_resp = resp
|
||||
self._last_json = resp.json()
|
||||
except Exception:
|
||||
return self._last_resp
|
||||
|
||||
@property
|
||||
def last_json(self):
|
||||
return self._last_json
|
||||
193
src/undetected_chromedriver/devtool.py
Normal file
193
src/undetected_chromedriver/devtool.py
Normal file
@@ -0,0 +1,193 @@
|
||||
import asyncio
|
||||
from collections.abc import Mapping
|
||||
from collections.abc import Sequence
|
||||
from functools import wraps
|
||||
import os
|
||||
import logging
|
||||
import threading
|
||||
import time
|
||||
import traceback
|
||||
from typing import Any
|
||||
from typing import Awaitable
|
||||
from typing import Callable
|
||||
from typing import List
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class Structure(dict):
|
||||
"""
|
||||
This is a dict-like object structure, which you should subclass
|
||||
Only properties defined in the class context are used on initialization.
|
||||
|
||||
See example
|
||||
"""
|
||||
|
||||
_store = {}
|
||||
|
||||
def __init__(self, *a, **kw):
|
||||
"""
|
||||
Instantiate a new instance.
|
||||
|
||||
:param a:
|
||||
:param kw:
|
||||
"""
|
||||
|
||||
super().__init__()
|
||||
|
||||
# auxiliar dict
|
||||
d = dict(*a, **kw)
|
||||
for k, v in d.items():
|
||||
if isinstance(v, Mapping):
|
||||
self[k] = self.__class__(v)
|
||||
elif isinstance(v, Sequence) and not isinstance(v, (str, bytes)):
|
||||
self[k] = [self.__class__(i) for i in v]
|
||||
else:
|
||||
self[k] = v
|
||||
super().__setattr__("__dict__", self)
|
||||
|
||||
def __getattr__(self, item):
|
||||
return getattr(super(), item)
|
||||
|
||||
def __getitem__(self, item):
|
||||
return super().__getitem__(item)
|
||||
|
||||
def __setattr__(self, key, value):
|
||||
self.__setitem__(key, value)
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
super().__setitem__(key, value)
|
||||
|
||||
def update(self, *a, **kw):
|
||||
super().update(*a, **kw)
|
||||
|
||||
def __eq__(self, other):
|
||||
return frozenset(other.items()) == frozenset(self.items())
|
||||
|
||||
def __hash__(self):
|
||||
return hash(frozenset(self.items()))
|
||||
|
||||
@classmethod
|
||||
def __init_subclass__(cls, **kwargs):
|
||||
cls._store = {}
|
||||
|
||||
def _normalize_strings(self):
|
||||
for k, v in self.copy().items():
|
||||
if isinstance(v, (str)):
|
||||
self[k] = v.strip()
|
||||
|
||||
|
||||
def timeout(seconds=3, on_timeout: Optional[Callable[[callable], Any]] = None):
|
||||
def wrapper(func):
|
||||
@wraps(func)
|
||||
def wrapped(*args, **kwargs):
|
||||
def function_reached_timeout():
|
||||
if on_timeout:
|
||||
on_timeout(func)
|
||||
else:
|
||||
raise TimeoutError("function call timed out")
|
||||
|
||||
t = threading.Timer(interval=seconds, function=function_reached_timeout)
|
||||
t.start()
|
||||
try:
|
||||
return func(*args, **kwargs)
|
||||
except:
|
||||
t.cancel()
|
||||
raise
|
||||
finally:
|
||||
t.cancel()
|
||||
|
||||
return wrapped
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
def test():
|
||||
import sys, os
|
||||
|
||||
sys.path.insert(0, os.path.abspath(os.path.dirname(__file__)))
|
||||
import undetected_chromedriver as uc
|
||||
import threading
|
||||
|
||||
def collector(
|
||||
driver: uc.Chrome,
|
||||
stop_event: threading.Event,
|
||||
on_event_coro: Optional[Callable[[List[str]], Awaitable[Any]]] = None,
|
||||
listen_events: Sequence = ("browser", "network", "performance"),
|
||||
):
|
||||
def threaded(driver, stop_event, on_event_coro):
|
||||
async def _ensure_service_started():
|
||||
while (
|
||||
getattr(driver, "service", False)
|
||||
and getattr(driver.service, "process", False)
|
||||
and driver.service.process.poll()
|
||||
):
|
||||
print("waiting for driver service to come back on")
|
||||
await asyncio.sleep(0.05)
|
||||
# await asyncio.sleep(driver._delay or .25)
|
||||
|
||||
async def get_log_lines(typ):
|
||||
await _ensure_service_started()
|
||||
return driver.get_log(typ)
|
||||
|
||||
async def looper():
|
||||
while not stop_event.is_set():
|
||||
log_lines = []
|
||||
try:
|
||||
for _ in listen_events:
|
||||
try:
|
||||
log_lines += await get_log_lines(_)
|
||||
except:
|
||||
if logging.getLogger().getEffectiveLevel() <= 10:
|
||||
traceback.print_exc()
|
||||
continue
|
||||
if log_lines and on_event_coro:
|
||||
await on_event_coro(log_lines)
|
||||
except Exception as e:
|
||||
if logging.getLogger().getEffectiveLevel() <= 10:
|
||||
traceback.print_exc()
|
||||
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
loop.run_until_complete(looper())
|
||||
|
||||
t = threading.Thread(target=threaded, args=(driver, stop_event, on_event_coro))
|
||||
t.start()
|
||||
|
||||
async def on_event(data):
|
||||
print("on_event")
|
||||
print("data:", data)
|
||||
|
||||
def func_called(fn):
|
||||
def wrapped(*args, **kwargs):
|
||||
print(
|
||||
"func called! %s (args: %s, kwargs: %s)" % (fn.__name__, args, kwargs)
|
||||
)
|
||||
while driver.service.process and driver.service.process.poll() is not None:
|
||||
time.sleep(0.1)
|
||||
res = fn(*args, **kwargs)
|
||||
print("func completed! (result: %s)" % res)
|
||||
return res
|
||||
|
||||
return wrapped
|
||||
|
||||
logging.basicConfig(level=10)
|
||||
|
||||
options = uc.ChromeOptions()
|
||||
options.set_capability(
|
||||
"goog:loggingPrefs", {"performance": "ALL", "browser": "ALL", "network": "ALL"}
|
||||
)
|
||||
|
||||
driver = uc.Chrome(version_main=96, options=options)
|
||||
|
||||
# driver.command_executor._request = timeout(seconds=1)(driver.command_executor._request)
|
||||
driver.command_executor._request = func_called(driver.command_executor._request)
|
||||
collector_stop = threading.Event()
|
||||
collector(driver, collector_stop, on_event)
|
||||
|
||||
driver.get("https://nowsecure.nl")
|
||||
|
||||
time.sleep(10)
|
||||
|
||||
if os.name == "nt":
|
||||
driver.close()
|
||||
driver.quit()
|
||||
77
src/undetected_chromedriver/dprocess.py
Normal file
77
src/undetected_chromedriver/dprocess.py
Normal file
@@ -0,0 +1,77 @@
|
||||
import atexit
|
||||
import logging
|
||||
import multiprocessing
|
||||
import os
|
||||
import platform
|
||||
import signal
|
||||
from subprocess import PIPE
|
||||
from subprocess import Popen
|
||||
import sys
|
||||
|
||||
|
||||
CREATE_NEW_PROCESS_GROUP = 0x00000200
|
||||
DETACHED_PROCESS = 0x00000008
|
||||
|
||||
REGISTERED = []
|
||||
|
||||
|
||||
def start_detached(executable, *args):
|
||||
"""
|
||||
Starts a fully independent subprocess (with no parent)
|
||||
:param executable: executable
|
||||
:param args: arguments to the executable, eg: ['--param1_key=param1_val', '-vvv' ...]
|
||||
:return: pid of the grandchild process
|
||||
"""
|
||||
|
||||
# create pipe
|
||||
reader, writer = multiprocessing.Pipe(False)
|
||||
|
||||
# do not keep reference
|
||||
process = multiprocessing.Process(
|
||||
target=_start_detached,
|
||||
args=(executable, *args),
|
||||
kwargs={"writer": writer},
|
||||
daemon=True,
|
||||
)
|
||||
process.start()
|
||||
process.join()
|
||||
# receive pid from pipe
|
||||
pid = reader.recv()
|
||||
REGISTERED.append(pid)
|
||||
# close pipes
|
||||
writer.close()
|
||||
reader.close()
|
||||
process.close()
|
||||
|
||||
return pid
|
||||
|
||||
|
||||
def _start_detached(executable, *args, writer: multiprocessing.Pipe = None):
|
||||
# configure launch
|
||||
kwargs = {}
|
||||
if platform.system() == "Windows":
|
||||
kwargs.update(creationflags=DETACHED_PROCESS | CREATE_NEW_PROCESS_GROUP)
|
||||
elif sys.version_info < (3, 2):
|
||||
# assume posix
|
||||
kwargs.update(preexec_fn=os.setsid)
|
||||
else: # Python 3.2+ and Unix
|
||||
kwargs.update(start_new_session=True)
|
||||
|
||||
# run
|
||||
p = Popen([executable, *args], stdin=PIPE, stdout=PIPE, stderr=PIPE, **kwargs)
|
||||
|
||||
# send pid to pipe
|
||||
writer.send(p.pid)
|
||||
sys.exit()
|
||||
|
||||
|
||||
def _cleanup():
|
||||
for pid in REGISTERED:
|
||||
try:
|
||||
logging.getLogger(__name__).debug("cleaning up pid %d " % pid)
|
||||
os.kill(pid, signal.SIGTERM)
|
||||
except: # noqa
|
||||
pass
|
||||
|
||||
|
||||
atexit.register(_cleanup)
|
||||
85
src/undetected_chromedriver/options.py
Normal file
85
src/undetected_chromedriver/options.py
Normal file
@@ -0,0 +1,85 @@
|
||||
#!/usr/bin/env python3
|
||||
# this module is part of undetected_chromedriver
|
||||
|
||||
|
||||
import json
|
||||
import os
|
||||
|
||||
from selenium.webdriver.chromium.options import ChromiumOptions as _ChromiumOptions
|
||||
|
||||
|
||||
class ChromeOptions(_ChromiumOptions):
|
||||
_session = None
|
||||
_user_data_dir = None
|
||||
|
||||
@property
|
||||
def user_data_dir(self):
|
||||
return self._user_data_dir
|
||||
|
||||
@user_data_dir.setter
|
||||
def user_data_dir(self, path: str):
|
||||
"""
|
||||
Sets the browser profile folder to use, or creates a new profile
|
||||
at given <path>.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
path: str
|
||||
the path to a chrome profile folder
|
||||
if it does not exist, a new profile will be created at given location
|
||||
"""
|
||||
apath = os.path.abspath(path)
|
||||
self._user_data_dir = os.path.normpath(apath)
|
||||
|
||||
@staticmethod
|
||||
def _undot_key(key, value):
|
||||
"""turn a (dotted key, value) into a proper nested dict"""
|
||||
if "." in key:
|
||||
key, rest = key.split(".", 1)
|
||||
value = ChromeOptions._undot_key(rest, value)
|
||||
return {key: value}
|
||||
|
||||
@staticmethod
|
||||
def _merge_nested(a, b):
|
||||
"""
|
||||
merges b into a
|
||||
leaf values in a are overwritten with values from b
|
||||
"""
|
||||
for key in b:
|
||||
if key in a:
|
||||
if isinstance(a[key], dict) and isinstance(b[key], dict):
|
||||
ChromeOptions._merge_nested(a[key], b[key])
|
||||
continue
|
||||
a[key] = b[key]
|
||||
return a
|
||||
|
||||
def handle_prefs(self, user_data_dir):
|
||||
prefs = self.experimental_options.get("prefs")
|
||||
if prefs:
|
||||
user_data_dir = user_data_dir or self._user_data_dir
|
||||
default_path = os.path.join(user_data_dir, "Default")
|
||||
os.makedirs(default_path, exist_ok=True)
|
||||
|
||||
# undot prefs dict keys
|
||||
undot_prefs = {}
|
||||
for key, value in prefs.items():
|
||||
undot_prefs = self._merge_nested(
|
||||
undot_prefs, self._undot_key(key, value)
|
||||
)
|
||||
|
||||
prefs_file = os.path.join(default_path, "Preferences")
|
||||
if os.path.exists(prefs_file):
|
||||
with open(prefs_file, encoding="latin1", mode="r") as f:
|
||||
undot_prefs = self._merge_nested(json.load(f), undot_prefs)
|
||||
|
||||
with open(prefs_file, encoding="latin1", mode="w") as f:
|
||||
json.dump(undot_prefs, f)
|
||||
|
||||
# remove the experimental_options to avoid an error
|
||||
del self._experimental_options["prefs"]
|
||||
|
||||
@classmethod
|
||||
def from_options(cls, options):
|
||||
o = cls()
|
||||
o.__dict__.update(options.__dict__)
|
||||
return o
|
||||
473
src/undetected_chromedriver/patcher.py
Normal file
473
src/undetected_chromedriver/patcher.py
Normal file
@@ -0,0 +1,473 @@
|
||||
#!/usr/bin/env python3
|
||||
# this module is part of undetected_chromedriver
|
||||
|
||||
from packaging.version import Version as LooseVersion
|
||||
import io
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import pathlib
|
||||
import platform
|
||||
import random
|
||||
import re
|
||||
import shutil
|
||||
import string
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
from urllib.request import urlopen
|
||||
from urllib.request import urlretrieve
|
||||
import zipfile
|
||||
from multiprocessing import Lock
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
IS_POSIX = sys.platform.startswith(("darwin", "cygwin", "linux", "linux2", "freebsd"))
|
||||
|
||||
|
||||
class Patcher(object):
|
||||
lock = Lock()
|
||||
exe_name = "chromedriver%s"
|
||||
|
||||
platform = sys.platform
|
||||
if platform.endswith("win32"):
|
||||
d = "~/appdata/roaming/undetected_chromedriver"
|
||||
elif "LAMBDA_TASK_ROOT" in os.environ:
|
||||
d = "/tmp/undetected_chromedriver"
|
||||
elif platform.startswith(("linux", "linux2")):
|
||||
d = "~/.local/share/undetected_chromedriver"
|
||||
elif platform.endswith("darwin"):
|
||||
d = "~/Library/Application Support/undetected_chromedriver"
|
||||
else:
|
||||
d = "~/.undetected_chromedriver"
|
||||
data_path = os.path.abspath(os.path.expanduser(d))
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
executable_path=None,
|
||||
force=False,
|
||||
version_main: int = 0,
|
||||
user_multi_procs=False,
|
||||
):
|
||||
"""
|
||||
Args:
|
||||
executable_path: None = automatic
|
||||
a full file path to the chromedriver executable
|
||||
force: False
|
||||
terminate processes which are holding lock
|
||||
version_main: 0 = auto
|
||||
specify main chrome version (rounded, ex: 82)
|
||||
"""
|
||||
self.force = force
|
||||
self._custom_exe_path = False
|
||||
prefix = "undetected"
|
||||
self.user_multi_procs = user_multi_procs
|
||||
|
||||
try:
|
||||
# Try to convert version_main into an integer
|
||||
version_main_int = int(version_main)
|
||||
# check if version_main_int is less than or equal to e.g 114
|
||||
self.is_old_chromedriver = version_main and version_main_int <= 114
|
||||
except (ValueError,TypeError):
|
||||
# Check not running inside Docker
|
||||
if not os.path.exists("/app/chromedriver"):
|
||||
# If the conversion fails, log an error message
|
||||
logging.info("version_main cannot be converted to an integer")
|
||||
# Set self.is_old_chromedriver to False if the conversion fails
|
||||
self.is_old_chromedriver = False
|
||||
|
||||
# Needs to be called before self.exe_name is accessed
|
||||
self._set_platform_name()
|
||||
|
||||
if not os.path.exists(self.data_path):
|
||||
os.makedirs(self.data_path, exist_ok=True)
|
||||
|
||||
if not executable_path:
|
||||
if sys.platform.startswith("freebsd"):
|
||||
self.executable_path = os.path.join(
|
||||
self.data_path, self.exe_name
|
||||
)
|
||||
else:
|
||||
self.executable_path = os.path.join(
|
||||
self.data_path, "_".join([prefix, self.exe_name])
|
||||
)
|
||||
|
||||
if not IS_POSIX:
|
||||
if executable_path:
|
||||
if not executable_path[-4:] == ".exe":
|
||||
executable_path += ".exe"
|
||||
|
||||
self.zip_path = os.path.join(self.data_path, prefix)
|
||||
|
||||
if not executable_path:
|
||||
if not self.user_multi_procs:
|
||||
self.executable_path = os.path.abspath(
|
||||
os.path.join(".", self.executable_path)
|
||||
)
|
||||
|
||||
if executable_path:
|
||||
self._custom_exe_path = True
|
||||
self.executable_path = executable_path
|
||||
|
||||
# Set the correct repository to download the Chromedriver from
|
||||
if self.is_old_chromedriver:
|
||||
self.url_repo = "https://chromedriver.storage.googleapis.com"
|
||||
else:
|
||||
self.url_repo = "https://googlechromelabs.github.io/chrome-for-testing"
|
||||
|
||||
self.version_main = version_main
|
||||
self.version_full = None
|
||||
|
||||
def _set_platform_name(self):
|
||||
"""
|
||||
Set the platform and exe name based on the platform undetected_chromedriver is running on
|
||||
in order to download the correct chromedriver.
|
||||
"""
|
||||
if self.platform.endswith("win32"):
|
||||
self.platform_name = "win32"
|
||||
self.exe_name %= ".exe"
|
||||
if self.platform.endswith(("linux", "linux2")):
|
||||
self.platform_name = "linux64"
|
||||
self.exe_name %= ""
|
||||
if self.platform.endswith("darwin"):
|
||||
if self.is_old_chromedriver:
|
||||
self.platform_name = "mac64"
|
||||
else:
|
||||
self.platform_name = "mac-x64"
|
||||
self.exe_name %= ""
|
||||
if self.platform.startswith("freebsd"):
|
||||
self.platform_name = "freebsd"
|
||||
self.exe_name %= ""
|
||||
|
||||
def auto(self, executable_path=None, force=False, version_main=None, _=None):
|
||||
"""
|
||||
|
||||
Args:
|
||||
executable_path:
|
||||
force:
|
||||
version_main:
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
p = pathlib.Path(self.data_path)
|
||||
if self.user_multi_procs:
|
||||
with Lock():
|
||||
files = list(p.rglob("*chromedriver*"))
|
||||
most_recent = max(files, key=lambda f: f.stat().st_mtime)
|
||||
files.remove(most_recent)
|
||||
list(map(lambda f: f.unlink(), files))
|
||||
if self.is_binary_patched(most_recent):
|
||||
self.executable_path = str(most_recent)
|
||||
return True
|
||||
|
||||
if executable_path:
|
||||
self.executable_path = executable_path
|
||||
self._custom_exe_path = True
|
||||
|
||||
if self._custom_exe_path:
|
||||
ispatched = self.is_binary_patched(self.executable_path)
|
||||
if not ispatched:
|
||||
return self.patch_exe()
|
||||
else:
|
||||
return
|
||||
|
||||
if version_main:
|
||||
self.version_main = version_main
|
||||
if force is True:
|
||||
self.force = force
|
||||
|
||||
|
||||
if self.platform_name == "freebsd":
|
||||
chromedriver_path = shutil.which("chromedriver")
|
||||
|
||||
if not os.path.isfile(chromedriver_path) or not os.access(chromedriver_path, os.X_OK):
|
||||
logging.error("Chromedriver not installed!")
|
||||
return
|
||||
|
||||
version_path = os.path.join(os.path.dirname(self.executable_path), "version.txt")
|
||||
|
||||
process = os.popen(f'"{chromedriver_path}" --version')
|
||||
chromedriver_version = process.read().split(' ')[1].split(' ')[0]
|
||||
process.close()
|
||||
|
||||
current_version = None
|
||||
if os.path.isfile(version_path) or os.access(version_path, os.X_OK):
|
||||
with open(version_path, 'r') as f:
|
||||
current_version = f.read()
|
||||
|
||||
if current_version != chromedriver_version:
|
||||
logging.info("Copying chromedriver executable...")
|
||||
shutil.copy(chromedriver_path, self.executable_path)
|
||||
os.chmod(self.executable_path, 0o755)
|
||||
|
||||
with open(version_path, 'w') as f:
|
||||
f.write(chromedriver_version)
|
||||
|
||||
logging.info("Chromedriver executable copied!")
|
||||
else:
|
||||
try:
|
||||
os.unlink(self.executable_path)
|
||||
except PermissionError:
|
||||
if self.force:
|
||||
self.force_kill_instances(self.executable_path)
|
||||
return self.auto(force=not self.force)
|
||||
try:
|
||||
if self.is_binary_patched():
|
||||
# assumes already running AND patched
|
||||
return True
|
||||
except PermissionError:
|
||||
pass
|
||||
# return False
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
|
||||
release = self.fetch_release_number()
|
||||
self.version_main = release.major
|
||||
self.version_full = release
|
||||
self.unzip_package(self.fetch_package())
|
||||
|
||||
return self.patch()
|
||||
|
||||
def driver_binary_in_use(self, path: str = None) -> bool:
|
||||
"""
|
||||
naive test to check if a found chromedriver binary is
|
||||
currently in use
|
||||
|
||||
Args:
|
||||
path: a string or PathLike object to the binary to check.
|
||||
if not specified, we check use this object's executable_path
|
||||
"""
|
||||
if not path:
|
||||
path = self.executable_path
|
||||
p = pathlib.Path(path)
|
||||
|
||||
if not p.exists():
|
||||
raise OSError("file does not exist: %s" % p)
|
||||
try:
|
||||
with open(p, mode="a+b") as fs:
|
||||
exc = []
|
||||
try:
|
||||
|
||||
fs.seek(0, 0)
|
||||
except PermissionError as e:
|
||||
exc.append(e) # since some systems apprently allow seeking
|
||||
# we conduct another test
|
||||
try:
|
||||
fs.readline()
|
||||
except PermissionError as e:
|
||||
exc.append(e)
|
||||
|
||||
if exc:
|
||||
|
||||
return True
|
||||
return False
|
||||
# ok safe to assume this is in use
|
||||
except Exception as e:
|
||||
# logger.exception("whoops ", e)
|
||||
pass
|
||||
|
||||
def cleanup_unused_files(self):
|
||||
p = pathlib.Path(self.data_path)
|
||||
items = list(p.glob("*undetected*"))
|
||||
for item in items:
|
||||
try:
|
||||
item.unlink()
|
||||
except:
|
||||
pass
|
||||
|
||||
def patch(self):
|
||||
self.patch_exe()
|
||||
return self.is_binary_patched()
|
||||
|
||||
def fetch_release_number(self):
|
||||
"""
|
||||
Gets the latest major version available, or the latest major version of self.target_version if set explicitly.
|
||||
:return: version string
|
||||
:rtype: LooseVersion
|
||||
"""
|
||||
# Endpoint for old versions of Chromedriver (114 and below)
|
||||
if self.is_old_chromedriver:
|
||||
path = f"/latest_release_{self.version_main}"
|
||||
path = path.upper()
|
||||
logger.debug("getting release number from %s" % path)
|
||||
return LooseVersion(urlopen(self.url_repo + path).read().decode())
|
||||
|
||||
# Endpoint for new versions of Chromedriver (115+)
|
||||
if not self.version_main:
|
||||
# Fetch the latest version
|
||||
path = "/last-known-good-versions-with-downloads.json"
|
||||
logger.debug("getting release number from %s" % path)
|
||||
with urlopen(self.url_repo + path) as conn:
|
||||
response = conn.read().decode()
|
||||
|
||||
last_versions = json.loads(response)
|
||||
return LooseVersion(last_versions["channels"]["Stable"]["version"])
|
||||
|
||||
# Fetch the latest minor version of the major version provided
|
||||
path = "/latest-versions-per-milestone-with-downloads.json"
|
||||
logger.debug("getting release number from %s" % path)
|
||||
with urlopen(self.url_repo + path) as conn:
|
||||
response = conn.read().decode()
|
||||
|
||||
major_versions = json.loads(response)
|
||||
return LooseVersion(major_versions["milestones"][str(self.version_main)]["version"])
|
||||
|
||||
def parse_exe_version(self):
|
||||
with io.open(self.executable_path, "rb") as f:
|
||||
for line in iter(lambda: f.readline(), b""):
|
||||
match = re.search(rb"platform_handle\x00content\x00([0-9.]*)", line)
|
||||
if match:
|
||||
return LooseVersion(match[1].decode())
|
||||
|
||||
def fetch_package(self):
|
||||
"""
|
||||
Downloads ChromeDriver from source
|
||||
|
||||
:return: path to downloaded file
|
||||
"""
|
||||
zip_name = f"chromedriver_{self.platform_name}.zip"
|
||||
if self.is_old_chromedriver:
|
||||
download_url = "%s/%s/%s" % (self.url_repo, str(self.version_full), zip_name)
|
||||
else:
|
||||
zip_name = zip_name.replace("_", "-", 1)
|
||||
download_url = "https://storage.googleapis.com/chrome-for-testing-public/%s/%s/%s"
|
||||
download_url %= (str(self.version_full), self.platform_name, zip_name)
|
||||
|
||||
logger.debug("downloading from %s" % download_url)
|
||||
return urlretrieve(download_url)[0]
|
||||
|
||||
def unzip_package(self, fp):
|
||||
"""
|
||||
Does what it says
|
||||
|
||||
:return: path to unpacked executable
|
||||
"""
|
||||
exe_path = self.exe_name
|
||||
if not self.is_old_chromedriver:
|
||||
# The new chromedriver unzips into its own folder
|
||||
zip_name = f"chromedriver-{self.platform_name}"
|
||||
exe_path = os.path.join(zip_name, self.exe_name)
|
||||
|
||||
logger.debug("unzipping %s" % fp)
|
||||
try:
|
||||
os.unlink(self.zip_path)
|
||||
except (FileNotFoundError, OSError):
|
||||
pass
|
||||
|
||||
os.makedirs(self.zip_path, mode=0o755, exist_ok=True)
|
||||
with zipfile.ZipFile(fp, mode="r") as zf:
|
||||
zf.extractall(self.zip_path)
|
||||
os.rename(os.path.join(self.zip_path, exe_path), self.executable_path)
|
||||
os.remove(fp)
|
||||
shutil.rmtree
|
||||
os.chmod(self.executable_path, 0o755)
|
||||
return self.executable_path
|
||||
|
||||
@staticmethod
|
||||
def force_kill_instances(exe_name):
|
||||
"""
|
||||
kills running instances.
|
||||
:param: executable name to kill, may be a path as well
|
||||
|
||||
:return: True on success else False
|
||||
"""
|
||||
exe_name = os.path.basename(exe_name)
|
||||
if IS_POSIX:
|
||||
# Using shell=True for pidof, consider a more robust pid finding method if issues arise.
|
||||
# pgrep can be an alternative: ["pgrep", "-f", exe_name]
|
||||
# Or psutil if adding a dependency is acceptable.
|
||||
command = f"pidof {exe_name}"
|
||||
try:
|
||||
result = subprocess.run(command, shell=True, capture_output=True, text=True, check=True)
|
||||
pids = result.stdout.strip().split()
|
||||
if pids:
|
||||
subprocess.run(["kill", "-9"] + pids, check=False) # Changed from -f -9 to -9 as -f is not standard for kill
|
||||
return True
|
||||
return False # No PIDs found
|
||||
except subprocess.CalledProcessError: # pidof returns 1 if no process found
|
||||
return False # No process found
|
||||
except Exception as e:
|
||||
logger.debug(f"Error killing process on POSIX: {e}")
|
||||
return False
|
||||
else:
|
||||
try:
|
||||
# TASKKILL /F /IM chromedriver.exe
|
||||
result = subprocess.run(["taskkill", "/f", "/im", exe_name], check=False, capture_output=True)
|
||||
# taskkill returns 0 if process was killed, 128 if not found.
|
||||
return result.returncode == 0
|
||||
except Exception as e:
|
||||
logger.debug(f"Error killing process on Windows: {e}")
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def gen_random_cdc():
|
||||
cdc = random.choices(string.ascii_letters, k=27)
|
||||
return "".join(cdc).encode()
|
||||
|
||||
def is_binary_patched(self, executable_path=None):
|
||||
executable_path = executable_path or self.executable_path
|
||||
try:
|
||||
with io.open(executable_path, "rb") as fh:
|
||||
return fh.read().find(b"undetected chromedriver") != -1
|
||||
except FileNotFoundError:
|
||||
return False
|
||||
|
||||
def patch_exe(self):
|
||||
start = time.perf_counter()
|
||||
logger.info("patching driver executable %s" % self.executable_path)
|
||||
with io.open(self.executable_path, "r+b") as fh:
|
||||
content = fh.read()
|
||||
# match_injected_codeblock = re.search(rb"{window.*;}", content)
|
||||
match_injected_codeblock = re.search(rb"\{window\.cdc.*?;\}", content)
|
||||
if match_injected_codeblock:
|
||||
target_bytes = match_injected_codeblock[0]
|
||||
new_target_bytes = (
|
||||
b'{console.log("undetected chromedriver 1337!")}'.ljust(
|
||||
len(target_bytes), b" "
|
||||
)
|
||||
)
|
||||
new_content = content.replace(target_bytes, new_target_bytes)
|
||||
if new_content == content:
|
||||
logger.warning(
|
||||
"something went wrong patching the driver binary. could not find injection code block"
|
||||
)
|
||||
else:
|
||||
logger.debug(
|
||||
"found block:\n%s\nreplacing with:\n%s"
|
||||
% (target_bytes, new_target_bytes)
|
||||
)
|
||||
fh.seek(0)
|
||||
fh.write(new_content)
|
||||
logger.debug(
|
||||
"patching took us {:.2f} seconds".format(time.perf_counter() - start)
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return "{0:s}({1:s})".format(
|
||||
self.__class__.__name__,
|
||||
self.executable_path,
|
||||
)
|
||||
|
||||
def __del__(self):
|
||||
if self._custom_exe_path:
|
||||
# if the driver binary is specified by user
|
||||
# we assume it is important enough to not delete it
|
||||
return
|
||||
else:
|
||||
timeout = 3 # stop trying after this many seconds
|
||||
t = time.monotonic()
|
||||
now = lambda: time.monotonic()
|
||||
while now() - t > timeout:
|
||||
# we don't want to wait until the end of time
|
||||
try:
|
||||
if self.user_multi_procs:
|
||||
break
|
||||
os.unlink(self.executable_path)
|
||||
logger.debug("successfully unlinked %s" % self.executable_path)
|
||||
break
|
||||
except (OSError, RuntimeError, PermissionError):
|
||||
time.sleep(0.01)
|
||||
continue
|
||||
except FileNotFoundError:
|
||||
break
|
||||
99
src/undetected_chromedriver/reactor.py
Normal file
99
src/undetected_chromedriver/reactor.py
Normal file
@@ -0,0 +1,99 @@
|
||||
#!/usr/bin/env python3
|
||||
# this module is part of undetected_chromedriver
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import threading
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Reactor(threading.Thread):
|
||||
def __init__(self, driver: "Chrome"):
|
||||
super().__init__()
|
||||
|
||||
self.driver = driver
|
||||
self.loop = asyncio.new_event_loop()
|
||||
|
||||
self.lock = threading.Lock()
|
||||
self.event = threading.Event()
|
||||
self.daemon = True
|
||||
self.handlers = {}
|
||||
|
||||
def add_event_handler(self, method_name, callback: callable):
|
||||
"""
|
||||
|
||||
Parameters
|
||||
----------
|
||||
event_name: str
|
||||
example "Network.responseReceived"
|
||||
|
||||
callback: callable
|
||||
callable which accepts 1 parameter: the message object dictionary
|
||||
|
||||
Returns
|
||||
-------
|
||||
|
||||
"""
|
||||
with self.lock:
|
||||
self.handlers[method_name.lower()] = callback
|
||||
|
||||
@property
|
||||
def running(self):
|
||||
return not self.event.is_set()
|
||||
|
||||
def run(self):
|
||||
try:
|
||||
asyncio.set_event_loop(self.loop)
|
||||
self.loop.run_until_complete(self.listen())
|
||||
except Exception as e:
|
||||
logger.warning("Reactor.run() => %s", e)
|
||||
|
||||
async def _wait_service_started(self):
|
||||
while True:
|
||||
with self.lock:
|
||||
if (
|
||||
getattr(self.driver, "service", None)
|
||||
and getattr(self.driver.service, "process", None)
|
||||
and self.driver.service.process.poll()
|
||||
):
|
||||
await asyncio.sleep(self.driver._delay or 0.25)
|
||||
else:
|
||||
break
|
||||
|
||||
async def listen(self):
|
||||
while self.running:
|
||||
await self._wait_service_started()
|
||||
await asyncio.sleep(1)
|
||||
|
||||
try:
|
||||
with self.lock:
|
||||
log_entries = self.driver.get_log("performance")
|
||||
|
||||
for entry in log_entries:
|
||||
try:
|
||||
obj_serialized: str = entry.get("message")
|
||||
obj = json.loads(obj_serialized)
|
||||
message = obj.get("message")
|
||||
method = message.get("method")
|
||||
|
||||
if "*" in self.handlers:
|
||||
await self.loop.run_in_executor(
|
||||
None, self.handlers["*"], message
|
||||
)
|
||||
elif method.lower() in self.handlers:
|
||||
await self.loop.run_in_executor(
|
||||
None, self.handlers[method.lower()], message
|
||||
)
|
||||
|
||||
# print(type(message), message)
|
||||
except Exception as e:
|
||||
raise e from None
|
||||
|
||||
except Exception as e:
|
||||
if "invalid session id" in str(e):
|
||||
pass
|
||||
else:
|
||||
logging.debug("exception ignored :", e)
|
||||
86
src/undetected_chromedriver/webelement.py
Normal file
86
src/undetected_chromedriver/webelement.py
Normal file
@@ -0,0 +1,86 @@
|
||||
from typing import List
|
||||
|
||||
from selenium.webdriver.common.by import By
|
||||
import selenium.webdriver.remote.webelement
|
||||
|
||||
|
||||
class WebElement(selenium.webdriver.remote.webelement.WebElement):
|
||||
def click_safe(self):
|
||||
super().click()
|
||||
self._parent.reconnect(0.1)
|
||||
|
||||
def children(
|
||||
self, tag=None, recursive=False
|
||||
) -> List[selenium.webdriver.remote.webelement.WebElement]:
|
||||
"""
|
||||
returns direct child elements of current element
|
||||
:param tag: str, if supplied, returns <tag> nodes only
|
||||
"""
|
||||
script = "return [... arguments[0].children]"
|
||||
if tag:
|
||||
script += ".filter( node => node.tagName === '%s')" % tag.upper()
|
||||
if recursive:
|
||||
return list(_recursive_children(self, tag))
|
||||
return list(self._parent.execute_script(script, self))
|
||||
|
||||
|
||||
class UCWebElement(WebElement):
|
||||
"""
|
||||
Custom WebElement class which makes it easier to view elements when
|
||||
working in an interactive environment.
|
||||
|
||||
standard webelement repr:
|
||||
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
|
||||
|
||||
using this WebElement class:
|
||||
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, parent, id_):
|
||||
super().__init__(parent, id_)
|
||||
self._attrs = None
|
||||
|
||||
@property
|
||||
def attrs(self):
|
||||
if not self._attrs:
|
||||
self._attrs = self._parent.execute_script(
|
||||
"""
|
||||
var items = {};
|
||||
for (index = 0; index < arguments[0].attributes.length; ++index)
|
||||
{
|
||||
items[arguments[0].attributes[index].name] = arguments[0].attributes[index].value
|
||||
};
|
||||
return items;
|
||||
""",
|
||||
self,
|
||||
)
|
||||
return self._attrs
|
||||
|
||||
def __repr__(self):
|
||||
strattrs = " ".join([f'{k}="{v}"' for k, v in self.attrs.items()])
|
||||
if strattrs:
|
||||
strattrs = " " + strattrs
|
||||
return f"{self.__class__.__name__} <{self.tag_name}{strattrs}>"
|
||||
|
||||
|
||||
def _recursive_children(element, tag: str = None, _results=None):
|
||||
"""
|
||||
returns all children of <element> recursively
|
||||
|
||||
:param element: `WebElement` object.
|
||||
find children below this <element>
|
||||
|
||||
:param tag: str = None.
|
||||
if provided, return only <tag> elements. example: 'a', or 'img'
|
||||
:param _results: do not use!
|
||||
"""
|
||||
results = _results or set()
|
||||
for element in element.children():
|
||||
if tag:
|
||||
if element.tag_name == tag:
|
||||
results.add(element)
|
||||
else:
|
||||
results.add(element)
|
||||
results |= _recursive_children(element, tag, results)
|
||||
return results
|
||||
347
src/utils.py
Normal file
347
src/utils.py
Normal file
@@ -0,0 +1,347 @@
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
import re
|
||||
import shutil
|
||||
import sys
|
||||
import tempfile
|
||||
import urllib.parse
|
||||
|
||||
from selenium.webdriver.chrome.webdriver import WebDriver
|
||||
import undetected_chromedriver as uc
|
||||
|
||||
FLARESOLVERR_VERSION = None
|
||||
PLATFORM_VERSION = None
|
||||
CHROME_EXE_PATH = None
|
||||
CHROME_MAJOR_VERSION = None
|
||||
USER_AGENT = None
|
||||
XVFB_DISPLAY = None
|
||||
PATCHED_DRIVER_PATH = None
|
||||
|
||||
|
||||
def get_config_log_html() -> bool:
|
||||
return os.environ.get('LOG_HTML', 'false').lower() == 'true'
|
||||
|
||||
|
||||
def get_config_headless() -> bool:
|
||||
return os.environ.get('HEADLESS', 'true').lower() == 'true'
|
||||
|
||||
|
||||
def get_config_disable_media() -> bool:
|
||||
return os.environ.get('DISABLE_MEDIA', 'false').lower() == 'true'
|
||||
|
||||
|
||||
def get_flaresolverr_version() -> str:
|
||||
global FLARESOLVERR_VERSION
|
||||
if FLARESOLVERR_VERSION is not None:
|
||||
return FLARESOLVERR_VERSION
|
||||
|
||||
package_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, 'package.json')
|
||||
if not os.path.isfile(package_path):
|
||||
package_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'package.json')
|
||||
with open(package_path) as f:
|
||||
FLARESOLVERR_VERSION = json.loads(f.read())['version']
|
||||
return FLARESOLVERR_VERSION
|
||||
|
||||
def get_current_platform() -> str:
|
||||
global PLATFORM_VERSION
|
||||
if PLATFORM_VERSION is not None:
|
||||
return PLATFORM_VERSION
|
||||
PLATFORM_VERSION = os.name
|
||||
return PLATFORM_VERSION
|
||||
|
||||
|
||||
def create_proxy_extension(proxy: dict) -> str:
|
||||
parsed_url = urllib.parse.urlparse(proxy['url'])
|
||||
scheme = parsed_url.scheme
|
||||
host = parsed_url.hostname
|
||||
port = parsed_url.port
|
||||
username = proxy['username']
|
||||
password = proxy['password']
|
||||
manifest_json = """
|
||||
{
|
||||
"version": "1.0.0",
|
||||
"manifest_version": 3,
|
||||
"name": "Chrome Proxy",
|
||||
"permissions": [
|
||||
"proxy",
|
||||
"tabs",
|
||||
"storage",
|
||||
"webRequest",
|
||||
"webRequestAuthProvider"
|
||||
],
|
||||
"host_permissions": [
|
||||
"<all_urls>"
|
||||
],
|
||||
"background": {
|
||||
"service_worker": "background.js"
|
||||
},
|
||||
"minimum_chrome_version": "76.0.0"
|
||||
}
|
||||
"""
|
||||
|
||||
background_js = """
|
||||
var config = {
|
||||
mode: "fixed_servers",
|
||||
rules: {
|
||||
singleProxy: {
|
||||
scheme: "%s",
|
||||
host: "%s",
|
||||
port: %d
|
||||
},
|
||||
bypassList: ["localhost"]
|
||||
}
|
||||
};
|
||||
|
||||
chrome.proxy.settings.set({value: config, scope: "regular"}, function() {});
|
||||
|
||||
function callbackFn(details) {
|
||||
return {
|
||||
authCredentials: {
|
||||
username: "%s",
|
||||
password: "%s"
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
chrome.webRequest.onAuthRequired.addListener(
|
||||
callbackFn,
|
||||
{ urls: ["<all_urls>"] },
|
||||
['blocking']
|
||||
);
|
||||
""" % (
|
||||
scheme,
|
||||
host,
|
||||
port,
|
||||
username,
|
||||
password
|
||||
)
|
||||
|
||||
proxy_extension_dir = tempfile.mkdtemp()
|
||||
|
||||
with open(os.path.join(proxy_extension_dir, "manifest.json"), "w") as f:
|
||||
f.write(manifest_json)
|
||||
|
||||
with open(os.path.join(proxy_extension_dir, "background.js"), "w") as f:
|
||||
f.write(background_js)
|
||||
|
||||
return proxy_extension_dir
|
||||
|
||||
|
||||
def get_webdriver(proxy: dict = None) -> WebDriver:
|
||||
global PATCHED_DRIVER_PATH, USER_AGENT
|
||||
logging.debug('Launching web browser...')
|
||||
|
||||
# undetected_chromedriver
|
||||
options = uc.ChromeOptions()
|
||||
options.add_argument('--no-sandbox')
|
||||
options.add_argument('--window-size=1920,1080')
|
||||
options.add_argument('--disable-search-engine-choice-screen')
|
||||
# todo: this param shows a warning in chrome head-full
|
||||
options.add_argument('--disable-setuid-sandbox')
|
||||
options.add_argument('--disable-dev-shm-usage')
|
||||
# this option removes the zygote sandbox (it seems that the resolution is a bit faster)
|
||||
options.add_argument('--no-zygote')
|
||||
# attempt to fix Docker ARM32 build
|
||||
IS_ARMARCH = platform.machine().startswith(('arm', 'aarch'))
|
||||
if IS_ARMARCH:
|
||||
options.add_argument('--disable-gpu-sandbox')
|
||||
options.add_argument('--ignore-certificate-errors')
|
||||
options.add_argument('--ignore-ssl-errors')
|
||||
|
||||
language = os.environ.get('LANG', None)
|
||||
if language is not None:
|
||||
options.add_argument('--accept-lang=%s' % language)
|
||||
|
||||
# Fix for Chrome 117 | https://github.com/FlareSolverr/FlareSolverr/issues/910
|
||||
if USER_AGENT is not None:
|
||||
options.add_argument('--user-agent=%s' % USER_AGENT)
|
||||
|
||||
proxy_extension_dir = None
|
||||
if proxy and all(key in proxy for key in ['url', 'username', 'password']):
|
||||
proxy_extension_dir = create_proxy_extension(proxy)
|
||||
options.add_argument("--disable-features=DisableLoadExtensionCommandLineSwitch")
|
||||
options.add_argument("--load-extension=%s" % os.path.abspath(proxy_extension_dir))
|
||||
elif proxy and 'url' in proxy:
|
||||
proxy_url = proxy['url']
|
||||
logging.debug("Using webdriver proxy: %s", proxy_url)
|
||||
options.add_argument('--proxy-server=%s' % proxy_url)
|
||||
|
||||
# note: headless mode is detected (headless = True)
|
||||
# we launch the browser in head-full mode with the window hidden
|
||||
windows_headless = False
|
||||
if get_config_headless():
|
||||
if os.name == 'nt':
|
||||
windows_headless = True
|
||||
else:
|
||||
start_xvfb_display()
|
||||
# For normal headless mode:
|
||||
# options.add_argument('--headless')
|
||||
|
||||
# if we are inside the Docker container, we avoid downloading the driver
|
||||
driver_exe_path = None
|
||||
version_main = None
|
||||
if os.path.exists("/app/chromedriver"):
|
||||
# running inside Docker
|
||||
driver_exe_path = "/app/chromedriver"
|
||||
else:
|
||||
version_main = get_chrome_major_version()
|
||||
if PATCHED_DRIVER_PATH is not None:
|
||||
driver_exe_path = PATCHED_DRIVER_PATH
|
||||
|
||||
# detect chrome path
|
||||
browser_executable_path = get_chrome_exe_path()
|
||||
|
||||
# downloads and patches the chromedriver
|
||||
# if we don't set driver_executable_path it downloads, patches, and deletes the driver each time
|
||||
try:
|
||||
driver = uc.Chrome(options=options, browser_executable_path=browser_executable_path,
|
||||
driver_executable_path=driver_exe_path, version_main=version_main,
|
||||
windows_headless=windows_headless, headless=get_config_headless())
|
||||
except Exception as e:
|
||||
logging.error("Error starting Chrome: %s" % e)
|
||||
# No point in continuing if we cannot retrieve the driver
|
||||
raise e
|
||||
|
||||
# save the patched driver to avoid re-downloads
|
||||
if driver_exe_path is None:
|
||||
PATCHED_DRIVER_PATH = os.path.join(driver.patcher.data_path, driver.patcher.exe_name)
|
||||
if PATCHED_DRIVER_PATH != driver.patcher.executable_path:
|
||||
shutil.copy(driver.patcher.executable_path, PATCHED_DRIVER_PATH)
|
||||
|
||||
# clean up proxy extension directory
|
||||
if proxy_extension_dir is not None:
|
||||
shutil.rmtree(proxy_extension_dir)
|
||||
|
||||
# selenium vanilla
|
||||
# options = webdriver.ChromeOptions()
|
||||
# options.add_argument('--no-sandbox')
|
||||
# options.add_argument('--window-size=1920,1080')
|
||||
# options.add_argument('--disable-setuid-sandbox')
|
||||
# options.add_argument('--disable-dev-shm-usage')
|
||||
# driver = webdriver.Chrome(options=options)
|
||||
|
||||
return driver
|
||||
|
||||
|
||||
def get_chrome_exe_path() -> str:
|
||||
global CHROME_EXE_PATH
|
||||
if CHROME_EXE_PATH is not None:
|
||||
return CHROME_EXE_PATH
|
||||
# linux pyinstaller bundle
|
||||
chrome_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'chrome', "chrome")
|
||||
if os.path.exists(chrome_path):
|
||||
if not os.access(chrome_path, os.X_OK):
|
||||
raise Exception(f'Chrome binary "{chrome_path}" is not executable. '
|
||||
f'Please, extract the archive with "tar xzf <file.tar.gz>".')
|
||||
CHROME_EXE_PATH = chrome_path
|
||||
return CHROME_EXE_PATH
|
||||
# windows pyinstaller bundle
|
||||
chrome_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'chrome', "chrome.exe")
|
||||
if os.path.exists(chrome_path):
|
||||
CHROME_EXE_PATH = chrome_path
|
||||
return CHROME_EXE_PATH
|
||||
# system
|
||||
CHROME_EXE_PATH = uc.find_chrome_executable()
|
||||
return CHROME_EXE_PATH
|
||||
|
||||
|
||||
def get_chrome_major_version() -> str:
|
||||
global CHROME_MAJOR_VERSION
|
||||
if CHROME_MAJOR_VERSION is not None:
|
||||
return CHROME_MAJOR_VERSION
|
||||
|
||||
if os.name == 'nt':
|
||||
# Example: '104.0.5112.79'
|
||||
try:
|
||||
complete_version = extract_version_nt_executable(get_chrome_exe_path())
|
||||
except Exception:
|
||||
try:
|
||||
complete_version = extract_version_nt_registry()
|
||||
except Exception:
|
||||
# Example: '104.0.5112.79'
|
||||
complete_version = extract_version_nt_folder()
|
||||
else:
|
||||
chrome_path = get_chrome_exe_path()
|
||||
process = os.popen(f'"{chrome_path}" --version')
|
||||
# Example 1: 'Chromium 104.0.5112.79 Arch Linux\n'
|
||||
# Example 2: 'Google Chrome 104.0.5112.79 Arch Linux\n'
|
||||
complete_version = process.read()
|
||||
process.close()
|
||||
|
||||
CHROME_MAJOR_VERSION = complete_version.split('.')[0].split(' ')[-1]
|
||||
return CHROME_MAJOR_VERSION
|
||||
|
||||
|
||||
def extract_version_nt_executable(exe_path: str) -> str:
|
||||
import pefile
|
||||
pe = pefile.PE(exe_path, fast_load=True)
|
||||
pe.parse_data_directories(
|
||||
directories=[pefile.DIRECTORY_ENTRY["IMAGE_DIRECTORY_ENTRY_RESOURCE"]]
|
||||
)
|
||||
return pe.FileInfo[0][0].StringTable[0].entries[b"FileVersion"].decode('utf-8')
|
||||
|
||||
|
||||
def extract_version_nt_registry() -> str:
|
||||
stream = os.popen(
|
||||
'reg query "HKLM\\SOFTWARE\\Wow6432Node\\Microsoft\\Windows\\CurrentVersion\\Uninstall\\Google Chrome"')
|
||||
output = stream.read()
|
||||
google_version = ''
|
||||
for letter in output[output.rindex('DisplayVersion REG_SZ') + 24:]:
|
||||
if letter != '\n':
|
||||
google_version += letter
|
||||
else:
|
||||
break
|
||||
return google_version.strip()
|
||||
|
||||
|
||||
def extract_version_nt_folder() -> str:
|
||||
# Check if the Chrome folder exists in the x32 or x64 Program Files folders.
|
||||
for i in range(2):
|
||||
path = 'C:\\Program Files' + (' (x86)' if i else '') + '\\Google\\Chrome\\Application'
|
||||
if os.path.isdir(path):
|
||||
paths = [f.path for f in os.scandir(path) if f.is_dir()]
|
||||
for path in paths:
|
||||
filename = os.path.basename(path)
|
||||
pattern = r'\d+\.\d+\.\d+\.\d+'
|
||||
match = re.search(pattern, filename)
|
||||
if match and match.group():
|
||||
# Found a Chrome version.
|
||||
return match.group(0)
|
||||
return ''
|
||||
|
||||
|
||||
def get_user_agent(driver=None) -> str:
|
||||
global USER_AGENT
|
||||
if USER_AGENT is not None:
|
||||
return USER_AGENT
|
||||
|
||||
try:
|
||||
if driver is None:
|
||||
driver = get_webdriver()
|
||||
USER_AGENT = driver.execute_script("return navigator.userAgent")
|
||||
# Fix for Chrome 117 | https://github.com/FlareSolverr/FlareSolverr/issues/910
|
||||
USER_AGENT = re.sub('HEADLESS', '', USER_AGENT, flags=re.IGNORECASE)
|
||||
return USER_AGENT
|
||||
except Exception as e:
|
||||
raise Exception("Error getting browser User-Agent. " + str(e))
|
||||
finally:
|
||||
if driver is not None:
|
||||
if PLATFORM_VERSION == "nt":
|
||||
driver.close()
|
||||
driver.quit()
|
||||
|
||||
|
||||
def start_xvfb_display():
|
||||
global XVFB_DISPLAY
|
||||
if XVFB_DISPLAY is None:
|
||||
from xvfbwrapper import Xvfb
|
||||
XVFB_DISPLAY = Xvfb()
|
||||
XVFB_DISPLAY.start()
|
||||
|
||||
|
||||
def object_to_dict(_object):
|
||||
json_dict = json.loads(json.dumps(_object, default=lambda o: o.__dict__))
|
||||
# remove hidden fields
|
||||
return {k: v for k, v in json_dict.items() if not k.startswith('__')}
|
||||
1
test-requirements.txt
Normal file
1
test-requirements.txt
Normal file
@@ -0,0 +1 @@
|
||||
WebTest==3.0.7
|
||||
@@ -1,21 +0,0 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"moduleResolution": "node",
|
||||
"target": "es2017",
|
||||
"noImplicitAny": true,
|
||||
"removeComments": true,
|
||||
"preserveConstEnums": true,
|
||||
"allowSyntheticDefaultImports": true,
|
||||
"resolveJsonModule": true,
|
||||
"lib": [
|
||||
"es2015", "dom"
|
||||
],
|
||||
"module": "commonjs",
|
||||
"outDir": "dist",
|
||||
"sourceMap": true
|
||||
},
|
||||
"include": [
|
||||
"src", "node_modules/@types/puppeteer/index.d.ts"
|
||||
],
|
||||
"exclude": ["node_modules"]
|
||||
}
|
||||
Reference in New Issue
Block a user