mirror of
https://github.com/FlareSolverr/FlareSolverr.git
synced 2025-12-06 09:38:49 +01:00
Compare commits
21 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
36226b34c1 | ||
|
|
606d84f7c0 | ||
|
|
62eb363575 | ||
|
|
345d27dd5a | ||
|
|
3b9fd0aa6a | ||
|
|
93041779fb | ||
|
|
3dbb4e65d6 | ||
|
|
23dd8f8725 | ||
|
|
9ab7ab1371 | ||
|
|
cf7e4f8749 | ||
|
|
e8328adb90 | ||
|
|
843f588859 | ||
|
|
f8462c86f2 | ||
|
|
4bc083896b | ||
|
|
c9f2d6e954 | ||
|
|
177578d5d8 | ||
|
|
efcab83f6e | ||
|
|
51b7bc3b92 | ||
|
|
e5be265026 | ||
|
|
aed54e0bb3 | ||
|
|
5046f60914 |
32
.github/ISSUE_TEMPLATE.md
vendored
Normal file
32
.github/ISSUE_TEMPLATE.md
vendored
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
**Please use the search bar** at the top of the page and make sure you are not creating an already submitted issue.
|
||||||
|
Check closed issues as well, because your issue may have already been fixed.
|
||||||
|
|
||||||
|
### How to enable debug and html traces
|
||||||
|
|
||||||
|
[Follow the instructions from this wiki page](https://github.com/FlareSolverr/FlareSolverr/wiki/How-to-enable-debug-and-html-trace)
|
||||||
|
|
||||||
|
### Environment
|
||||||
|
|
||||||
|
* **FlareSolverr version**:
|
||||||
|
* **Last working FlareSolverr version**:
|
||||||
|
* **Operating system**:
|
||||||
|
* **Are you using Docker**: [yes/no]
|
||||||
|
* **FlareSolverr User-Agent (see log traces or / endpoint)**:
|
||||||
|
* **Are you using a proxy or VPN?** [yes/no]
|
||||||
|
* **Are you using Captcha Solver:** [yes/no]
|
||||||
|
* **If using captcha solver, which one:**
|
||||||
|
* **URL to test this issue:**
|
||||||
|
|
||||||
|
### Description
|
||||||
|
|
||||||
|
[List steps to reproduce the error and details on what happens and what you expected to happen]
|
||||||
|
|
||||||
|
### Logged Error Messages
|
||||||
|
|
||||||
|
[Place any relevant error messages you noticed from the logs here.]
|
||||||
|
|
||||||
|
[Make sure you attach the full logs with your personal information removed in case we need more information]
|
||||||
|
|
||||||
|
### Screenshots
|
||||||
|
|
||||||
|
[Place any screenshots of the issue here if needed]
|
||||||
64
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
64
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
@@ -1,64 +0,0 @@
|
|||||||
name: Bug report
|
|
||||||
description: Create a report of your issue
|
|
||||||
body:
|
|
||||||
- type: checkboxes
|
|
||||||
attributes:
|
|
||||||
label: Have you checked our README?
|
|
||||||
description: Please check the <a href="https://github.com/FlareSolverr/FlareSolverr/blob/master/README.md">README</a>.
|
|
||||||
options:
|
|
||||||
- label: I have checked the README
|
|
||||||
required: true
|
|
||||||
- type: checkboxes
|
|
||||||
attributes:
|
|
||||||
label: Is there already an issue for your problem?
|
|
||||||
description: Please make sure you are not creating an already submitted <a href="https://github.com/FlareSolverr/FlareSolverr/issues">Issue</a>. Check closed issues as well, because your issue may have already been fixed.
|
|
||||||
options:
|
|
||||||
- label: I have checked older issues, open and closed
|
|
||||||
required: true
|
|
||||||
- type: checkboxes
|
|
||||||
attributes:
|
|
||||||
label: Have you checked the discussions?
|
|
||||||
description: Please read our <a href="https://github.com/FlareSolverr/FlareSolverr/discussions">Discussions</a> before submitting your issue, some wider problems may be dealt with there.
|
|
||||||
options:
|
|
||||||
- label: I have read the Discussions
|
|
||||||
required: true
|
|
||||||
- type: textarea
|
|
||||||
attributes:
|
|
||||||
label: Environment
|
|
||||||
description: Please provide the details of the system FlareSolverr is running on.
|
|
||||||
value: |
|
|
||||||
- FlareSolverr version:
|
|
||||||
- Last working FlareSolverr version:
|
|
||||||
- Operating system:
|
|
||||||
- Are you using Docker: [yes/no]
|
|
||||||
- FlareSolverr User-Agent (see log traces or / endpoint):
|
|
||||||
- Are you using a VPN: [yes/no]
|
|
||||||
- Are you using a Proxy: [yes/no]
|
|
||||||
- Are you using Captcha Solver: [yes/no]
|
|
||||||
- If using captcha solver, which one:
|
|
||||||
- URL to test this issue:
|
|
||||||
render: markdown
|
|
||||||
validations:
|
|
||||||
required: true
|
|
||||||
- type: textarea
|
|
||||||
attributes:
|
|
||||||
label: Description
|
|
||||||
description: List steps to reproduce the error and details on what happens and what you expected to happen.
|
|
||||||
validations:
|
|
||||||
required: true
|
|
||||||
- type: textarea
|
|
||||||
attributes:
|
|
||||||
label: Logged Error Messages
|
|
||||||
description: |
|
|
||||||
Place any relevant error messages you noticed from the logs here.
|
|
||||||
Make sure you attach the full logs with your personal information removed in case we need more information.
|
|
||||||
If you wish to provide debug logs, follow the instructions from this <a href="https://github.com/FlareSolverr/FlareSolverr/wiki/How-to-enable-debug-and-html-trace">wiki page</a>.
|
|
||||||
render: text
|
|
||||||
validations:
|
|
||||||
required: true
|
|
||||||
- type: textarea
|
|
||||||
attributes:
|
|
||||||
label: Screenshots
|
|
||||||
description: Place any screenshots of the issue here if needed
|
|
||||||
validations:
|
|
||||||
required: false
|
|
||||||
8
.github/ISSUE_TEMPLATE/config.yml
vendored
8
.github/ISSUE_TEMPLATE/config.yml
vendored
@@ -1,8 +0,0 @@
|
|||||||
blank_issues_enabled: false
|
|
||||||
contact_links:
|
|
||||||
- name: Requesting new features or changes
|
|
||||||
url: https://github.com/FlareSolverr/FlareSolverr/discussions
|
|
||||||
about: Please create a new discussion topic, grouped under "Ideas".
|
|
||||||
- name: Asking questions
|
|
||||||
url: https://github.com/FlareSolverr/FlareSolverr/discussions
|
|
||||||
about: Please create a new discussion topic, grouped under "Q&A".
|
|
||||||
41
.github/workflows/autotag.yml
vendored
41
.github/workflows/autotag.yml
vendored
@@ -1,20 +1,21 @@
|
|||||||
name: autotag
|
# todo: enable in the first release
|
||||||
|
#name: autotag
|
||||||
on:
|
#
|
||||||
push:
|
#on:
|
||||||
branches:
|
# push:
|
||||||
- "master"
|
# branches:
|
||||||
|
# - "master"
|
||||||
jobs:
|
#
|
||||||
build:
|
#jobs:
|
||||||
runs-on: ubuntu-latest
|
# build:
|
||||||
steps:
|
# runs-on: ubuntu-latest
|
||||||
-
|
# steps:
|
||||||
name: Checkout
|
# -
|
||||||
uses: actions/checkout@v3
|
# name: Checkout
|
||||||
-
|
# uses: actions/checkout@v2
|
||||||
name: Auto Tag
|
# -
|
||||||
uses: Klemensas/action-autotag@stable
|
# name: Auto Tag
|
||||||
with:
|
# uses: Klemensas/action-autotag@stable
|
||||||
GITHUB_TOKEN: "${{ secrets.GH_PAT }}"
|
# with:
|
||||||
tag_prefix: "v"
|
# GITHUB_TOKEN: "${{ secrets.GH_PAT }}"
|
||||||
|
# tag_prefix: "v"
|
||||||
|
|||||||
16
.github/workflows/release-docker.yml
vendored
16
.github/workflows/release-docker.yml
vendored
@@ -11,43 +11,43 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
-
|
-
|
||||||
name: Checkout
|
name: Checkout
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v2
|
||||||
-
|
-
|
||||||
name: Downcase repo
|
name: Downcase repo
|
||||||
run: echo REPOSITORY=$(echo ${{ github.repository }} | tr '[:upper:]' '[:lower:]') >> $GITHUB_ENV
|
run: echo REPOSITORY=$(echo ${{ github.repository }} | tr '[:upper:]' '[:lower:]') >> $GITHUB_ENV
|
||||||
-
|
-
|
||||||
name: Docker meta
|
name: Docker meta
|
||||||
id: docker_meta
|
id: docker_meta
|
||||||
uses: crazy-max/ghaction-docker-meta@v3
|
uses: crazy-max/ghaction-docker-meta@v1
|
||||||
with:
|
with:
|
||||||
images: ${{ env.REPOSITORY }},ghcr.io/${{ env.REPOSITORY }}
|
images: ${{ env.REPOSITORY }},ghcr.io/${{ env.REPOSITORY }}
|
||||||
tag-sha: false
|
tag-sha: false
|
||||||
-
|
-
|
||||||
name: Set up QEMU
|
name: Set up QEMU
|
||||||
uses: docker/setup-qemu-action@v2
|
uses: docker/setup-qemu-action@v1.0.1
|
||||||
-
|
-
|
||||||
name: Set up Docker Buildx
|
name: Set up Docker Buildx
|
||||||
uses: docker/setup-buildx-action@v2
|
uses: docker/setup-buildx-action@v1
|
||||||
-
|
-
|
||||||
name: Login to DockerHub
|
name: Login to DockerHub
|
||||||
uses: docker/login-action@v2
|
uses: docker/login-action@v1
|
||||||
with:
|
with:
|
||||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||||
-
|
-
|
||||||
name: Login to GitHub Container Registry
|
name: Login to GitHub Container Registry
|
||||||
uses: docker/login-action@v2
|
uses: docker/login-action@v1
|
||||||
with:
|
with:
|
||||||
registry: ghcr.io
|
registry: ghcr.io
|
||||||
username: ${{ github.repository_owner }}
|
username: ${{ github.repository_owner }}
|
||||||
password: ${{ secrets.GH_PAT }}
|
password: ${{ secrets.GH_PAT }}
|
||||||
-
|
-
|
||||||
name: Build and push
|
name: Build and push
|
||||||
uses: docker/build-push-action@v3
|
uses: docker/build-push-action@v2
|
||||||
with:
|
with:
|
||||||
context: .
|
context: .
|
||||||
file: ./Dockerfile
|
file: ./Dockerfile
|
||||||
platforms: linux/386,linux/amd64,linux/arm/v7,linux/arm64/v8
|
platforms: linux/amd64,linux/arm/v7,linux/arm64
|
||||||
push: ${{ github.event_name != 'pull_request' }}
|
push: ${{ github.event_name != 'pull_request' }}
|
||||||
tags: ${{ steps.docker_meta.outputs.tags }}
|
tags: ${{ steps.docker_meta.outputs.tags }}
|
||||||
labels: ${{ steps.docker_meta.outputs.labels }}
|
labels: ${{ steps.docker_meta.outputs.labels }}
|
||||||
|
|||||||
4
.github/workflows/release.yml
vendored
4
.github/workflows/release.yml
vendored
@@ -11,12 +11,12 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v2
|
||||||
with:
|
with:
|
||||||
fetch-depth: 0 # get all commits, branches and tags (required for the changelog)
|
fetch-depth: 0 # get all commits, branches and tags (required for the changelog)
|
||||||
|
|
||||||
- name: Setup Node
|
- name: Setup Node
|
||||||
uses: actions/setup-node@v3
|
uses: actions/setup-node@v2
|
||||||
with:
|
with:
|
||||||
node-version: '16'
|
node-version: '16'
|
||||||
|
|
||||||
|
|||||||
266
CHANGELOG.md
266
CHANGELOG.md
@@ -1,266 +0,0 @@
|
|||||||
# Changelog
|
|
||||||
|
|
||||||
## v3.0.4 (2023/03/07
|
|
||||||
|
|
||||||
* Click on the Cloudflare's 'Verify you are human' button if necessary
|
|
||||||
|
|
||||||
## v3.0.3 (2023/03/06)
|
|
||||||
|
|
||||||
* Update undetected_chromedriver version to 3.4.6
|
|
||||||
|
|
||||||
## v3.0.2 (2023/01/08)
|
|
||||||
|
|
||||||
* Detect Cloudflare blocked access
|
|
||||||
* Check Chrome / Chromium web browser is installed correctly
|
|
||||||
|
|
||||||
## v3.0.1 (2023/01/06)
|
|
||||||
|
|
||||||
* Kill Chromium processes properly to avoid defunct/zombie processes
|
|
||||||
* Update undetected-chromedriver
|
|
||||||
* Disable Zygote sandbox in Chromium browser
|
|
||||||
* Add more selectors to detect blocked access
|
|
||||||
* Include procps (ps), curl and vim packages in the Docker image
|
|
||||||
|
|
||||||
## v3.0.0 (2023/01/04)
|
|
||||||
|
|
||||||
* This is the first release of FlareSolverr v3. There are some breaking changes
|
|
||||||
* Docker images for linux/386, linux/amd64, linux/arm/v7 and linux/arm64/v8
|
|
||||||
* Replaced Firefox with Chrome
|
|
||||||
* Replaced NodeJS / Typescript with Python
|
|
||||||
* Replaced Puppeter with Selenium
|
|
||||||
* No binaries for Linux / Windows. You have to use the Docker image or install from Source code
|
|
||||||
* No proxy support
|
|
||||||
* No session support
|
|
||||||
|
|
||||||
## v2.2.10 (2022/10/22)
|
|
||||||
|
|
||||||
* Detect DDoS-Guard through title content
|
|
||||||
|
|
||||||
## v2.2.9 (2022/09/25)
|
|
||||||
|
|
||||||
* Detect Cloudflare Access Denied
|
|
||||||
* Commit the complete changelog
|
|
||||||
|
|
||||||
## v2.2.8 (2022/09/17)
|
|
||||||
|
|
||||||
* Remove 30 s delay and clean legacy code
|
|
||||||
|
|
||||||
## v2.2.7 (2022/09/12)
|
|
||||||
|
|
||||||
* Temporary fix: add 30s delay
|
|
||||||
* Update README.md
|
|
||||||
|
|
||||||
## v2.2.6 (2022/07/31)
|
|
||||||
|
|
||||||
* Fix Cloudflare detection in POST requests
|
|
||||||
|
|
||||||
## v2.2.5 (2022/07/30)
|
|
||||||
|
|
||||||
* Update GitHub actions to build executables with NodeJs 16
|
|
||||||
* Update Cloudflare selectors and add HTML samples
|
|
||||||
* Install Firefox 94 instead of the latest Nightly
|
|
||||||
* Update dependencies
|
|
||||||
* Upgrade Puppeteer (#396)
|
|
||||||
|
|
||||||
## v2.2.4 (2022/04/17)
|
|
||||||
|
|
||||||
* Detect DDoS-Guard challenge
|
|
||||||
|
|
||||||
## v2.2.3 (2022/04/16)
|
|
||||||
|
|
||||||
* Fix 2000 ms navigation timeout
|
|
||||||
* Update README.md (libseccomp2 package in Debian)
|
|
||||||
* Update README.md (clarify proxy parameter) (#307)
|
|
||||||
* Update NPM dependencies
|
|
||||||
* Disable Cloudflare ban detection
|
|
||||||
|
|
||||||
## v2.2.2 (2022/03/19)
|
|
||||||
|
|
||||||
* Fix ban detection. Resolves #330 (#336)
|
|
||||||
|
|
||||||
## v2.2.1 (2022/02/06)
|
|
||||||
|
|
||||||
* Fix max timeout error in some pages
|
|
||||||
* Avoid crashing in NodeJS 17 due to Unhandled promise rejection
|
|
||||||
* Improve proxy validation and debug traces
|
|
||||||
* Remove @types/puppeteer dependency
|
|
||||||
|
|
||||||
## v2.2.0 (2022/01/31)
|
|
||||||
|
|
||||||
* Increase default BROWSER_TIMEOUT=40000 (40 seconds)
|
|
||||||
* Fix Puppeter deprecation warnings
|
|
||||||
* Update base Docker image Alpine 3.15 / NodeJS 16
|
|
||||||
* Build precompiled binaries with NodeJS 16
|
|
||||||
* Update Puppeter and other dependencies
|
|
||||||
* Add support for Custom CloudFlare challenge
|
|
||||||
* Add support for DDoS-GUARD challenge
|
|
||||||
|
|
||||||
## v2.1.0 (2021/12/12)
|
|
||||||
|
|
||||||
* Add aarch64 to user agents to be replaced (#248)
|
|
||||||
* Fix SOCKSv4 and SOCKSv5 proxy. resolves #214 #220
|
|
||||||
* Remove redundant JSON key (postData) (#242)
|
|
||||||
* Make test URL configurable with TEST_URL env var. resolves #240
|
|
||||||
* Bypass new Cloudflare protection
|
|
||||||
* Update donation links
|
|
||||||
|
|
||||||
## v2.0.2 (2021/10/31)
|
|
||||||
|
|
||||||
* Fix SOCKS5 proxy. Resolves #214
|
|
||||||
* Replace Firefox ERS with a newer version
|
|
||||||
* Catch startup exceptions and give some advices
|
|
||||||
* Add env var BROWSER_TIMEOUT for slow systems
|
|
||||||
* Fix NPM warning in Docker images
|
|
||||||
|
|
||||||
## v2.0.1 (2021/10/24)
|
|
||||||
|
|
||||||
* Check user home dir before testing web browser installation
|
|
||||||
|
|
||||||
## v2.0.0 (2021/10/20)
|
|
||||||
|
|
||||||
FlareSolverr 2.0.0 is out with some important changes:
|
|
||||||
|
|
||||||
* It is capable of solving the automatic challenges of Cloudflare. CAPTCHAs (hCaptcha) cannot be resolved and the old solvers have been removed.
|
|
||||||
* The Chrome browser has been replaced by Firefox. This has caused some functionality to be removed. Parameters: `userAgent`, `headers`, `rawHtml` and `downloadare` no longer available.
|
|
||||||
* Included `proxy` support without user/password credentials. If you are writing your own integration with FlareSolverr, make sure your client uses the same User-Agent header and Proxy that FlareSolverr uses. Those values together with the Cookie are checked and detected by Cloudflare.
|
|
||||||
* FlareSolverr has been rewritten from scratch. From now on it should be easier to maintain and test.
|
|
||||||
* If you are using Jackett make sure you have version v0.18.1041 or higher. FlareSolverSharp v2.0.0 is out too.
|
|
||||||
|
|
||||||
Complete changelog:
|
|
||||||
|
|
||||||
* Bump version 2.0.0
|
|
||||||
* Set puppeteer timeout half of maxTimeout param. Resolves #180
|
|
||||||
* Add test for blocked IP
|
|
||||||
* Avoid reloading the page in case of error
|
|
||||||
* Improve Cloudflare detection
|
|
||||||
* Fix version
|
|
||||||
* Fix browser preferences and proxy
|
|
||||||
* Fix request.post method and clean error traces
|
|
||||||
* Use Firefox ESR for Docker images
|
|
||||||
* Improve Firefox start time and code clean up
|
|
||||||
* Improve bad request management and tests
|
|
||||||
* Build native packages with Firefox
|
|
||||||
* Update readme
|
|
||||||
* Improve Docker image and clean TODOs
|
|
||||||
* Add proxy support
|
|
||||||
* Implement request.post method for Firefox
|
|
||||||
* Code clean up, remove returnRawHtml, download, headers params
|
|
||||||
* Remove outdated chaptcha solvers
|
|
||||||
* Refactor the app to use Express server and Jest for tests
|
|
||||||
* Fix Cloudflare resolver for Linux ARM builds
|
|
||||||
* Fix Cloudflare resolver
|
|
||||||
* Replace Chrome web browser with Firefox
|
|
||||||
* Remove userAgent parameter since any modification is detected by CF
|
|
||||||
* Update dependencies
|
|
||||||
* Remove Puppeter steath plugin
|
|
||||||
|
|
||||||
## v1.2.9 (2021/08/01)
|
|
||||||
|
|
||||||
* Improve "Execution context was destroyed" error handling
|
|
||||||
* Implement returnRawHtml parameter. resolves #172 resolves #165
|
|
||||||
* Capture Docker stop signal. resolves #158
|
|
||||||
* Reduce Docker image size 20 MB
|
|
||||||
* Fix page reload after challenge is solved. resolves #162 resolves #143
|
|
||||||
* Avoid loading images/css/fonts to speed up page load
|
|
||||||
* Improve Cloudflare IP ban detection
|
|
||||||
* Fix vulnerabilities
|
|
||||||
|
|
||||||
## v1.2.8 (2021/06/01)
|
|
||||||
|
|
||||||
* Improve old JS challenge waiting. Resolves #129
|
|
||||||
|
|
||||||
## v1.2.7 (2021/06/01)
|
|
||||||
|
|
||||||
* Improvements in Cloudflare redirect detection. Resolves #140
|
|
||||||
* Fix installation instructions
|
|
||||||
|
|
||||||
## v1.2.6 (2021/05/30)
|
|
||||||
|
|
||||||
* Handle new Cloudflare challenge. Resolves #135 Resolves #134
|
|
||||||
* Provide reference Systemd unit file. Resolves #72
|
|
||||||
* Fix EACCES: permission denied, open '/tmp/flaresolverr.txt'. Resolves #120
|
|
||||||
* Configure timezone with TZ env var. Resolves #109
|
|
||||||
* Return the redirected URL in the response (#126)
|
|
||||||
* Show an error in hcaptcha-solver. Resolves #132
|
|
||||||
* Regenerate package-lock.json lockfileVersion 2
|
|
||||||
* Update issue template. Resolves #130
|
|
||||||
* Bump ws from 7.4.1 to 7.4.6 (#137)
|
|
||||||
* Bump hosted-git-info from 2.8.8 to 2.8.9 (#124)
|
|
||||||
* Bump lodash from 4.17.20 to 4.17.21 (#125)
|
|
||||||
|
|
||||||
## v1.2.5 (2021/04/05)
|
|
||||||
|
|
||||||
* Fix memory regression, close test browser
|
|
||||||
* Fix release-docker GitHub action
|
|
||||||
|
|
||||||
## v1.2.4 (2021/04/04)
|
|
||||||
|
|
||||||
* Include license in release zips. resolves #75
|
|
||||||
* Validate Chrome is working at startup
|
|
||||||
* Speedup Docker image build
|
|
||||||
* Add health check endpoint
|
|
||||||
* Update issue template
|
|
||||||
* Minor improvements in debug traces
|
|
||||||
* Validate environment variables at startup. resolves #101
|
|
||||||
* Add FlareSolverr logo. resolves #23
|
|
||||||
|
|
||||||
## v1.2.3 (2021/01/10)
|
|
||||||
|
|
||||||
* CI/CD: Generate release changelog from commits. resolves #34
|
|
||||||
* Update README.md
|
|
||||||
* Add donation links
|
|
||||||
* Simplify docker-compose.yml
|
|
||||||
* Allow to configure "none" captcha resolver
|
|
||||||
* Override docker-compose.yml variables via .env resolves #64 (#66)
|
|
||||||
|
|
||||||
## v1.2.2 (2021/01/09)
|
|
||||||
|
|
||||||
* Add documentation for precompiled binaries installation
|
|
||||||
* Add instructions to set environment variables in Windows
|
|
||||||
* Build Windows and Linux binaries. resolves #18
|
|
||||||
* Add release badge in the readme
|
|
||||||
* CI/CD: Generate release changelog from commits. resolves #34
|
|
||||||
* Add a notice about captcha solvers
|
|
||||||
* Add Chrome flag --disable-dev-shm-usage to fix crashes. resolves #45
|
|
||||||
* Fix Docker CLI documentation
|
|
||||||
* Add traces with captcha solver service. resolves #39
|
|
||||||
* Improve logic to detect Cloudflare captcha. resolves #48
|
|
||||||
* Move Cloudflare provider logic to his own class
|
|
||||||
* Simplify and document the "return only cookies" parameter
|
|
||||||
* Show message when debug log is enabled
|
|
||||||
* Update readme to add more clarifications. resolves #53 (#60)
|
|
||||||
* issue_template: typo fix (#52)
|
|
||||||
|
|
||||||
## v1.2.1 (2020/12/20)
|
|
||||||
|
|
||||||
* Change version to match release tag / 1.2.0 => v1.2.0
|
|
||||||
* CI/CD Publish release in GitHub repository. resolves #34
|
|
||||||
* Add welcome message in / endpoint
|
|
||||||
* Rewrite request timeout handling (maxTimeout) resolves #42
|
|
||||||
* Add http status for better logging
|
|
||||||
* Return an error when no selectors are found, #25
|
|
||||||
* Add issue template, fix #32
|
|
||||||
* Moving log.html right after loading the page and add one on reload, fix #30
|
|
||||||
* Update User-Agent to match chromium version, ref: #15 (#28)
|
|
||||||
* Update install from source code documentation
|
|
||||||
* Update readme to add Docker instructions (#20)
|
|
||||||
* Clean up readme (#19)
|
|
||||||
* Add docker-compose
|
|
||||||
* Change default log level to info
|
|
||||||
|
|
||||||
## v1.2.0 (2020/12/20)
|
|
||||||
|
|
||||||
* Fix User-Agent detected by CouldFlare (Docker ARM) resolves #15
|
|
||||||
* Include exception message in error response
|
|
||||||
* CI/CD: Rename GitHub Action build => publish
|
|
||||||
* Bump version
|
|
||||||
* Fix TypeScript compilation and bump minor version
|
|
||||||
* CI/CD: Bump minor version
|
|
||||||
* CI/CD: Configure GitHub Actions
|
|
||||||
* CI/CD: Configure GitHub Actions
|
|
||||||
* CI/CD: Bump minor version
|
|
||||||
* CI/CD: Configure Build GitHub Action
|
|
||||||
* CI/CD: Configure AutoTag GitHub Action (#14)
|
|
||||||
* CI/CD: Build the Docker images with GitHub Actions (#13)
|
|
||||||
* Update dependencies
|
|
||||||
* Backport changes from Cloudproxy (#11)
|
|
||||||
26
Dockerfile
26
Dockerfile
@@ -1,4 +1,4 @@
|
|||||||
FROM python:3.11-slim-bullseye as builder
|
FROM python:3.10-slim-bullseye as builder
|
||||||
|
|
||||||
# Build dummy packages to skip installing them and their dependencies
|
# Build dummy packages to skip installing them and their dependencies
|
||||||
RUN apt-get update \
|
RUN apt-get update \
|
||||||
@@ -12,25 +12,28 @@ RUN apt-get update \
|
|||||||
&& equivs-build adwaita-icon-theme \
|
&& equivs-build adwaita-icon-theme \
|
||||||
&& mv adwaita-icon-theme_*.deb /adwaita-icon-theme.deb
|
&& mv adwaita-icon-theme_*.deb /adwaita-icon-theme.deb
|
||||||
|
|
||||||
FROM python:3.11-slim-bullseye
|
FROM python:3.10-slim-bullseye
|
||||||
|
|
||||||
# Copy dummy packages
|
# Copy dummy packages
|
||||||
COPY --from=builder /*.deb /
|
COPY --from=builder /*.deb /
|
||||||
|
|
||||||
# Install dependencies and create flaresolverr user
|
# Install dependencies and create flaresolverr user
|
||||||
|
# We have to install and old version of Chromium because its not working in Raspberry Pi / ARM
|
||||||
# You can test Chromium running this command inside the container:
|
# You can test Chromium running this command inside the container:
|
||||||
# xvfb-run -s "-screen 0 1600x1200x24" chromium --no-sandbox
|
# xvfb-run -s "-screen 0 1600x1200x24" chromium --no-sandbox
|
||||||
# The error traces is like this: "*** stack smashing detected ***: terminated"
|
# The error traces is like this: "*** stack smashing detected ***: terminated"
|
||||||
# To check the package versions available you can use this command:
|
# To check the package versions available you can use this command:
|
||||||
# apt-cache madison chromium
|
# apt-cache madison chromium
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
RUN echo "\ndeb http://snapshot.debian.org/archive/debian/20210519T212015Z/ bullseye main" >> /etc/apt/sources.list \
|
||||||
|
&& echo 'Acquire::Check-Valid-Until "false";' | tee /etc/apt/apt.conf.d/00snapshot \
|
||||||
# Install dummy packages
|
# Install dummy packages
|
||||||
RUN dpkg -i /libgl1-mesa-dri.deb \
|
&& dpkg -i /libgl1-mesa-dri.deb \
|
||||||
&& dpkg -i /adwaita-icon-theme.deb \
|
&& dpkg -i /adwaita-icon-theme.deb \
|
||||||
# Install dependencies
|
# Install dependencies
|
||||||
&& apt-get update \
|
&& apt-get update \
|
||||||
&& apt-get install -y --no-install-recommends chromium chromium-common chromium-driver xvfb dumb-init \
|
&& apt-get install -y --no-install-recommends chromium=89.0.4389.114-1 chromium-common=89.0.4389.114-1 \
|
||||||
procps curl vim \
|
chromium-driver=89.0.4389.114-1 xvfb \
|
||||||
# Remove temporary files and hardware decoding libraries
|
# Remove temporary files and hardware decoding libraries
|
||||||
&& rm -rf /var/lib/apt/lists/* \
|
&& rm -rf /var/lib/apt/lists/* \
|
||||||
&& rm -f /usr/lib/x86_64-linux-gnu/libmfxhw* \
|
&& rm -f /usr/lib/x86_64-linux-gnu/libmfxhw* \
|
||||||
@@ -44,7 +47,8 @@ RUN dpkg -i /libgl1-mesa-dri.deb \
|
|||||||
COPY requirements.txt .
|
COPY requirements.txt .
|
||||||
RUN pip install -r requirements.txt \
|
RUN pip install -r requirements.txt \
|
||||||
# Remove temporary files
|
# Remove temporary files
|
||||||
&& rm -rf /root/.cache
|
&& rm -rf /root/.cache \
|
||||||
|
&& find / -name '*.pyc' -delete
|
||||||
|
|
||||||
USER flaresolverr
|
USER flaresolverr
|
||||||
|
|
||||||
@@ -53,17 +57,13 @@ COPY package.json ../
|
|||||||
|
|
||||||
EXPOSE 8191
|
EXPOSE 8191
|
||||||
|
|
||||||
# dumb-init avoids zombie chromium processes
|
|
||||||
ENTRYPOINT ["/usr/bin/dumb-init", "--"]
|
|
||||||
|
|
||||||
CMD ["/usr/local/bin/python", "-u", "/app/flaresolverr.py"]
|
CMD ["/usr/local/bin/python", "-u", "/app/flaresolverr.py"]
|
||||||
|
|
||||||
# Local build
|
# Local build
|
||||||
# docker build -t ngosang/flaresolverr:3.0.0 .
|
# docker build -t ngosang/flaresolverr:3.0.0.beta2 .
|
||||||
# docker run -p 8191:8191 ngosang/flaresolverr:3.0.0
|
# docker run -p 8191:8191 ngosang/flaresolverr:3.0.0.beta2
|
||||||
|
|
||||||
# Multi-arch build
|
# Multi-arch build
|
||||||
# docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
|
|
||||||
# docker buildx create --use
|
# docker buildx create --use
|
||||||
# docker buildx build -t ngosang/flaresolverr:3.0.0 --platform linux/386,linux/amd64,linux/arm/v7,linux/arm64/v8 .
|
# docker buildx build -t ngosang/flaresolverr:3.0.0.beta2 --platform linux/386,linux/amd64,linux/arm/v7,linux/arm64/v8 .
|
||||||
# add --push to publish in DockerHub
|
# add --push to publish in DockerHub
|
||||||
|
|||||||
2
LICENSE
2
LICENSE
@@ -1,6 +1,6 @@
|
|||||||
MIT License
|
MIT License
|
||||||
|
|
||||||
Copyright (c) 2023 Diego Heras (ngosang / ngosang@hotmail.es)
|
Copyright (c) 2022 Diego Heras (ngosang / ngosang@hotmail.es)
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
|||||||
@@ -64,11 +64,14 @@ Remember to restart the Docker daemon and the container after the update.
|
|||||||
|
|
||||||
### Precompiled binaries
|
### Precompiled binaries
|
||||||
|
|
||||||
Precompiled binaries are not currently available for v3. Please see https://github.com/FlareSolverr/FlareSolverr/issues/660 for updates,
|
This is the recommended way for Windows users.
|
||||||
or below for instructions of how to build FlareSolverr from source code.
|
* Download the [FlareSolverr zip](https://github.com/FlareSolverr/FlareSolverr/releases) from the release's assets. It is available for Windows and Linux.
|
||||||
|
* Extract the zip file. FlareSolverr executable and firefox folder must be in the same directory.
|
||||||
|
* Execute FlareSolverr binary. In the environment variables section you can find how to change the configuration.
|
||||||
|
|
||||||
### From source code
|
### From source code
|
||||||
|
|
||||||
|
This is the recommended way for macOS users and for developers.
|
||||||
* Install [Python 3.10](https://www.python.org/downloads/).
|
* Install [Python 3.10](https://www.python.org/downloads/).
|
||||||
* Install [Chrome](https://www.google.com/intl/en_us/chrome/) or [Chromium](https://www.chromium.org/getting-involved/download-chromium/) web browser.
|
* Install [Chrome](https://www.google.com/intl/en_us/chrome/) or [Chromium](https://www.chromium.org/getting-involved/download-chromium/) web browser.
|
||||||
* (Only in Linux / macOS) Install [Xvfb](https://en.wikipedia.org/wiki/Xvfb) package.
|
* (Only in Linux / macOS) Install [Xvfb](https://en.wikipedia.org/wiki/Xvfb) package.
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "flaresolverr",
|
"name": "flaresolverr",
|
||||||
"version": "3.0.4",
|
"version": "3.0.0.beta2",
|
||||||
"description": "Proxy server to bypass Cloudflare protection",
|
"description": "Proxy server to bypass Cloudflare protection",
|
||||||
"author": "Diego Heras (ngosang / ngosang@hotmail.es)",
|
"author": "Diego Heras (ngosang / ngosang@hotmail.es)",
|
||||||
"license": "MIT"
|
"license": "MIT"
|
||||||
|
|||||||
@@ -1,9 +1,9 @@
|
|||||||
bottle==0.12.23
|
bottle==0.12.23
|
||||||
waitress==2.1.2
|
waitress==2.1.2
|
||||||
selenium==4.7.2
|
selenium==4.4.3
|
||||||
func-timeout==4.3.5
|
func-timeout==4.3.5
|
||||||
# required by undetected_chromedriver
|
# required by undetected_chromedriver
|
||||||
requests==2.28.1
|
requests==2.28.1
|
||||||
websockets==10.4
|
websockets==10.3
|
||||||
# only required for linux
|
# only required for linux
|
||||||
xvfbwrapper==0.2.9
|
xvfbwrapper==0.2.9
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
import logging
|
import logging
|
||||||
import sys
|
|
||||||
import time
|
import time
|
||||||
from urllib.parse import unquote
|
from urllib.parse import unquote
|
||||||
|
|
||||||
@@ -7,58 +6,30 @@ from func_timeout import func_timeout, FunctionTimedOut
|
|||||||
from selenium.common import TimeoutException
|
from selenium.common import TimeoutException
|
||||||
from selenium.webdriver.chrome.webdriver import WebDriver
|
from selenium.webdriver.chrome.webdriver import WebDriver
|
||||||
from selenium.webdriver.common.by import By
|
from selenium.webdriver.common.by import By
|
||||||
from selenium.webdriver.common.action_chains import ActionChains
|
|
||||||
from selenium.webdriver.support.wait import WebDriverWait
|
from selenium.webdriver.support.wait import WebDriverWait
|
||||||
from selenium.webdriver.support.expected_conditions import presence_of_element_located, staleness_of, title_is
|
from selenium.webdriver.support.expected_conditions import presence_of_element_located, staleness_of
|
||||||
|
|
||||||
from dtos import V1RequestBase, V1ResponseBase, ChallengeResolutionT, ChallengeResolutionResultT, IndexResponse, \
|
from dtos import V1RequestBase, V1ResponseBase, ChallengeResolutionT, ChallengeResolutionResultT, IndexResponse, \
|
||||||
HealthResponse, STATUS_OK, STATUS_ERROR
|
HealthResponse, STATUS_OK, STATUS_ERROR
|
||||||
import utils
|
import utils
|
||||||
|
|
||||||
ACCESS_DENIED_TITLES = [
|
|
||||||
# Cloudflare
|
|
||||||
'Access denied',
|
|
||||||
# Cloudflare http://bitturk.net/ Firefox
|
|
||||||
'Attention Required! | Cloudflare'
|
|
||||||
]
|
|
||||||
ACCESS_DENIED_SELECTORS = [
|
ACCESS_DENIED_SELECTORS = [
|
||||||
# Cloudflare
|
# Cloudflare
|
||||||
'div.cf-error-title span.cf-code-label span',
|
'div.main-wrapper div.header.section h1 span.code-label span'
|
||||||
# Cloudflare http://bitturk.net/ Firefox
|
|
||||||
'#cf-error-details div.cf-error-overview h1'
|
|
||||||
]
|
|
||||||
CHALLENGE_TITLES = [
|
|
||||||
# Cloudflare
|
|
||||||
'Just a moment...',
|
|
||||||
# DDoS-GUARD
|
|
||||||
'DDOS-GUARD',
|
|
||||||
]
|
]
|
||||||
CHALLENGE_SELECTORS = [
|
CHALLENGE_SELECTORS = [
|
||||||
# Cloudflare
|
# Cloudflare
|
||||||
'#cf-challenge-running', '.ray_id', '.attack-box', '#cf-please-wait', '#challenge-spinner', '#trk_jschal_js',
|
'#cf-challenge-running', '.ray_id', '.attack-box', '#cf-please-wait', '#trk_jschal_js',
|
||||||
|
# DDoS-GUARD
|
||||||
|
'#link-ddg',
|
||||||
# Custom CloudFlare for EbookParadijs, Film-Paleis, MuziekFabriek and Puur-Hollands
|
# Custom CloudFlare for EbookParadijs, Film-Paleis, MuziekFabriek and Puur-Hollands
|
||||||
'td.info #js_info'
|
'td.info #js_info'
|
||||||
]
|
]
|
||||||
SHORT_TIMEOUT = 10
|
SHORT_TIMEOUT = 5
|
||||||
|
|
||||||
|
|
||||||
def test_browser_installation():
|
def test_browser_installation():
|
||||||
logging.info("Testing web browser installation...")
|
logging.info("Testing web browser installation...")
|
||||||
|
|
||||||
chrome_exe_path = utils.get_chrome_exe_path()
|
|
||||||
if chrome_exe_path is None:
|
|
||||||
logging.error("Chrome / Chromium web browser not installed!")
|
|
||||||
sys.exit(1)
|
|
||||||
else:
|
|
||||||
logging.info("Chrome / Chromium path: " + chrome_exe_path)
|
|
||||||
|
|
||||||
chrome_major_version = utils.get_chrome_major_version()
|
|
||||||
if chrome_major_version == '':
|
|
||||||
logging.error("Chrome / Chromium version not detected!")
|
|
||||||
sys.exit(1)
|
|
||||||
else:
|
|
||||||
logging.info("Chrome / Chromium major version: " + chrome_major_version)
|
|
||||||
|
|
||||||
user_agent = utils.get_user_agent()
|
user_agent = utils.get_user_agent()
|
||||||
logging.info("FlareSolverr User-Agent: " + user_agent)
|
logging.info("FlareSolverr User-Agent: " + user_agent)
|
||||||
logging.info("Test successful")
|
logging.info("Test successful")
|
||||||
@@ -181,45 +152,6 @@ def _resolve_challenge(req: V1RequestBase, method: str) -> ChallengeResolutionT:
|
|||||||
driver.quit()
|
driver.quit()
|
||||||
|
|
||||||
|
|
||||||
def click_verify(driver: WebDriver):
|
|
||||||
try:
|
|
||||||
logging.debug("Try to find the Cloudflare verify checkbox")
|
|
||||||
iframe = driver.find_element(By.XPATH, "//iframe[@title='Widget containing a Cloudflare security challenge']")
|
|
||||||
driver.switch_to.frame(iframe)
|
|
||||||
checkbox = driver.find_element(
|
|
||||||
by=By.XPATH,
|
|
||||||
value='//*[@id="cf-stage"]//label[@class="ctp-checkbox-label"]/input',
|
|
||||||
)
|
|
||||||
if checkbox:
|
|
||||||
actions = ActionChains(driver)
|
|
||||||
actions.move_to_element_with_offset(checkbox, 5, 7)
|
|
||||||
actions.click(checkbox)
|
|
||||||
actions.perform()
|
|
||||||
logging.debug("Cloudflare verify checkbox found and clicked")
|
|
||||||
except Exception as e:
|
|
||||||
logging.debug("Cloudflare verify checkbox not found on the page")
|
|
||||||
# print(e)
|
|
||||||
finally:
|
|
||||||
driver.switch_to.default_content()
|
|
||||||
|
|
||||||
try:
|
|
||||||
logging.debug("Try to find the Cloudflare 'Verify you are human' button")
|
|
||||||
button = driver.find_element(
|
|
||||||
by=By.XPATH,
|
|
||||||
value="//input[@type='button' and @value='Verify you are human']",
|
|
||||||
)
|
|
||||||
if button:
|
|
||||||
actions = ActionChains(driver)
|
|
||||||
actions.move_to_element_with_offset(button, 5, 7)
|
|
||||||
actions.click(button)
|
|
||||||
actions.perform()
|
|
||||||
logging.debug("The Cloudflare 'Verify you are human' button found and clicked")
|
|
||||||
except Exception as e:
|
|
||||||
logging.debug("The Cloudflare 'Verify you are human' button not found on the page")
|
|
||||||
# print(e)
|
|
||||||
|
|
||||||
time.sleep(2)
|
|
||||||
|
|
||||||
def _evil_logic(req: V1RequestBase, driver: WebDriver, method: str) -> ChallengeResolutionT:
|
def _evil_logic(req: V1RequestBase, driver: WebDriver, method: str) -> ChallengeResolutionT:
|
||||||
res = ChallengeResolutionT({})
|
res = ChallengeResolutionT({})
|
||||||
res.status = STATUS_OK
|
res.status = STATUS_OK
|
||||||
@@ -236,13 +168,7 @@ def _evil_logic(req: V1RequestBase, driver: WebDriver, method: str) -> Challenge
|
|||||||
|
|
||||||
# wait for the page
|
# wait for the page
|
||||||
html_element = driver.find_element(By.TAG_NAME, "html")
|
html_element = driver.find_element(By.TAG_NAME, "html")
|
||||||
page_title = driver.title
|
|
||||||
|
|
||||||
# find access denied titles
|
|
||||||
for title in ACCESS_DENIED_TITLES:
|
|
||||||
if title == page_title:
|
|
||||||
raise Exception('Cloudflare has blocked this request. '
|
|
||||||
'Probably your IP is banned for this site, check in your web browser.')
|
|
||||||
# find access denied selectors
|
# find access denied selectors
|
||||||
for selector in ACCESS_DENIED_SELECTORS:
|
for selector in ACCESS_DENIED_SELECTORS:
|
||||||
found_elements = driver.find_elements(By.CSS_SELECTOR, selector)
|
found_elements = driver.find_elements(By.CSS_SELECTOR, selector)
|
||||||
@@ -250,15 +176,8 @@ def _evil_logic(req: V1RequestBase, driver: WebDriver, method: str) -> Challenge
|
|||||||
raise Exception('Cloudflare has blocked this request. '
|
raise Exception('Cloudflare has blocked this request. '
|
||||||
'Probably your IP is banned for this site, check in your web browser.')
|
'Probably your IP is banned for this site, check in your web browser.')
|
||||||
|
|
||||||
# find challenge by title
|
# find challenge selectors
|
||||||
challenge_found = False
|
challenge_found = False
|
||||||
for title in CHALLENGE_TITLES:
|
|
||||||
if title == page_title:
|
|
||||||
challenge_found = True
|
|
||||||
logging.info("Challenge detected. Title found: " + title)
|
|
||||||
break
|
|
||||||
if not challenge_found:
|
|
||||||
# find challenge by selectors
|
|
||||||
for selector in CHALLENGE_SELECTORS:
|
for selector in CHALLENGE_SELECTORS:
|
||||||
found_elements = driver.find_elements(By.CSS_SELECTOR, selector)
|
found_elements = driver.find_elements(By.CSS_SELECTOR, selector)
|
||||||
if len(found_elements) > 0:
|
if len(found_elements) > 0:
|
||||||
@@ -266,19 +185,12 @@ def _evil_logic(req: V1RequestBase, driver: WebDriver, method: str) -> Challenge
|
|||||||
logging.info("Challenge detected. Selector found: " + selector)
|
logging.info("Challenge detected. Selector found: " + selector)
|
||||||
break
|
break
|
||||||
|
|
||||||
attempt = 0
|
|
||||||
if challenge_found:
|
if challenge_found:
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
attempt = attempt + 1
|
|
||||||
# wait until the title changes
|
|
||||||
for title in CHALLENGE_TITLES:
|
|
||||||
logging.debug("Waiting for title (attempt " + str(attempt) + "): " + title)
|
|
||||||
WebDriverWait(driver, SHORT_TIMEOUT).until_not(title_is(title))
|
|
||||||
|
|
||||||
# then wait until all the selectors disappear
|
# then wait until all the selectors disappear
|
||||||
for selector in CHALLENGE_SELECTORS:
|
for selector in CHALLENGE_SELECTORS:
|
||||||
logging.debug("Waiting for selector (attempt " + str(attempt) + "): " + selector)
|
logging.debug("Waiting for selector: " + selector)
|
||||||
WebDriverWait(driver, SHORT_TIMEOUT).until_not(
|
WebDriverWait(driver, SHORT_TIMEOUT).until_not(
|
||||||
presence_of_element_located((By.CSS_SELECTOR, selector)))
|
presence_of_element_located((By.CSS_SELECTOR, selector)))
|
||||||
|
|
||||||
@@ -287,9 +199,6 @@ def _evil_logic(req: V1RequestBase, driver: WebDriver, method: str) -> Challenge
|
|||||||
|
|
||||||
except TimeoutException:
|
except TimeoutException:
|
||||||
logging.debug("Timeout waiting for selector")
|
logging.debug("Timeout waiting for selector")
|
||||||
|
|
||||||
click_verify(driver)
|
|
||||||
|
|
||||||
# update the html (cloudflare reloads the page every 5 s)
|
# update the html (cloudflare reloads the page every 5 s)
|
||||||
html_element = driver.find_element(By.TAG_NAME, "html")
|
html_element = driver.find_element(By.TAG_NAME, "html")
|
||||||
|
|
||||||
@@ -311,11 +220,11 @@ def _evil_logic(req: V1RequestBase, driver: WebDriver, method: str) -> Challenge
|
|||||||
challenge_res.url = driver.current_url
|
challenge_res.url = driver.current_url
|
||||||
challenge_res.status = 200 # todo: fix, selenium not provides this info
|
challenge_res.status = 200 # todo: fix, selenium not provides this info
|
||||||
challenge_res.cookies = driver.get_cookies()
|
challenge_res.cookies = driver.get_cookies()
|
||||||
challenge_res.userAgent = utils.get_user_agent(driver)
|
|
||||||
|
|
||||||
if not req.returnOnlyCookies:
|
if not req.returnOnlyCookies:
|
||||||
challenge_res.headers = {} # todo: fix, selenium not provides this info
|
challenge_res.headers = {} # todo: fix, selenium not provides this info
|
||||||
challenge_res.response = driver.page_source
|
challenge_res.response = driver.page_source
|
||||||
|
challenge_res.userAgent = utils.get_user_agent(driver)
|
||||||
|
|
||||||
res.result = challenge_res
|
res.result = challenge_res
|
||||||
return res
|
return res
|
||||||
|
|||||||
23
src/tests.py
23
src/tests.py
@@ -1,4 +1,5 @@
|
|||||||
import unittest
|
import unittest
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
from webtest import TestApp
|
from webtest import TestApp
|
||||||
|
|
||||||
@@ -19,12 +20,12 @@ class TestFlareSolverr(unittest.TestCase):
|
|||||||
proxy_url = "http://127.0.0.1:8888"
|
proxy_url = "http://127.0.0.1:8888"
|
||||||
proxy_socks_url = "socks5://127.0.0.1:1080"
|
proxy_socks_url = "socks5://127.0.0.1:1080"
|
||||||
google_url = "https://www.google.com"
|
google_url = "https://www.google.com"
|
||||||
post_url = "https://httpbin.org/post"
|
post_url = "https://ptsv2.com/t/qv4j3-1634496523"
|
||||||
cloudflare_url = "https://nowsecure.nl"
|
cloudflare_url = "https://nowsecure.nl"
|
||||||
cloudflare_url_2 = "https://idope.se/torrent-list/harry/"
|
cloudflare_url_2 = "https://idope.se/torrent-list/harry/"
|
||||||
ddos_guard_url = "https://anidex.info/"
|
ddos_guard_url = "https://anidex.info/"
|
||||||
custom_cloudflare_url = "https://www.muziekfabriek.org"
|
custom_cloudflare_url = "https://www.muziekfabriek.org"
|
||||||
cloudflare_blocked_url = "https://cpasbiens3.fr/index.php?do=search&subaction=search"
|
cloudflare_blocked_url = "https://avistaz.to/api/v1/jackett/torrents?in=1&type=0&search="
|
||||||
|
|
||||||
app = TestApp(flaresolverr.app)
|
app = TestApp(flaresolverr.app)
|
||||||
|
|
||||||
@@ -232,7 +233,7 @@ class TestFlareSolverr(unittest.TestCase):
|
|||||||
self.assertIsNone(solution.headers)
|
self.assertIsNone(solution.headers)
|
||||||
self.assertIsNone(solution.response)
|
self.assertIsNone(solution.response)
|
||||||
self.assertGreater(len(solution.cookies), 0)
|
self.assertGreater(len(solution.cookies), 0)
|
||||||
self.assertIn("Chrome/", solution.userAgent)
|
self.assertIsNone(solution.userAgent)
|
||||||
|
|
||||||
# todo: test Cmd 'request.get' should return OK with HTTP 'proxy' param
|
# todo: test Cmd 'request.get' should return OK with HTTP 'proxy' param
|
||||||
# todo: test Cmd 'request.get' should return OK with HTTP 'proxy' param with credentials
|
# todo: test Cmd 'request.get' should return OK with HTTP 'proxy' param with credentials
|
||||||
@@ -280,7 +281,7 @@ class TestFlareSolverr(unittest.TestCase):
|
|||||||
def test_v1_endpoint_request_post_no_cloudflare(self):
|
def test_v1_endpoint_request_post_no_cloudflare(self):
|
||||||
res = self.app.post_json('/v1', {
|
res = self.app.post_json('/v1', {
|
||||||
"cmd": "request.post",
|
"cmd": "request.post",
|
||||||
"url": self.post_url,
|
"url": self.post_url + '/post',
|
||||||
"postData": "param1=value1¶m2=value2"
|
"postData": "param1=value1¶m2=value2"
|
||||||
})
|
})
|
||||||
self.assertEqual(res.status_code, 200)
|
self.assertEqual(res.status_code, 200)
|
||||||
@@ -296,10 +297,22 @@ class TestFlareSolverr(unittest.TestCase):
|
|||||||
self.assertIn(self.post_url, solution.url)
|
self.assertIn(self.post_url, solution.url)
|
||||||
self.assertEqual(solution.status, 200)
|
self.assertEqual(solution.status, 200)
|
||||||
self.assertIs(len(solution.headers), 0)
|
self.assertIs(len(solution.headers), 0)
|
||||||
self.assertIn('"form": {\n "param1": "value1", \n "param2": "value2"\n }', solution.response)
|
self.assertIn("I hope you have a lovely day!", solution.response)
|
||||||
self.assertEqual(len(solution.cookies), 0)
|
self.assertEqual(len(solution.cookies), 0)
|
||||||
self.assertIn("Chrome/", solution.userAgent)
|
self.assertIn("Chrome/", solution.userAgent)
|
||||||
|
|
||||||
|
# check that we sent the post data
|
||||||
|
res2 = self.app.post_json('/v1', {
|
||||||
|
"cmd": "request.get",
|
||||||
|
"url": self.post_url
|
||||||
|
})
|
||||||
|
self.assertEqual(res2.status_code, 200)
|
||||||
|
|
||||||
|
body2 = V1ResponseBase(res2.json)
|
||||||
|
self.assertEqual(STATUS_OK, body2.status)
|
||||||
|
date_hour = datetime.now(timezone.utc).isoformat().split(':')[0].replace('T', ' ')
|
||||||
|
self.assertIn(date_hour, body2.solution.response)
|
||||||
|
|
||||||
def test_v1_endpoint_request_post_cloudflare(self):
|
def test_v1_endpoint_request_post_cloudflare(self):
|
||||||
res = self.app.post_json('/v1', {
|
res = self.app.post_json('/v1', {
|
||||||
"cmd": "request.post",
|
"cmd": "request.post",
|
||||||
|
|||||||
@@ -1,4 +1,7 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import subprocess
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@@ -14,38 +17,33 @@ Y88b. 888 888 888 Y88..88P 888 888 888 Y8b. Y88b 888 888 888 Y
|
|||||||
by UltrafunkAmsterdam (https://github.com/ultrafunkamsterdam)
|
by UltrafunkAmsterdam (https://github.com/ultrafunkamsterdam)
|
||||||
|
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
|
|
||||||
__version__ = "3.4.6"
|
__version__ = "3.1.5r4"
|
||||||
|
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
import subprocess
|
|
||||||
import sys
|
import sys
|
||||||
import tempfile
|
import tempfile
|
||||||
import time
|
import time
|
||||||
from weakref import finalize
|
import inspect
|
||||||
|
import threading
|
||||||
|
|
||||||
import selenium.webdriver.chrome.service
|
import selenium.webdriver.chrome.service
|
||||||
import selenium.webdriver.chrome.webdriver
|
import selenium.webdriver.chrome.webdriver
|
||||||
from selenium.webdriver.common.by import By
|
|
||||||
import selenium.webdriver.common.service
|
import selenium.webdriver.common.service
|
||||||
import selenium.webdriver.remote.command
|
|
||||||
import selenium.webdriver.remote.webdriver
|
import selenium.webdriver.remote.webdriver
|
||||||
|
|
||||||
from .cdp import CDP
|
from .cdp import CDP
|
||||||
from .dprocess import start_detached
|
|
||||||
from .options import ChromeOptions
|
from .options import ChromeOptions
|
||||||
from .patcher import IS_POSIX
|
from .patcher import IS_POSIX
|
||||||
from .patcher import Patcher
|
from .patcher import Patcher
|
||||||
from .reactor import Reactor
|
from .reactor import Reactor
|
||||||
from .webelement import UCWebElement
|
from .dprocess import start_detached
|
||||||
from .webelement import WebElement
|
|
||||||
|
|
||||||
|
|
||||||
__all__ = (
|
__all__ = (
|
||||||
"Chrome",
|
"Chrome",
|
||||||
@@ -110,7 +108,6 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
|||||||
port=0,
|
port=0,
|
||||||
enable_cdp_events=False,
|
enable_cdp_events=False,
|
||||||
service_args=None,
|
service_args=None,
|
||||||
service_creationflags=None,
|
|
||||||
desired_capabilities=None,
|
desired_capabilities=None,
|
||||||
advanced_elements=False,
|
advanced_elements=False,
|
||||||
service_log_path=None,
|
service_log_path=None,
|
||||||
@@ -122,9 +119,8 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
|||||||
suppress_welcome=True,
|
suppress_welcome=True,
|
||||||
use_subprocess=False,
|
use_subprocess=False,
|
||||||
debug=False,
|
debug=False,
|
||||||
no_sandbox=True,
|
|
||||||
windows_headless=False,
|
windows_headless=False,
|
||||||
**kw,
|
**kw
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Creates a new instance of the chrome driver.
|
Creates a new instance of the chrome driver.
|
||||||
@@ -151,9 +147,7 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
|||||||
If not specified, make sure the executable's folder is in $PATH
|
If not specified, make sure the executable's folder is in $PATH
|
||||||
|
|
||||||
port: int, optional, default: 0
|
port: int, optional, default: 0
|
||||||
port to be used by the chromedriver executable, this is NOT the debugger port.
|
port you would like the service to run, if left as 0, a free port will be found.
|
||||||
leave it at 0 unless you know what you are doing.
|
|
||||||
the default value of 0 automatically picks an available port.
|
|
||||||
|
|
||||||
enable_cdp_events: bool, default: False
|
enable_cdp_events: bool, default: False
|
||||||
:: currently for chrome only
|
:: currently for chrome only
|
||||||
@@ -213,12 +207,11 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
|||||||
now, in case you are nag-fetishist, or a diagnostics data feeder to google, you can set this to False.
|
now, in case you are nag-fetishist, or a diagnostics data feeder to google, you can set this to False.
|
||||||
Note: if you don't handle the nag screen in time, the browser loses it's connection and throws an Exception.
|
Note: if you don't handle the nag screen in time, the browser loses it's connection and throws an Exception.
|
||||||
|
|
||||||
use_subprocess: bool, optional , default: True,
|
use_subprocess: bool, optional , default: False,
|
||||||
|
|
||||||
False (the default) makes sure Chrome will get it's own process (so no subprocess of chromedriver.exe or python
|
False (the default) makes sure Chrome will get it's own process (so no subprocess of chromedriver.exe or python
|
||||||
This fixes a LOT of issues, like multithreaded run, but mst importantly. shutting corectly after
|
This fixes a LOT of issues, like multithreaded run, but mst importantly. shutting corectly after
|
||||||
program exits or using .quit()
|
program exits or using .quit()
|
||||||
you should be knowing what you're doing, and know how python works.
|
|
||||||
|
|
||||||
unfortunately, there is always an edge case in which one would like to write an single script with the only contents being:
|
unfortunately, there is always an edge case in which one would like to write an single script with the only contents being:
|
||||||
--start script--
|
--start script--
|
||||||
@@ -231,24 +224,19 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
|||||||
in that case you can set this to `True`. The browser will start via subprocess, and will keep running most of times.
|
in that case you can set this to `True`. The browser will start via subprocess, and will keep running most of times.
|
||||||
! setting it to True comes with NO support when being detected. !
|
! setting it to True comes with NO support when being detected. !
|
||||||
|
|
||||||
no_sandbox: bool, optional, default=True
|
|
||||||
uses the --no-sandbox option, and additionally does suppress the "unsecure option" status bar
|
|
||||||
this option has a default of True since many people seem to run this as root (....) , and chrome does not start
|
|
||||||
when running as root without using --no-sandbox flag.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
finalize(self, self._ensure_close, self)
|
|
||||||
self.debug = debug
|
self.debug = debug
|
||||||
self.patcher = Patcher(
|
patcher = Patcher(
|
||||||
executable_path=driver_executable_path,
|
executable_path=driver_executable_path,
|
||||||
force=patcher_force_close,
|
force=patcher_force_close,
|
||||||
version_main=version_main,
|
version_main=version_main,
|
||||||
)
|
)
|
||||||
self.patcher.auto()
|
patcher.auto()
|
||||||
# self.patcher = patcher
|
self.patcher = patcher
|
||||||
if not options:
|
if not options:
|
||||||
options = ChromeOptions()
|
options = ChromeOptions()
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if hasattr(options, "_session") and options._session is not None:
|
if hasattr(options, "_session") and options._session is not None:
|
||||||
# prevent reuse of options,
|
# prevent reuse of options,
|
||||||
@@ -260,17 +248,11 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
|||||||
|
|
||||||
options._session = self
|
options._session = self
|
||||||
|
|
||||||
if not options.debugger_address:
|
debug_port = selenium.webdriver.common.service.utils.free_port()
|
||||||
debug_port = (
|
|
||||||
port
|
|
||||||
if port != 0
|
|
||||||
else selenium.webdriver.common.service.utils.free_port()
|
|
||||||
)
|
|
||||||
debug_host = "127.0.0.1"
|
debug_host = "127.0.0.1"
|
||||||
|
|
||||||
|
if not options.debugger_address:
|
||||||
options.debugger_address = "%s:%d" % (debug_host, debug_port)
|
options.debugger_address = "%s:%d" % (debug_host, debug_port)
|
||||||
else:
|
|
||||||
debug_host, debug_port = options.debugger_address.split(":")
|
|
||||||
debug_port = int(debug_port)
|
|
||||||
|
|
||||||
if enable_cdp_events:
|
if enable_cdp_events:
|
||||||
options.set_capability(
|
options.set_capability(
|
||||||
@@ -281,17 +263,13 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
|||||||
options.add_argument("--remote-debugging-port=%s" % debug_port)
|
options.add_argument("--remote-debugging-port=%s" % debug_port)
|
||||||
|
|
||||||
if user_data_dir:
|
if user_data_dir:
|
||||||
options.add_argument("--user-data-dir=%s" % user_data_dir)
|
options.add_argument('--user-data-dir=%s' % user_data_dir)
|
||||||
|
|
||||||
language, keep_user_data_dir = None, bool(user_data_dir)
|
language, keep_user_data_dir = None, bool(user_data_dir)
|
||||||
|
|
||||||
# see if a custom user profile is specified in options
|
# see if a custom user profile is specified in options
|
||||||
for arg in options.arguments:
|
for arg in options.arguments:
|
||||||
|
|
||||||
if any([_ in arg for _ in ("--headless", "headless")]):
|
|
||||||
options.arguments.remove(arg)
|
|
||||||
options.headless = True
|
|
||||||
|
|
||||||
if "lang" in arg:
|
if "lang" in arg:
|
||||||
m = re.search("(?:--)?lang(?:[ =])?(.*)", arg)
|
m = re.search("(?:--)?lang(?:[ =])?(.*)", arg)
|
||||||
try:
|
try:
|
||||||
@@ -316,6 +294,7 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
|||||||
)
|
)
|
||||||
|
|
||||||
if not user_data_dir:
|
if not user_data_dir:
|
||||||
|
|
||||||
# backward compatiblity
|
# backward compatiblity
|
||||||
# check if an old uc.ChromeOptions is used, and extract the user data dir
|
# check if an old uc.ChromeOptions is used, and extract the user data dir
|
||||||
|
|
||||||
@@ -368,15 +347,8 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
|||||||
|
|
||||||
if suppress_welcome:
|
if suppress_welcome:
|
||||||
options.arguments.extend(["--no-default-browser-check", "--no-first-run"])
|
options.arguments.extend(["--no-default-browser-check", "--no-first-run"])
|
||||||
if no_sandbox:
|
|
||||||
options.arguments.extend(["--no-sandbox", "--test-type"])
|
|
||||||
|
|
||||||
if headless or options.headless:
|
if headless or options.headless:
|
||||||
if self.patcher.version_main < 108:
|
options.headless = True
|
||||||
options.add_argument("--headless=chrome")
|
|
||||||
elif self.patcher.version_main >= 108:
|
|
||||||
options.add_argument("--headless=new")
|
|
||||||
|
|
||||||
options.add_argument("--window-size=1920,1080")
|
options.add_argument("--window-size=1920,1080")
|
||||||
options.add_argument("--start-maximized")
|
options.add_argument("--start-maximized")
|
||||||
options.add_argument("--no-sandbox")
|
options.add_argument("--no-sandbox")
|
||||||
@@ -388,7 +360,7 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
|||||||
or divmod(logging.getLogger().getEffectiveLevel(), 10)[0]
|
or divmod(logging.getLogger().getEffectiveLevel(), 10)[0]
|
||||||
)
|
)
|
||||||
|
|
||||||
if hasattr(options, "handle_prefs"):
|
if hasattr(options, 'handle_prefs'):
|
||||||
options.handle_prefs(user_data_dir)
|
options.handle_prefs(user_data_dir)
|
||||||
|
|
||||||
# fix exit_type flag to prevent tab-restore nag
|
# fix exit_type flag to prevent tab-restore nag
|
||||||
@@ -404,7 +376,6 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
|||||||
config["profile"]["exit_type"] = None
|
config["profile"]["exit_type"] = None
|
||||||
fs.seek(0, 0)
|
fs.seek(0, 0)
|
||||||
json.dump(config, fs)
|
json.dump(config, fs)
|
||||||
fs.truncate() # the file might be shorter
|
|
||||||
logger.debug("fixed exit_type flag")
|
logger.debug("fixed exit_type flag")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.debug("did not find a bad exit_type flag ")
|
logger.debug("did not find a bad exit_type flag ")
|
||||||
@@ -432,26 +403,14 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
|||||||
)
|
)
|
||||||
self.browser_pid = browser.pid
|
self.browser_pid = browser.pid
|
||||||
|
|
||||||
if service_creationflags:
|
|
||||||
service = selenium.webdriver.common.service.Service(
|
|
||||||
self.patcher.executable_path, port, service_args, service_log_path
|
|
||||||
)
|
|
||||||
for attr_name in ("creationflags", "creation_flags"):
|
|
||||||
if hasattr(service, attr_name):
|
|
||||||
setattr(service, attr_name, service_creationflags)
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
service = None
|
|
||||||
|
|
||||||
super(Chrome, self).__init__(
|
super(Chrome, self).__init__(
|
||||||
executable_path=self.patcher.executable_path,
|
executable_path=patcher.executable_path,
|
||||||
port=port,
|
port=port,
|
||||||
options=options,
|
options=options,
|
||||||
service_args=service_args,
|
service_args=service_args,
|
||||||
desired_capabilities=desired_capabilities,
|
desired_capabilities=desired_capabilities,
|
||||||
service_log_path=service_log_path,
|
service_log_path=service_log_path,
|
||||||
keep_alive=keep_alive,
|
keep_alive=keep_alive,
|
||||||
service=service, # needed or the service will be re-created
|
|
||||||
)
|
)
|
||||||
|
|
||||||
self.reactor = None
|
self.reactor = None
|
||||||
@@ -466,14 +425,35 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
|||||||
self.reactor = reactor
|
self.reactor = reactor
|
||||||
|
|
||||||
if advanced_elements:
|
if advanced_elements:
|
||||||
self._web_element_cls = UCWebElement
|
from .webelement import WebElement
|
||||||
else:
|
|
||||||
self._web_element_cls = WebElement
|
self._web_element_cls = WebElement
|
||||||
|
|
||||||
if options.headless:
|
if options.headless:
|
||||||
self._configure_headless()
|
self._configure_headless()
|
||||||
|
|
||||||
|
def __getattribute__(self, item):
|
||||||
|
|
||||||
|
if not super().__getattribute__("debug"):
|
||||||
|
return super().__getattribute__(item)
|
||||||
|
else:
|
||||||
|
import inspect
|
||||||
|
|
||||||
|
original = super().__getattribute__(item)
|
||||||
|
if inspect.ismethod(original) and not inspect.isclass(original):
|
||||||
|
|
||||||
|
def newfunc(*args, **kwargs):
|
||||||
|
logger.debug(
|
||||||
|
"calling %s with args %s and kwargs %s\n"
|
||||||
|
% (original.__qualname__, args, kwargs)
|
||||||
|
)
|
||||||
|
return original(*args, **kwargs)
|
||||||
|
|
||||||
|
return newfunc
|
||||||
|
return original
|
||||||
|
|
||||||
def _configure_headless(self):
|
def _configure_headless(self):
|
||||||
|
|
||||||
orig_get = self.get
|
orig_get = self.get
|
||||||
logger.info("setting properties for headless")
|
logger.info("setting properties for headless")
|
||||||
|
|
||||||
@@ -485,18 +465,18 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
|||||||
{
|
{
|
||||||
"source": """
|
"source": """
|
||||||
|
|
||||||
Object.defineProperty(window, "navigator", {
|
Object.defineProperty(window, 'navigator', {
|
||||||
Object.defineProperty(window, "navigator", {
|
|
||||||
value: new Proxy(navigator, {
|
value: new Proxy(navigator, {
|
||||||
has: (target, key) => (key === "webdriver" ? false : key in target),
|
has: (target, key) => (key === 'webdriver' ? false : key in target),
|
||||||
get: (target, key) =>
|
get: (target, key) =>
|
||||||
key === "webdriver"
|
key === 'webdriver' ?
|
||||||
? false
|
false :
|
||||||
: typeof target[key] === "function"
|
typeof target[key] === 'function' ?
|
||||||
? target[key].bind(target)
|
target[key].bind(target) :
|
||||||
: target[key],
|
target[key]
|
||||||
}),
|
})
|
||||||
});
|
});
|
||||||
|
|
||||||
"""
|
"""
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
@@ -514,139 +494,49 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
|||||||
"Page.addScriptToEvaluateOnNewDocument",
|
"Page.addScriptToEvaluateOnNewDocument",
|
||||||
{
|
{
|
||||||
"source": """
|
"source": """
|
||||||
Object.defineProperty(navigator, 'maxTouchPoints', {get: () => 1});
|
Object.defineProperty(navigator, 'maxTouchPoints', {
|
||||||
Object.defineProperty(navigator.connection, 'rtt', {get: () => 100});
|
get: () => 1
|
||||||
|
})"""
|
||||||
// https://github.com/microlinkhq/browserless/blob/master/packages/goto/src/evasions/chrome-runtime.js
|
|
||||||
window.chrome = {
|
|
||||||
app: {
|
|
||||||
isInstalled: false,
|
|
||||||
InstallState: {
|
|
||||||
DISABLED: 'disabled',
|
|
||||||
INSTALLED: 'installed',
|
|
||||||
NOT_INSTALLED: 'not_installed'
|
|
||||||
},
|
|
||||||
RunningState: {
|
|
||||||
CANNOT_RUN: 'cannot_run',
|
|
||||||
READY_TO_RUN: 'ready_to_run',
|
|
||||||
RUNNING: 'running'
|
|
||||||
}
|
|
||||||
},
|
|
||||||
runtime: {
|
|
||||||
OnInstalledReason: {
|
|
||||||
CHROME_UPDATE: 'chrome_update',
|
|
||||||
INSTALL: 'install',
|
|
||||||
SHARED_MODULE_UPDATE: 'shared_module_update',
|
|
||||||
UPDATE: 'update'
|
|
||||||
},
|
|
||||||
OnRestartRequiredReason: {
|
|
||||||
APP_UPDATE: 'app_update',
|
|
||||||
OS_UPDATE: 'os_update',
|
|
||||||
PERIODIC: 'periodic'
|
|
||||||
},
|
|
||||||
PlatformArch: {
|
|
||||||
ARM: 'arm',
|
|
||||||
ARM64: 'arm64',
|
|
||||||
MIPS: 'mips',
|
|
||||||
MIPS64: 'mips64',
|
|
||||||
X86_32: 'x86-32',
|
|
||||||
X86_64: 'x86-64'
|
|
||||||
},
|
|
||||||
PlatformNaclArch: {
|
|
||||||
ARM: 'arm',
|
|
||||||
MIPS: 'mips',
|
|
||||||
MIPS64: 'mips64',
|
|
||||||
X86_32: 'x86-32',
|
|
||||||
X86_64: 'x86-64'
|
|
||||||
},
|
|
||||||
PlatformOs: {
|
|
||||||
ANDROID: 'android',
|
|
||||||
CROS: 'cros',
|
|
||||||
LINUX: 'linux',
|
|
||||||
MAC: 'mac',
|
|
||||||
OPENBSD: 'openbsd',
|
|
||||||
WIN: 'win'
|
|
||||||
},
|
|
||||||
RequestUpdateCheckStatus: {
|
|
||||||
NO_UPDATE: 'no_update',
|
|
||||||
THROTTLED: 'throttled',
|
|
||||||
UPDATE_AVAILABLE: 'update_available'
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// https://github.com/microlinkhq/browserless/blob/master/packages/goto/src/evasions/navigator-permissions.js
|
|
||||||
if (!window.Notification) {
|
|
||||||
window.Notification = {
|
|
||||||
permission: 'denied'
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const originalQuery = window.navigator.permissions.query
|
|
||||||
window.navigator.permissions.__proto__.query = parameters =>
|
|
||||||
parameters.name === 'notifications'
|
|
||||||
? Promise.resolve({ state: window.Notification.permission })
|
|
||||||
: originalQuery(parameters)
|
|
||||||
|
|
||||||
const oldCall = Function.prototype.call
|
|
||||||
function call() {
|
|
||||||
return oldCall.apply(this, arguments)
|
|
||||||
}
|
|
||||||
Function.prototype.call = call
|
|
||||||
|
|
||||||
const nativeToStringFunctionString = Error.toString().replace(/Error/g, 'toString')
|
|
||||||
const oldToString = Function.prototype.toString
|
|
||||||
|
|
||||||
function functionToString() {
|
|
||||||
if (this === window.navigator.permissions.query) {
|
|
||||||
return 'function query() { [native code] }'
|
|
||||||
}
|
|
||||||
if (this === functionToString) {
|
|
||||||
return nativeToStringFunctionString
|
|
||||||
}
|
|
||||||
return oldCall.call(oldToString, this)
|
|
||||||
}
|
|
||||||
// eslint-disable-next-line
|
|
||||||
Function.prototype.toString = functionToString
|
|
||||||
"""
|
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
return orig_get(*args, **kwargs)
|
return orig_get(*args, **kwargs)
|
||||||
|
|
||||||
self.get = get_wrapped
|
self.get = get_wrapped
|
||||||
|
|
||||||
# def _get_cdc_props(self):
|
def __dir__(self):
|
||||||
# return self.execute_script(
|
return object.__dir__(self)
|
||||||
# """
|
|
||||||
# let objectToInspect = window,
|
def _get_cdc_props(self):
|
||||||
# result = [];
|
return self.execute_script(
|
||||||
# while(objectToInspect !== null)
|
"""
|
||||||
# { result = result.concat(Object.getOwnPropertyNames(objectToInspect));
|
let objectToInspect = window,
|
||||||
# objectToInspect = Object.getPrototypeOf(objectToInspect); }
|
result = [];
|
||||||
#
|
while(objectToInspect !== null)
|
||||||
# return result.filter(i => i.match(/^([a-zA-Z]){27}(Array|Promise|Symbol)$/ig))
|
{ result = result.concat(Object.getOwnPropertyNames(objectToInspect));
|
||||||
# """
|
objectToInspect = Object.getPrototypeOf(objectToInspect); }
|
||||||
# )
|
return result.filter(i => i.match(/.+_.+_(Array|Promise|Symbol)/ig))
|
||||||
#
|
"""
|
||||||
# def _hook_remove_cdc_props(self):
|
)
|
||||||
# self.execute_cdp_cmd(
|
|
||||||
# "Page.addScriptToEvaluateOnNewDocument",
|
def _hook_remove_cdc_props(self):
|
||||||
# {
|
self.execute_cdp_cmd(
|
||||||
# "source": """
|
"Page.addScriptToEvaluateOnNewDocument",
|
||||||
# let objectToInspect = window,
|
{
|
||||||
# result = [];
|
"source": """
|
||||||
# while(objectToInspect !== null)
|
let objectToInspect = window,
|
||||||
# { result = result.concat(Object.getOwnPropertyNames(objectToInspect));
|
result = [];
|
||||||
# objectToInspect = Object.getPrototypeOf(objectToInspect); }
|
while(objectToInspect !== null)
|
||||||
# result.forEach(p => p.match(/^([a-zA-Z]){27}(Array|Promise|Symbol)$/ig)
|
{ result = result.concat(Object.getOwnPropertyNames(objectToInspect));
|
||||||
# &&delete window[p]&&console.log('removed',p))
|
objectToInspect = Object.getPrototypeOf(objectToInspect); }
|
||||||
# """
|
result.forEach(p => p.match(/.+_.+_(Array|Promise|Symbol)/ig)
|
||||||
# },
|
&&delete window[p]&&console.log('removed',p))
|
||||||
# )
|
"""
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
def get(self, url):
|
def get(self, url):
|
||||||
# if self._get_cdc_props():
|
if self._get_cdc_props():
|
||||||
# self._hook_remove_cdc_props()
|
self._hook_remove_cdc_props()
|
||||||
return super().get(url)
|
return super().get(url)
|
||||||
|
|
||||||
def add_cdp_listener(self, event_name, callback):
|
def add_cdp_listener(self, event_name, callback):
|
||||||
@@ -663,11 +553,6 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
|||||||
if self.reactor and isinstance(self.reactor, Reactor):
|
if self.reactor and isinstance(self.reactor, Reactor):
|
||||||
self.reactor.handlers.clear()
|
self.reactor.handlers.clear()
|
||||||
|
|
||||||
def window_new(self):
|
|
||||||
self.execute(
|
|
||||||
selenium.webdriver.remote.command.Command.NEW_WINDOW, {"type": "window"}
|
|
||||||
)
|
|
||||||
|
|
||||||
def tab_new(self, url: str):
|
def tab_new(self, url: str):
|
||||||
"""
|
"""
|
||||||
this opens a url in a new tab.
|
this opens a url in a new tab.
|
||||||
@@ -712,22 +597,24 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
|||||||
# super(Chrome, self).start_session(capabilities, browser_profile)
|
# super(Chrome, self).start_session(capabilities, browser_profile)
|
||||||
|
|
||||||
def quit(self):
|
def quit(self):
|
||||||
try:
|
logger.debug("closing webdriver")
|
||||||
|
if hasattr(self, "service") and getattr(self.service, "process", None):
|
||||||
self.service.process.kill()
|
self.service.process.kill()
|
||||||
self.service.process.wait(5)
|
|
||||||
logger.debug("webdriver process ended")
|
|
||||||
except (AttributeError, RuntimeError, OSError):
|
|
||||||
pass
|
|
||||||
try:
|
try:
|
||||||
self.reactor.event.set()
|
if self.reactor and isinstance(self.reactor, Reactor):
|
||||||
logger.debug("shutting down reactor")
|
logger.debug("shutting down reactor")
|
||||||
except AttributeError:
|
self.reactor.event.set()
|
||||||
|
except Exception: # noqa
|
||||||
pass
|
pass
|
||||||
try:
|
try:
|
||||||
|
logger.debug("killing browser")
|
||||||
os.kill(self.browser_pid, 15)
|
os.kill(self.browser_pid, 15)
|
||||||
logger.debug("gracefully closed browser")
|
|
||||||
except Exception as e: # noqa
|
except TimeoutError as e:
|
||||||
logger.debug(e, exc_info=True)
|
logger.debug(e, exc_info=True)
|
||||||
|
except Exception: # noqa
|
||||||
|
pass
|
||||||
|
|
||||||
if (
|
if (
|
||||||
hasattr(self, "keep_user_data_dir")
|
hasattr(self, "keep_user_data_dir")
|
||||||
and hasattr(self, "user_data_dir")
|
and hasattr(self, "user_data_dir")
|
||||||
@@ -735,6 +622,7 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
|||||||
):
|
):
|
||||||
for _ in range(5):
|
for _ in range(5):
|
||||||
try:
|
try:
|
||||||
|
|
||||||
shutil.rmtree(self.user_data_dir, ignore_errors=False)
|
shutil.rmtree(self.user_data_dir, ignore_errors=False)
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
pass
|
pass
|
||||||
@@ -752,24 +640,13 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
|||||||
# this must come last, otherwise it will throw 'in use' errors
|
# this must come last, otherwise it will throw 'in use' errors
|
||||||
self.patcher = None
|
self.patcher = None
|
||||||
|
|
||||||
def __getattribute__(self, item):
|
def __del__(self):
|
||||||
if not super().__getattribute__("debug"):
|
try:
|
||||||
return super().__getattribute__(item)
|
super().quit()
|
||||||
else:
|
# self.service.process.kill()
|
||||||
import inspect
|
except: # noqa
|
||||||
|
pass
|
||||||
original = super().__getattribute__(item)
|
self.quit()
|
||||||
if inspect.ismethod(original) and not inspect.isclass(original):
|
|
||||||
|
|
||||||
def newfunc(*args, **kwargs):
|
|
||||||
logger.debug(
|
|
||||||
"calling %s with args %s and kwargs %s\n"
|
|
||||||
% (original.__qualname__, args, kwargs)
|
|
||||||
)
|
|
||||||
return original(*args, **kwargs)
|
|
||||||
|
|
||||||
return newfunc
|
|
||||||
return original
|
|
||||||
|
|
||||||
def __enter__(self):
|
def __enter__(self):
|
||||||
return self
|
return self
|
||||||
@@ -783,27 +660,6 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
|||||||
def __hash__(self):
|
def __hash__(self):
|
||||||
return hash(self.options.debugger_address)
|
return hash(self.options.debugger_address)
|
||||||
|
|
||||||
def __dir__(self):
|
|
||||||
return object.__dir__(self)
|
|
||||||
|
|
||||||
def __del__(self):
|
|
||||||
try:
|
|
||||||
self.service.process.kill()
|
|
||||||
except: # noqa
|
|
||||||
pass
|
|
||||||
self.quit()
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def _ensure_close(cls, self):
|
|
||||||
# needs to be a classmethod so finalize can find the reference
|
|
||||||
logger.info("ensuring close")
|
|
||||||
if (
|
|
||||||
hasattr(self, "service")
|
|
||||||
and hasattr(self.service, "process")
|
|
||||||
and hasattr(self.service.process, "kill")
|
|
||||||
):
|
|
||||||
self.service.process.kill()
|
|
||||||
|
|
||||||
|
|
||||||
def find_chrome_executable():
|
def find_chrome_executable():
|
||||||
"""
|
"""
|
||||||
@@ -835,10 +691,8 @@ def find_chrome_executable():
|
|||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
for item in map(
|
for item in map(
|
||||||
os.environ.get,
|
os.environ.get, ("PROGRAMFILES", "PROGRAMFILES(X86)", "LOCALAPPDATA")
|
||||||
("PROGRAMFILES", "PROGRAMFILES(X86)", "LOCALAPPDATA", "PROGRAMW6432"),
|
|
||||||
):
|
):
|
||||||
if item is not None:
|
|
||||||
for subitem in (
|
for subitem in (
|
||||||
"Google/Chrome/Application",
|
"Google/Chrome/Application",
|
||||||
"Google/Chrome Beta/Application",
|
"Google/Chrome Beta/Application",
|
||||||
|
|||||||
259
src/undetected_chromedriver/_compat.py
Normal file
259
src/undetected_chromedriver/_compat.py
Normal file
@@ -0,0 +1,259 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# this module is part of undetected_chromedriver
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
888 888 d8b
|
||||||
|
888 888 Y8P
|
||||||
|
888 888
|
||||||
|
.d8888b 88888b. 888d888 .d88b. 88888b.d88b. .d88b. .d88888 888d888 888 888 888 .d88b. 888d888
|
||||||
|
d88P" 888 "88b 888P" d88""88b 888 "888 "88b d8P Y8b d88" 888 888P" 888 888 888 d8P Y8b 888P"
|
||||||
|
888 888 888 888 888 888 888 888 888 88888888 888 888 888 888 Y88 88P 88888888 888
|
||||||
|
Y88b. 888 888 888 Y88..88P 888 888 888 Y8b. Y88b 888 888 888 Y8bd8P Y8b. 888
|
||||||
|
"Y8888P 888 888 888 "Y88P" 888 888 888 "Y8888 "Y88888 888 888 Y88P "Y8888 888 88888888
|
||||||
|
|
||||||
|
by UltrafunkAmsterdam (https://github.com/ultrafunkamsterdam)
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
import io
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import random
|
||||||
|
import re
|
||||||
|
import string
|
||||||
|
import sys
|
||||||
|
import zipfile
|
||||||
|
from distutils.version import LooseVersion
|
||||||
|
from urllib.request import urlopen, urlretrieve
|
||||||
|
|
||||||
|
from selenium.webdriver import Chrome as _Chrome, ChromeOptions as _ChromeOptions
|
||||||
|
|
||||||
|
TARGET_VERSION = 0
|
||||||
|
logger = logging.getLogger("uc")
|
||||||
|
|
||||||
|
|
||||||
|
class Chrome:
|
||||||
|
def __new__(cls, *args, emulate_touch=False, **kwargs):
|
||||||
|
|
||||||
|
if not ChromeDriverManager.installed:
|
||||||
|
ChromeDriverManager(*args, **kwargs).install()
|
||||||
|
if not ChromeDriverManager.selenium_patched:
|
||||||
|
ChromeDriverManager(*args, **kwargs).patch_selenium_webdriver()
|
||||||
|
if not kwargs.get("executable_path"):
|
||||||
|
kwargs["executable_path"] = "./{}".format(
|
||||||
|
ChromeDriverManager(*args, **kwargs).executable_path
|
||||||
|
)
|
||||||
|
if not kwargs.get("options"):
|
||||||
|
kwargs["options"] = ChromeOptions()
|
||||||
|
instance = object.__new__(_Chrome)
|
||||||
|
instance.__init__(*args, **kwargs)
|
||||||
|
|
||||||
|
instance._orig_get = instance.get
|
||||||
|
|
||||||
|
def _get_wrapped(*args, **kwargs):
|
||||||
|
if instance.execute_script("return navigator.webdriver"):
|
||||||
|
instance.execute_cdp_cmd(
|
||||||
|
"Page.addScriptToEvaluateOnNewDocument",
|
||||||
|
{
|
||||||
|
"source": """
|
||||||
|
|
||||||
|
Object.defineProperty(window, 'navigator', {
|
||||||
|
value: new Proxy(navigator, {
|
||||||
|
has: (target, key) => (key === 'webdriver' ? false : key in target),
|
||||||
|
get: (target, key) =>
|
||||||
|
key === 'webdriver'
|
||||||
|
? undefined
|
||||||
|
: typeof target[key] === 'function'
|
||||||
|
? target[key].bind(target)
|
||||||
|
: target[key]
|
||||||
|
})
|
||||||
|
});
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return instance._orig_get(*args, **kwargs)
|
||||||
|
|
||||||
|
instance.get = _get_wrapped
|
||||||
|
instance.get = _get_wrapped
|
||||||
|
instance.get = _get_wrapped
|
||||||
|
|
||||||
|
original_user_agent_string = instance.execute_script(
|
||||||
|
"return navigator.userAgent"
|
||||||
|
)
|
||||||
|
instance.execute_cdp_cmd(
|
||||||
|
"Network.setUserAgentOverride",
|
||||||
|
{
|
||||||
|
"userAgent": original_user_agent_string.replace("Headless", ""),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
if emulate_touch:
|
||||||
|
instance.execute_cdp_cmd(
|
||||||
|
"Page.addScriptToEvaluateOnNewDocument",
|
||||||
|
{
|
||||||
|
"source": """
|
||||||
|
Object.defineProperty(navigator, 'maxTouchPoints', {
|
||||||
|
get: () => 1
|
||||||
|
})"""
|
||||||
|
},
|
||||||
|
)
|
||||||
|
logger.info(f"starting undetected_chromedriver.Chrome({args}, {kwargs})")
|
||||||
|
return instance
|
||||||
|
|
||||||
|
|
||||||
|
class ChromeOptions:
|
||||||
|
def __new__(cls, *args, **kwargs):
|
||||||
|
if not ChromeDriverManager.installed:
|
||||||
|
ChromeDriverManager(*args, **kwargs).install()
|
||||||
|
if not ChromeDriverManager.selenium_patched:
|
||||||
|
ChromeDriverManager(*args, **kwargs).patch_selenium_webdriver()
|
||||||
|
|
||||||
|
instance = object.__new__(_ChromeOptions)
|
||||||
|
instance.__init__()
|
||||||
|
instance.add_argument("start-maximized")
|
||||||
|
instance.add_experimental_option("excludeSwitches", ["enable-automation"])
|
||||||
|
instance.add_argument("--disable-blink-features=AutomationControlled")
|
||||||
|
return instance
|
||||||
|
|
||||||
|
|
||||||
|
class ChromeDriverManager(object):
|
||||||
|
installed = False
|
||||||
|
selenium_patched = False
|
||||||
|
target_version = None
|
||||||
|
|
||||||
|
DL_BASE = "https://chromedriver.storage.googleapis.com/"
|
||||||
|
|
||||||
|
def __init__(self, executable_path=None, target_version=None, *args, **kwargs):
|
||||||
|
|
||||||
|
_platform = sys.platform
|
||||||
|
|
||||||
|
if TARGET_VERSION:
|
||||||
|
# use global if set
|
||||||
|
self.target_version = TARGET_VERSION
|
||||||
|
|
||||||
|
if target_version:
|
||||||
|
# use explicitly passed target
|
||||||
|
self.target_version = target_version # user override
|
||||||
|
|
||||||
|
if not self.target_version:
|
||||||
|
# none of the above (default) and just get current version
|
||||||
|
self.target_version = self.get_release_version_number().version[
|
||||||
|
0
|
||||||
|
] # only major version int
|
||||||
|
|
||||||
|
self._base = base_ = "chromedriver{}"
|
||||||
|
|
||||||
|
exe_name = self._base
|
||||||
|
if _platform in ("win32",):
|
||||||
|
exe_name = base_.format(".exe")
|
||||||
|
if _platform in ("linux",):
|
||||||
|
_platform += "64"
|
||||||
|
exe_name = exe_name.format("")
|
||||||
|
if _platform in ("darwin",):
|
||||||
|
_platform = "mac64"
|
||||||
|
exe_name = exe_name.format("")
|
||||||
|
self.platform = _platform
|
||||||
|
self.executable_path = executable_path or exe_name
|
||||||
|
self._exe_name = exe_name
|
||||||
|
|
||||||
|
def patch_selenium_webdriver(self_):
|
||||||
|
"""
|
||||||
|
Patches selenium package Chrome, ChromeOptions classes for current session
|
||||||
|
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
import selenium.webdriver.chrome.service
|
||||||
|
import selenium.webdriver
|
||||||
|
|
||||||
|
selenium.webdriver.Chrome = Chrome
|
||||||
|
selenium.webdriver.ChromeOptions = ChromeOptions
|
||||||
|
logger.info("Selenium patched. Safe to import Chrome / ChromeOptions")
|
||||||
|
self_.__class__.selenium_patched = True
|
||||||
|
|
||||||
|
def install(self, patch_selenium=True):
|
||||||
|
"""
|
||||||
|
Initialize the patch
|
||||||
|
|
||||||
|
This will:
|
||||||
|
download chromedriver if not present
|
||||||
|
patch the downloaded chromedriver
|
||||||
|
patch selenium package if <patch_selenium> is True (default)
|
||||||
|
|
||||||
|
:param patch_selenium: patch selenium webdriver classes for Chrome and ChromeDriver (for current python session)
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
if not os.path.exists(self.executable_path):
|
||||||
|
self.fetch_chromedriver()
|
||||||
|
if not self.__class__.installed:
|
||||||
|
if self.patch_binary():
|
||||||
|
self.__class__.installed = True
|
||||||
|
|
||||||
|
if patch_selenium:
|
||||||
|
self.patch_selenium_webdriver()
|
||||||
|
|
||||||
|
def get_release_version_number(self):
|
||||||
|
"""
|
||||||
|
Gets the latest major version available, or the latest major version of self.target_version if set explicitly.
|
||||||
|
|
||||||
|
:return: version string
|
||||||
|
"""
|
||||||
|
path = (
|
||||||
|
"LATEST_RELEASE"
|
||||||
|
if not self.target_version
|
||||||
|
else f"LATEST_RELEASE_{self.target_version}"
|
||||||
|
)
|
||||||
|
return LooseVersion(urlopen(self.__class__.DL_BASE + path).read().decode())
|
||||||
|
|
||||||
|
def fetch_chromedriver(self):
|
||||||
|
"""
|
||||||
|
Downloads ChromeDriver from source and unpacks the executable
|
||||||
|
|
||||||
|
:return: on success, name of the unpacked executable
|
||||||
|
"""
|
||||||
|
base_ = self._base
|
||||||
|
zip_name = base_.format(".zip")
|
||||||
|
ver = self.get_release_version_number().vstring
|
||||||
|
if os.path.exists(self.executable_path):
|
||||||
|
return self.executable_path
|
||||||
|
urlretrieve(
|
||||||
|
f"{self.__class__.DL_BASE}{ver}/{base_.format(f'_{self.platform}')}.zip",
|
||||||
|
filename=zip_name,
|
||||||
|
)
|
||||||
|
with zipfile.ZipFile(zip_name) as zf:
|
||||||
|
zf.extract(self._exe_name)
|
||||||
|
os.remove(zip_name)
|
||||||
|
if sys.platform != "win32":
|
||||||
|
os.chmod(self._exe_name, 0o755)
|
||||||
|
return self._exe_name
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def random_cdc():
|
||||||
|
cdc = random.choices(string.ascii_lowercase, k=26)
|
||||||
|
cdc[-6:-4] = map(str.upper, cdc[-6:-4])
|
||||||
|
cdc[2] = cdc[0]
|
||||||
|
cdc[3] = "_"
|
||||||
|
return "".join(cdc).encode()
|
||||||
|
|
||||||
|
def patch_binary(self):
|
||||||
|
"""
|
||||||
|
Patches the ChromeDriver binary
|
||||||
|
|
||||||
|
:return: False on failure, binary name on success
|
||||||
|
"""
|
||||||
|
linect = 0
|
||||||
|
replacement = self.random_cdc()
|
||||||
|
with io.open(self.executable_path, "r+b") as fh:
|
||||||
|
for line in iter(lambda: fh.readline(), b""):
|
||||||
|
if b"cdc_" in line:
|
||||||
|
fh.seek(-len(line), 1)
|
||||||
|
newline = re.sub(b"cdc_.{22}", replacement, line)
|
||||||
|
fh.write(newline)
|
||||||
|
linect += 1
|
||||||
|
return linect
|
||||||
|
|
||||||
|
|
||||||
|
def install(executable_path=None, target_version=None, *args, **kwargs):
|
||||||
|
ChromeDriverManager(executable_path, target_version, *args, **kwargs).install()
|
||||||
@@ -3,11 +3,11 @@
|
|||||||
|
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
from collections.abc import Mapping, Sequence
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
import websockets
|
import websockets
|
||||||
|
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,16 +1,17 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
from collections.abc import Mapping
|
|
||||||
from collections.abc import Sequence
|
|
||||||
from functools import wraps
|
|
||||||
import logging
|
import logging
|
||||||
import threading
|
|
||||||
import time
|
import time
|
||||||
import traceback
|
import traceback
|
||||||
|
from collections.abc import Mapping
|
||||||
|
from collections.abc import Sequence
|
||||||
from typing import Any
|
from typing import Any
|
||||||
from typing import Awaitable
|
from typing import Awaitable
|
||||||
from typing import Callable
|
from typing import Callable
|
||||||
from typing import List
|
from typing import List
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
from contextlib import ExitStack
|
||||||
|
import threading
|
||||||
|
from functools import wraps, partial
|
||||||
|
|
||||||
|
|
||||||
class Structure(dict):
|
class Structure(dict):
|
||||||
|
|||||||
@@ -1,13 +1,13 @@
|
|||||||
import atexit
|
|
||||||
import logging
|
|
||||||
import multiprocessing
|
import multiprocessing
|
||||||
import os
|
import os
|
||||||
import platform
|
import platform
|
||||||
import signal
|
import sys
|
||||||
from subprocess import PIPE
|
from subprocess import PIPE
|
||||||
from subprocess import Popen
|
from subprocess import Popen
|
||||||
import sys
|
import atexit
|
||||||
|
import traceback
|
||||||
|
import logging
|
||||||
|
import signal
|
||||||
|
|
||||||
CREATE_NEW_PROCESS_GROUP = 0x00000200
|
CREATE_NEW_PROCESS_GROUP = 0x00000200
|
||||||
DETACHED_PROCESS = 0x00000008
|
DETACHED_PROCESS = 0x00000008
|
||||||
@@ -27,14 +27,12 @@ def start_detached(executable, *args):
|
|||||||
reader, writer = multiprocessing.Pipe(False)
|
reader, writer = multiprocessing.Pipe(False)
|
||||||
|
|
||||||
# do not keep reference
|
# do not keep reference
|
||||||
process = multiprocessing.Process(
|
multiprocessing.Process(
|
||||||
target=_start_detached,
|
target=_start_detached,
|
||||||
args=(executable, *args),
|
args=(executable, *args),
|
||||||
kwargs={"writer": writer},
|
kwargs={"writer": writer},
|
||||||
daemon=True,
|
daemon=True,
|
||||||
)
|
).start()
|
||||||
process.start()
|
|
||||||
process.join()
|
|
||||||
# receive pid from pipe
|
# receive pid from pipe
|
||||||
pid = reader.recv()
|
pid = reader.recv()
|
||||||
REGISTERED.append(pid)
|
REGISTERED.append(pid)
|
||||||
@@ -46,6 +44,7 @@ def start_detached(executable, *args):
|
|||||||
|
|
||||||
|
|
||||||
def _start_detached(executable, *args, writer: multiprocessing.Pipe = None):
|
def _start_detached(executable, *args, writer: multiprocessing.Pipe = None):
|
||||||
|
|
||||||
# configure launch
|
# configure launch
|
||||||
kwargs = {}
|
kwargs = {}
|
||||||
if platform.system() == "Windows":
|
if platform.system() == "Windows":
|
||||||
|
|||||||
@@ -39,23 +39,10 @@ class ChromeOptions(_ChromiumOptions):
|
|||||||
value = ChromeOptions._undot_key(rest, value)
|
value = ChromeOptions._undot_key(rest, value)
|
||||||
return {key: value}
|
return {key: value}
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _merge_nested(a, b):
|
|
||||||
"""
|
|
||||||
merges b into a
|
|
||||||
leaf values in a are overwritten with values from b
|
|
||||||
"""
|
|
||||||
for key in b:
|
|
||||||
if key in a:
|
|
||||||
if isinstance(a[key], dict) and isinstance(b[key], dict):
|
|
||||||
ChromeOptions._merge_nested(a[key], b[key])
|
|
||||||
continue
|
|
||||||
a[key] = b[key]
|
|
||||||
return a
|
|
||||||
|
|
||||||
def handle_prefs(self, user_data_dir):
|
def handle_prefs(self, user_data_dir):
|
||||||
prefs = self.experimental_options.get("prefs")
|
prefs = self.experimental_options.get("prefs")
|
||||||
if prefs:
|
if prefs:
|
||||||
|
|
||||||
user_data_dir = user_data_dir or self._user_data_dir
|
user_data_dir = user_data_dir or self._user_data_dir
|
||||||
default_path = os.path.join(user_data_dir, "Default")
|
default_path = os.path.join(user_data_dir, "Default")
|
||||||
os.makedirs(default_path, exist_ok=True)
|
os.makedirs(default_path, exist_ok=True)
|
||||||
@@ -63,14 +50,12 @@ class ChromeOptions(_ChromiumOptions):
|
|||||||
# undot prefs dict keys
|
# undot prefs dict keys
|
||||||
undot_prefs = {}
|
undot_prefs = {}
|
||||||
for key, value in prefs.items():
|
for key, value in prefs.items():
|
||||||
undot_prefs = self._merge_nested(
|
undot_prefs.update(self._undot_key(key, value))
|
||||||
undot_prefs, self._undot_key(key, value)
|
|
||||||
)
|
|
||||||
|
|
||||||
prefs_file = os.path.join(default_path, "Preferences")
|
prefs_file = os.path.join(default_path, "Preferences")
|
||||||
if os.path.exists(prefs_file):
|
if os.path.exists(prefs_file):
|
||||||
with open(prefs_file, encoding="latin1", mode="r") as f:
|
with open(prefs_file, encoding="latin1", mode="r") as f:
|
||||||
undot_prefs = self._merge_nested(json.load(f), undot_prefs)
|
undot_prefs.update(json.load(f))
|
||||||
|
|
||||||
with open(prefs_file, encoding="latin1", mode="w") as f:
|
with open(prefs_file, encoding="latin1", mode="w") as f:
|
||||||
json.dump(undot_prefs, f)
|
json.dump(undot_prefs, f)
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# this module is part of undetected_chromedriver
|
# this module is part of undetected_chromedriver
|
||||||
|
|
||||||
from distutils.version import LooseVersion
|
|
||||||
import io
|
import io
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
@@ -10,14 +9,15 @@ import re
|
|||||||
import string
|
import string
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
from urllib.request import urlopen
|
|
||||||
from urllib.request import urlretrieve
|
|
||||||
import zipfile
|
import zipfile
|
||||||
|
from distutils.version import LooseVersion
|
||||||
|
from urllib.request import urlopen, urlretrieve
|
||||||
|
import secrets
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
IS_POSIX = sys.platform.startswith(("darwin", "cygwin", "linux", "linux2"))
|
IS_POSIX = sys.platform.startswith(("darwin", "cygwin", "linux"))
|
||||||
|
|
||||||
|
|
||||||
class Patcher(object):
|
class Patcher(object):
|
||||||
@@ -29,7 +29,7 @@ class Patcher(object):
|
|||||||
if platform.endswith("win32"):
|
if platform.endswith("win32"):
|
||||||
zip_name %= "win32"
|
zip_name %= "win32"
|
||||||
exe_name %= ".exe"
|
exe_name %= ".exe"
|
||||||
if platform.endswith(("linux", "linux2")):
|
if platform.endswith("linux"):
|
||||||
zip_name %= "linux64"
|
zip_name %= "linux64"
|
||||||
exe_name %= ""
|
exe_name %= ""
|
||||||
if platform.endswith("darwin"):
|
if platform.endswith("darwin"):
|
||||||
@@ -38,9 +38,7 @@ class Patcher(object):
|
|||||||
|
|
||||||
if platform.endswith("win32"):
|
if platform.endswith("win32"):
|
||||||
d = "~/appdata/roaming/undetected_chromedriver"
|
d = "~/appdata/roaming/undetected_chromedriver"
|
||||||
elif "LAMBDA_TASK_ROOT" in os.environ:
|
elif platform.startswith("linux"):
|
||||||
d = "/tmp/undetected_chromedriver"
|
|
||||||
elif platform.startswith(("linux", "linux2")):
|
|
||||||
d = "~/.local/share/undetected_chromedriver"
|
d = "~/.local/share/undetected_chromedriver"
|
||||||
elif platform.endswith("darwin"):
|
elif platform.endswith("darwin"):
|
||||||
d = "~/Library/Application Support/undetected_chromedriver"
|
d = "~/Library/Application Support/undetected_chromedriver"
|
||||||
@@ -50,6 +48,7 @@ class Patcher(object):
|
|||||||
|
|
||||||
def __init__(self, executable_path=None, force=False, version_main: int = 0):
|
def __init__(self, executable_path=None, force=False, version_main: int = 0):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
executable_path: None = automatic
|
executable_path: None = automatic
|
||||||
a full file path to the chromedriver executable
|
a full file path to the chromedriver executable
|
||||||
@@ -58,9 +57,10 @@ class Patcher(object):
|
|||||||
version_main: 0 = auto
|
version_main: 0 = auto
|
||||||
specify main chrome version (rounded, ex: 82)
|
specify main chrome version (rounded, ex: 82)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
self.force = force
|
self.force = force
|
||||||
self._custom_exe_path = False
|
self.executable_path = None
|
||||||
prefix = "undetected"
|
prefix = secrets.token_hex(8)
|
||||||
|
|
||||||
if not os.path.exists(self.data_path):
|
if not os.path.exists(self.data_path):
|
||||||
os.makedirs(self.data_path, exist_ok=True)
|
os.makedirs(self.data_path, exist_ok=True)
|
||||||
@@ -82,6 +82,8 @@ class Patcher(object):
|
|||||||
os.path.join(".", self.executable_path)
|
os.path.join(".", self.executable_path)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
self._custom_exe_path = False
|
||||||
|
|
||||||
if executable_path:
|
if executable_path:
|
||||||
self._custom_exe_path = True
|
self._custom_exe_path = True
|
||||||
self.executable_path = executable_path
|
self.executable_path = executable_path
|
||||||
@@ -89,6 +91,7 @@ class Patcher(object):
|
|||||||
self.version_full = None
|
self.version_full = None
|
||||||
|
|
||||||
def auto(self, executable_path=None, force=False, version_main=None):
|
def auto(self, executable_path=None, force=False, version_main=None):
|
||||||
|
""""""
|
||||||
if executable_path:
|
if executable_path:
|
||||||
self.executable_path = executable_path
|
self.executable_path = executable_path
|
||||||
self._custom_exe_path = True
|
self._custom_exe_path = True
|
||||||
@@ -200,46 +203,43 @@ class Patcher(object):
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def gen_random_cdc():
|
def gen_random_cdc():
|
||||||
cdc = random.choices(string.ascii_letters, k=27)
|
cdc = random.choices(string.ascii_lowercase, k=26)
|
||||||
|
cdc[-6:-4] = map(str.upper, cdc[-6:-4])
|
||||||
|
cdc[2] = cdc[0]
|
||||||
|
cdc[3] = "_"
|
||||||
return "".join(cdc).encode()
|
return "".join(cdc).encode()
|
||||||
|
|
||||||
def is_binary_patched(self, executable_path=None):
|
def is_binary_patched(self, executable_path=None):
|
||||||
|
"""simple check if executable is patched.
|
||||||
|
|
||||||
|
:return: False if not patched, else True
|
||||||
|
"""
|
||||||
executable_path = executable_path or self.executable_path
|
executable_path = executable_path or self.executable_path
|
||||||
try:
|
|
||||||
with io.open(executable_path, "rb") as fh:
|
with io.open(executable_path, "rb") as fh:
|
||||||
return fh.read().find(b"undetected chromedriver") != -1
|
for line in iter(lambda: fh.readline(), b""):
|
||||||
except FileNotFoundError:
|
if b"cdc_" in line:
|
||||||
return False
|
return False
|
||||||
|
else:
|
||||||
|
return True
|
||||||
|
|
||||||
def patch_exe(self):
|
def patch_exe(self):
|
||||||
start = time.perf_counter()
|
"""
|
||||||
|
Patches the ChromeDriver binary
|
||||||
|
|
||||||
|
:return: False on failure, binary name on success
|
||||||
|
"""
|
||||||
logger.info("patching driver executable %s" % self.executable_path)
|
logger.info("patching driver executable %s" % self.executable_path)
|
||||||
|
|
||||||
|
linect = 0
|
||||||
|
replacement = self.gen_random_cdc()
|
||||||
with io.open(self.executable_path, "r+b") as fh:
|
with io.open(self.executable_path, "r+b") as fh:
|
||||||
content = fh.read()
|
for line in iter(lambda: fh.readline(), b""):
|
||||||
# match_injected_codeblock = re.search(rb"{window.*;}", content)
|
if b"cdc_" in line:
|
||||||
match_injected_codeblock = re.search(rb"\{window\.cdc.*?;\}", content)
|
fh.seek(-len(line), 1)
|
||||||
if match_injected_codeblock:
|
newline = re.sub(b"cdc_.{22}", replacement, line)
|
||||||
target_bytes = match_injected_codeblock[0]
|
fh.write(newline)
|
||||||
new_target_bytes = (
|
linect += 1
|
||||||
b'{console.log("undetected chromedriver 1337!")}'.ljust(
|
return linect
|
||||||
len(target_bytes), b" "
|
|
||||||
)
|
|
||||||
)
|
|
||||||
new_content = content.replace(target_bytes, new_target_bytes)
|
|
||||||
if new_content == content:
|
|
||||||
logger.warning(
|
|
||||||
"something went wrong patching the driver binary. could not find injection code block"
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
logger.debug(
|
|
||||||
"found block:\n%s\nreplacing with:\n%s"
|
|
||||||
% (target_bytes, new_target_bytes)
|
|
||||||
)
|
|
||||||
fh.seek(0)
|
|
||||||
fh.write(new_content)
|
|
||||||
logger.debug(
|
|
||||||
"patching took us {:.2f} seconds".format(time.perf_counter() - start)
|
|
||||||
)
|
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return "{0:s}({1:s})".format(
|
return "{0:s}({1:s})".format(
|
||||||
@@ -248,6 +248,7 @@ class Patcher(object):
|
|||||||
)
|
)
|
||||||
|
|
||||||
def __del__(self):
|
def __del__(self):
|
||||||
|
|
||||||
if self._custom_exe_path:
|
if self._custom_exe_path:
|
||||||
# if the driver binary is specified by user
|
# if the driver binary is specified by user
|
||||||
# we assume it is important enough to not delete it
|
# we assume it is important enough to not delete it
|
||||||
|
|||||||
@@ -6,7 +6,6 @@ import json
|
|||||||
import logging
|
import logging
|
||||||
import threading
|
import threading
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
@@ -64,7 +63,9 @@ class Reactor(threading.Thread):
|
|||||||
break
|
break
|
||||||
|
|
||||||
async def listen(self):
|
async def listen(self):
|
||||||
|
|
||||||
while self.running:
|
while self.running:
|
||||||
|
|
||||||
await self._wait_service_started()
|
await self._wait_service_started()
|
||||||
await asyncio.sleep(1)
|
await asyncio.sleep(1)
|
||||||
|
|
||||||
@@ -73,7 +74,9 @@ class Reactor(threading.Thread):
|
|||||||
log_entries = self.driver.get_log("performance")
|
log_entries = self.driver.get_log("performance")
|
||||||
|
|
||||||
for entry in log_entries:
|
for entry in log_entries:
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
||||||
obj_serialized: str = entry.get("message")
|
obj_serialized: str = entry.get("message")
|
||||||
obj = json.loads(obj_serialized)
|
obj = json.loads(obj_serialized)
|
||||||
message = obj.get("message")
|
message = obj.get("message")
|
||||||
|
|||||||
4
src/undetected_chromedriver/v2.py
Normal file
4
src/undetected_chromedriver/v2.py
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
# for backward compatibility
|
||||||
|
import sys
|
||||||
|
|
||||||
|
sys.modules[__name__] = sys.modules[__package__]
|
||||||
@@ -1,30 +1,7 @@
|
|||||||
from typing import List
|
|
||||||
|
|
||||||
from selenium.webdriver.common.by import By
|
|
||||||
import selenium.webdriver.remote.webelement
|
import selenium.webdriver.remote.webelement
|
||||||
|
|
||||||
|
|
||||||
class WebElement(selenium.webdriver.remote.webelement.WebElement):
|
class WebElement(selenium.webdriver.remote.webelement.WebElement):
|
||||||
def click_safe(self):
|
|
||||||
super().click()
|
|
||||||
self._parent.reconnect(0.1)
|
|
||||||
|
|
||||||
def children(
|
|
||||||
self, tag=None, recursive=False
|
|
||||||
) -> List[selenium.webdriver.remote.webelement.WebElement]:
|
|
||||||
"""
|
|
||||||
returns direct child elements of current element
|
|
||||||
:param tag: str, if supplied, returns <tag> nodes only
|
|
||||||
"""
|
|
||||||
script = "return [... arguments[0].children]"
|
|
||||||
if tag:
|
|
||||||
script += ".filter( node => node.tagName === '%s')" % tag.upper()
|
|
||||||
if recursive:
|
|
||||||
return list(_recursive_children(self, tag))
|
|
||||||
return list(self._parent.execute_script(script, self))
|
|
||||||
|
|
||||||
|
|
||||||
class UCWebElement(WebElement):
|
|
||||||
"""
|
"""
|
||||||
Custom WebElement class which makes it easier to view elements when
|
Custom WebElement class which makes it easier to view elements when
|
||||||
working in an interactive environment.
|
working in an interactive environment.
|
||||||
@@ -37,13 +14,9 @@ class UCWebElement(WebElement):
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, parent, id_):
|
|
||||||
super().__init__(parent, id_)
|
|
||||||
self._attrs = None
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def attrs(self):
|
def attrs(self):
|
||||||
if not self._attrs:
|
if not hasattr(self, "_attrs"):
|
||||||
self._attrs = self._parent.execute_script(
|
self._attrs = self._parent.execute_script(
|
||||||
"""
|
"""
|
||||||
var items = {};
|
var items = {};
|
||||||
@@ -62,25 +35,3 @@ class UCWebElement(WebElement):
|
|||||||
if strattrs:
|
if strattrs:
|
||||||
strattrs = " " + strattrs
|
strattrs = " " + strattrs
|
||||||
return f"{self.__class__.__name__} <{self.tag_name}{strattrs}>"
|
return f"{self.__class__.__name__} <{self.tag_name}{strattrs}>"
|
||||||
|
|
||||||
|
|
||||||
def _recursive_children(element, tag: str = None, _results=None):
|
|
||||||
"""
|
|
||||||
returns all children of <element> recursively
|
|
||||||
|
|
||||||
:param element: `WebElement` object.
|
|
||||||
find children below this <element>
|
|
||||||
|
|
||||||
:param tag: str = None.
|
|
||||||
if provided, return only <tag> elements. example: 'a', or 'img'
|
|
||||||
:param _results: do not use!
|
|
||||||
"""
|
|
||||||
results = _results or set()
|
|
||||||
for element in element.children():
|
|
||||||
if tag:
|
|
||||||
if element.tag_name == tag:
|
|
||||||
results.add(element)
|
|
||||||
else:
|
|
||||||
results.add(element)
|
|
||||||
results |= _recursive_children(element, tag, results)
|
|
||||||
return results
|
|
||||||
|
|||||||
@@ -44,8 +44,6 @@ def get_webdriver() -> WebDriver:
|
|||||||
# todo: this param shows a warning in chrome head-full
|
# todo: this param shows a warning in chrome head-full
|
||||||
options.add_argument('--disable-setuid-sandbox')
|
options.add_argument('--disable-setuid-sandbox')
|
||||||
options.add_argument('--disable-dev-shm-usage')
|
options.add_argument('--disable-dev-shm-usage')
|
||||||
# this option removes the zygote sandbox (it seems that the resolution is a bit faster)
|
|
||||||
options.add_argument('--no-zygote')
|
|
||||||
|
|
||||||
# note: headless mode is detected (options.headless = True)
|
# note: headless mode is detected (options.headless = True)
|
||||||
# we launch the browser in head-full mode with the window hidden
|
# we launch the browser in head-full mode with the window hidden
|
||||||
@@ -88,10 +86,6 @@ def get_webdriver() -> WebDriver:
|
|||||||
return driver
|
return driver
|
||||||
|
|
||||||
|
|
||||||
def get_chrome_exe_path() -> str:
|
|
||||||
return uc.find_chrome_executable()
|
|
||||||
|
|
||||||
|
|
||||||
def get_chrome_major_version() -> str:
|
def get_chrome_major_version() -> str:
|
||||||
global CHROME_MAJOR_VERSION
|
global CHROME_MAJOR_VERSION
|
||||||
if CHROME_MAJOR_VERSION is not None:
|
if CHROME_MAJOR_VERSION is not None:
|
||||||
@@ -116,6 +110,7 @@ def get_chrome_major_version() -> str:
|
|||||||
process.close()
|
process.close()
|
||||||
|
|
||||||
CHROME_MAJOR_VERSION = complete_version.split('.')[0].split(' ')[-1]
|
CHROME_MAJOR_VERSION = complete_version.split('.')[0].split(' ')[-1]
|
||||||
|
logging.info(f"Chrome major version: {CHROME_MAJOR_VERSION}")
|
||||||
return CHROME_MAJOR_VERSION
|
return CHROME_MAJOR_VERSION
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user