Compare commits

...

70 Commits

Author SHA1 Message Date
MCG-pok
f75994197c Update css selector to get shadowed iframe 2024-08-14 14:05:04 +03:00
MCG-pok
42714b248f add js click on iframe_body 2024-08-14 14:05:04 +03:00
MCG-pok
2900c58165 Access iframe in closed shadow root + click on checkbox from iframe body 2024-08-14 14:05:04 +03:00
ilike2burnthing
a798561338 Bump requests version
*.0 was yanked
2024-07-30 02:38:13 +01:00
Bogdan
eb680efc90 Don't build docker images for PRs from forks (#1281) 2024-07-20 22:08:40 +03:00
ilike2burnthing
0f8f0bec25 revert and bump action version 2024-07-20 19:41:49 +01:00
ilike2burnthing
3d9bc5627b Change to GITHUB_TOKEN for GHRC login 2024-07-20 14:21:34 +01:00
ilike2burnthing
dd7eaee2e3 Bump requirements
resolves Dependabot alerts
2024-07-12 17:11:40 +01:00
ilike2burnthing
031177bbdb Bump version 3.3.21 (#1240) 2024-06-26 02:14:25 +01:00
Bogdan
a8644532a1 Escape values for generated form used in request.post (#1236)
and build docker images for PRs
2024-06-26 02:04:59 +01:00
ilike2burnthing
e96161c873 Add challenge selector to catch reloading page on non-English systems. resolves #1237 2024-06-25 22:32:06 +01:00
ilike2burnthing
5a1f25cd52 Bump version 3.3.20 (#1229) 2024-06-21 22:21:37 +01:00
tenettow
a2c0e4348e Update Cloudflare challenge and checkbox selectors (#1224) 2024-06-21 22:07:03 +01:00
ilike2burnthing
2ecf88895b Check not running in Docker before logging version_main error 2024-06-15 08:37:42 +01:00
ilike2burnthing
984368edb5 maxTimeout should always be int. resolves #1212 2024-06-15 05:41:45 +01:00
21hsmw
6c1d78cb84 Fix occasional headless issue on Linux when set to "false" (#1199)
* Fix occasional headless issue on Linux when set to "false"

- Add a variable containing the current platform
- Check if the platform is "nt" (Windows) before closing the driver

* Update CHANGELOG.md

---------

Co-authored-by: ilike2burnthing <59480337+ilike2burnthing@users.noreply.github.com>
2024-05-24 17:33:46 +01:00
ilike2burnthing
5a2c61601e Fix Chrome v124+ not closing on Windows. resolves #1161 (#1193) 2024-05-20 00:52:55 +01:00
ilike2burnthing
c304da2964 Update README.md 2024-04-22 23:30:52 +01:00
ilike2burnthing
b811412699 Fix LANG ENV for Linux. #1036 2024-04-20 03:41:53 +01:00
Ross Patterson
0bb8de144f Add Compose V2 command to readme (#1154)
Co-authored-by: root@library.moodysalon.net <root@library.moodysalon.net>
Co-authored-by: ilike2burnthing <59480337+ilike2burnthing@users.noreply.github.com>
2024-04-18 05:07:06 +01:00
ilike2burnthing
38166dfaa0 Bump version 3.3.17 (#1147) 2024-04-09 20:31:21 +01:00
ilike2burnthing
8dea0ed017 Fix file descriptor leak in service on quit(). resolves #983
credit: @zkulis - https://github.com/ultrafunkamsterdam/undetected-chromedriver/pull/1812
2024-04-09 20:27:42 +01:00
francisco-lafe
20cd2944a7 Update README.md (#1127)
Co-authored-by: ilike2burnthing <59480337+ilike2burnthing@users.noreply.github.com>
2024-03-19 18:05:11 +00:00
ilike2burnthing
fd773e5909 Bump version 3.3.16 (#1105) 2024-02-28 21:38:53 +00:00
Justin Kromlinger
35c7bff3c8 Use headless configuration properly (#1104) 2024-02-28 20:36:38 +00:00
Raphaël
afdc1c7a8e Add FreeBSD support (#1054) 2024-02-28 02:45:04 +00:00
ilike2burnthing
0bc7a4498c Update README.md 2024-02-23 05:01:00 +00:00
Xewdy
c5a5f6d65e Add platform specifiers to dependencies (#877) 2024-02-21 21:14:25 +00:00
ilike2burnthing
aaf29be8e1 Bump version 3.3.15 (#1088) 2024-02-20 23:48:23 +00:00
Tadas Gedgaudas
800866d033 Fix looping challenges. #1036 (#1065)
Co-authored-by: Tadas Gedgaudas <tg@infrahub.io>
Co-authored-by: ilike2burnthing <59480337+ilike2burnthing@users.noreply.github.com>
2024-02-20 23:41:02 +00:00
ilike2burnthing
043f18b231 Bump UC version to 3.5.5 2024-02-17 19:28:06 +00:00
ilike2burnthing
d21a332519 Hotfix 2 - bad Chromium build, instances failed to terminate (#1072) 2024-02-17 05:53:45 +00:00
ilike2burnthing
3ca6d08f41 Hotfix for Linux build - some Chrome files no longer exist (#1071) 2024-02-17 01:15:32 +00:00
ilike2burnthing
227bd7ac72 Update Chrome downloads (#1070) 2024-02-17 00:50:14 +00:00
ilike2burnthing
e6a08584c0 Update README.md
thanks @kimboslice99
2024-02-16 04:35:37 +00:00
ilike2burnthing
df06d13cf8 Update README.md 2024-01-12 23:38:18 +00:00
ilike2burnthing
993b8c41ac Fix too many open files error. resolves #983 (#1033) 2024-01-07 21:22:14 +00:00
ilike2burnthing
a4d42d7834 Remove unnecessary comment 2023-12-17 00:33:06 +00:00
21hsmw
1c855b8af0 Fix looping challenges and invalid cookies. resolves #1006 (#1010)
Co-authored-by: ilike2burnthing <59480337+ilike2burnthing@users.noreply.github.com>
2023-12-15 22:11:58 +00:00
ilike2burnthing
745c69491f Bump version 3.3.11 (#999) 2023-12-11 20:56:14 +00:00
txtsd
f7e316fd5a updates: UC 3.5.4 & Selenium 4.15.2 (#970)
Co-authored-by: GaspardRuan <1039553124@qq.com>
Co-authored-by: ilike2burnthing <59480337+ilike2burnthing@users.noreply.github.com>
2023-12-11 20:51:16 +00:00
ilike2burnthing
16c8ab5f3d Update README.md 2023-11-14 07:54:09 +00:00
ilike2burnthing
7af311b73c Bump version 3.3.10 (#969) 2023-11-14 04:04:42 +00:00
ilike2burnthing
daec97532d Update README.md 2023-11-14 04:00:01 +00:00
ilike2burnthing
8d7ed48f21 Add LANG ENV. resolves #951 2023-11-14 03:56:57 +00:00
ilike2burnthing
220f2599ae Bump version 3.3.9 (#963) 2023-11-13 07:17:28 +00:00
ilike2burnthing
d772cf3f50 Fix for Docker build, capture TypeError. Fixes #962 2023-11-13 07:14:13 +00:00
ilike2burnthing
ab4365894b Bump version 3.3.8 (#961) 2023-11-13 04:55:49 +00:00
ilike2burnthing
3fa9631559 Fix "OSError: [WinError 6] The handle is invalid" on exit 2023-11-13 04:28:19 +00:00
ilike2burnthing
04858c22fd Support running Chrome 119 from source (#960) 2023-11-13 04:23:06 +00:00
Nabi KaramAliZadeh
5085ca6990 Fix headless=true for Chrome 117+. Fixes #910 (#921) 2023-11-13 04:03:56 +00:00
ilike2burnthing
cd4df1e061 Bump version 3.3.7 (#944) 2023-11-05 14:41:12 +00:00
ilike2burnthing
6c79783f7c Bump version 3.3.6 (#905) 2023-09-15 20:40:56 +01:00
ilike2burnthing
4139e8d47c Update checkbox selector, again 2023-09-15 20:37:26 +01:00
zax2002
1942eb5fdc Typo in README (#901) 2023-09-14 07:41:59 +01:00
ilike2burnthing
401bf5be76 Bump version 3.3.5 (#902) 2023-09-13 10:28:02 +01:00
ilike2burnthing
d8ffdd3061 Change checkbox selector, support language other than English. resolves #891 2023-09-13 10:19:19 +01:00
ilike2burnthing
2d66590b08 Bump version 3.3.4 (#884) 2023-09-02 12:30:21 +01:00
Zachary Hampton
a217510dc7 Update checkbox selector (#882) 2023-09-02 12:24:25 +01:00
ilike2burnthing
553bd8ab4f Bump version 3.3.3 (#879) 2023-08-31 20:02:17 +01:00
ilike2burnthing
1b197c3e53 Update undetected_chromedriver to v3.5.3 (#860) 2023-08-31 19:56:06 +01:00
ngosang
fd308f01be Bump version 3.3.2 2023-08-03 10:00:16 +02:00
ngosang
b5eef32615 Fix URL domain in Prometheus exporter 2023-08-03 09:02:46 +02:00
ngosang
644a843d89 Bump version 3.3.1 2023-08-03 08:13:01 +02:00
ngosang
82e1c94c6f Fix HEADLESS=false in Windows binary 2023-08-03 08:10:14 +02:00
ngosang
fbc71516f5 Fix for Cloudflare verify checkbox 2023-08-03 07:28:58 +02:00
ngosang
40bd1cba4c Fix Prometheus exporter for management and health endpoints 2023-08-03 06:36:31 +02:00
ngosang
d1588c1156 Remove misleading stack trace when a button is not found 2023-08-03 05:45:38 +02:00
ngosang
b4ad583baa Revert "Update base Docker image to Debian Bookworm"
This reverts commit 0edc50e271.
2023-08-03 05:19:56 +02:00
ngosang
5d31e551cc Revert "Install Chromium 115 from Debian testing"
This reverts commit 2aa095ed5d.
2023-08-03 05:19:27 +02:00
16 changed files with 525 additions and 169 deletions

View File

@@ -4,50 +4,64 @@ on:
push: push:
tags: tags:
- 'v*.*.*' - 'v*.*.*'
pull_request:
branches:
- master
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs: jobs:
build-docker-images: build-docker-images:
if: ${{ !github.event.pull_request.head.repo.fork }}
runs-on: ubuntu-22.04 runs-on: ubuntu-22.04
steps: steps:
- - name: Checkout
name: Checkout uses: actions/checkout@v4
uses: actions/checkout@v3
- - name: Downcase repo
name: Downcase repo
run: echo REPOSITORY=$(echo ${{ github.repository }} | tr '[:upper:]' '[:lower:]') >> $GITHUB_ENV run: echo REPOSITORY=$(echo ${{ github.repository }} | tr '[:upper:]' '[:lower:]') >> $GITHUB_ENV
-
name: Docker meta - name: Docker meta
id: docker_meta id: docker_meta
uses: crazy-max/ghaction-docker-meta@v3 uses: docker/metadata-action@v5
with: with:
images: ${{ env.REPOSITORY }},ghcr.io/${{ env.REPOSITORY }} images: |
tag-sha: false ${{ env.REPOSITORY }},enable=${{ github.event_name != 'pull_request' }}
- ghcr.io/${{ env.REPOSITORY }}
name: Set up QEMU tags: |
uses: docker/setup-qemu-action@v2 type=semver,pattern={{version}},prefix=v
- type=ref,event=pr
name: Set up Docker Buildx flavor: |
uses: docker/setup-buildx-action@v2 latest=auto
-
name: Login to DockerHub - name: Set up QEMU
uses: docker/login-action@v2 uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to DockerHub
uses: docker/login-action@v3
if: github.event_name != 'pull_request'
with: with:
username: ${{ secrets.DOCKERHUB_USERNAME }} username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }} password: ${{ secrets.DOCKERHUB_TOKEN }}
-
name: Login to GitHub Container Registry - name: Login to GitHub Container Registry
uses: docker/login-action@v2 uses: docker/login-action@v3
with: with:
registry: ghcr.io registry: ghcr.io
username: ${{ github.repository_owner }} username: ${{ github.repository_owner }}
password: ${{ secrets.GH_PAT }} password: ${{ secrets.GH_PAT }}
-
name: Build and push - name: Build and push
uses: docker/build-push-action@v3 uses: docker/build-push-action@v6
with: with:
context: . context: .
file: ./Dockerfile file: ./Dockerfile
platforms: linux/386,linux/amd64,linux/arm/v7,linux/arm64/v8 platforms: linux/386,linux/amd64,linux/arm/v7,linux/arm64/v8
push: ${{ github.event_name != 'pull_request' }} push: true
tags: ${{ steps.docker_meta.outputs.tags }} tags: ${{ steps.docker_meta.outputs.tags }}
labels: ${{ steps.docker_meta.outputs.labels }} labels: ${{ steps.docker_meta.outputs.labels }}

View File

@@ -1,5 +1,107 @@
# Changelog # Changelog
## v3.3.21 (2024/06/26)
* Add challenge selector to catch reloading page on non-English systems
* Escape values for generated form used in request.post. Thanks @mynameisbogdan
## v3.3.20 (2024/06/21)
* maxTimeout should always be int
* Check not running in Docker before logging version_main error
* Update Cloudflare challenge and checkbox selectors. Thanks @tenettow & @21hsmw
## v3.3.19 (2024/05/23)
* Fix occasional headless issue on Linux when set to "false". Thanks @21hsmw
## v3.3.18 (2024/05/20)
* Fix LANG ENV for Linux
* Fix Chrome v124+ not closing on Windows. Thanks @RileyXX
## v3.3.17 (2024/04/09)
* Fix file descriptor leak in service on quit(). Thanks @zkulis
## v3.3.16 (2024/02/28)
* Fix of the subprocess.STARTUPINFO() call. Thanks @ceconelo
* Add FreeBSD support. Thanks @Asthowen
* Use headless configuration properly. Thanks @hashworks
## v3.3.15 (2024/02/20)
* Fix looping challenges
## v3.3.14-hotfix2 (2024/02/17)
* Hotfix 2 - bad Chromium build, instances failed to terminate
## v3.3.14-hotfix (2024/02/17)
* Hotfix for Linux build - some Chrome files no longer exist
## v3.3.14 (2024/02/17)
* Update Chrome downloads. Thanks @opemvbs
## v3.3.13 (2024/01/07)
* Fix too many open files error
## v3.3.12 (2023/12/15)
* Fix looping challenges and invalid cookies
## v3.3.11 (2023/12/11)
* Update UC 3.5.4 & Selenium 4.15.2. Thanks @txtsd
## v3.3.10 (2023/11/14)
* Add LANG ENV - resolves issues with YGGtorrent
## v3.3.9 (2023/11/13)
* Fix for Docker build, capture TypeError
## v3.3.8 (2023/11/13)
* Fix headless=true for Chrome 117+. Thanks @NabiKAZ
* Support running Chrome 119 from source. Thanks @koleg and @Chris7X
* Fix "OSError: [WinError 6] The handle is invalid" on exit. Thanks @enesgorkemgenc
## v3.3.7 (2023/11/05)
* Bump to rebuild. Thanks @JoachimDorchies
## v3.3.6 (2023/09/15)
* Update checkbox selector, again
## v3.3.5 (2023/09/13)
* Change checkbox selector, support languages other than English
## v3.3.4 (2023/09/02)
* Update checkbox selector
## v3.3.3 (2023/08/31)
* Update undetected_chromedriver to v3.5.3
## v3.3.2 (2023/08/03)
* Fix URL domain in Prometheus exporter
## v3.3.1 (2023/08/03)
* Fix for Cloudflare verify checkbox
* Fix HEADLESS=false in Windows binary
* Fix Prometheus exporter for management and health endpoints
* Remove misleading stack trace when the verify checkbox is not found
* Revert "Update base Docker image to Debian Bookworm" #849
* Revert "Install Chromium 115 from Debian testing" #849
## v3.3.0 (2023/08/02) ## v3.3.0 (2023/08/02)
* Fix for new Cloudflare detection. Thanks @cedric-bour for #845 * Fix for new Cloudflare detection. Thanks @cedric-bour for #845

View File

@@ -1,4 +1,4 @@
FROM python:3.11-slim-bookworm as builder FROM python:3.11-slim-bullseye as builder
# Build dummy packages to skip installing them and their dependencies # Build dummy packages to skip installing them and their dependencies
RUN apt-get update \ RUN apt-get update \
@@ -12,7 +12,7 @@ RUN apt-get update \
&& equivs-build adwaita-icon-theme \ && equivs-build adwaita-icon-theme \
&& mv adwaita-icon-theme_*.deb /adwaita-icon-theme.deb && mv adwaita-icon-theme_*.deb /adwaita-icon-theme.deb
FROM python:3.11-slim-bookworm FROM python:3.11-slim-bullseye
# Copy dummy packages # Copy dummy packages
COPY --from=builder /*.deb / COPY --from=builder /*.deb /
@@ -27,16 +27,12 @@ WORKDIR /app
# Install dummy packages # Install dummy packages
RUN dpkg -i /libgl1-mesa-dri.deb \ RUN dpkg -i /libgl1-mesa-dri.deb \
&& dpkg -i /adwaita-icon-theme.deb \ && dpkg -i /adwaita-icon-theme.deb \
# Use Testing packages. The latest version of Chromium is not available for ARM
&& sed -i 's/bookworm-updates/bookworm-updates testing/g' /etc/apt/sources.list.d/debian.sources \
# Install dependencies # Install dependencies
&& apt-get update \ && apt-get update \
&& apt-get install -y --no-install-recommends -t testing chromium chromium-common chromium-driver xvfb dumb-init \ && apt-get install -y --no-install-recommends chromium chromium-common chromium-driver xvfb dumb-init \
procps curl vim-tiny xauth \ procps curl vim xauth \
# Remove temporary files and hardware decoding libraries # Remove temporary files and hardware decoding libraries
&& rm -rf /var/lib/apt/lists/* \ && rm -rf /var/lib/apt/lists/* \
&& rm -f /usr/lib/systemd/systemd* \
&& rm -f /usr/lib/x86_64-linux-gnu/systemd/* \
&& rm -f /usr/lib/x86_64-linux-gnu/libmfxhw* \ && rm -f /usr/lib/x86_64-linux-gnu/libmfxhw* \
&& rm -f /usr/lib/x86_64-linux-gnu/mfx/* \ && rm -f /usr/lib/x86_64-linux-gnu/mfx/* \
# Create flaresolverr user # Create flaresolverr user
@@ -66,17 +62,17 @@ ENTRYPOINT ["/usr/bin/dumb-init", "--"]
CMD ["/usr/local/bin/python", "-u", "/app/flaresolverr.py"] CMD ["/usr/local/bin/python", "-u", "/app/flaresolverr.py"]
# Local build # Local build
# docker build -t ngosang/flaresolverr:3.3.0 . # docker build -t ngosang/flaresolverr:3.3.21 .
# docker run -p 8191:8191 ngosang/flaresolverr:3.3.0 # docker run -p 8191:8191 ngosang/flaresolverr:3.3.21
# Multi-arch build # Multi-arch build
# docker run --rm --privileged multiarch/qemu-user-static --reset -p yes # docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
# docker buildx create --use # docker buildx create --use
# docker buildx build -t ngosang/flaresolverr:3.3.0 --platform linux/386,linux/amd64,linux/arm/v7,linux/arm64/v8 . # docker buildx build -t ngosang/flaresolverr:3.3.21 --platform linux/386,linux/amd64,linux/arm/v7,linux/arm64/v8 .
# add --push to publish in DockerHub # add --push to publish in DockerHub
# Test multi-arch build # Test multi-arch build
# docker run --rm --privileged multiarch/qemu-user-static --reset -p yes # docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
# docker buildx create --use # docker buildx create --use
# docker buildx build -t ngosang/flaresolverr:3.3.0 --platform linux/arm/v7 --load . # docker buildx build -t ngosang/flaresolverr:3.3.21 --platform linux/arm/v7 --load .
# docker run -p 8191:8191 --platform linux/arm/v7 ngosang/flaresolverr:3.3.0 # docker run -p 8191:8191 --platform linux/arm/v7 ngosang/flaresolverr:3.3.21

View File

@@ -45,7 +45,8 @@ Supported architectures are:
| ARM32 | linux/arm/v7 | | ARM32 | linux/arm/v7 |
| ARM64 | linux/arm64 | | ARM64 | linux/arm64 |
We provide a `docker-compose.yml` configuration file. Clone this repository and execute `docker-compose up -d` to start We provide a `docker-compose.yml` configuration file. Clone this repository and execute
`docker-compose up -d` _(Compose V1)_ or `docker compose up -d` _(Compose V2)_ to start
the container. the container.
If you prefer the `docker cli` execute the following command. If you prefer the `docker cli` execute the following command.
@@ -78,28 +79,62 @@ This is the recommended way for Windows users.
* Install [Python 3.11](https://www.python.org/downloads/). * Install [Python 3.11](https://www.python.org/downloads/).
* Install [Chrome](https://www.google.com/intl/en_us/chrome/) (all OS) or [Chromium](https://www.chromium.org/getting-involved/download-chromium/) (just Linux, it doesn't work in Windows) web browser. * Install [Chrome](https://www.google.com/intl/en_us/chrome/) (all OS) or [Chromium](https://www.chromium.org/getting-involved/download-chromium/) (just Linux, it doesn't work in Windows) web browser.
* (Only in Linux / macOS) Install [Xvfb](https://en.wikipedia.org/wiki/Xvfb) package. * (Only in Linux) Install [Xvfb](https://en.wikipedia.org/wiki/Xvfb) package.
* (Only in macOS) Install [XQuartz](https://www.xquartz.org/) package.
* Clone this repository and open a shell in that path. * Clone this repository and open a shell in that path.
* Run `pip install -r requirements.txt` command to install FlareSolverr dependencies. * Run `pip install -r requirements.txt` command to install FlareSolverr dependencies.
* Run `python src/flaresolverr.py` command to start FlareSolverr. * Run `python src/flaresolverr.py` command to start FlareSolverr.
### From source code (FreeBSD/TrueNAS CORE)
* Run `pkg install chromium python39 py39-pip xorg-vfbserver` command to install the required dependencies.
* Clone this repository and open a shell in that path.
* Run `python3.9 -m pip install -r requirements.txt` command to install FlareSolverr dependencies.
* Run `python3.9 src/flaresolverr.py` command to start FlareSolverr.
### Systemd service ### Systemd service
We provide an example Systemd unit file `flaresolverr.service` as reference. You have to modify the file to suit your needs: paths, user and environment variables. We provide an example Systemd unit file `flaresolverr.service` as reference. You have to modify the file to suit your needs: paths, user and environment variables.
## Usage ## Usage
Example request: Example Bash request:
```bash ```bash
curl -L -X POST 'http://localhost:8191/v1' \ curl -L -X POST 'http://localhost:8191/v1' \
-H 'Content-Type: application/json' \ -H 'Content-Type: application/json' \
--data-raw '{ --data-raw '{
"cmd": "request.get", "cmd": "request.get",
"url":"http://www.google.com/", "url": "http://www.google.com/",
"maxTimeout": 60000 "maxTimeout": 60000
}' }'
``` ```
Example Python request:
```py
import requests
url = "http://localhost:8191/v1"
headers = {"Content-Type": "application/json"}
data = {
"cmd": "request.get",
"url": "http://www.google.com/",
"maxTimeout": 60000
}
response = requests.post(url, headers=headers, json=data)
print(response.text)
```
Example PowerShell request:
```ps1
$body = @{
cmd = "request.get"
url = "http://www.google.com/"
maxTimeout = 60000
} | ConvertTo-Json
irm -UseBasicParsing 'http://localhost:8191/v1' -Headers @{"Content-Type"="application/json"} -Method Post -Body $body
```
### Commands ### Commands
#### + `sessions.create` #### + `sessions.create`
@@ -113,7 +148,7 @@ This also speeds up the requests since it won't have to launch a new browser ins
| Parameter | Notes | | Parameter | Notes |
|-----------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| |-----------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| session | Optional. The session ID that you want to be assigned to the instance. If isn't set a random UUID will be assigned. | | session | Optional. The session ID that you want to be assigned to the instance. If isn't set a random UUID will be assigned. |
| proxy | Optional, default disabled. Eg: `"proxy": {"url": "http://127.0.0.1:8888"}`. You must include the proxy schema in the URL: `http://`, `socks4://` or `socks5://`. Authorization (username/password) is supported. Eg: `"proxy": {"url": "http://127.0.0.1:8888", username": "testuser", "password": "testpass"}` | | proxy | Optional, default disabled. Eg: `"proxy": {"url": "http://127.0.0.1:8888"}`. You must include the proxy schema in the URL: `http://`, `socks4://` or `socks5://`. Authorization (username/password) is supported. Eg: `"proxy": {"url": "http://127.0.0.1:8888", "username": "testuser", "password": "testpass"}` |
#### + `sessions.list` #### + `sessions.list`
@@ -232,6 +267,7 @@ This is the same as `request.get` but it takes one more param:
| LOG_HTML | false | Only for debugging. If `true` all HTML that passes through the proxy will be logged to the console in `debug` level. | | LOG_HTML | false | Only for debugging. If `true` all HTML that passes through the proxy will be logged to the console in `debug` level. |
| CAPTCHA_SOLVER | none | Captcha solving method. It is used when a captcha is encountered. See the Captcha Solvers section. | | CAPTCHA_SOLVER | none | Captcha solving method. It is used when a captcha is encountered. See the Captcha Solvers section. |
| TZ | UTC | Timezone used in the logs and the web browser. Example: `TZ=Europe/London`. | | TZ | UTC | Timezone used in the logs and the web browser. Example: `TZ=Europe/London`. |
| LANG | none | Language used in the web browser. Example: `LANG=en_GB`. |
| HEADLESS | true | Only for debugging. To run the web browser in headless mode or visible. | | HEADLESS | true | Only for debugging. To run the web browser in headless mode or visible. |
| BROWSER_TIMEOUT | 40000 | If you are experiencing errors/timeouts because your system is slow, you can try to increase this value. Remember to increase the `maxTimeout` parameter too. | | BROWSER_TIMEOUT | 40000 | If you are experiencing errors/timeouts because your system is slow, you can try to increase this value. Remember to increase the `maxTimeout` parameter too. |
| TEST_URL | https://www.google.com | FlareSolverr makes a request on start to make sure the web browser is working. You can change that URL if it is blocked in your country. | | TEST_URL | https://www.google.com | FlareSolverr makes a request on start to make sure the web browser is working. You can change that URL if it is blocked in your country. |
@@ -242,8 +278,8 @@ This is the same as `request.get` but it takes one more param:
Environment variables are set differently depending on the operating system. Some examples: Environment variables are set differently depending on the operating system. Some examples:
* Docker: Take a look at the Docker section in this document. Environment variables can be set in the `docker-compose.yml` file or in the Docker CLI command. * Docker: Take a look at the Docker section in this document. Environment variables can be set in the `docker-compose.yml` file or in the Docker CLI command.
* Linux: Run `export LOG_LEVEL=debug` and then start FlareSolverr in the same shell. * Linux: Run `export LOG_LEVEL=debug` and then run `flaresolverr` in the same shell.
* Windows: Open `cmd.exe`, run `set LOG_LEVEL=debug` and then start FlareSolverr in the same shell. * Windows: Open `cmd.exe`, run `set LOG_LEVEL=debug` and then run `flaresolverr.exe` in the same shell.
## Prometheus exporter ## Prometheus exporter

View File

@@ -1,6 +1,6 @@
{ {
"name": "flaresolverr", "name": "flaresolverr",
"version": "3.3.0", "version": "3.3.21",
"description": "Proxy server to bypass Cloudflare protection", "description": "Proxy server to bypass Cloudflare protection",
"author": "Diego Heras (ngosang / ngosang@hotmail.es)", "author": "Diego Heras (ngosang / ngosang@hotmail.es)",
"license": "MIT" "license": "MIT"

View File

@@ -1,13 +1,13 @@
bottle==0.12.25 bottle==0.12.25
waitress==2.1.2 waitress==2.1.2
selenium==4.11.2 selenium==4.15.2
func-timeout==4.3.5 func-timeout==4.3.5
prometheus-client==0.17.1 prometheus-client==0.17.1
# required by undetected_chromedriver # required by undetected_chromedriver
requests==2.31.0 requests==2.32.3
certifi==2023.7.22 certifi==2024.07.04
websockets==11.0.3 websockets==11.0.3
# only required for linux # only required for linux and macos
xvfbwrapper==0.2.9 xvfbwrapper==0.2.9; platform_system != "Windows"
# only required for windows # only required for windows
pefile==2023.2.7 pefile==2023.2.7; platform_system == "Windows"

View File

@@ -2,7 +2,8 @@ import logging
import os import os
import urllib.parse import urllib.parse
from dtos import V1ResponseBase from bottle import request
from dtos import V1RequestBase, V1ResponseBase
from metrics import start_metrics_http_server, REQUEST_COUNTER, REQUEST_DURATION from metrics import start_metrics_http_server, REQUEST_COUNTER, REQUEST_DURATION
PROMETHEUS_ENABLED = os.environ.get('PROMETHEUS_ENABLED', 'false').lower() == 'true' PROMETHEUS_ENABLED = os.environ.get('PROMETHEUS_ENABLED', 'false').lower() == 'true'
@@ -33,10 +34,18 @@ def prometheus_plugin(callback):
def export_metrics(actual_response): def export_metrics(actual_response):
res = V1ResponseBase(actual_response) res = V1ResponseBase(actual_response)
if res.startTimestamp is None or res.endTimestamp is None:
# skip management and healthcheck endpoints
return
domain = "unknown" domain = "unknown"
if res.solution and res.solution.url: if res.solution and res.solution.url:
parsed_url = urllib.parse.urlparse(res.solution.url) domain = parse_domain_url(res.solution.url)
domain = parsed_url.hostname else:
# timeout error
req = V1RequestBase(request.json)
if req.url:
domain = parse_domain_url(req.url)
run_time = (res.endTimestamp - res.startTimestamp) / 1000 run_time = (res.endTimestamp - res.startTimestamp) / 1000
REQUEST_DURATION.labels(domain=domain).observe(run_time) REQUEST_DURATION.labels(domain=domain).observe(run_time)
@@ -50,4 +59,8 @@ def prometheus_plugin(callback):
result = "error" result = "error"
REQUEST_COUNTER.labels(domain=domain, result=result).inc() REQUEST_COUNTER.labels(domain=domain, result=result).inc()
def parse_domain_url(url):
parsed_url = urllib.parse.urlparse(url)
return parsed_url.hostname
return wrapper return wrapper

View File

@@ -25,7 +25,7 @@ def clean_files():
def download_chromium(): def download_chromium():
# https://commondatastorage.googleapis.com/chromium-browser-snapshots/index.html?prefix=Linux_x64/ # https://commondatastorage.googleapis.com/chromium-browser-snapshots/index.html?prefix=Linux_x64/
revision = "1140001" if os.name == 'nt' else '1140000' revision = "1260008" if os.name == 'nt' else '1260015'
arch = 'Win_x64' if os.name == 'nt' else 'Linux_x64' arch = 'Win_x64' if os.name == 'nt' else 'Linux_x64'
dl_file = 'chrome-win' if os.name == 'nt' else 'chrome-linux' dl_file = 'chrome-win' if os.name == 'nt' else 'chrome-linux'
dl_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, 'dist_chrome') dl_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, 'dist_chrome')
@@ -59,8 +59,7 @@ def download_chromium():
# Give executable permissions for *nix # Give executable permissions for *nix
# file * | grep executable | cut -d: -f1 # file * | grep executable | cut -d: -f1
print("Giving executable permissions...") print("Giving executable permissions...")
execs = ['chrome', 'chrome_crashpad_handler', 'chrome_sandbox', 'chrome-wrapper', 'nacl_helper', execs = ['chrome', 'chrome_crashpad_handler', 'chrome_sandbox', 'chrome-wrapper', 'xdg-mime', 'xdg-settings']
'nacl_helper_bootstrap', 'nacl_irt_x86_64.nexe', 'xdg-mime', 'xdg-settings']
for exec_file in execs: for exec_file in execs:
exec_path = os.path.join(chrome_path, exec_file) exec_path = os.path.join(chrome_path, exec_file)
os.chmod(exec_path, 0o755) os.chmod(exec_path, 0o755)

View File

@@ -62,6 +62,12 @@ if __name__ == "__main__":
if sys.version_info < (3, 9): if sys.version_info < (3, 9):
raise Exception("The Python version is less than 3.9, a version equal to or higher is required.") raise Exception("The Python version is less than 3.9, a version equal to or higher is required.")
# fix for HEADLESS=false in Windows binary
# https://stackoverflow.com/a/27694505
if os.name == 'nt':
import multiprocessing
multiprocessing.freeze_support()
# fix ssl certificates for compiled binaries # fix ssl certificates for compiled binaries
# https://github.com/pyinstaller/pyinstaller/issues/7229 # https://github.com/pyinstaller/pyinstaller/issues/7229
# https://stackoverflow.com/questions/55736855/how-to-change-the-cafile-argument-in-the-ssl-module-in-python3 # https://stackoverflow.com/questions/55736855/how-to-change-the-cafile-argument-in-the-ssl-module-in-python3
@@ -95,6 +101,9 @@ if __name__ == "__main__":
logging.info(f'FlareSolverr {utils.get_flaresolverr_version()}') logging.info(f'FlareSolverr {utils.get_flaresolverr_version()}')
logging.debug('Debug log enabled') logging.debug('Debug log enabled')
# Get current OS for global variable
utils.get_current_platform()
# test browser installation # test browser installation
flaresolverr_service.test_browser_installation() flaresolverr_service.test_browser_installation()

View File

@@ -3,7 +3,8 @@ import platform
import sys import sys
import time import time
from datetime import timedelta from datetime import timedelta
from urllib.parse import unquote from html import escape
from urllib.parse import unquote, quote
from func_timeout import FunctionTimedOut, func_timeout from func_timeout import FunctionTimedOut, func_timeout
from selenium.common import TimeoutException from selenium.common import TimeoutException
@@ -40,13 +41,13 @@ CHALLENGE_TITLES = [
] ]
CHALLENGE_SELECTORS = [ CHALLENGE_SELECTORS = [
# Cloudflare # Cloudflare
'#cf-challenge-running', '.ray_id', '.attack-box', '#cf-please-wait', '#challenge-spinner', '#trk_jschal_js', '#cf-challenge-running', '.ray_id', '.attack-box', '#cf-please-wait', '#challenge-spinner', '#trk_jschal_js', '#turnstile-wrapper', '.lds-ring',
# Custom CloudFlare for EbookParadijs, Film-Paleis, MuziekFabriek and Puur-Hollands # Custom CloudFlare for EbookParadijs, Film-Paleis, MuziekFabriek and Puur-Hollands
'td.info #js_info', 'td.info #js_info',
# Fairlane / pararius.com # Fairlane / pararius.com
'div.vc div.text-box h2' 'div.vc div.text-box h2'
] ]
SHORT_TIMEOUT = 10 SHORT_TIMEOUT = 1
SESSIONS_STORAGE = SessionsStorage() SESSIONS_STORAGE = SessionsStorage()
@@ -119,7 +120,7 @@ def _controller_v1_handler(req: V1RequestBase) -> V1ResponseBase:
logging.warning("Request parameter 'userAgent' was removed in FlareSolverr v2.") logging.warning("Request parameter 'userAgent' was removed in FlareSolverr v2.")
# set default values # set default values
if req.maxTimeout is None or req.maxTimeout < 1: if req.maxTimeout is None or int(req.maxTimeout) < 1:
req.maxTimeout = 60000 req.maxTimeout = 60000
# execute the command # execute the command
@@ -218,9 +219,23 @@ def _cmd_sessions_destroy(req: V1RequestBase) -> V1ResponseBase:
"message": "The session has been removed." "message": "The session has been removed."
}) })
def _init_driver(driver):
try:
driver.execute_cdp_cmd('Page.enable', {})
driver.execute_cdp_cmd('Page.addScriptToEvaluateOnNewDocument', {
'source': """
Element.prototype._as = Element.prototype.attachShadow;
Element.prototype.attachShadow = function (params) {
return this._as({mode: "open"})
};
"""
})
except Exception as e:
logging.debug("Driver init exception: %s", repr(e))
def _resolve_challenge(req: V1RequestBase, method: str) -> ChallengeResolutionT: def _resolve_challenge(req: V1RequestBase, method: str) -> ChallengeResolutionT:
timeout = req.maxTimeout / 1000 timeout = int(req.maxTimeout) / 1000
driver = None driver = None
try: try:
if req.session: if req.session:
@@ -238,6 +253,7 @@ def _resolve_challenge(req: V1RequestBase, method: str) -> ChallengeResolutionT:
else: else:
driver = utils.get_webdriver(req.proxy) driver = utils.get_webdriver(req.proxy)
logging.debug('New instance of webdriver has been created to perform the request') logging.debug('New instance of webdriver has been created to perform the request')
_init_driver(driver)
return func_timeout(timeout, _evil_logic, (req, driver, method)) return func_timeout(timeout, _evil_logic, (req, driver, method))
except FunctionTimedOut: except FunctionTimedOut:
raise Exception(f'Error solving the challenge. Timeout after {timeout} seconds.') raise Exception(f'Error solving the challenge. Timeout after {timeout} seconds.')
@@ -245,27 +261,39 @@ def _resolve_challenge(req: V1RequestBase, method: str) -> ChallengeResolutionT:
raise Exception('Error solving the challenge. ' + str(e).replace('\n', '\\n')) raise Exception('Error solving the challenge. ' + str(e).replace('\n', '\\n'))
finally: finally:
if not req.session and driver is not None: if not req.session and driver is not None:
if utils.PLATFORM_VERSION == "nt":
driver.close()
driver.quit() driver.quit()
logging.debug('A used instance of webdriver has been destroyed') logging.debug('A used instance of webdriver has been destroyed')
def get_shadowed_iframe(driver: WebDriver, css_selector: str):
logging.debug("Getting ShadowRoot by selector: %s", css_selector)
shadow_element = driver.execute_script("""
return document.querySelector(arguments[0]).shadowRoot.firstChild;
""", css_selector)
if shadow_element:
logging.debug("iframe found")
else:
logging.debug("iframe not found")
return shadow_element
def click_verify(driver: WebDriver): def click_verify(driver: WebDriver):
try: try:
logging.debug("Try to find the Cloudflare verify checkbox...") logging.debug("Try to find the Cloudflare verify checkbox...")
iframe = driver.find_element(By.XPATH, "//iframe[@title='Widget containing a Cloudflare security challenge']") iframe = get_shadowed_iframe(driver, "div:not(:has(div))")
driver.switch_to.frame(iframe) driver.switch_to.frame(iframe)
checkbox = driver.find_element( iframe_body = driver.find_element(By.CSS_SELECTOR, "body")
by=By.XPATH, if iframe_body:
value='//*[@id="cf-stage"]//label[@class="ctp-checkbox-label"]/input', iframe_body.click()
)
if checkbox:
actions = ActionChains(driver) actions = ActionChains(driver)
actions.move_to_element_with_offset(checkbox, 5, 7) actions.move_to_element_with_offset(iframe_body, 10, 10)
actions.click(checkbox) actions.click(iframe_body)
actions.perform() actions.perform()
logging.debug("Cloudflare verify checkbox found and clicked!") logging.debug("Attempted to click on iframe body")
except Exception as e: except Exception as e:
logging.debug("Cloudflare verify checkbox not found on the page. Error: " + str(e)) logging.debug("Cloudflare verify checkbox not found on the page. %s", repr(e))
finally: finally:
driver.switch_to.default_content() driver.switch_to.default_content()
@@ -281,24 +309,41 @@ def click_verify(driver: WebDriver):
actions.click(button) actions.click(button)
actions.perform() actions.perform()
logging.debug("The Cloudflare 'Verify you are human' button found and clicked!") logging.debug("The Cloudflare 'Verify you are human' button found and clicked!")
except Exception as e: except Exception:
logging.debug("The Cloudflare 'Verify you are human' button not found on the page. Error: " + str(e)) logging.debug("The Cloudflare 'Verify you are human' button not found on the page.")
time.sleep(2) time.sleep(2)
def get_correct_window(driver: WebDriver) -> WebDriver:
if len(driver.window_handles) > 1:
for window_handle in driver.window_handles:
driver.switch_to.window(window_handle)
current_url = driver.current_url
if not current_url.startswith("devtools://devtools"):
return driver
return driver
def access_page(driver: WebDriver, url: str) -> None:
driver.get(url)
driver.start_session()
driver.start_session() # required to bypass Cloudflare
def _evil_logic(req: V1RequestBase, driver: WebDriver, method: str) -> ChallengeResolutionT: def _evil_logic(req: V1RequestBase, driver: WebDriver, method: str) -> ChallengeResolutionT:
res = ChallengeResolutionT({}) res = ChallengeResolutionT({})
res.status = STATUS_OK res.status = STATUS_OK
res.message = "" res.message = ""
# navigate to the page # navigate to the page
logging.debug(f'Navigating to... {req.url}') logging.debug(f'Navigating to... {req.url}')
if method == 'POST': if method == 'POST':
_post_request(req, driver) _post_request(req, driver)
else: else:
with driver: access_page(driver, req.url)
driver.get(req.url) driver = get_correct_window(driver)
# set cookies if required # set cookies if required
if req.cookies is not None and len(req.cookies) > 0: if req.cookies is not None and len(req.cookies) > 0:
@@ -310,8 +355,8 @@ def _evil_logic(req: V1RequestBase, driver: WebDriver, method: str) -> Challenge
if method == 'POST': if method == 'POST':
_post_request(req, driver) _post_request(req, driver)
else: else:
with driver: access_page(driver, req.url)
driver.get(req.url) driver = get_correct_window(driver)
# wait for the page # wait for the page
if utils.get_config_log_html(): if utils.get_config_log_html():
@@ -420,7 +465,7 @@ def _post_request(req: V1RequestBase, driver: WebDriver):
value = unquote(parts[1]) value = unquote(parts[1])
except Exception: except Exception:
value = parts[1] value = parts[1]
post_form += f'<input type="text" name="{name}" value="{value}"><br>' post_form += f'<input type="text" name="{escape(quote(name))}" value="{escape(quote(value))}"><br>'
post_form += '</form>' post_form += '</form>'
html_content = f""" html_content = f"""
<!DOCTYPE html> <!DOCTYPE html>
@@ -430,5 +475,6 @@ def _post_request(req: V1RequestBase, driver: WebDriver):
<script>document.getElementById('hackForm').submit();</script> <script>document.getElementById('hackForm').submit();</script>
</body> </body>
</html>""" </html>"""
with driver: driver.get("data:text/html;charset=utf-8,{html_content}".format(html_content=html_content))
driver.get("data:text/html;charset=utf-8," + html_content) driver.start_session()
driver.start_session() # required to bypass Cloudflare

View File

@@ -66,6 +66,8 @@ class SessionsStorage:
return False return False
session = self.sessions.pop(session_id) session = self.sessions.pop(session_id)
if utils.PLATFORM_VERSION == "nt":
session.driver.close()
session.driver.quit() session.driver.quit()
return True return True

View File

@@ -17,11 +17,12 @@ by UltrafunkAmsterdam (https://github.com/ultrafunkamsterdam)
from __future__ import annotations from __future__ import annotations
__version__ = "3.5.0" __version__ = "3.5.5"
import json import json
import logging import logging
import os import os
import pathlib
import re import re
import shutil import shutil
import subprocess import subprocess
@@ -373,6 +374,18 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
browser_executable_path or find_chrome_executable() browser_executable_path or find_chrome_executable()
) )
if not options.binary_location or not \
pathlib.Path(options.binary_location).exists():
raise FileNotFoundError(
"\n---------------------\n"
"Could not determine browser executable."
"\n---------------------\n"
"Make sure your browser is installed in the default location (path).\n"
"If you are sure about the browser executable, you can specify it using\n"
"the `browser_executable_path='{}` parameter.\n\n"
.format("/path/to/browser/executable" if IS_POSIX else "c:/path/to/your/browser.exe")
)
self._delay = 3 self._delay = 3
self.user_data_dir = user_data_dir self.user_data_dir = user_data_dir
@@ -383,7 +396,7 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
if no_sandbox: if no_sandbox:
options.arguments.extend(["--no-sandbox", "--test-type"]) options.arguments.extend(["--no-sandbox", "--test-type"])
if headless or options.headless: if headless or getattr(options, 'headless', None):
#workaround until a better checking is found #workaround until a better checking is found
try: try:
v_main = int(self.patcher.version_main) if self.patcher.version_main else 108 v_main = int(self.patcher.version_main) if self.patcher.version_main else 108
@@ -438,8 +451,10 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
options.binary_location, *options.arguments options.binary_location, *options.arguments
) )
else: else:
startupinfo = subprocess.STARTUPINFO() startupinfo = None
if os.name == 'nt' and windows_headless: if os.name == 'nt' and windows_headless:
# STARTUPINFO() is Windows only
startupinfo = subprocess.STARTUPINFO()
startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
browser = subprocess.Popen( browser = subprocess.Popen(
[options.binary_location, *options.arguments], [options.binary_location, *options.arguments],
@@ -451,11 +466,9 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
) )
self.browser_pid = browser.pid self.browser_pid = browser.pid
# Fix for Chrome 115
# https://github.com/seleniumbase/SeleniumBase/pull/1967
service = selenium.webdriver.chromium.service.ChromiumService( service = selenium.webdriver.chromium.service.ChromiumService(
executable_path=self.patcher.executable_path, self.patcher.executable_path
service_args=["--disable-build-check"]
) )
super(Chrome, self).__init__( super(Chrome, self).__init__(
@@ -480,7 +493,7 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
else: else:
self._web_element_cls = WebElement self._web_element_cls = WebElement
if options.headless: if headless or getattr(options, 'headless', None):
self._configure_headless() self._configure_headless()
def _configure_headless(self): def _configure_headless(self):
@@ -758,7 +771,9 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
def quit(self): def quit(self):
try: try:
self.service.stop()
self.service.process.kill() self.service.process.kill()
self.command_executor.close()
self.service.process.wait(5) self.service.process.wait(5)
logger.debug("webdriver process ended") logger.debug("webdriver process ended")
except (AttributeError, RuntimeError, OSError): except (AttributeError, RuntimeError, OSError):
@@ -773,15 +788,6 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
logger.debug("gracefully closed browser") logger.debug("gracefully closed browser")
except Exception as e: # noqa except Exception as e: # noqa
pass pass
# Force kill Chrome process in Windows
# https://github.com/FlareSolverr/FlareSolverr/issues/772
if os.name == 'nt':
try:
subprocess.call(['taskkill', '/f', '/pid', str(self.browser_pid)],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL)
except Exception:
pass
if ( if (
hasattr(self, "keep_user_data_dir") hasattr(self, "keep_user_data_dir")
and hasattr(self, "user_data_dir") and hasattr(self, "user_data_dir")
@@ -800,7 +806,11 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
else: else:
logger.debug("successfully removed %s" % self.user_data_dir) logger.debug("successfully removed %s" % self.user_data_dir)
break break
time.sleep(0.1)
try:
time.sleep(0.1)
except OSError:
pass
# dereference patcher, so patcher can start cleaning up as well. # dereference patcher, so patcher can start cleaning up as well.
# this must come last, otherwise it will throw 'in use' errors # this must come last, otherwise it will throw 'in use' errors
@@ -895,8 +905,6 @@ def find_chrome_executable():
if item is not None: if item is not None:
for subitem in ( for subitem in (
"Google/Chrome/Application", "Google/Chrome/Application",
"Google/Chrome Beta/Application",
"Google/Chrome Canary/Application",
): ):
candidates.add(os.sep.join((item, subitem, "chrome.exe"))) candidates.add(os.sep.join((item, subitem, "chrome.exe")))
for candidate in candidates: for candidate in candidates:

View File

@@ -2,6 +2,7 @@ import asyncio
from collections.abc import Mapping from collections.abc import Mapping
from collections.abc import Sequence from collections.abc import Sequence
from functools import wraps from functools import wraps
import os
import logging import logging
import threading import threading
import time import time
@@ -187,4 +188,6 @@ def test():
time.sleep(10) time.sleep(10)
if os.name == "nt":
driver.close()
driver.quit() driver.quit()

View File

@@ -41,6 +41,7 @@ def start_detached(executable, *args):
# close pipes # close pipes
writer.close() writer.close()
reader.close() reader.close()
process.close()
return pid return pid

View File

@@ -3,9 +3,11 @@
from distutils.version import LooseVersion from distutils.version import LooseVersion
import io import io
import json
import logging import logging
import os import os
import pathlib import pathlib
import platform
import random import random
import re import re
import shutil import shutil
@@ -19,26 +21,14 @@ from multiprocessing import Lock
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
IS_POSIX = sys.platform.startswith(("darwin", "cygwin", "linux", "linux2")) IS_POSIX = sys.platform.startswith(("darwin", "cygwin", "linux", "linux2", "freebsd"))
class Patcher(object): class Patcher(object):
lock = Lock() lock = Lock()
url_repo = "https://chromedriver.storage.googleapis.com"
zip_name = "chromedriver_%s.zip"
exe_name = "chromedriver%s" exe_name = "chromedriver%s"
platform = sys.platform platform = sys.platform
if platform.endswith("win32"):
zip_name %= "win32"
exe_name %= ".exe"
if platform.endswith(("linux", "linux2")):
zip_name %= "linux64"
exe_name %= ""
if platform.endswith("darwin"):
zip_name %= "mac64"
exe_name %= ""
if platform.endswith("win32"): if platform.endswith("win32"):
d = "~/appdata/roaming/undetected_chromedriver" d = "~/appdata/roaming/undetected_chromedriver"
elif "LAMBDA_TASK_ROOT" in os.environ: elif "LAMBDA_TASK_ROOT" in os.environ:
@@ -72,13 +62,34 @@ class Patcher(object):
prefix = "undetected" prefix = "undetected"
self.user_multi_procs = user_multi_procs self.user_multi_procs = user_multi_procs
try:
# Try to convert version_main into an integer
version_main_int = int(version_main)
# check if version_main_int is less than or equal to e.g 114
self.is_old_chromedriver = version_main and version_main_int <= 114
except (ValueError,TypeError):
# Check not running inside Docker
if not os.path.exists("/app/chromedriver"):
# If the conversion fails, log an error message
logging.info("version_main cannot be converted to an integer")
# Set self.is_old_chromedriver to False if the conversion fails
self.is_old_chromedriver = False
# Needs to be called before self.exe_name is accessed
self._set_platform_name()
if not os.path.exists(self.data_path): if not os.path.exists(self.data_path):
os.makedirs(self.data_path, exist_ok=True) os.makedirs(self.data_path, exist_ok=True)
if not executable_path: if not executable_path:
self.executable_path = os.path.join( if sys.platform.startswith("freebsd"):
self.data_path, "_".join([prefix, self.exe_name]) self.executable_path = os.path.join(
) self.data_path, self.exe_name
)
else:
self.executable_path = os.path.join(
self.data_path, "_".join([prefix, self.exe_name])
)
if not IS_POSIX: if not IS_POSIX:
if executable_path: if executable_path:
@@ -97,9 +108,36 @@ class Patcher(object):
self._custom_exe_path = True self._custom_exe_path = True
self.executable_path = executable_path self.executable_path = executable_path
# Set the correct repository to download the Chromedriver from
if self.is_old_chromedriver:
self.url_repo = "https://chromedriver.storage.googleapis.com"
else:
self.url_repo = "https://googlechromelabs.github.io/chrome-for-testing"
self.version_main = version_main self.version_main = version_main
self.version_full = None self.version_full = None
def _set_platform_name(self):
"""
Set the platform and exe name based on the platform undetected_chromedriver is running on
in order to download the correct chromedriver.
"""
if self.platform.endswith("win32"):
self.platform_name = "win32"
self.exe_name %= ".exe"
if self.platform.endswith(("linux", "linux2")):
self.platform_name = "linux64"
self.exe_name %= ""
if self.platform.endswith("darwin"):
if self.is_old_chromedriver:
self.platform_name = "mac64"
else:
self.platform_name = "mac-x64"
self.exe_name %= ""
if self.platform.startswith("freebsd"):
self.platform_name = "freebsd"
self.exe_name %= ""
def auto(self, executable_path=None, force=False, version_main=None, _=None): def auto(self, executable_path=None, force=False, version_main=None, _=None):
""" """
@@ -111,16 +149,15 @@ class Patcher(object):
Returns: Returns:
""" """
# if self.user_multi_procs and \
# self.user_multi_procs != -1:
# # -1 being a skip value used later in this block
#
p = pathlib.Path(self.data_path) p = pathlib.Path(self.data_path)
with Lock(): if self.user_multi_procs:
files = list(p.rglob("*chromedriver*?")) with Lock():
for file in files: files = list(p.rglob("*chromedriver*"))
if self.is_binary_patched(file): most_recent = max(files, key=lambda f: f.stat().st_mtime)
self.executable_path = str(file) files.remove(most_recent)
list(map(lambda f: f.unlink(), files))
if self.is_binary_patched(most_recent):
self.executable_path = str(most_recent)
return True return True
if executable_path: if executable_path:
@@ -139,26 +176,56 @@ class Patcher(object):
if force is True: if force is True:
self.force = force self.force = force
try:
os.unlink(self.executable_path)
except PermissionError:
if self.force:
self.force_kill_instances(self.executable_path)
return self.auto(force=not self.force)
try:
if self.is_binary_patched():
# assumes already running AND patched
return True
except PermissionError:
pass
# return False
except FileNotFoundError:
pass
release = self.fetch_release_number() if self.platform_name == "freebsd":
self.version_main = release.version[0] chromedriver_path = shutil.which("chromedriver")
self.version_full = release
self.unzip_package(self.fetch_package()) if not os.path.isfile(chromedriver_path) or not os.access(chromedriver_path, os.X_OK):
logging.error("Chromedriver not installed!")
return
version_path = os.path.join(os.path.dirname(self.executable_path), "version.txt")
process = os.popen(f'"{chromedriver_path}" --version')
chromedriver_version = process.read().split(' ')[1].split(' ')[0]
process.close()
current_version = None
if os.path.isfile(version_path) or os.access(version_path, os.X_OK):
with open(version_path, 'r') as f:
current_version = f.read()
if current_version != chromedriver_version:
logging.info("Copying chromedriver executable...")
shutil.copy(chromedriver_path, self.executable_path)
os.chmod(self.executable_path, 0o755)
with open(version_path, 'w') as f:
f.write(chromedriver_version)
logging.info("Chromedriver executable copied!")
else:
try:
os.unlink(self.executable_path)
except PermissionError:
if self.force:
self.force_kill_instances(self.executable_path)
return self.auto(force=not self.force)
try:
if self.is_binary_patched():
# assumes already running AND patched
return True
except PermissionError:
pass
# return False
except FileNotFoundError:
pass
release = self.fetch_release_number()
self.version_main = release.version[0]
self.version_full = release
self.unzip_package(self.fetch_package())
return self.patch() return self.patch()
def driver_binary_in_use(self, path: str = None) -> bool: def driver_binary_in_use(self, path: str = None) -> bool:
@@ -202,7 +269,11 @@ class Patcher(object):
def cleanup_unused_files(self): def cleanup_unused_files(self):
p = pathlib.Path(self.data_path) p = pathlib.Path(self.data_path)
items = list(p.glob("*undetected*")) items = list(p.glob("*undetected*"))
print(items) for item in items:
try:
item.unlink()
except:
pass
def patch(self): def patch(self):
self.patch_exe() self.patch_exe()
@@ -214,12 +285,32 @@ class Patcher(object):
:return: version string :return: version string
:rtype: LooseVersion :rtype: LooseVersion
""" """
path = "/latest_release" # Endpoint for old versions of Chromedriver (114 and below)
if self.version_main: if self.is_old_chromedriver:
path += f"_{self.version_main}" path = f"/latest_release_{self.version_main}"
path = path.upper() path = path.upper()
logger.debug("getting release number from %s" % path)
return LooseVersion(urlopen(self.url_repo + path).read().decode())
# Endpoint for new versions of Chromedriver (115+)
if not self.version_main:
# Fetch the latest version
path = "/last-known-good-versions-with-downloads.json"
logger.debug("getting release number from %s" % path)
with urlopen(self.url_repo + path) as conn:
response = conn.read().decode()
last_versions = json.loads(response)
return LooseVersion(last_versions["channels"]["Stable"]["version"])
# Fetch the latest minor version of the major version provided
path = "/latest-versions-per-milestone-with-downloads.json"
logger.debug("getting release number from %s" % path) logger.debug("getting release number from %s" % path)
return LooseVersion(urlopen(self.url_repo + path).read().decode()) with urlopen(self.url_repo + path) as conn:
response = conn.read().decode()
major_versions = json.loads(response)
return LooseVersion(major_versions["milestones"][str(self.version_main)]["version"])
def parse_exe_version(self): def parse_exe_version(self):
with io.open(self.executable_path, "rb") as f: with io.open(self.executable_path, "rb") as f:
@@ -234,10 +325,16 @@ class Patcher(object):
:return: path to downloaded file :return: path to downloaded file
""" """
u = "%s/%s/%s" % (self.url_repo, self.version_full.vstring, self.zip_name) zip_name = f"chromedriver_{self.platform_name}.zip"
logger.debug("downloading from %s" % u) if self.is_old_chromedriver:
# return urlretrieve(u, filename=self.data_path)[0] download_url = "%s/%s/%s" % (self.url_repo, self.version_full.vstring, zip_name)
return urlretrieve(u)[0] else:
zip_name = zip_name.replace("_", "-", 1)
download_url = "https://storage.googleapis.com/chrome-for-testing-public/%s/%s/%s"
download_url %= (self.version_full.vstring, self.platform_name, zip_name)
logger.debug("downloading from %s" % download_url)
return urlretrieve(download_url)[0]
def unzip_package(self, fp): def unzip_package(self, fp):
""" """
@@ -245,6 +342,12 @@ class Patcher(object):
:return: path to unpacked executable :return: path to unpacked executable
""" """
exe_path = self.exe_name
if not self.is_old_chromedriver:
# The new chromedriver unzips into its own folder
zip_name = f"chromedriver-{self.platform_name}"
exe_path = os.path.join(zip_name, self.exe_name)
logger.debug("unzipping %s" % fp) logger.debug("unzipping %s" % fp)
try: try:
os.unlink(self.zip_path) os.unlink(self.zip_path)
@@ -253,10 +356,10 @@ class Patcher(object):
os.makedirs(self.zip_path, mode=0o755, exist_ok=True) os.makedirs(self.zip_path, mode=0o755, exist_ok=True)
with zipfile.ZipFile(fp, mode="r") as zf: with zipfile.ZipFile(fp, mode="r") as zf:
zf.extract(self.exe_name, self.zip_path) zf.extractall(self.zip_path)
os.rename(os.path.join(self.zip_path, self.exe_name), self.executable_path) os.rename(os.path.join(self.zip_path, exe_path), self.executable_path)
os.remove(fp) os.remove(fp)
os.rmdir(self.zip_path) shutil.rmtree
os.chmod(self.executable_path, 0o755) os.chmod(self.executable_path, 0o755)
return self.executable_path return self.executable_path

View File

@@ -5,11 +5,13 @@ import re
import shutil import shutil
import urllib.parse import urllib.parse
import tempfile import tempfile
import sys
from selenium.webdriver.chrome.webdriver import WebDriver from selenium.webdriver.chrome.webdriver import WebDriver
import undetected_chromedriver as uc import undetected_chromedriver as uc
FLARESOLVERR_VERSION = None FLARESOLVERR_VERSION = None
PLATFORM_VERSION = None
CHROME_EXE_PATH = None CHROME_EXE_PATH = None
CHROME_MAJOR_VERSION = None CHROME_MAJOR_VERSION = None
USER_AGENT = None USER_AGENT = None
@@ -37,6 +39,13 @@ def get_flaresolverr_version() -> str:
FLARESOLVERR_VERSION = json.loads(f.read())['version'] FLARESOLVERR_VERSION = json.loads(f.read())['version']
return FLARESOLVERR_VERSION return FLARESOLVERR_VERSION
def get_current_platform() -> str:
global PLATFORM_VERSION
if PLATFORM_VERSION is not None:
return PLATFORM_VERSION
PLATFORM_VERSION = os.name
return PLATFORM_VERSION
def create_proxy_extension(proxy: dict) -> str: def create_proxy_extension(proxy: dict) -> str:
parsed_url = urllib.parse.urlparse(proxy['url']) parsed_url = urllib.parse.urlparse(proxy['url'])
@@ -113,7 +122,7 @@ def create_proxy_extension(proxy: dict) -> str:
def get_webdriver(proxy: dict = None) -> WebDriver: def get_webdriver(proxy: dict = None) -> WebDriver:
global PATCHED_DRIVER_PATH global PATCHED_DRIVER_PATH, USER_AGENT
logging.debug('Launching web browser...') logging.debug('Launching web browser...')
# undetected_chromedriver # undetected_chromedriver
@@ -136,6 +145,14 @@ def get_webdriver(proxy: dict = None) -> WebDriver:
# https://peter.sh/experiments/chromium-command-line-switches/#use-gl # https://peter.sh/experiments/chromium-command-line-switches/#use-gl
options.add_argument('--use-gl=swiftshader') options.add_argument('--use-gl=swiftshader')
language = os.environ.get('LANG', None)
if language is not None:
options.add_argument('--accept-lang=%s' % language)
# Fix for Chrome 117 | https://github.com/FlareSolverr/FlareSolverr/issues/910
if USER_AGENT is not None:
options.add_argument('--user-agent=%s' % USER_AGENT)
proxy_extension_dir = None proxy_extension_dir = None
if proxy and all(key in proxy for key in ['url', 'username', 'password']): if proxy and all(key in proxy for key in ['url', 'username', 'password']):
proxy_extension_dir = create_proxy_extension(proxy) proxy_extension_dir = create_proxy_extension(proxy)
@@ -145,7 +162,7 @@ def get_webdriver(proxy: dict = None) -> WebDriver:
logging.debug("Using webdriver proxy: %s", proxy_url) logging.debug("Using webdriver proxy: %s", proxy_url)
options.add_argument('--proxy-server=%s' % proxy_url) options.add_argument('--proxy-server=%s' % proxy_url)
# note: headless mode is detected (options.headless = True) # note: headless mode is detected (headless = True)
# we launch the browser in head-full mode with the window hidden # we launch the browser in head-full mode with the window hidden
windows_headless = False windows_headless = False
if get_config_headless(): if get_config_headless():
@@ -153,6 +170,10 @@ def get_webdriver(proxy: dict = None) -> WebDriver:
windows_headless = True windows_headless = True
else: else:
start_xvfb_display() start_xvfb_display()
# For normal headless mode:
# options.add_argument('--headless')
options.add_argument("--auto-open-devtools-for-tabs")
# if we are inside the Docker container, we avoid downloading the driver # if we are inside the Docker container, we avoid downloading the driver
driver_exe_path = None driver_exe_path = None
@@ -162,10 +183,6 @@ def get_webdriver(proxy: dict = None) -> WebDriver:
driver_exe_path = "/app/chromedriver" driver_exe_path = "/app/chromedriver"
else: else:
version_main = get_chrome_major_version() version_main = get_chrome_major_version()
# Fix for Chrome 115
# https://github.com/seleniumbase/SeleniumBase/pull/1967
if int(version_main) > 114:
version_main = 114
if PATCHED_DRIVER_PATH is not None: if PATCHED_DRIVER_PATH is not None:
driver_exe_path = PATCHED_DRIVER_PATH driver_exe_path = PATCHED_DRIVER_PATH
@@ -174,9 +191,12 @@ def get_webdriver(proxy: dict = None) -> WebDriver:
# downloads and patches the chromedriver # downloads and patches the chromedriver
# if we don't set driver_executable_path it downloads, patches, and deletes the driver each time # if we don't set driver_executable_path it downloads, patches, and deletes the driver each time
driver = uc.Chrome(options=options, browser_executable_path=browser_executable_path, try:
driver_executable_path=driver_exe_path, version_main=version_main, driver = uc.Chrome(options=options, browser_executable_path=browser_executable_path,
windows_headless=windows_headless, headless=windows_headless) driver_executable_path=driver_exe_path, version_main=version_main,
windows_headless=windows_headless, headless=get_config_headless())
except Exception as e:
logging.error("Error starting Chrome: %s" % e)
# save the patched driver to avoid re-downloads # save the patched driver to avoid re-downloads
if driver_exe_path is None: if driver_exe_path is None:
@@ -295,11 +315,15 @@ def get_user_agent(driver=None) -> str:
if driver is None: if driver is None:
driver = get_webdriver() driver = get_webdriver()
USER_AGENT = driver.execute_script("return navigator.userAgent") USER_AGENT = driver.execute_script("return navigator.userAgent")
# Fix for Chrome 117 | https://github.com/FlareSolverr/FlareSolverr/issues/910
USER_AGENT = re.sub('HEADLESS', '', USER_AGENT, flags=re.IGNORECASE)
return USER_AGENT return USER_AGENT
except Exception as e: except Exception as e:
raise Exception("Error getting browser User-Agent. " + str(e)) raise Exception("Error getting browser User-Agent. " + str(e))
finally: finally:
if driver is not None: if driver is not None:
if PLATFORM_VERSION == "nt":
driver.close()
driver.quit() driver.quit()