mirror of
https://github.com/FlareSolverr/FlareSolverr.git
synced 2025-12-05 17:18:19 +01:00
Compare commits
18 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ab5f14d6c3 | ||
|
|
e0bf02fb8b | ||
|
|
82a1cd835a | ||
|
|
7017715e21 | ||
|
|
ae18559db1 | ||
|
|
2680521008 | ||
|
|
2297bab185 | ||
|
|
8d9bac9dd4 | ||
|
|
30ccf18e85 | ||
|
|
a15d041a0c | ||
|
|
c6c74e7c9d | ||
|
|
49fd1aacfc | ||
|
|
f6879c70de | ||
|
|
24f59a39cb | ||
|
|
4d16105176 | ||
|
|
5957b7b3bc | ||
|
|
8de16058d0 | ||
|
|
5fc4f966a5 |
2
.github/workflows/autotag.yml
vendored
2
.github/workflows/autotag.yml
vendored
@@ -6,7 +6,7 @@ on:
|
||||
- "master"
|
||||
|
||||
jobs:
|
||||
build:
|
||||
tag-release:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
-
|
||||
|
||||
4
.github/workflows/release-docker.yml
vendored
4
.github/workflows/release-docker.yml
vendored
@@ -6,8 +6,8 @@ on:
|
||||
- 'v*.*.*'
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
build-docker-images:
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
-
|
||||
name: Checkout
|
||||
|
||||
70
.github/workflows/release.yml
vendored
70
.github/workflows/release.yml
vendored
@@ -6,26 +6,15 @@ on:
|
||||
- 'v*.*.*'
|
||||
|
||||
jobs:
|
||||
build:
|
||||
create-release:
|
||||
name: Create release
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
fetch-depth: 0 # get all commits, branches and tags (required for the changelog)
|
||||
|
||||
- name: Setup Node
|
||||
uses: actions/setup-node@v3
|
||||
with:
|
||||
node-version: '16'
|
||||
|
||||
- name: Build artifacts
|
||||
run: |
|
||||
npm install
|
||||
npm run build
|
||||
npm run package
|
||||
|
||||
- name: Build changelog
|
||||
id: github_changelog
|
||||
run: |
|
||||
@@ -47,9 +36,60 @@ jobs:
|
||||
draft: false
|
||||
prerelease: false
|
||||
|
||||
build-linux-package:
|
||||
name: Build Linux binary
|
||||
needs: create-release
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
fetch-depth: 0 # get all commits, branches and tags (required for the changelog)
|
||||
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Build artifacts
|
||||
run: |
|
||||
python -m pip install -r requirements.txt
|
||||
python -m pip install pyinstaller==5.9.0
|
||||
cd src
|
||||
python build_package.py
|
||||
|
||||
- name: Upload release artifacts
|
||||
uses: alexellis/upload-assets@0.2.2
|
||||
uses: alexellis/upload-assets@0.4.0
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GH_PAT }}
|
||||
with:
|
||||
asset_paths: '["./bin/*.zip"]'
|
||||
asset_paths: '["./dist/flaresolverr_*"]'
|
||||
|
||||
build-windows-package:
|
||||
name: Build Windows binary
|
||||
needs: create-release
|
||||
runs-on: windows-2022
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
fetch-depth: 0 # get all commits, branches and tags (required for the changelog)
|
||||
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Build artifacts
|
||||
run: |
|
||||
python -m pip install -r requirements.txt
|
||||
python -m pip install pyinstaller==5.9.0
|
||||
cd src
|
||||
python build_package.py
|
||||
|
||||
- name: Upload release artifacts
|
||||
uses: alexellis/upload-assets@0.4.0
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GH_PAT }}
|
||||
with:
|
||||
asset_paths: '["./dist/flaresolverr_*"]'
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -25,6 +25,7 @@ __pycache__/
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
dist_chrome/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
|
||||
21
CHANGELOG.md
21
CHANGELOG.md
@@ -1,6 +1,25 @@
|
||||
# Changelog
|
||||
|
||||
## v3.0.4 (2023/03/07
|
||||
## v3.1.1 (2023/03/25)
|
||||
|
||||
* Distribute binary executables in compressed package
|
||||
* Add icon for binary executable
|
||||
* Include information about supported architectures in the readme
|
||||
* Check Python version on start
|
||||
|
||||
## v3.1.0 (2023/03/20)
|
||||
|
||||
* Build binaries for Linux x64 and Windows x64
|
||||
* Sessions with auto-creation on fetch request and TTL
|
||||
* Fix error trace: Crash Reports/pending No such file or directory
|
||||
* Fix Waitress server error with asyncore_use_poll=true
|
||||
* Attempt to fix Docker ARM32 build
|
||||
* Print platform information on start up
|
||||
* Add Fairlane challenge selector
|
||||
* Update DDOS-GUARD title
|
||||
* Update dependencies
|
||||
|
||||
## v3.0.4 (2023/03/07)
|
||||
|
||||
* Click on the Cloudflare's 'Verify you are human' button if necessary
|
||||
|
||||
|
||||
10
Dockerfile
10
Dockerfile
@@ -30,7 +30,7 @@ RUN dpkg -i /libgl1-mesa-dri.deb \
|
||||
# Install dependencies
|
||||
&& apt-get update \
|
||||
&& apt-get install -y --no-install-recommends chromium chromium-common chromium-driver xvfb dumb-init \
|
||||
procps curl vim \
|
||||
procps curl vim xauth \
|
||||
# Remove temporary files and hardware decoding libraries
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& rm -f /usr/lib/x86_64-linux-gnu/libmfxhw* \
|
||||
@@ -48,6 +48,8 @@ RUN pip install -r requirements.txt \
|
||||
|
||||
USER flaresolverr
|
||||
|
||||
RUN mkdir -p "/app/.config/chromium/Crash Reports/pending"
|
||||
|
||||
COPY src .
|
||||
COPY package.json ../
|
||||
|
||||
@@ -67,3 +69,9 @@ CMD ["/usr/local/bin/python", "-u", "/app/flaresolverr.py"]
|
||||
# docker buildx create --use
|
||||
# docker buildx build -t ngosang/flaresolverr:3.0.0 --platform linux/386,linux/amd64,linux/arm/v7,linux/arm64/v8 .
|
||||
# add --push to publish in DockerHub
|
||||
|
||||
# Test multi-arch build
|
||||
# docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
|
||||
# docker buildx create --use
|
||||
# docker buildx build -t ngosang/flaresolverr:3.0.0 --platform linux/arm/v7 --load .
|
||||
# docker run -p 8191:8191 --platform linux/arm/v7 ngosang/flaresolverr:3.0.0
|
||||
|
||||
40
README.md
40
README.md
@@ -64,13 +64,20 @@ Remember to restart the Docker daemon and the container after the update.
|
||||
|
||||
### Precompiled binaries
|
||||
|
||||
Precompiled binaries are not currently available for v3. Please see https://github.com/FlareSolverr/FlareSolverr/issues/660 for updates,
|
||||
or below for instructions of how to build FlareSolverr from source code.
|
||||
> **Warning**
|
||||
> Precompiled binaries are only available for x64 architecture. For other architectures see Docker images.
|
||||
|
||||
This is the recommended way for Windows users.
|
||||
* Download the [FlareSolverr executable](https://github.com/FlareSolverr/FlareSolverr/releases) from the release's page. It is available for Windows x64 and Linux x64.
|
||||
* Execute FlareSolverr binary. In the environment variables section you can find how to change the configuration.
|
||||
|
||||
### From source code
|
||||
|
||||
* Install [Python 3.10](https://www.python.org/downloads/).
|
||||
* Install [Chrome](https://www.google.com/intl/en_us/chrome/) or [Chromium](https://www.chromium.org/getting-involved/download-chromium/) web browser.
|
||||
> **Warning**
|
||||
> Installing from source code only works for x64 architecture. For other architectures see Docker images.
|
||||
|
||||
* Install [Python 3.11](https://www.python.org/downloads/).
|
||||
* Install [Chrome](https://www.google.com/intl/en_us/chrome/) (all OS) or [Chromium](https://www.chromium.org/getting-involved/download-chromium/) (just Linux, it doesn't work in Windows) web browser.
|
||||
* (Only in Linux / macOS) Install [Xvfb](https://en.wikipedia.org/wiki/Xvfb) package.
|
||||
* Clone this repository and open a shell in that path.
|
||||
* Run `pip install -r requirements.txt` command to install FlareSolverr dependencies.
|
||||
@@ -137,16 +144,18 @@ session. When you no longer need to use a session you should make sure to close
|
||||
|
||||
#### + `request.get`
|
||||
|
||||
| Parameter | Notes |
|
||||
|-------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| url | Mandatory |
|
||||
| session | Optional. Will send the request from and existing browser instance. If one is not sent it will create a temporary instance that will be destroyed immediately after the request is completed. |
|
||||
| maxTimeout | Optional, default value 60000. Max timeout to solve the challenge in milliseconds. |
|
||||
| cookies | Optional. Will be used by the headless browser. Follow [this](https://github.com/puppeteer/puppeteer/blob/v3.3.0/docs/api.md#pagesetcookiecookies) format. |
|
||||
| returnOnlyCookies | Optional, default false. Only returns the cookies. Response data, headers and other parts of the response are removed. |
|
||||
| proxy | Optional, default disabled. Eg: `"proxy": {"url": "http://127.0.0.1:8888"}`. You must include the proxy schema in the URL: `http://`, `socks4://` or `socks5://`. Authorization (username/password) is not supported. (When the `session` parameter is set, the proxy is ignored; a session specific proxy can be set in `sessions.create`.) |
|
||||
| Parameter | Notes |
|
||||
|---------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| url | Mandatory |
|
||||
| session | Optional. Will send the request from and existing browser instance. If one is not sent it will create a temporary instance that will be destroyed immediately after the request is completed. |
|
||||
| session_ttl_minutes | Optional. FlareSolverr will automatically rotate expired sessions based on the TTL provided in minutes. |
|
||||
| maxTimeout | Optional, default value 60000. Max timeout to solve the challenge in milliseconds. |
|
||||
| cookies | Optional. Will be used by the headless browser. Follow [this](https://github.com/puppeteer/puppeteer/blob/v3.3.0/docs/api.md#pagesetcookiecookies) format. |
|
||||
| returnOnlyCookies | Optional, default false. Only returns the cookies. Response data, headers and other parts of the response are removed. |
|
||||
| proxy | Optional, default disabled. Eg: `"proxy": {"url": "http://127.0.0.1:8888"}`. You must include the proxy schema in the URL: `http://`, `socks4://` or `socks5://`. Authorization (username/password) is not supported. (When the `session` parameter is set, the proxy is ignored; a session specific proxy can be set in `sessions.create`.) |
|
||||
|
||||
:warning: If you want to use Cloudflare clearance cookie in your scripts, make sure you use the FlareSolverr User-Agent too. If they don't match you will see the challenge.
|
||||
> **Warning**
|
||||
> If you want to use Cloudflare clearance cookie in your scripts, make sure you use the FlareSolverr User-Agent too. If they don't match you will see the challenge.
|
||||
|
||||
Example response from running the `curl` above:
|
||||
|
||||
@@ -236,13 +245,14 @@ Environment variables are set differently depending on the operating system. Som
|
||||
|
||||
## Captcha Solvers
|
||||
|
||||
:warning: At this time none of the captcha solvers work. You can check the status in the open issues. Any help is welcome.
|
||||
> **Warning**
|
||||
> At this time none of the captcha solvers work. You can check the status in the open issues. Any help is welcome.
|
||||
|
||||
Sometimes CloudFlare not only gives mathematical computations and browser tests, sometimes they also require the user to
|
||||
solve a captcha.
|
||||
If this is the case, FlareSolverr will return the error `Captcha detected but no automatic solver is configured.`
|
||||
|
||||
FlareSolverr can be customized to solve the captchas automatically by setting the environment variable `CAPTCHA_SOLVER`
|
||||
FlareSolverr can be customized to solve the CAPTCHA automatically by setting the environment variable `CAPTCHA_SOLVER`
|
||||
to the file name of one of the adapters inside the [/captcha](src/captcha) directory.
|
||||
|
||||
## Related projects
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "flaresolverr",
|
||||
"version": "3.0.4",
|
||||
"version": "3.1.1",
|
||||
"description": "Proxy server to bypass Cloudflare protection",
|
||||
"author": "Diego Heras (ngosang / ngosang@hotmail.es)",
|
||||
"license": "MIT"
|
||||
|
||||
@@ -1,9 +1,12 @@
|
||||
bottle==0.12.23
|
||||
bottle==0.12.25
|
||||
waitress==2.1.2
|
||||
selenium==4.7.2
|
||||
selenium==4.8.2
|
||||
func-timeout==4.3.5
|
||||
# required by undetected_chromedriver
|
||||
requests==2.28.1
|
||||
requests==2.28.2
|
||||
certifi==2022.12.7
|
||||
websockets==10.4
|
||||
# only required for linux
|
||||
xvfbwrapper==0.2.9
|
||||
# only required for windows
|
||||
pefile==2023.2.7
|
||||
|
||||
BIN
resources/flaresolverr_logo.ico
Normal file
BIN
resources/flaresolverr_logo.ico
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 8.8 KiB |
94
src/build_package.py
Normal file
94
src/build_package.py
Normal file
@@ -0,0 +1,94 @@
|
||||
import os
|
||||
import platform
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import zipfile
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
def clean_files():
|
||||
try:
|
||||
shutil.rmtree(os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, 'build'))
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
shutil.rmtree(os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, 'dist'))
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
shutil.rmtree(os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, 'dist_chrome'))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def download_chromium():
|
||||
# https://commondatastorage.googleapis.com/chromium-browser-snapshots/index.html?prefix=Linux_x64/
|
||||
revision = "1090006" if os.name == 'nt' else '1090007'
|
||||
arch = 'Win' if os.name == 'nt' else 'Linux_x64'
|
||||
dl_file = 'chrome-win' if os.name == 'nt' else 'chrome-linux'
|
||||
dl_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, 'dist_chrome')
|
||||
dl_path_folder = os.path.join(dl_path, dl_file)
|
||||
dl_path_zip = dl_path_folder + '.zip'
|
||||
|
||||
# response = requests.get(
|
||||
# f'https://commondatastorage.googleapis.com/chromium-browser-snapshots/{arch}/LAST_CHANGE',
|
||||
# timeout=30)
|
||||
# revision = response.text.strip()
|
||||
print("Downloading revision: " + revision)
|
||||
|
||||
os.mkdir(dl_path)
|
||||
with requests.get(
|
||||
f'https://commondatastorage.googleapis.com/chromium-browser-snapshots/{arch}/{revision}/{dl_file}.zip',
|
||||
stream=True) as r:
|
||||
r.raise_for_status()
|
||||
with open(dl_path_zip, 'wb') as f:
|
||||
for chunk in r.iter_content(chunk_size=8192):
|
||||
f.write(chunk)
|
||||
print("File downloaded: " + dl_path_zip)
|
||||
with zipfile.ZipFile(dl_path_zip, 'r') as zip_ref:
|
||||
zip_ref.extractall(dl_path)
|
||||
os.remove(dl_path_zip)
|
||||
shutil.move(dl_path_folder, os.path.join(dl_path, "chrome"))
|
||||
|
||||
|
||||
def run_pyinstaller():
|
||||
sep = ';' if os.name == 'nt' else ':'
|
||||
subprocess.check_call([sys.executable, "-m", "PyInstaller",
|
||||
"--icon", "resources/flaresolverr_logo.ico",
|
||||
"--add-data", f"package.json{sep}.",
|
||||
"--add-data", f"{os.path.join('dist_chrome', 'chrome')}{sep}chrome",
|
||||
os.path.join("src", "flaresolverr.py")],
|
||||
cwd=os.pardir)
|
||||
|
||||
|
||||
def compress_package():
|
||||
dist_folder = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, 'dist')
|
||||
exe_folder = os.path.join(dist_folder, 'flaresolverr')
|
||||
print("Executable folder: " + exe_folder)
|
||||
|
||||
compr_format = 'zip' if os.name == 'nt' else 'gztar'
|
||||
compr_file_name = 'flaresolverr_windows_x64' if os.name == 'nt' else 'flaresolverr_linux_x64'
|
||||
compr_file_path = os.path.join(dist_folder, compr_file_name)
|
||||
shutil.make_archive(compr_file_path, compr_format, dist_folder)
|
||||
print("Compressed file path: " + compr_file_path)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("Building package...")
|
||||
print("Platform: " + platform.platform())
|
||||
|
||||
print("Cleaning previous build...")
|
||||
clean_files()
|
||||
|
||||
print("Downloading Chromium...")
|
||||
download_chromium()
|
||||
|
||||
print("Building pyinstaller executable... ")
|
||||
run_pyinstaller()
|
||||
|
||||
print("Compressing package... ")
|
||||
compress_package()
|
||||
|
||||
# NOTE: python -m pip install pyinstaller
|
||||
@@ -33,6 +33,7 @@ class V1RequestBase(object):
|
||||
maxTimeout: int = None
|
||||
proxy: dict = None
|
||||
session: str = None
|
||||
session_ttl_minutes: int = None
|
||||
headers: list = None # deprecated v2.0.0, not used
|
||||
userAgent: str = None # deprecated v2.0.0, not used
|
||||
|
||||
@@ -51,6 +52,8 @@ class V1ResponseBase(object):
|
||||
# V1ResponseBase
|
||||
status: str = None
|
||||
message: str = None
|
||||
session: str = None
|
||||
sessions: list[str] = None
|
||||
startTimestamp: int = None
|
||||
endTimestamp: int = None
|
||||
version: str = None
|
||||
|
||||
@@ -3,11 +3,12 @@ import logging
|
||||
import os
|
||||
import sys
|
||||
|
||||
from bottle import run, response, Bottle, request
|
||||
import certifi
|
||||
from bottle import run, response, Bottle, request, ServerAdapter
|
||||
|
||||
from bottle_plugins.error_plugin import error_plugin
|
||||
from bottle_plugins.logger_plugin import logger_plugin
|
||||
from dtos import IndexResponse, V1RequestBase
|
||||
from dtos import V1RequestBase
|
||||
import flaresolverr_service
|
||||
import utils
|
||||
|
||||
@@ -60,6 +61,16 @@ def controller_v1():
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# check python version
|
||||
if sys.version_info < (3, 9):
|
||||
raise Exception("The Python version is less than 3.9, a version equal to or higher is required.")
|
||||
|
||||
# fix ssl certificates for compiled binaries
|
||||
# https://github.com/pyinstaller/pyinstaller/issues/7229
|
||||
# https://stackoverflow.com/questions/55736855/how-to-change-the-cafile-argument-in-the-ssl-module-in-python3
|
||||
os.environ["REQUESTS_CA_BUNDLE"] = certifi.where()
|
||||
os.environ["SSL_CERT_FILE"] = certifi.where()
|
||||
|
||||
# validate configuration
|
||||
log_level = os.environ.get('LOG_LEVEL', 'info').upper()
|
||||
log_html = utils.get_config_log_html()
|
||||
@@ -92,4 +103,10 @@ if __name__ == "__main__":
|
||||
|
||||
# start webserver
|
||||
# default server 'wsgiref' does not support concurrent requests
|
||||
run(app, host=server_host, port=server_port, quiet=True, server='waitress')
|
||||
# https://github.com/FlareSolverr/FlareSolverr/issues/680
|
||||
# https://github.com/Pylons/waitress/issues/31
|
||||
class WaitressServerPoll(ServerAdapter):
|
||||
def run(self, handler):
|
||||
from waitress import serve
|
||||
serve(handler, host=self.host, port=self.port, asyncore_use_poll=True)
|
||||
run(app, host=server_host, port=server_port, quiet=True, server=WaitressServerPoll)
|
||||
|
||||
@@ -1,19 +1,24 @@
|
||||
import logging
|
||||
import platform
|
||||
import sys
|
||||
import time
|
||||
from datetime import timedelta
|
||||
from urllib.parse import unquote
|
||||
|
||||
from func_timeout import func_timeout, FunctionTimedOut
|
||||
from func_timeout import FunctionTimedOut, func_timeout
|
||||
from selenium.common import TimeoutException
|
||||
from selenium.webdriver.chrome.webdriver import WebDriver
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.support.expected_conditions import (
|
||||
presence_of_element_located, staleness_of, title_is)
|
||||
from selenium.webdriver.common.action_chains import ActionChains
|
||||
from selenium.webdriver.support.wait import WebDriverWait
|
||||
from selenium.webdriver.support.expected_conditions import presence_of_element_located, staleness_of, title_is
|
||||
|
||||
from dtos import V1RequestBase, V1ResponseBase, ChallengeResolutionT, ChallengeResolutionResultT, IndexResponse, \
|
||||
HealthResponse, STATUS_OK, STATUS_ERROR
|
||||
import utils
|
||||
from dtos import (STATUS_ERROR, STATUS_OK, ChallengeResolutionResultT,
|
||||
ChallengeResolutionT, HealthResponse, IndexResponse,
|
||||
V1RequestBase, V1ResponseBase)
|
||||
from sessions import SessionsStorage
|
||||
|
||||
ACCESS_DENIED_TITLES = [
|
||||
# Cloudflare
|
||||
@@ -31,19 +36,23 @@ CHALLENGE_TITLES = [
|
||||
# Cloudflare
|
||||
'Just a moment...',
|
||||
# DDoS-GUARD
|
||||
'DDOS-GUARD',
|
||||
'DDoS-Guard'
|
||||
]
|
||||
CHALLENGE_SELECTORS = [
|
||||
# Cloudflare
|
||||
'#cf-challenge-running', '.ray_id', '.attack-box', '#cf-please-wait', '#challenge-spinner', '#trk_jschal_js',
|
||||
# Custom CloudFlare for EbookParadijs, Film-Paleis, MuziekFabriek and Puur-Hollands
|
||||
'td.info #js_info'
|
||||
'td.info #js_info',
|
||||
# Fairlane / pararius.com
|
||||
'div.vc div.text-box h2'
|
||||
]
|
||||
SHORT_TIMEOUT = 10
|
||||
SESSIONS_STORAGE = SessionsStorage()
|
||||
|
||||
|
||||
def test_browser_installation():
|
||||
logging.info("Testing web browser installation...")
|
||||
logging.info("Platform: " + platform.platform())
|
||||
|
||||
chrome_exe_path = utils.get_chrome_exe_path()
|
||||
if chrome_exe_path is None:
|
||||
@@ -59,9 +68,10 @@ def test_browser_installation():
|
||||
else:
|
||||
logging.info("Chrome / Chromium major version: " + chrome_major_version)
|
||||
|
||||
logging.info("Launching web browser...")
|
||||
user_agent = utils.get_user_agent()
|
||||
logging.info("FlareSolverr User-Agent: " + user_agent)
|
||||
logging.info("Test successful")
|
||||
logging.info("Test successful!")
|
||||
|
||||
|
||||
def index_endpoint() -> IndexResponse:
|
||||
@@ -115,11 +125,11 @@ def _controller_v1_handler(req: V1RequestBase) -> V1ResponseBase:
|
||||
# execute the command
|
||||
res: V1ResponseBase
|
||||
if req.cmd == 'sessions.create':
|
||||
raise Exception("Not implemented yet.")
|
||||
res = _cmd_sessions_create(req)
|
||||
elif req.cmd == 'sessions.list':
|
||||
raise Exception("Not implemented yet.")
|
||||
res = _cmd_sessions_list(req)
|
||||
elif req.cmd == 'sessions.destroy':
|
||||
raise Exception("Not implemented yet.")
|
||||
res = _cmd_sessions_destroy(req)
|
||||
elif req.cmd == 'request.get':
|
||||
res = _cmd_request_get(req)
|
||||
elif req.cmd == 'request.post':
|
||||
@@ -166,19 +176,77 @@ def _cmd_request_post(req: V1RequestBase) -> V1ResponseBase:
|
||||
return res
|
||||
|
||||
|
||||
def _cmd_sessions_create(req: V1RequestBase) -> V1ResponseBase:
|
||||
logging.debug("Creating new session...")
|
||||
|
||||
session, fresh = SESSIONS_STORAGE.create(session_id=req.session)
|
||||
session_id = session.session_id
|
||||
|
||||
if not fresh:
|
||||
return V1ResponseBase({
|
||||
"status": STATUS_OK,
|
||||
"message": "Session already exists.",
|
||||
"session": session_id
|
||||
})
|
||||
|
||||
return V1ResponseBase({
|
||||
"status": STATUS_OK,
|
||||
"message": "Session created successfully.",
|
||||
"session": session_id
|
||||
})
|
||||
|
||||
|
||||
def _cmd_sessions_list(req: V1RequestBase) -> V1ResponseBase:
|
||||
session_ids = SESSIONS_STORAGE.session_ids()
|
||||
|
||||
return V1ResponseBase({
|
||||
"status": STATUS_OK,
|
||||
"message": "",
|
||||
"sessions": session_ids
|
||||
})
|
||||
|
||||
|
||||
def _cmd_sessions_destroy(req: V1RequestBase) -> V1ResponseBase:
|
||||
session_id = req.session
|
||||
existed = SESSIONS_STORAGE.destroy(session_id)
|
||||
|
||||
if not existed:
|
||||
raise Exception("The session doesn't exist.")
|
||||
|
||||
return V1ResponseBase({
|
||||
"status": STATUS_OK,
|
||||
"message": "The session has been removed."
|
||||
})
|
||||
|
||||
|
||||
def _resolve_challenge(req: V1RequestBase, method: str) -> ChallengeResolutionT:
|
||||
timeout = req.maxTimeout / 1000
|
||||
driver = None
|
||||
try:
|
||||
driver = utils.get_webdriver()
|
||||
if req.session:
|
||||
session_id = req.session
|
||||
ttl = timedelta(minutes=req.session_ttl_minutes) if req.session_ttl_minutes else None
|
||||
session, fresh = SESSIONS_STORAGE.get(session_id, ttl)
|
||||
|
||||
if fresh:
|
||||
logging.debug(f"new session created to perform the request (session_id={session_id})")
|
||||
else:
|
||||
logging.debug(f"existing session is used to perform the request (session_id={session_id}, "
|
||||
f"lifetime={str(session.lifetime())}, ttl={str(ttl)})")
|
||||
|
||||
driver = session.driver
|
||||
else:
|
||||
driver = utils.get_webdriver()
|
||||
logging.debug('New instance of webdriver has been created to perform the request')
|
||||
return func_timeout(timeout, _evil_logic, (req, driver, method))
|
||||
except FunctionTimedOut:
|
||||
raise Exception(f'Error solving the challenge. Timeout after {timeout} seconds.')
|
||||
except Exception as e:
|
||||
raise Exception('Error solving the challenge. ' + str(e))
|
||||
finally:
|
||||
if driver is not None:
|
||||
if not req.session and driver is not None:
|
||||
driver.quit()
|
||||
logging.debug('A used instance of webdriver has been destroyed')
|
||||
|
||||
|
||||
def click_verify(driver: WebDriver):
|
||||
@@ -196,9 +264,8 @@ def click_verify(driver: WebDriver):
|
||||
actions.click(checkbox)
|
||||
actions.perform()
|
||||
logging.debug("Cloudflare verify checkbox found and clicked")
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
logging.debug("Cloudflare verify checkbox not found on the page")
|
||||
# print(e)
|
||||
finally:
|
||||
driver.switch_to.default_content()
|
||||
|
||||
@@ -220,6 +287,7 @@ def click_verify(driver: WebDriver):
|
||||
|
||||
time.sleep(2)
|
||||
|
||||
|
||||
def _evil_logic(req: V1RequestBase, driver: WebDriver, method: str) -> ChallengeResolutionT:
|
||||
res = ChallengeResolutionT({})
|
||||
res.status = STATUS_OK
|
||||
@@ -253,9 +321,9 @@ def _evil_logic(req: V1RequestBase, driver: WebDriver, method: str) -> Challenge
|
||||
# find challenge by title
|
||||
challenge_found = False
|
||||
for title in CHALLENGE_TITLES:
|
||||
if title == page_title:
|
||||
if title.lower() == page_title.lower():
|
||||
challenge_found = True
|
||||
logging.info("Challenge detected. Title found: " + title)
|
||||
logging.info("Challenge detected. Title found: " + page_title)
|
||||
break
|
||||
if not challenge_found:
|
||||
# find challenge by selectors
|
||||
|
||||
81
src/sessions.py
Normal file
81
src/sessions.py
Normal file
@@ -0,0 +1,81 @@
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Optional, Tuple
|
||||
from uuid import uuid1
|
||||
|
||||
from selenium.webdriver.chrome.webdriver import WebDriver
|
||||
|
||||
import utils
|
||||
|
||||
|
||||
@dataclass
|
||||
class Session:
|
||||
session_id: str
|
||||
driver: WebDriver
|
||||
created_at: datetime
|
||||
|
||||
def lifetime(self) -> timedelta:
|
||||
return datetime.now() - self.created_at
|
||||
|
||||
|
||||
class SessionsStorage:
|
||||
"""SessionsStorage creates, stores and process all the sessions"""
|
||||
|
||||
def __init__(self):
|
||||
self.sessions = {}
|
||||
|
||||
def create(self, session_id: Optional[str] = None, force_new: Optional[bool] = False) -> Tuple[Session, bool]:
|
||||
"""create creates new instance of WebDriver if necessary,
|
||||
assign defined (or newly generated) session_id to the instance
|
||||
and returns the session object. If a new session has been created
|
||||
second argument is set to True.
|
||||
|
||||
Note: The function is idempotent, so in case if session_id
|
||||
already exists in the storage a new instance of WebDriver won't be created
|
||||
and existing session will be returned. Second argument defines if
|
||||
new session has been created (True) or an existing one was used (False).
|
||||
"""
|
||||
session_id = session_id or str(uuid1())
|
||||
|
||||
if force_new:
|
||||
self.destroy(session_id)
|
||||
|
||||
if self.exists(session_id):
|
||||
return self.sessions[session_id], False
|
||||
|
||||
driver = utils.get_webdriver()
|
||||
created_at = datetime.now()
|
||||
session = Session(session_id, driver, created_at)
|
||||
|
||||
self.sessions[session_id] = session
|
||||
|
||||
return session, True
|
||||
|
||||
def exists(self, session_id: str) -> bool:
|
||||
return session_id in self.sessions
|
||||
|
||||
def destroy(self, session_id: str) -> bool:
|
||||
"""destroy closes the driver instance and removes session from the storage.
|
||||
The function is noop if session_id doesn't exist.
|
||||
The function returns True if session was found and destroyed,
|
||||
and False if session_id wasn't found.
|
||||
"""
|
||||
if not self.exists(session_id):
|
||||
return False
|
||||
|
||||
session = self.sessions.pop(session_id)
|
||||
session.driver.quit()
|
||||
return True
|
||||
|
||||
def get(self, session_id: str, ttl: Optional[timedelta] = None) -> Tuple[Session, bool]:
|
||||
session, fresh = self.create(session_id)
|
||||
|
||||
if ttl is not None and not fresh and session.lifetime() > ttl:
|
||||
logging.debug(f'session\'s lifetime has expired, so the session is recreated (session_id={session_id})')
|
||||
session, fresh = self.create(session_id, force_new=True)
|
||||
|
||||
return session, fresh
|
||||
|
||||
def session_ids(self) -> list[str]:
|
||||
return list(self.sessions.keys())
|
||||
112
src/tests.py
112
src/tests.py
@@ -23,6 +23,7 @@ class TestFlareSolverr(unittest.TestCase):
|
||||
cloudflare_url = "https://nowsecure.nl"
|
||||
cloudflare_url_2 = "https://idope.se/torrent-list/harry/"
|
||||
ddos_guard_url = "https://anidex.info/"
|
||||
fairlane_url = "https://www.pararius.com/apartments/amsterdam"
|
||||
custom_cloudflare_url = "https://www.muziekfabriek.org"
|
||||
cloudflare_blocked_url = "https://cpasbiens3.fr/index.php?do=search&subaction=search"
|
||||
|
||||
@@ -166,6 +167,32 @@ class TestFlareSolverr(unittest.TestCase):
|
||||
self.assertIsNotNone(cf_cookie, "DDOS-Guard cookie not found")
|
||||
self.assertGreater(len(cf_cookie["value"]), 10)
|
||||
|
||||
def test_v1_endpoint_request_get_fairlane_js(self):
|
||||
res = self.app.post_json('/v1', {
|
||||
"cmd": "request.get",
|
||||
"url": self.fairlane_url
|
||||
})
|
||||
self.assertEqual(res.status_code, 200)
|
||||
|
||||
body = V1ResponseBase(res.json)
|
||||
self.assertEqual(STATUS_OK, body.status)
|
||||
self.assertEqual("Challenge solved!", body.message)
|
||||
self.assertGreater(body.startTimestamp, 10000)
|
||||
self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
|
||||
self.assertEqual(utils.get_flaresolverr_version(), body.version)
|
||||
|
||||
solution = body.solution
|
||||
self.assertIn(self.fairlane_url, solution.url)
|
||||
self.assertEqual(solution.status, 200)
|
||||
self.assertIs(len(solution.headers), 0)
|
||||
self.assertIn("<title>Rental Apartments Amsterdam</title>", solution.response)
|
||||
self.assertGreater(len(solution.cookies), 0)
|
||||
self.assertIn("Chrome/", solution.userAgent)
|
||||
|
||||
cf_cookie = _find_obj_by_key("name", "fl_pass_v2_b", solution.cookies)
|
||||
self.assertIsNotNone(cf_cookie, "Fairlane cookie not found")
|
||||
self.assertGreater(len(cf_cookie["value"]), 50)
|
||||
|
||||
def test_v1_endpoint_request_get_custom_cloudflare_js(self):
|
||||
res = self.app.post_json('/v1', {
|
||||
"cmd": "request.get",
|
||||
@@ -351,12 +378,85 @@ class TestFlareSolverr(unittest.TestCase):
|
||||
self.assertEqual(STATUS_OK, body.status)
|
||||
self.assertEqual("Challenge not detected!", body.message)
|
||||
|
||||
# todo: test Cmd 'sessions.create' should return OK
|
||||
# todo: test Cmd 'sessions.create' should return OK with session
|
||||
# todo: test Cmd 'sessions.list' should return OK
|
||||
# todo: test Cmd 'sessions.destroy' should return OK
|
||||
# todo: test Cmd 'sessions.destroy' should fail
|
||||
# todo: test Cmd 'request.get' should use session
|
||||
def test_v1_endpoint_sessions_create_without_session(self):
|
||||
res = self.app.post_json('/v1', {
|
||||
"cmd": "sessions.create"
|
||||
})
|
||||
self.assertEqual(res.status_code, 200)
|
||||
|
||||
body = V1ResponseBase(res.json)
|
||||
self.assertEqual(STATUS_OK, body.status)
|
||||
self.assertEqual("Session created successfully.", body.message)
|
||||
self.assertIsNotNone(body.session)
|
||||
|
||||
def test_v1_endpoint_sessions_create_with_session(self):
|
||||
res = self.app.post_json('/v1', {
|
||||
"cmd": "sessions.create",
|
||||
"session": "test_create_session"
|
||||
})
|
||||
self.assertEqual(res.status_code, 200)
|
||||
|
||||
body = V1ResponseBase(res.json)
|
||||
self.assertEqual(STATUS_OK, body.status)
|
||||
self.assertEqual("Session created successfully.", body.message)
|
||||
self.assertEqual(body.session, "test_create_session")
|
||||
|
||||
def test_v1_endpoint_sessions_list(self):
|
||||
self.app.post_json('/v1', {
|
||||
"cmd": "sessions.create",
|
||||
"session": "test_list_sessions"
|
||||
})
|
||||
res = self.app.post_json('/v1', {
|
||||
"cmd": "sessions.list"
|
||||
})
|
||||
self.assertEqual(res.status_code, 200)
|
||||
|
||||
body = V1ResponseBase(res.json)
|
||||
self.assertEqual(STATUS_OK, body.status)
|
||||
self.assertEqual("", body.message)
|
||||
self.assertGreaterEqual(len(body.sessions), 1)
|
||||
self.assertIn("test_list_sessions", body.sessions)
|
||||
|
||||
def test_v1_endpoint_sessions_destroy_existing_session(self):
|
||||
self.app.post_json('/v1', {
|
||||
"cmd": "sessions.create",
|
||||
"session": "test_destroy_sessions"
|
||||
})
|
||||
res = self.app.post_json('/v1', {
|
||||
"cmd": "sessions.destroy",
|
||||
"session": "test_destroy_sessions"
|
||||
})
|
||||
self.assertEqual(res.status_code, 200)
|
||||
|
||||
body = V1ResponseBase(res.json)
|
||||
self.assertEqual(STATUS_OK, body.status)
|
||||
self.assertEqual("The session has been removed.", body.message)
|
||||
|
||||
def test_v1_endpoint_sessions_destroy_non_existing_session(self):
|
||||
res = self.app.post_json('/v1', {
|
||||
"cmd": "sessions.destroy",
|
||||
"session": "non_existing_session_name"
|
||||
}, status=500)
|
||||
self.assertEqual(res.status_code, 500)
|
||||
|
||||
body = V1ResponseBase(res.json)
|
||||
self.assertEqual(STATUS_ERROR, body.status)
|
||||
self.assertEqual("Error: The session doesn't exist.", body.message)
|
||||
|
||||
def test_v1_endpoint_request_get_with_session(self):
|
||||
self.app.post_json('/v1', {
|
||||
"cmd": "sessions.create",
|
||||
"session": "test_request_sessions"
|
||||
})
|
||||
res = self.app.post_json('/v1', {
|
||||
"cmd": "request.get",
|
||||
"session": "test_request_sessions",
|
||||
"url": self.google_url
|
||||
})
|
||||
self.assertEqual(res.status_code, 200)
|
||||
|
||||
body = V1ResponseBase(res.json)
|
||||
self.assertEqual(STATUS_OK, body.status)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
80
src/utils.py
80
src/utils.py
@@ -8,6 +8,7 @@ from selenium.webdriver.chrome.webdriver import WebDriver
|
||||
import undetected_chromedriver as uc
|
||||
|
||||
FLARESOLVERR_VERSION = None
|
||||
CHROME_EXE_PATH = None
|
||||
CHROME_MAJOR_VERSION = None
|
||||
USER_AGENT = None
|
||||
XVFB_DISPLAY = None
|
||||
@@ -28,6 +29,8 @@ def get_flaresolverr_version() -> str:
|
||||
return FLARESOLVERR_VERSION
|
||||
|
||||
package_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, 'package.json')
|
||||
if not os.path.isfile(package_path):
|
||||
package_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'package.json')
|
||||
with open(package_path) as f:
|
||||
FLARESOLVERR_VERSION = json.loads(f.read())['version']
|
||||
return FLARESOLVERR_VERSION
|
||||
@@ -46,6 +49,11 @@ def get_webdriver() -> WebDriver:
|
||||
options.add_argument('--disable-dev-shm-usage')
|
||||
# this option removes the zygote sandbox (it seems that the resolution is a bit faster)
|
||||
options.add_argument('--no-zygote')
|
||||
# attempt to fix Docker ARM32 build
|
||||
options.add_argument('--disable-gpu-sandbox')
|
||||
options.add_argument('--disable-software-rasterizer')
|
||||
options.add_argument('--ignore-certificate-errors')
|
||||
options.add_argument('--ignore-ssl-errors')
|
||||
|
||||
# note: headless mode is detected (options.headless = True)
|
||||
# we launch the browser in head-full mode with the window hidden
|
||||
@@ -67,9 +75,13 @@ def get_webdriver() -> WebDriver:
|
||||
if PATCHED_DRIVER_PATH is not None:
|
||||
driver_exe_path = PATCHED_DRIVER_PATH
|
||||
|
||||
# detect chrome path
|
||||
browser_executable_path = get_chrome_exe_path()
|
||||
|
||||
# downloads and patches the chromedriver
|
||||
# if we don't set driver_executable_path it downloads, patches, and deletes the driver each time
|
||||
driver = uc.Chrome(options=options, driver_executable_path=driver_exe_path, version_main=version_main,
|
||||
driver = uc.Chrome(options=options, browser_executable_path=browser_executable_path,
|
||||
driver_executable_path=driver_exe_path, version_main=version_main,
|
||||
windows_headless=windows_headless)
|
||||
|
||||
# save the patched driver to avoid re-downloads
|
||||
@@ -89,7 +101,22 @@ def get_webdriver() -> WebDriver:
|
||||
|
||||
|
||||
def get_chrome_exe_path() -> str:
|
||||
return uc.find_chrome_executable()
|
||||
global CHROME_EXE_PATH
|
||||
if CHROME_EXE_PATH is not None:
|
||||
return CHROME_EXE_PATH
|
||||
# linux pyinstaller bundle
|
||||
chrome_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'chrome', "chrome")
|
||||
if os.path.exists(chrome_path) and os.access(chrome_path, os.X_OK):
|
||||
CHROME_EXE_PATH = chrome_path
|
||||
return CHROME_EXE_PATH
|
||||
# windows pyinstaller bundle
|
||||
chrome_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'chrome', "chrome.exe")
|
||||
if os.path.exists(chrome_path) and os.access(chrome_path, os.X_OK):
|
||||
CHROME_EXE_PATH = chrome_path
|
||||
return CHROME_EXE_PATH
|
||||
# system
|
||||
CHROME_EXE_PATH = uc.find_chrome_executable()
|
||||
return CHROME_EXE_PATH
|
||||
|
||||
|
||||
def get_chrome_major_version() -> str:
|
||||
@@ -98,17 +125,17 @@ def get_chrome_major_version() -> str:
|
||||
return CHROME_MAJOR_VERSION
|
||||
|
||||
if os.name == 'nt':
|
||||
# Example: '104.0.5112.79'
|
||||
try:
|
||||
stream = os.popen(
|
||||
'reg query "HKLM\\SOFTWARE\\Wow6432Node\\Microsoft\\Windows\\CurrentVersion\\Uninstall\\Google Chrome"')
|
||||
output = stream.read()
|
||||
# Example: '104.0.5112.79'
|
||||
complete_version = extract_version_registry(output)
|
||||
complete_version = extract_version_nt_executable(get_chrome_exe_path())
|
||||
except Exception:
|
||||
# Example: '104.0.5112.79'
|
||||
complete_version = extract_version_folder()
|
||||
try:
|
||||
complete_version = extract_version_nt_registry()
|
||||
except Exception:
|
||||
# Example: '104.0.5112.79'
|
||||
complete_version = extract_version_nt_folder()
|
||||
else:
|
||||
chrome_path = uc.find_chrome_executable()
|
||||
chrome_path = get_chrome_exe_path()
|
||||
process = os.popen(f'"{chrome_path}" --version')
|
||||
# Example 1: 'Chromium 104.0.5112.79 Arch Linux\n'
|
||||
# Example 2: 'Google Chrome 104.0.5112.79 Arch Linux\n'
|
||||
@@ -119,20 +146,29 @@ def get_chrome_major_version() -> str:
|
||||
return CHROME_MAJOR_VERSION
|
||||
|
||||
|
||||
def extract_version_registry(output) -> str:
|
||||
try:
|
||||
google_version = ''
|
||||
for letter in output[output.rindex('DisplayVersion REG_SZ') + 24:]:
|
||||
if letter != '\n':
|
||||
google_version += letter
|
||||
else:
|
||||
break
|
||||
return google_version.strip()
|
||||
except TypeError:
|
||||
return ''
|
||||
def extract_version_nt_executable(exe_path: str) -> str:
|
||||
import pefile
|
||||
pe = pefile.PE(exe_path, fast_load=True)
|
||||
pe.parse_data_directories(
|
||||
directories=[pefile.DIRECTORY_ENTRY["IMAGE_DIRECTORY_ENTRY_RESOURCE"]]
|
||||
)
|
||||
return pe.FileInfo[0][0].StringTable[0].entries[b"FileVersion"].decode('utf-8')
|
||||
|
||||
|
||||
def extract_version_folder() -> str:
|
||||
def extract_version_nt_registry() -> str:
|
||||
stream = os.popen(
|
||||
'reg query "HKLM\\SOFTWARE\\Wow6432Node\\Microsoft\\Windows\\CurrentVersion\\Uninstall\\Google Chrome"')
|
||||
output = stream.read()
|
||||
google_version = ''
|
||||
for letter in output[output.rindex('DisplayVersion REG_SZ') + 24:]:
|
||||
if letter != '\n':
|
||||
google_version += letter
|
||||
else:
|
||||
break
|
||||
return google_version.strip()
|
||||
|
||||
|
||||
def extract_version_nt_folder() -> str:
|
||||
# Check if the Chrome folder exists in the x32 or x64 Program Files folders.
|
||||
for i in range(2):
|
||||
path = 'C:\\Program Files' + (' (x86)' if i else '') + '\\Google\\Chrome\\Application'
|
||||
|
||||
Reference in New Issue
Block a user