mirror of
https://github.com/FlareSolverr/FlareSolverr.git
synced 2025-12-05 17:18:19 +01:00
Compare commits
15 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
09c9404d5d | ||
|
|
9dd0478e69 | ||
|
|
cd4f48721c | ||
|
|
89aed86390 | ||
|
|
a23fa0983f | ||
|
|
d2b680520d | ||
|
|
c3b2173f39 | ||
|
|
37cd979bf7 | ||
|
|
54d589464a | ||
|
|
7ca880da7c | ||
|
|
d4d7b93d7e | ||
|
|
743058a37f | ||
|
|
87b5a6a1c8 | ||
|
|
08bec21dfc | ||
|
|
f37ce039a1 |
37
.github/ISSUE_TEMPLATE.md
vendored
Normal file
37
.github/ISSUE_TEMPLATE.md
vendored
Normal file
@@ -0,0 +1,37 @@
|
||||
**Please use the search bar** at the top of the page and make sure you are not creating an already submitted issue.
|
||||
Check closed issues as well, because your issue may have already been fixed.
|
||||
|
||||
### Instruction on how to enable debug and html trace
|
||||
|
||||
[Follow the instructions from this wiki page](https://github.com/FlareSolverr/FlareSolverr/wiki/How-to-enable-debug-and-html-trace)
|
||||
|
||||
### Environment
|
||||
|
||||
**FlareSolverr Version**:
|
||||
|
||||
**Docker**: [yes/no]
|
||||
|
||||
**OS**:
|
||||
|
||||
**Last Working FlareSolverr Version**:
|
||||
|
||||
**Are you using a proxy or VPN?** [yes/no]
|
||||
|
||||
**Using Captcha Solver:** [yse/no]
|
||||
|
||||
**If using captcha solver, which one:**
|
||||
|
||||
### Description
|
||||
|
||||
[List steps to reproduce the error and details on what happens and what you expected to happen]
|
||||
|
||||
|
||||
### Logged Error Messages
|
||||
|
||||
[Place any relevant error messages you noticed from the logs here.]
|
||||
|
||||
[Make sure you attach the full logs with your personal information removed in case we need more information]
|
||||
|
||||
### Screenshots
|
||||
|
||||
[Place any screenshots of the issue here if needed]
|
||||
32
.github/workflows/release.yml
vendored
Normal file
32
.github/workflows/release.yml
vendored
Normal file
@@ -0,0 +1,32 @@
|
||||
name: release
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- 'v*.*.*'
|
||||
|
||||
jobs:
|
||||
build:
|
||||
name: Create Release
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v2
|
||||
|
||||
- name: Build Changelog
|
||||
id: github_release
|
||||
uses: mikepenz/release-changelog-builder-action@main
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GH_PAT }}
|
||||
|
||||
- name: Create Release
|
||||
id: create_release
|
||||
uses: actions/create-release@v1
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GH_PAT }}
|
||||
with:
|
||||
tag_name: ${{ github.ref }}
|
||||
release_name: ${{ github.ref }}
|
||||
body: ${{ steps.github_release.outputs.changelog }}
|
||||
draft: false
|
||||
prerelease: false
|
||||
171
README.md
171
README.md
@@ -1,33 +1,69 @@
|
||||
# FlareSolverr
|
||||
|
||||
Proxy server to bypass Cloudflare protection
|
||||
[](https://github.com/FlareSolverr/FlareSolverr/issues)
|
||||
[](https://github.com/FlareSolverr/FlareSolverr/pulls)
|
||||
[](https://hub.docker.com/r/flaresolverr/flaresolverr/)
|
||||
|
||||
FlareSolverr is a proxy server to bypass Cloudflare protection
|
||||
|
||||
:warning: This project is in beta state. Some things may not work and the API can change at any time.
|
||||
See the known issues section.
|
||||
|
||||
## How it works
|
||||
|
||||
FlareSolverr starts a proxy server and it waits for user requests in an idle state using few resources.
|
||||
When some request arrives, it uses [puppeteer](https://github.com/puppeteer/puppeteer) with the
|
||||
[stealth plugin](https://github.com/berstend/puppeteer-extra/tree/master/packages/puppeteer-extra-plugin-stealth)
|
||||
to create a headless browser (Chrome). It opens the URL with user parameters and waits until the
|
||||
Cloudflare challenge is solved (or timeout). The HTML code and the cookies are sent back to the
|
||||
user and those cookies can be used to bypass Cloudflare using other HTTP clients.
|
||||
to create a headless browser (Chrome). It opens the URL with user parameters and waits until the Cloudflare challenge
|
||||
is solved (or timeout). The HTML code and the cookies are sent back to the user, and those cookies can be used to
|
||||
bypass Cloudflare using other HTTP clients.
|
||||
|
||||
**NOTE**: Web browsers consume a lot of memory. If you are running FlareSolverr on a machine with few RAM,
|
||||
do not make many requests at once. With each request a new browser is launched.
|
||||
(It is possible to use a permanent session. However, if you use sessions, you should make sure to close them as soon as you are done using them.)
|
||||
**NOTE**: Web browsers consume a lot of memory. If you are running FlareSolverr on a machine with few RAM, do not make
|
||||
many requests at once. With each request a new browser is launched.
|
||||
|
||||
It is also possible to use a permanent session. However, if you use sessions, you should make sure to close them as
|
||||
soon as you are done using them.
|
||||
|
||||
## Installation
|
||||
|
||||
It requires NodeJS.
|
||||
### Docker
|
||||
|
||||
Run `PUPPETEER_PRODUCT=chrome npm install` to install FlareSolverr dependencies.
|
||||
It is recommended to install using a Docker container because the project depends on an external browser that is
|
||||
already included within the image.
|
||||
|
||||
Docker images are available in:
|
||||
* GitHub Registry => https://github.com/orgs/FlareSolverr/packages/container/package/flaresolverr
|
||||
* DockerHub => https://hub.docker.com/r/flaresolverr/flaresolverr
|
||||
|
||||
Supported architectures are:
|
||||
| Architecture | Tag |
|
||||
| :----: | --- |
|
||||
| x86-64 | linux/amd64 |
|
||||
| ARM64 | linux/arm64 |
|
||||
| ARM32 | linux/arm/v7 |
|
||||
|
||||
We provide a `docker-compose.yml` configuration file. Clone this repository and execute `docker-compose up -d` to start
|
||||
the container.
|
||||
|
||||
If you prefer the `docker cli` execute the following command.
|
||||
```bash
|
||||
docker run -d \
|
||||
--name=flaresolverr \
|
||||
-e LOG_LEVEL=info \
|
||||
--restart unless-stopped \
|
||||
ghcr.io/flaresolverr/flaresolverr:latest
|
||||
```
|
||||
|
||||
### From source code
|
||||
|
||||
This is the recommended way for Windows / MacOS users and for developers.
|
||||
* Install [NodeJS](https://nodejs.org/)
|
||||
* Clone this repository and open a shell in that path
|
||||
* Run `npm install` command to install FlareSolverr dependencies
|
||||
* Run `npm run build` command to compile TypeScript code
|
||||
* Run `npm start` command to start FlareSolverr
|
||||
|
||||
## Usage
|
||||
|
||||
First run `npm run build`. Once the TypeScript is compiled, you can use `npm start` to start FlareSolverr.
|
||||
|
||||
Example request:
|
||||
```bash
|
||||
curl -L -X POST 'http://localhost:8191/v1' \
|
||||
@@ -35,7 +71,7 @@ curl -L -X POST 'http://localhost:8191/v1' \
|
||||
--data-raw '{
|
||||
"cmd": "request.get",
|
||||
"url":"http://www.google.com/",
|
||||
"userAgent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36",
|
||||
"userAgent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleW...",
|
||||
"maxTimeout": 60000,
|
||||
"headers": {
|
||||
"X-Test": "Testing 123..."
|
||||
@@ -47,23 +83,22 @@ curl -L -X POST 'http://localhost:8191/v1' \
|
||||
|
||||
#### + `sessions.create`
|
||||
|
||||
This will launch a new browser instance which will retain cookies until you destroy it
|
||||
with `sessions.destroy`. This comes in handy so you don't have to keep solving challenges
|
||||
over and over and you won't need to keep sending cookies for the browser to use.
|
||||
This will launch a new browser instance which will retain cookies until you destroy it with `sessions.destroy`.
|
||||
This comes in handy, so you don't have to keep solving challenges over and over and you won't need to keep sending
|
||||
cookies for the browser to use.
|
||||
|
||||
This also speeds up the requests since it won't have to launch a new browser instance for
|
||||
every request.
|
||||
This also speeds up the requests since it won't have to launch a new browser instance for every request.
|
||||
|
||||
Parameter | Notes
|
||||
|--|--|
|
||||
session | Optional. The session ID that you want to be assinged to the instance. If one isn't set a random UUID will be assigned.
|
||||
session | Optional. The session ID that you want to be assigned to the instance. If isn't set a random UUID will be assigned.
|
||||
userAgent | Optional. Will be used by the headless browser.
|
||||
|
||||
#### + `sessions.list`
|
||||
|
||||
Returns a list of all the active sessions. More for debuging if you are curious to see
|
||||
how many sessions are running. You should always make sure to properly close each
|
||||
session when you are done using them as too many may slow your computer down.
|
||||
Returns a list of all the active sessions. More for debugging if you are curious to see how many sessions are running.
|
||||
You should always make sure to properly close each session when you are done using them as too many may slow your
|
||||
computer down.
|
||||
|
||||
Example response:
|
||||
|
||||
@@ -79,9 +114,8 @@ Example response:
|
||||
|
||||
#### + `sessions.destroy`
|
||||
|
||||
This will properly shutdown a browser instance and remove all files associaded with it
|
||||
to free up resources for a new session. Whenever you no longer need to use a session you
|
||||
should make sure to close it.
|
||||
This will properly shutdown a browser instance and remove all files associated with it to free up resources for a new
|
||||
session. When you no longer need to use a session you should make sure to close it.
|
||||
|
||||
Parameter | Notes
|
||||
|--|--|
|
||||
@@ -117,14 +151,13 @@ Example response from running the `curl` above:
|
||||
"content-length": "61587",
|
||||
"x-xss-protection": "0",
|
||||
"x-frame-options": "SAMEORIGIN",
|
||||
"set-cookie": "1P_JAR=2020-07-16-04; expires=Sat, 15-Aug-2020 04:15:49 GMT; path=/; domain=.google.com; Secure; SameSite=none\nNID=204=QE3Ocq15XalczqjuDy52HeseG3zAZuJzID3R57g_oeQHyoV5DuvDhpWc4r9IcPoeIYmkr_ZTX_MNOU8IAbtXmVO7Bmq0adb-hpIHaTBIdBk3Ofifp4gO6vZleVuFYfj7ePkHeHdzGoX-en0FvKtd9iofX4O6RiAdEIAnpL7Wge4; expires=Fri, 15-Jan-2021 04:15:49 GMT; path=/; domain=.google.com; Secure; HttpOnly; SameSite=none",
|
||||
"alt-svc": "h3-29=\":443\"; ma=2592000,h3-27=\":443\"; ma=2592000,h3-25=\":443\"; ma=2592000,h3-T050=\":443\"; ma=2592000,h3-Q050=\":443\"; ma=2592000,h3-Q046=\":443\"; ma=2592000,h3-Q043=\":443\"; ma=2592000,quic=\":443\"; ma=2592000; v=\"46,43\""
|
||||
"set-cookie": "1P_JAR=2020-07-16-04; expires=Sat..."
|
||||
},
|
||||
"response":"<!DOCTYPE html>...",
|
||||
"cookies": [
|
||||
{
|
||||
"name": "NID",
|
||||
"value": "204=QE3Ocq15XalczqjuDy52HeseG3zAZuJzID3R57g_oeQHyoV5DuvDhpWc4r9IcPoeIYmkr_ZTX_MNOU8IAbtXmVO7Bmq0adb-hpIHaTBIdBk3Ofifp4gO6vZleVuFYfj7ePkHeHdzGoX-en0FvKtd9iofX4O6RiAdEIAnpL7Wge4",
|
||||
"value": "204=QE3Ocq15XalczqjuDy52HeseG3zAZuJzID3R57...",
|
||||
"domain": ".google.com",
|
||||
"path": "/",
|
||||
"expires": 1610684149.307722,
|
||||
@@ -147,7 +180,7 @@ Example response from running the `curl` above:
|
||||
"sameSite": "None"
|
||||
}
|
||||
],
|
||||
"userAgent": "Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36"
|
||||
"userAgent": "Windows NT 10.0; Win64; x64) AppleWebKit/5..."
|
||||
},
|
||||
"status": "ok",
|
||||
"message": "",
|
||||
@@ -165,15 +198,14 @@ Parameter | Notes
|
||||
|--|--|
|
||||
postData | Must be a string. If you want to POST a form, don't forget to set the `Content-Type` header to `application/x-www-form-urlencoded` or the server might not understand your request.
|
||||
|
||||
## Downloading Images and PDFs (small files)
|
||||
### Download small files
|
||||
|
||||
If you need to access an image/pdf or small file, you should pass the `download` parameter to
|
||||
`request.get` setting it to `true`. Rather than access the html and return text it will
|
||||
return a the buffer **base64** encoded which you will be able to decode and save the image/pdf.
|
||||
If you need to access an image/pdf or small file, you should pass the `download` parameter to `request.get` setting it
|
||||
to `true`. Rather than access the html and return text it will return the buffer **base64** encoded which you will be
|
||||
able to decode and save the image/pdf.
|
||||
|
||||
This method isn't recommended for videos or anything larger. As that should be streamed back to
|
||||
the client and at the moment there is nothing setup to do so. If this is something you need feel
|
||||
free to create an issue and/or submit a PR.
|
||||
This method isn't recommended for videos or anything larger. As that should be streamed back to the client and at the
|
||||
moment there is nothing setup to do so. If this is something you need feel free to create an issue and/or submit a PR.
|
||||
|
||||
## Environment variables
|
||||
|
||||
@@ -182,40 +214,24 @@ To set the environment vars in Linux run `export LOG_LEVEL=debug` and then start
|
||||
Name | Default | Notes
|
||||
|--|--|--|
|
||||
LOG_LEVEL | info | Used to change the verbosity of the logging.
|
||||
LOG_HTML | false | Used for debugging. If `true` all html that passes through the proxy will be logged to the console.
|
||||
LOG_HTML | false | Used for debugging. If `true` all HTML that passes through the proxy will be logged to the console in `debug` level.
|
||||
PORT | 8191 | Change this if you already have a process running on port `8191`.
|
||||
HOST | 0.0.0.0 | This shouldn't need to be messed with but if you insist, it's here!
|
||||
CAPTCHA_SOLVER | None | This is used to select which captcha solving method it used when a captcha is encounted.
|
||||
CAPTCHA_SOLVER | None | This is used to select which captcha solving method it used when a captcha is encountered.
|
||||
HEADLESS | true | This is used to debug the browser by not running it in headless mode.
|
||||
|
||||
## Captcha Solvers
|
||||
|
||||
Sometimes CF not only gives mathmatical computations and browser tests, sometimes they also require
|
||||
the user to solve a captcha. If this is the case, FlareSolverr will return the captcha page. But that's
|
||||
not very helpful to you is it?
|
||||
Sometimes CF not only gives mathematical computations and browser tests, sometimes they also require the user to solve
|
||||
a captcha. If this is the case, FlareSolverr will return the captcha page. But that's not very helpful to you is it?
|
||||
|
||||
FlareSolverr can be customized to solve the captcha's automatically by setting the environment variable
|
||||
`CAPTCHA_SOLVER` to the file name of one of the adapters inside the [/captcha](src/captcha) directory.
|
||||
FlareSolverr can be customized to solve the captchas automatically by setting the environment variable `CAPTCHA_SOLVER`
|
||||
to the file name of one of the adapters inside the [/captcha](src/captcha) directory.
|
||||
|
||||
### [CaptchaHarvester](https://github.com/NoahCardoza/CaptchaHarvester)
|
||||
### hcaptcha-solver
|
||||
|
||||
This method makes use of the [CaptchaHarvester](https://github.com/NoahCardoza/CaptchaHarvester) project which allows users to collect thier own tokens from ReCaptcha V2/V3 and hCaptcha for free.
|
||||
|
||||
To use this method you must set these ENV variables:
|
||||
|
||||
```bash
|
||||
CAPTCHA_SOLVER=harvester
|
||||
HARVESTER_ENDPOINT=https://127.0.0.1:5000/token
|
||||
```
|
||||
|
||||
**Note**: above I set `HARVESTER_ENDPOINT` to the default configureation
|
||||
of the captcha harvester's server, but that could change if
|
||||
you customize the command line flags. Simply put, `HARVESTER_ENDPOINT`
|
||||
should be set to the URI of the route that returns a token in plain text when called.
|
||||
|
||||
### [hcaptcha-solver](https://github.com/JimmyLaurent/hcaptcha-solver)
|
||||
|
||||
This method makes use of the [hcaptcha-solver](https://github.com/JimmyLaurent/hcaptcha-solver) project which attempts to solve hcaptcha by randomly selecting images.
|
||||
This method makes use of the [hcaptcha-solver](https://github.com/JimmyLaurent/hcaptcha-solver) project which attempts
|
||||
to solve hCaptcha by randomly selecting images.
|
||||
|
||||
To use this solver you must first install it and then set it as the `CAPTCHA_SOLVER`.
|
||||
|
||||
@@ -224,31 +240,22 @@ npm i hcaptcha-solver
|
||||
CAPTCHA_SOLVER=hcaptcha-solver
|
||||
```
|
||||
|
||||
## Docker
|
||||
### CaptchaHarvester
|
||||
|
||||
You can edit environment variables in `./Dockerfile` and build your own image.
|
||||
This method makes use of the [CaptchaHarvester](https://github.com/NoahCardoza/CaptchaHarvester) project which allows
|
||||
users to collect thier own tokens from ReCaptcha V2/V3 and hCaptcha for free.
|
||||
|
||||
To use this method you must set these ENV variables:
|
||||
|
||||
```bash
|
||||
docker build -t flaresolverr:latest .
|
||||
docker run --restart=always --name flaresolverr -p 8191:8191 -d flaresolverr:latest
|
||||
CAPTCHA_SOLVER=harvester
|
||||
HARVESTER_ENDPOINT=https://127.0.0.1:5000/token
|
||||
```
|
||||
|
||||
## TypeScript
|
||||
**Note**: above I set `HARVESTER_ENDPOINT` to the default configuration of the captcha harvester's server, but that
|
||||
could change if you customize the command line flags. Simply put, `HARVESTER_ENDPOINT` should be set to the URI of the
|
||||
route that returns a token in plain text when called.
|
||||
|
||||
I'm quite new to TypeScript. If you spot any funny business or anything that is or isn't being
|
||||
used properly feel free to submit a PR or open an issue.
|
||||
## Related projects
|
||||
|
||||
## Known issues / Roadmap
|
||||
|
||||
The current implementation seems to be working on the sites I have been testing them on. However, if you find it unable to access a site, open an issue and I'd be happy to investigate.
|
||||
|
||||
That being said, the project uses the [puppeteer stealth plugin](https://github.com/berstend/puppeteer-extra/tree/master/packages/puppeteer-extra-plugin-stealth). If Cloudflare is able to detect the headless browser, it's more that projects domain to fix.
|
||||
|
||||
TODO:
|
||||
|
||||
* Fix remaining issues in the code (see TODOs in code)
|
||||
* Make the maxTimeout more accurate (count the time to open the first page / maybe count the captcha solve time?)
|
||||
* Hide sensitive information in logs
|
||||
* Reduce Docker image size
|
||||
* Docker image for ARM architecture
|
||||
* Install instructions for Windows
|
||||
* C# implementation => https://github.com/FlareSolverr/FlareSolverrSharp
|
||||
|
||||
18
docker-compose.yml
Normal file
18
docker-compose.yml
Normal file
@@ -0,0 +1,18 @@
|
||||
---
|
||||
version: "2.1"
|
||||
services:
|
||||
flaresolverr:
|
||||
# DockerHub mirror flaresolverr/flaresolverr:latest
|
||||
image: ghcr.io/flaresolverr/flaresolverr:latest
|
||||
container_name: flaresolverr
|
||||
environment:
|
||||
# Used to change the verbosity of the logging
|
||||
- LOG_LEVEL=info
|
||||
# Enables hcaptcha-solver => https://github.com/JimmyLaurent/hcaptcha-solver
|
||||
#- CAPTCHA_SOLVER=hcaptcha-solver
|
||||
# Enables CaptchaHarvester => https://github.com/NoahCardoza/CaptchaHarvester
|
||||
#- CAPTCHA_SOLVER=harvester
|
||||
#- HARVESTER_ENDPOINT=https://127.0.0.1:5000/token
|
||||
ports:
|
||||
- 8191:8191
|
||||
restart: unless-stopped
|
||||
13
package-lock.json
generated
13
package-lock.json
generated
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "flaresolverr",
|
||||
"version": "1.2.0",
|
||||
"version": "1.2.1",
|
||||
"lockfileVersion": 1,
|
||||
"requires": true,
|
||||
"dependencies": {
|
||||
@@ -74,6 +74,12 @@
|
||||
"defer-to-connect": "^2.0.0"
|
||||
}
|
||||
},
|
||||
"@types/await-timeout": {
|
||||
"version": "0.3.1",
|
||||
"resolved": "https://registry.npmjs.org/@types/await-timeout/-/await-timeout-0.3.1.tgz",
|
||||
"integrity": "sha512-H5PzROT4KuP7XQDua13Iw8did//OCKAZ/3TL15DjvMzDonrk4HvhH1+tLko96f2guU6XaD3AoqRa49ZOwbwNig==",
|
||||
"dev": true
|
||||
},
|
||||
"@types/cacheable-request": {
|
||||
"version": "6.0.1",
|
||||
"resolved": "https://registry.npmjs.org/@types/cacheable-request/-/cacheable-request-6.0.1.tgz",
|
||||
@@ -288,6 +294,11 @@
|
||||
"resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
|
||||
"integrity": "sha1-x57Zf380y48robyXkLzDZkdLS3k="
|
||||
},
|
||||
"await-timeout": {
|
||||
"version": "1.1.1",
|
||||
"resolved": "https://registry.npmjs.org/await-timeout/-/await-timeout-1.1.1.tgz",
|
||||
"integrity": "sha512-gsDXAS6XVc4Jt+7S92MPX6Noq69bdeXUPEaXd8dk3+yVr629LTDLxNt4j1ycBbrU+AStK2PhKIyNIM+xzWMVOQ=="
|
||||
},
|
||||
"aws-sign2": {
|
||||
"version": "0.7.0",
|
||||
"resolved": "https://registry.npmjs.org/aws-sign2/-/aws-sign2-0.7.0.tgz",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "flaresolverr",
|
||||
"version": "1.2.0",
|
||||
"version": "1.2.1",
|
||||
"description": "Proxy server to bypass Cloudflare protection.",
|
||||
"scripts": {
|
||||
"start": "node ./dist/index.js",
|
||||
@@ -20,6 +20,7 @@
|
||||
"url": "https://github.com/ngosang/FlareSolverr"
|
||||
},
|
||||
"dependencies": {
|
||||
"await-timeout": "^1.1.1",
|
||||
"console-log-level": "^1.4.1",
|
||||
"got": "^11.5.1",
|
||||
"hcaptcha-solver": "^1.0.2",
|
||||
@@ -29,6 +30,7 @@
|
||||
"uuid": "^8.2.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/await-timeout": "^0.3.1",
|
||||
"@types/node": "^14.0.23",
|
||||
"@types/puppeteer": "^3.0.1",
|
||||
"@types/uuid": "^8.0.0",
|
||||
|
||||
16
src/index.ts
16
src/index.ts
@@ -3,7 +3,7 @@ import { createServer, IncomingMessage, ServerResponse } from 'http';
|
||||
import { RequestContext } from './types'
|
||||
import Router, { BaseAPICall } from './routes'
|
||||
|
||||
const version: string = require('../package.json').version
|
||||
const version: string = "v" + require('../package.json').version
|
||||
const serverPort: number = Number(process.env.PORT) || 8191
|
||||
const serverHost: string = process.env.HOST || '0.0.0.0'
|
||||
|
||||
@@ -65,11 +65,19 @@ function validateIncomingRequest(ctx: RequestContext, params: BaseAPICall) {
|
||||
}
|
||||
|
||||
createServer((req: IncomingMessage, res: ServerResponse) => {
|
||||
const startTimestamp = Date.now()
|
||||
|
||||
// count the request for the log prefix
|
||||
log.incRequests()
|
||||
|
||||
const startTimestamp = Date.now()
|
||||
log.info(`Incoming request: ${req.method} ${req.url}`)
|
||||
|
||||
// show welcome message
|
||||
if (req.url == '/') {
|
||||
successResponse("FlareSolverr is ready!", null, res, startTimestamp);
|
||||
return;
|
||||
}
|
||||
|
||||
// get request body
|
||||
const bodyParts: any[] = []
|
||||
req.on('data', chunk => {
|
||||
bodyParts.push(chunk)
|
||||
@@ -102,5 +110,5 @@ createServer((req: IncomingMessage, res: ServerResponse) => {
|
||||
})
|
||||
})
|
||||
}).listen(serverPort, serverHost, () => {
|
||||
log.info(`FlareSolverr v${version} listening on http://${serverHost}:${serverPort}`)
|
||||
log.info(`FlareSolverr ${version} listening on http://${serverHost}:${serverPort}`)
|
||||
})
|
||||
|
||||
@@ -10,7 +10,7 @@ export default {
|
||||
},
|
||||
...require('console-log-level')(
|
||||
{
|
||||
level: process.env.LOG_LEVEL || 'debug',
|
||||
level: process.env.LOG_LEVEL || 'info',
|
||||
prefix(level: string) {
|
||||
return `${new Date().toISOString()} ${level.toUpperCase()} REQ-${requests}`
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ import { SetCookie, Request, Headers, HttpMethod, Overrides, Cookie } from 'pupp
|
||||
import { TimeoutError } from 'puppeteer/Errors'
|
||||
import getCaptchaSolver, { CaptchaType } from './captcha'
|
||||
import * as Puppeteer from "puppeteer-extra/dist/puppeteer";
|
||||
const Timeout = require('await-timeout');
|
||||
|
||||
export interface BaseAPICall {
|
||||
cmd: string
|
||||
@@ -69,7 +70,7 @@ type OverridesProps =
|
||||
'headers'
|
||||
|
||||
// We always set a Windows User-Agent because ARM builds are detected by CloudFlare
|
||||
const DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36"
|
||||
const DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36"
|
||||
const CHALLENGE_SELECTORS = ['#trk_jschal_js', '.ray_id', '.attack-box']
|
||||
const TOKEN_INPUT_NAMES = ['g-recaptcha-response', 'h-captcha-response']
|
||||
|
||||
@@ -119,9 +120,22 @@ async function interceptResponse(page: Puppeteer.Page, callback: (payload: Chall
|
||||
});
|
||||
}
|
||||
|
||||
async function resolveChallenge(ctx: RequestContext, { url, maxTimeout, proxy, download, returnOnlyCookies }: BaseRequestAPICall, page: Puppeteer.Page): Promise<ChallengeResolutionT | void> {
|
||||
async function resolveChallengeWithTimeout(ctx: RequestContext, params: BaseRequestAPICall, page: Puppeteer.Page) {
|
||||
const maxTimeout = params.maxTimeout || 60000
|
||||
const timer = new Timeout();
|
||||
try {
|
||||
const promise = resolveChallenge(ctx, params, page);
|
||||
return await Promise.race([
|
||||
promise,
|
||||
timer.set(maxTimeout, `Maximum timeout reached. maxTimeout=${maxTimeout} (ms)`)
|
||||
]);
|
||||
} finally {
|
||||
timer.clear();
|
||||
}
|
||||
}
|
||||
|
||||
async function resolveChallenge(ctx: RequestContext, { url, proxy, download, returnOnlyCookies }: BaseRequestAPICall, page: Puppeteer.Page): Promise<ChallengeResolutionT | void> {
|
||||
|
||||
maxTimeout = maxTimeout || 60000
|
||||
let status = 'ok'
|
||||
let message = ''
|
||||
|
||||
@@ -134,6 +148,8 @@ async function resolveChallenge(ctx: RequestContext, { url, maxTimeout, proxy, d
|
||||
log.debug(`Navigating to... ${url}`)
|
||||
let response = await page.goto(url, { waitUntil: 'domcontentloaded' })
|
||||
|
||||
log.html(await page.content())
|
||||
|
||||
// look for challenge
|
||||
if (response.headers().server.startsWith('cloudflare')) {
|
||||
log.info('Cloudflare detected')
|
||||
@@ -145,10 +161,12 @@ async function resolveChallenge(ctx: RequestContext, { url, maxTimeout, proxy, d
|
||||
|
||||
if (response.status() > 400) {
|
||||
// detect cloudflare wait 5s
|
||||
let selectorFoundCount = 0
|
||||
for (const selector of CHALLENGE_SELECTORS) {
|
||||
const cfChallengeElem = await page.$(selector)
|
||||
if (cfChallengeElem) {
|
||||
log.html(await page.content())
|
||||
selectorFoundCount++
|
||||
log.debug(`'${selector}' challenge element detected.`)
|
||||
log.debug('Waiting for Cloudflare challenge...')
|
||||
|
||||
let interceptingResult: ChallengeResolutionT;
|
||||
@@ -158,8 +176,7 @@ async function resolveChallenge(ctx: RequestContext, { url, maxTimeout, proxy, d
|
||||
});
|
||||
}
|
||||
|
||||
// TODO: find out why these pages hang sometimes
|
||||
while (Date.now() - ctx.startTimestamp < maxTimeout) {
|
||||
while (true) {
|
||||
await page.waitFor(1000)
|
||||
try {
|
||||
// catch exception timeout in waitForNavigation
|
||||
@@ -181,11 +198,7 @@ async function resolveChallenge(ctx: RequestContext, { url, maxTimeout, proxy, d
|
||||
|
||||
response = await page.reload({ waitUntil: 'domcontentloaded' })
|
||||
log.debug('Reloaded page...')
|
||||
}
|
||||
|
||||
if (Date.now() - ctx.startTimestamp >= maxTimeout) {
|
||||
ctx.errorResponse(`Maximum timeout reached. maxTimeout=${maxTimeout} (ms)`)
|
||||
return
|
||||
log.html(await page.content())
|
||||
}
|
||||
|
||||
log.debug('Validating HTML code...')
|
||||
@@ -194,6 +207,12 @@ async function resolveChallenge(ctx: RequestContext, { url, maxTimeout, proxy, d
|
||||
log.debug(`No '${selector}' challenge element detected.`)
|
||||
}
|
||||
}
|
||||
log.debug("Number of selector found: " + selectorFoundCount + ", total selector: " + CHALLENGE_SELECTORS.length)
|
||||
if (selectorFoundCount == 0)
|
||||
{
|
||||
await page.close()
|
||||
return ctx.errorResponse('No challenge selectors found, unable to proceed')
|
||||
}
|
||||
}
|
||||
|
||||
// it seems some captcha pages return 200 sometimes
|
||||
@@ -203,7 +222,6 @@ async function resolveChallenge(ctx: RequestContext, { url, maxTimeout, proxy, d
|
||||
const captchaStartTimestamp = Date.now()
|
||||
const challengeForm = await page.$('#challenge-form')
|
||||
if (challengeForm) {
|
||||
log.html(await page.content())
|
||||
const captchaTypeElm = await page.$('input[name="cf_captcha_kind"]')
|
||||
const cfCaptchaType: string = await captchaTypeElm.evaluate((e: any) => e.value)
|
||||
const captchaType: CaptchaType = (CaptchaType as any)[cfCaptchaType]
|
||||
@@ -280,6 +298,8 @@ async function resolveChallenge(ctx: RequestContext, { url, maxTimeout, proxy, d
|
||||
message = 'Captcha detected but no automatic solver is configured.'
|
||||
}
|
||||
}
|
||||
|
||||
log.debug("Response is: " + response.status())
|
||||
}
|
||||
|
||||
const payload: ChallengeResolutionT = {
|
||||
@@ -406,7 +426,7 @@ const browserRequest = async (ctx: RequestContext, params: BaseRequestAPICall) =
|
||||
|
||||
try {
|
||||
const page = await setupPage(ctx, params, session.browser)
|
||||
const data = await resolveChallenge(ctx, params, page)
|
||||
const data = await resolveChallengeWithTimeout(ctx, params, page)
|
||||
|
||||
if (data) {
|
||||
const { status } = data
|
||||
|
||||
Reference in New Issue
Block a user