Compare commits

...

15 Commits

Author SHA1 Message Date
ngosang
09c9404d5d Bump version 1.2.1 2020-12-20 02:55:02 +01:00
ngosang
9dd0478e69 Change version to match release tag / 1.2.0 => v1.2.0 2020-12-20 02:53:36 +01:00
ngosang
cd4f48721c CI/CD Publish release in GitHub repository. resolves #34 2020-12-20 02:51:28 +01:00
ngosang
89aed86390 Add welcome message in / endpoint 2020-12-20 01:57:34 +01:00
ngosang
a23fa0983f Rewrite request timeout handling (maxTimeout) resolves #42 2020-12-20 01:43:47 +01:00
Alexandre Beloin
d2b680520d Add http status for better logging 2020-12-17 16:01:27 -05:00
Alexandre Beloin
c3b2173f39 Return an error when no selectors are found, #25 2020-12-14 17:06:42 -05:00
Alexandre Beloin
37cd979bf7 Add issue template, fix #32 2020-12-14 16:27:06 -05:00
Alexandre Beloin
54d589464a Moving log.html right after loading the page and add one on reload, fix #30 2020-12-14 15:18:18 -05:00
Alexandre Beloin
7ca880da7c Update User-Agent to match chromium version, ref: #15 (#28) 2020-12-14 09:09:18 +01:00
ngosang
d4d7b93d7e Update install from source code documentation 2020-12-14 00:51:44 +01:00
Diego Heras
743058a37f Update readme to add Docker instructions (#20) 2020-12-13 21:14:07 +01:00
Diego Heras
87b5a6a1c8 Clean up readme (#19) 2020-12-13 20:46:05 +01:00
ngosang
08bec21dfc Add docker-compose 2020-12-13 20:44:10 +01:00
ngosang
f37ce039a1 Change default log level to info 2020-12-13 20:41:33 +01:00
9 changed files with 238 additions and 103 deletions

37
.github/ISSUE_TEMPLATE.md vendored Normal file
View File

@@ -0,0 +1,37 @@
**Please use the search bar** at the top of the page and make sure you are not creating an already submitted issue.
Check closed issues as well, because your issue may have already been fixed.
### Instruction on how to enable debug and html trace
[Follow the instructions from this wiki page](https://github.com/FlareSolverr/FlareSolverr/wiki/How-to-enable-debug-and-html-trace)
### Environment
**FlareSolverr Version**:
**Docker**: [yes/no]
**OS**:
**Last Working FlareSolverr Version**:
**Are you using a proxy or VPN?** [yes/no]
**Using Captcha Solver:** [yse/no]
**If using captcha solver, which one:**
### Description
[List steps to reproduce the error and details on what happens and what you expected to happen]
### Logged Error Messages
[Place any relevant error messages you noticed from the logs here.]
[Make sure you attach the full logs with your personal information removed in case we need more information]
### Screenshots
[Place any screenshots of the issue here if needed]

32
.github/workflows/release.yml vendored Normal file
View File

@@ -0,0 +1,32 @@
name: release
on:
push:
tags:
- 'v*.*.*'
jobs:
build:
name: Create Release
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v2
- name: Build Changelog
id: github_release
uses: mikepenz/release-changelog-builder-action@main
env:
GITHUB_TOKEN: ${{ secrets.GH_PAT }}
- name: Create Release
id: create_release
uses: actions/create-release@v1
env:
GITHUB_TOKEN: ${{ secrets.GH_PAT }}
with:
tag_name: ${{ github.ref }}
release_name: ${{ github.ref }}
body: ${{ steps.github_release.outputs.changelog }}
draft: false
prerelease: false

171
README.md
View File

@@ -1,33 +1,69 @@
# FlareSolverr # FlareSolverr
Proxy server to bypass Cloudflare protection [![GitHub issues](https://img.shields.io/github/issues/FlareSolverr/FlareSolverr.svg?maxAge=60&style=flat-square)](https://github.com/FlareSolverr/FlareSolverr/issues)
[![GitHub pull requests](https://img.shields.io/github/issues-pr/FlareSolverr/FlareSolverr.svg?maxAge=60&style=flat-square)](https://github.com/FlareSolverr/FlareSolverr/pulls)
[![Docker Pulls](https://img.shields.io/docker/pulls/flaresolverr/flaresolverr.svg?maxAge=60&style=flat-square)](https://hub.docker.com/r/flaresolverr/flaresolverr/)
FlareSolverr is a proxy server to bypass Cloudflare protection
:warning: This project is in beta state. Some things may not work and the API can change at any time. :warning: This project is in beta state. Some things may not work and the API can change at any time.
See the known issues section.
## How it works ## How it works
FlareSolverr starts a proxy server and it waits for user requests in an idle state using few resources. FlareSolverr starts a proxy server and it waits for user requests in an idle state using few resources.
When some request arrives, it uses [puppeteer](https://github.com/puppeteer/puppeteer) with the When some request arrives, it uses [puppeteer](https://github.com/puppeteer/puppeteer) with the
[stealth plugin](https://github.com/berstend/puppeteer-extra/tree/master/packages/puppeteer-extra-plugin-stealth) [stealth plugin](https://github.com/berstend/puppeteer-extra/tree/master/packages/puppeteer-extra-plugin-stealth)
to create a headless browser (Chrome). It opens the URL with user parameters and waits until the to create a headless browser (Chrome). It opens the URL with user parameters and waits until the Cloudflare challenge
Cloudflare challenge is solved (or timeout). The HTML code and the cookies are sent back to the is solved (or timeout). The HTML code and the cookies are sent back to the user, and those cookies can be used to
user and those cookies can be used to bypass Cloudflare using other HTTP clients. bypass Cloudflare using other HTTP clients.
**NOTE**: Web browsers consume a lot of memory. If you are running FlareSolverr on a machine with few RAM, **NOTE**: Web browsers consume a lot of memory. If you are running FlareSolverr on a machine with few RAM, do not make
do not make many requests at once. With each request a new browser is launched. many requests at once. With each request a new browser is launched.
(It is possible to use a permanent session. However, if you use sessions, you should make sure to close them as soon as you are done using them.)
It is also possible to use a permanent session. However, if you use sessions, you should make sure to close them as
soon as you are done using them.
## Installation ## Installation
It requires NodeJS. ### Docker
Run `PUPPETEER_PRODUCT=chrome npm install` to install FlareSolverr dependencies. It is recommended to install using a Docker container because the project depends on an external browser that is
already included within the image.
Docker images are available in:
* GitHub Registry => https://github.com/orgs/FlareSolverr/packages/container/package/flaresolverr
* DockerHub => https://hub.docker.com/r/flaresolverr/flaresolverr
Supported architectures are:
| Architecture | Tag |
| :----: | --- |
| x86-64 | linux/amd64 |
| ARM64 | linux/arm64 |
| ARM32 | linux/arm/v7 |
We provide a `docker-compose.yml` configuration file. Clone this repository and execute `docker-compose up -d` to start
the container.
If you prefer the `docker cli` execute the following command.
```bash
docker run -d \
--name=flaresolverr \
-e LOG_LEVEL=info \
--restart unless-stopped \
ghcr.io/flaresolverr/flaresolverr:latest
```
### From source code
This is the recommended way for Windows / MacOS users and for developers.
* Install [NodeJS](https://nodejs.org/)
* Clone this repository and open a shell in that path
* Run `npm install` command to install FlareSolverr dependencies
* Run `npm run build` command to compile TypeScript code
* Run `npm start` command to start FlareSolverr
## Usage ## Usage
First run `npm run build`. Once the TypeScript is compiled, you can use `npm start` to start FlareSolverr.
Example request: Example request:
```bash ```bash
curl -L -X POST 'http://localhost:8191/v1' \ curl -L -X POST 'http://localhost:8191/v1' \
@@ -35,7 +71,7 @@ curl -L -X POST 'http://localhost:8191/v1' \
--data-raw '{ --data-raw '{
"cmd": "request.get", "cmd": "request.get",
"url":"http://www.google.com/", "url":"http://www.google.com/",
"userAgent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36", "userAgent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleW...",
"maxTimeout": 60000, "maxTimeout": 60000,
"headers": { "headers": {
"X-Test": "Testing 123..." "X-Test": "Testing 123..."
@@ -47,23 +83,22 @@ curl -L -X POST 'http://localhost:8191/v1' \
#### + `sessions.create` #### + `sessions.create`
This will launch a new browser instance which will retain cookies until you destroy it This will launch a new browser instance which will retain cookies until you destroy it with `sessions.destroy`.
with `sessions.destroy`. This comes in handy so you don't have to keep solving challenges This comes in handy, so you don't have to keep solving challenges over and over and you won't need to keep sending
over and over and you won't need to keep sending cookies for the browser to use. cookies for the browser to use.
This also speeds up the requests since it won't have to launch a new browser instance for This also speeds up the requests since it won't have to launch a new browser instance for every request.
every request.
Parameter | Notes Parameter | Notes
|--|--| |--|--|
session | Optional. The session ID that you want to be assinged to the instance. If one isn't set a random UUID will be assigned. session | Optional. The session ID that you want to be assigned to the instance. If isn't set a random UUID will be assigned.
userAgent | Optional. Will be used by the headless browser. userAgent | Optional. Will be used by the headless browser.
#### + `sessions.list` #### + `sessions.list`
Returns a list of all the active sessions. More for debuging if you are curious to see Returns a list of all the active sessions. More for debugging if you are curious to see how many sessions are running.
how many sessions are running. You should always make sure to properly close each You should always make sure to properly close each session when you are done using them as too many may slow your
session when you are done using them as too many may slow your computer down. computer down.
Example response: Example response:
@@ -79,9 +114,8 @@ Example response:
#### + `sessions.destroy` #### + `sessions.destroy`
This will properly shutdown a browser instance and remove all files associaded with it This will properly shutdown a browser instance and remove all files associated with it to free up resources for a new
to free up resources for a new session. Whenever you no longer need to use a session you session. When you no longer need to use a session you should make sure to close it.
should make sure to close it.
Parameter | Notes Parameter | Notes
|--|--| |--|--|
@@ -117,14 +151,13 @@ Example response from running the `curl` above:
"content-length": "61587", "content-length": "61587",
"x-xss-protection": "0", "x-xss-protection": "0",
"x-frame-options": "SAMEORIGIN", "x-frame-options": "SAMEORIGIN",
"set-cookie": "1P_JAR=2020-07-16-04; expires=Sat, 15-Aug-2020 04:15:49 GMT; path=/; domain=.google.com; Secure; SameSite=none\nNID=204=QE3Ocq15XalczqjuDy52HeseG3zAZuJzID3R57g_oeQHyoV5DuvDhpWc4r9IcPoeIYmkr_ZTX_MNOU8IAbtXmVO7Bmq0adb-hpIHaTBIdBk3Ofifp4gO6vZleVuFYfj7ePkHeHdzGoX-en0FvKtd9iofX4O6RiAdEIAnpL7Wge4; expires=Fri, 15-Jan-2021 04:15:49 GMT; path=/; domain=.google.com; Secure; HttpOnly; SameSite=none", "set-cookie": "1P_JAR=2020-07-16-04; expires=Sat..."
"alt-svc": "h3-29=\":443\"; ma=2592000,h3-27=\":443\"; ma=2592000,h3-25=\":443\"; ma=2592000,h3-T050=\":443\"; ma=2592000,h3-Q050=\":443\"; ma=2592000,h3-Q046=\":443\"; ma=2592000,h3-Q043=\":443\"; ma=2592000,quic=\":443\"; ma=2592000; v=\"46,43\""
}, },
"response":"<!DOCTYPE html>...", "response":"<!DOCTYPE html>...",
"cookies": [ "cookies": [
{ {
"name": "NID", "name": "NID",
"value": "204=QE3Ocq15XalczqjuDy52HeseG3zAZuJzID3R57g_oeQHyoV5DuvDhpWc4r9IcPoeIYmkr_ZTX_MNOU8IAbtXmVO7Bmq0adb-hpIHaTBIdBk3Ofifp4gO6vZleVuFYfj7ePkHeHdzGoX-en0FvKtd9iofX4O6RiAdEIAnpL7Wge4", "value": "204=QE3Ocq15XalczqjuDy52HeseG3zAZuJzID3R57...",
"domain": ".google.com", "domain": ".google.com",
"path": "/", "path": "/",
"expires": 1610684149.307722, "expires": 1610684149.307722,
@@ -147,7 +180,7 @@ Example response from running the `curl` above:
"sameSite": "None" "sameSite": "None"
} }
], ],
"userAgent": "Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36" "userAgent": "Windows NT 10.0; Win64; x64) AppleWebKit/5..."
}, },
"status": "ok", "status": "ok",
"message": "", "message": "",
@@ -165,15 +198,14 @@ Parameter | Notes
|--|--| |--|--|
postData | Must be a string. If you want to POST a form, don't forget to set the `Content-Type` header to `application/x-www-form-urlencoded` or the server might not understand your request. postData | Must be a string. If you want to POST a form, don't forget to set the `Content-Type` header to `application/x-www-form-urlencoded` or the server might not understand your request.
## Downloading Images and PDFs (small files) ### Download small files
If you need to access an image/pdf or small file, you should pass the `download` parameter to If you need to access an image/pdf or small file, you should pass the `download` parameter to `request.get` setting it
`request.get` setting it to `true`. Rather than access the html and return text it will to `true`. Rather than access the html and return text it will return the buffer **base64** encoded which you will be
return a the buffer **base64** encoded which you will be able to decode and save the image/pdf. able to decode and save the image/pdf.
This method isn't recommended for videos or anything larger. As that should be streamed back to This method isn't recommended for videos or anything larger. As that should be streamed back to the client and at the
the client and at the moment there is nothing setup to do so. If this is something you need feel moment there is nothing setup to do so. If this is something you need feel free to create an issue and/or submit a PR.
free to create an issue and/or submit a PR.
## Environment variables ## Environment variables
@@ -182,40 +214,24 @@ To set the environment vars in Linux run `export LOG_LEVEL=debug` and then start
Name | Default | Notes Name | Default | Notes
|--|--|--| |--|--|--|
LOG_LEVEL | info | Used to change the verbosity of the logging. LOG_LEVEL | info | Used to change the verbosity of the logging.
LOG_HTML | false | Used for debugging. If `true` all html that passes through the proxy will be logged to the console. LOG_HTML | false | Used for debugging. If `true` all HTML that passes through the proxy will be logged to the console in `debug` level.
PORT | 8191 | Change this if you already have a process running on port `8191`. PORT | 8191 | Change this if you already have a process running on port `8191`.
HOST | 0.0.0.0 | This shouldn't need to be messed with but if you insist, it's here! HOST | 0.0.0.0 | This shouldn't need to be messed with but if you insist, it's here!
CAPTCHA_SOLVER | None | This is used to select which captcha solving method it used when a captcha is encounted. CAPTCHA_SOLVER | None | This is used to select which captcha solving method it used when a captcha is encountered.
HEADLESS | true | This is used to debug the browser by not running it in headless mode. HEADLESS | true | This is used to debug the browser by not running it in headless mode.
## Captcha Solvers ## Captcha Solvers
Sometimes CF not only gives mathmatical computations and browser tests, sometimes they also require Sometimes CF not only gives mathematical computations and browser tests, sometimes they also require the user to solve
the user to solve a captcha. If this is the case, FlareSolverr will return the captcha page. But that's a captcha. If this is the case, FlareSolverr will return the captcha page. But that's not very helpful to you is it?
not very helpful to you is it?
FlareSolverr can be customized to solve the captcha's automatically by setting the environment variable FlareSolverr can be customized to solve the captchas automatically by setting the environment variable `CAPTCHA_SOLVER`
`CAPTCHA_SOLVER` to the file name of one of the adapters inside the [/captcha](src/captcha) directory. to the file name of one of the adapters inside the [/captcha](src/captcha) directory.
### [CaptchaHarvester](https://github.com/NoahCardoza/CaptchaHarvester) ### hcaptcha-solver
This method makes use of the [CaptchaHarvester](https://github.com/NoahCardoza/CaptchaHarvester) project which allows users to collect thier own tokens from ReCaptcha V2/V3 and hCaptcha for free. This method makes use of the [hcaptcha-solver](https://github.com/JimmyLaurent/hcaptcha-solver) project which attempts
to solve hCaptcha by randomly selecting images.
To use this method you must set these ENV variables:
```bash
CAPTCHA_SOLVER=harvester
HARVESTER_ENDPOINT=https://127.0.0.1:5000/token
```
**Note**: above I set `HARVESTER_ENDPOINT` to the default configureation
of the captcha harvester's server, but that could change if
you customize the command line flags. Simply put, `HARVESTER_ENDPOINT`
should be set to the URI of the route that returns a token in plain text when called.
### [hcaptcha-solver](https://github.com/JimmyLaurent/hcaptcha-solver)
This method makes use of the [hcaptcha-solver](https://github.com/JimmyLaurent/hcaptcha-solver) project which attempts to solve hcaptcha by randomly selecting images.
To use this solver you must first install it and then set it as the `CAPTCHA_SOLVER`. To use this solver you must first install it and then set it as the `CAPTCHA_SOLVER`.
@@ -224,31 +240,22 @@ npm i hcaptcha-solver
CAPTCHA_SOLVER=hcaptcha-solver CAPTCHA_SOLVER=hcaptcha-solver
``` ```
## Docker ### CaptchaHarvester
You can edit environment variables in `./Dockerfile` and build your own image. This method makes use of the [CaptchaHarvester](https://github.com/NoahCardoza/CaptchaHarvester) project which allows
users to collect thier own tokens from ReCaptcha V2/V3 and hCaptcha for free.
To use this method you must set these ENV variables:
```bash ```bash
docker build -t flaresolverr:latest . CAPTCHA_SOLVER=harvester
docker run --restart=always --name flaresolverr -p 8191:8191 -d flaresolverr:latest HARVESTER_ENDPOINT=https://127.0.0.1:5000/token
``` ```
## TypeScript **Note**: above I set `HARVESTER_ENDPOINT` to the default configuration of the captcha harvester's server, but that
could change if you customize the command line flags. Simply put, `HARVESTER_ENDPOINT` should be set to the URI of the
route that returns a token in plain text when called.
I'm quite new to TypeScript. If you spot any funny business or anything that is or isn't being ## Related projects
used properly feel free to submit a PR or open an issue.
## Known issues / Roadmap * C# implementation => https://github.com/FlareSolverr/FlareSolverrSharp
The current implementation seems to be working on the sites I have been testing them on. However, if you find it unable to access a site, open an issue and I'd be happy to investigate.
That being said, the project uses the [puppeteer stealth plugin](https://github.com/berstend/puppeteer-extra/tree/master/packages/puppeteer-extra-plugin-stealth). If Cloudflare is able to detect the headless browser, it's more that projects domain to fix.
TODO:
* Fix remaining issues in the code (see TODOs in code)
* Make the maxTimeout more accurate (count the time to open the first page / maybe count the captcha solve time?)
* Hide sensitive information in logs
* Reduce Docker image size
* Docker image for ARM architecture
* Install instructions for Windows

18
docker-compose.yml Normal file
View File

@@ -0,0 +1,18 @@
---
version: "2.1"
services:
flaresolverr:
# DockerHub mirror flaresolverr/flaresolverr:latest
image: ghcr.io/flaresolverr/flaresolverr:latest
container_name: flaresolverr
environment:
# Used to change the verbosity of the logging
- LOG_LEVEL=info
# Enables hcaptcha-solver => https://github.com/JimmyLaurent/hcaptcha-solver
#- CAPTCHA_SOLVER=hcaptcha-solver
# Enables CaptchaHarvester => https://github.com/NoahCardoza/CaptchaHarvester
#- CAPTCHA_SOLVER=harvester
#- HARVESTER_ENDPOINT=https://127.0.0.1:5000/token
ports:
- 8191:8191
restart: unless-stopped

13
package-lock.json generated
View File

@@ -1,6 +1,6 @@
{ {
"name": "flaresolverr", "name": "flaresolverr",
"version": "1.2.0", "version": "1.2.1",
"lockfileVersion": 1, "lockfileVersion": 1,
"requires": true, "requires": true,
"dependencies": { "dependencies": {
@@ -74,6 +74,12 @@
"defer-to-connect": "^2.0.0" "defer-to-connect": "^2.0.0"
} }
}, },
"@types/await-timeout": {
"version": "0.3.1",
"resolved": "https://registry.npmjs.org/@types/await-timeout/-/await-timeout-0.3.1.tgz",
"integrity": "sha512-H5PzROT4KuP7XQDua13Iw8did//OCKAZ/3TL15DjvMzDonrk4HvhH1+tLko96f2guU6XaD3AoqRa49ZOwbwNig==",
"dev": true
},
"@types/cacheable-request": { "@types/cacheable-request": {
"version": "6.0.1", "version": "6.0.1",
"resolved": "https://registry.npmjs.org/@types/cacheable-request/-/cacheable-request-6.0.1.tgz", "resolved": "https://registry.npmjs.org/@types/cacheable-request/-/cacheable-request-6.0.1.tgz",
@@ -288,6 +294,11 @@
"resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
"integrity": "sha1-x57Zf380y48robyXkLzDZkdLS3k=" "integrity": "sha1-x57Zf380y48robyXkLzDZkdLS3k="
}, },
"await-timeout": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/await-timeout/-/await-timeout-1.1.1.tgz",
"integrity": "sha512-gsDXAS6XVc4Jt+7S92MPX6Noq69bdeXUPEaXd8dk3+yVr629LTDLxNt4j1ycBbrU+AStK2PhKIyNIM+xzWMVOQ=="
},
"aws-sign2": { "aws-sign2": {
"version": "0.7.0", "version": "0.7.0",
"resolved": "https://registry.npmjs.org/aws-sign2/-/aws-sign2-0.7.0.tgz", "resolved": "https://registry.npmjs.org/aws-sign2/-/aws-sign2-0.7.0.tgz",

View File

@@ -1,6 +1,6 @@
{ {
"name": "flaresolverr", "name": "flaresolverr",
"version": "1.2.0", "version": "1.2.1",
"description": "Proxy server to bypass Cloudflare protection.", "description": "Proxy server to bypass Cloudflare protection.",
"scripts": { "scripts": {
"start": "node ./dist/index.js", "start": "node ./dist/index.js",
@@ -20,6 +20,7 @@
"url": "https://github.com/ngosang/FlareSolverr" "url": "https://github.com/ngosang/FlareSolverr"
}, },
"dependencies": { "dependencies": {
"await-timeout": "^1.1.1",
"console-log-level": "^1.4.1", "console-log-level": "^1.4.1",
"got": "^11.5.1", "got": "^11.5.1",
"hcaptcha-solver": "^1.0.2", "hcaptcha-solver": "^1.0.2",
@@ -29,6 +30,7 @@
"uuid": "^8.2.0" "uuid": "^8.2.0"
}, },
"devDependencies": { "devDependencies": {
"@types/await-timeout": "^0.3.1",
"@types/node": "^14.0.23", "@types/node": "^14.0.23",
"@types/puppeteer": "^3.0.1", "@types/puppeteer": "^3.0.1",
"@types/uuid": "^8.0.0", "@types/uuid": "^8.0.0",

View File

@@ -3,7 +3,7 @@ import { createServer, IncomingMessage, ServerResponse } from 'http';
import { RequestContext } from './types' import { RequestContext } from './types'
import Router, { BaseAPICall } from './routes' import Router, { BaseAPICall } from './routes'
const version: string = require('../package.json').version const version: string = "v" + require('../package.json').version
const serverPort: number = Number(process.env.PORT) || 8191 const serverPort: number = Number(process.env.PORT) || 8191
const serverHost: string = process.env.HOST || '0.0.0.0' const serverHost: string = process.env.HOST || '0.0.0.0'
@@ -65,11 +65,19 @@ function validateIncomingRequest(ctx: RequestContext, params: BaseAPICall) {
} }
createServer((req: IncomingMessage, res: ServerResponse) => { createServer((req: IncomingMessage, res: ServerResponse) => {
const startTimestamp = Date.now()
// count the request for the log prefix // count the request for the log prefix
log.incRequests() log.incRequests()
const startTimestamp = Date.now()
log.info(`Incoming request: ${req.method} ${req.url}`) log.info(`Incoming request: ${req.method} ${req.url}`)
// show welcome message
if (req.url == '/') {
successResponse("FlareSolverr is ready!", null, res, startTimestamp);
return;
}
// get request body
const bodyParts: any[] = [] const bodyParts: any[] = []
req.on('data', chunk => { req.on('data', chunk => {
bodyParts.push(chunk) bodyParts.push(chunk)
@@ -102,5 +110,5 @@ createServer((req: IncomingMessage, res: ServerResponse) => {
}) })
}) })
}).listen(serverPort, serverHost, () => { }).listen(serverPort, serverHost, () => {
log.info(`FlareSolverr v${version} listening on http://${serverHost}:${serverPort}`) log.info(`FlareSolverr ${version} listening on http://${serverHost}:${serverPort}`)
}) })

View File

@@ -10,10 +10,10 @@ export default {
}, },
...require('console-log-level')( ...require('console-log-level')(
{ {
level: process.env.LOG_LEVEL || 'debug', level: process.env.LOG_LEVEL || 'info',
prefix(level: string) { prefix(level: string) {
return `${new Date().toISOString()} ${level.toUpperCase()} REQ-${requests}` return `${new Date().toISOString()} ${level.toUpperCase()} REQ-${requests}`
} }
} }
) )
} }

View File

@@ -6,6 +6,7 @@ import { SetCookie, Request, Headers, HttpMethod, Overrides, Cookie } from 'pupp
import { TimeoutError } from 'puppeteer/Errors' import { TimeoutError } from 'puppeteer/Errors'
import getCaptchaSolver, { CaptchaType } from './captcha' import getCaptchaSolver, { CaptchaType } from './captcha'
import * as Puppeteer from "puppeteer-extra/dist/puppeteer"; import * as Puppeteer from "puppeteer-extra/dist/puppeteer";
const Timeout = require('await-timeout');
export interface BaseAPICall { export interface BaseAPICall {
cmd: string cmd: string
@@ -69,7 +70,7 @@ type OverridesProps =
'headers' 'headers'
// We always set a Windows User-Agent because ARM builds are detected by CloudFlare // We always set a Windows User-Agent because ARM builds are detected by CloudFlare
const DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36" const DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36"
const CHALLENGE_SELECTORS = ['#trk_jschal_js', '.ray_id', '.attack-box'] const CHALLENGE_SELECTORS = ['#trk_jschal_js', '.ray_id', '.attack-box']
const TOKEN_INPUT_NAMES = ['g-recaptcha-response', 'h-captcha-response'] const TOKEN_INPUT_NAMES = ['g-recaptcha-response', 'h-captcha-response']
@@ -119,9 +120,22 @@ async function interceptResponse(page: Puppeteer.Page, callback: (payload: Chall
}); });
} }
async function resolveChallenge(ctx: RequestContext, { url, maxTimeout, proxy, download, returnOnlyCookies }: BaseRequestAPICall, page: Puppeteer.Page): Promise<ChallengeResolutionT | void> { async function resolveChallengeWithTimeout(ctx: RequestContext, params: BaseRequestAPICall, page: Puppeteer.Page) {
const maxTimeout = params.maxTimeout || 60000
const timer = new Timeout();
try {
const promise = resolveChallenge(ctx, params, page);
return await Promise.race([
promise,
timer.set(maxTimeout, `Maximum timeout reached. maxTimeout=${maxTimeout} (ms)`)
]);
} finally {
timer.clear();
}
}
async function resolveChallenge(ctx: RequestContext, { url, proxy, download, returnOnlyCookies }: BaseRequestAPICall, page: Puppeteer.Page): Promise<ChallengeResolutionT | void> {
maxTimeout = maxTimeout || 60000
let status = 'ok' let status = 'ok'
let message = '' let message = ''
@@ -134,6 +148,8 @@ async function resolveChallenge(ctx: RequestContext, { url, maxTimeout, proxy, d
log.debug(`Navigating to... ${url}`) log.debug(`Navigating to... ${url}`)
let response = await page.goto(url, { waitUntil: 'domcontentloaded' }) let response = await page.goto(url, { waitUntil: 'domcontentloaded' })
log.html(await page.content())
// look for challenge // look for challenge
if (response.headers().server.startsWith('cloudflare')) { if (response.headers().server.startsWith('cloudflare')) {
log.info('Cloudflare detected') log.info('Cloudflare detected')
@@ -145,10 +161,12 @@ async function resolveChallenge(ctx: RequestContext, { url, maxTimeout, proxy, d
if (response.status() > 400) { if (response.status() > 400) {
// detect cloudflare wait 5s // detect cloudflare wait 5s
let selectorFoundCount = 0
for (const selector of CHALLENGE_SELECTORS) { for (const selector of CHALLENGE_SELECTORS) {
const cfChallengeElem = await page.$(selector) const cfChallengeElem = await page.$(selector)
if (cfChallengeElem) { if (cfChallengeElem) {
log.html(await page.content()) selectorFoundCount++
log.debug(`'${selector}' challenge element detected.`)
log.debug('Waiting for Cloudflare challenge...') log.debug('Waiting for Cloudflare challenge...')
let interceptingResult: ChallengeResolutionT; let interceptingResult: ChallengeResolutionT;
@@ -158,8 +176,7 @@ async function resolveChallenge(ctx: RequestContext, { url, maxTimeout, proxy, d
}); });
} }
// TODO: find out why these pages hang sometimes while (true) {
while (Date.now() - ctx.startTimestamp < maxTimeout) {
await page.waitFor(1000) await page.waitFor(1000)
try { try {
// catch exception timeout in waitForNavigation // catch exception timeout in waitForNavigation
@@ -181,11 +198,7 @@ async function resolveChallenge(ctx: RequestContext, { url, maxTimeout, proxy, d
response = await page.reload({ waitUntil: 'domcontentloaded' }) response = await page.reload({ waitUntil: 'domcontentloaded' })
log.debug('Reloaded page...') log.debug('Reloaded page...')
} log.html(await page.content())
if (Date.now() - ctx.startTimestamp >= maxTimeout) {
ctx.errorResponse(`Maximum timeout reached. maxTimeout=${maxTimeout} (ms)`)
return
} }
log.debug('Validating HTML code...') log.debug('Validating HTML code...')
@@ -194,6 +207,12 @@ async function resolveChallenge(ctx: RequestContext, { url, maxTimeout, proxy, d
log.debug(`No '${selector}' challenge element detected.`) log.debug(`No '${selector}' challenge element detected.`)
} }
} }
log.debug("Number of selector found: " + selectorFoundCount + ", total selector: " + CHALLENGE_SELECTORS.length)
if (selectorFoundCount == 0)
{
await page.close()
return ctx.errorResponse('No challenge selectors found, unable to proceed')
}
} }
// it seems some captcha pages return 200 sometimes // it seems some captcha pages return 200 sometimes
@@ -203,7 +222,6 @@ async function resolveChallenge(ctx: RequestContext, { url, maxTimeout, proxy, d
const captchaStartTimestamp = Date.now() const captchaStartTimestamp = Date.now()
const challengeForm = await page.$('#challenge-form') const challengeForm = await page.$('#challenge-form')
if (challengeForm) { if (challengeForm) {
log.html(await page.content())
const captchaTypeElm = await page.$('input[name="cf_captcha_kind"]') const captchaTypeElm = await page.$('input[name="cf_captcha_kind"]')
const cfCaptchaType: string = await captchaTypeElm.evaluate((e: any) => e.value) const cfCaptchaType: string = await captchaTypeElm.evaluate((e: any) => e.value)
const captchaType: CaptchaType = (CaptchaType as any)[cfCaptchaType] const captchaType: CaptchaType = (CaptchaType as any)[cfCaptchaType]
@@ -280,6 +298,8 @@ async function resolveChallenge(ctx: RequestContext, { url, maxTimeout, proxy, d
message = 'Captcha detected but no automatic solver is configured.' message = 'Captcha detected but no automatic solver is configured.'
} }
} }
log.debug("Response is: " + response.status())
} }
const payload: ChallengeResolutionT = { const payload: ChallengeResolutionT = {
@@ -406,7 +426,7 @@ const browserRequest = async (ctx: RequestContext, params: BaseRequestAPICall) =
try { try {
const page = await setupPage(ctx, params, session.browser) const page = await setupPage(ctx, params, session.browser)
const data = await resolveChallenge(ctx, params, page) const data = await resolveChallengeWithTimeout(ctx, params, page)
if (data) { if (data) {
const { status } = data const { status } = data