Compare commits

...

76 Commits
v1.2.0 ... v1

Author SHA1 Message Date
ngosang
1b01caaa78 Bump version 1.2.9 2021-08-01 22:11:55 +02:00
ngosang
447c8f67a1 Improve "Execution context was destroyed" error handling 2021-08-01 22:10:53 +02:00
ngosang
9dae74bc28 Implement returnRawHtml parameter. resolves #172 resolves #165 2021-08-01 22:08:55 +02:00
ngosang
4199db5a41 Capture Docker stop signal. resolves #158 2021-08-01 21:37:45 +02:00
ngosang
2a4fae37c0 Reduce Docker image size 20 MB 2021-08-01 21:27:27 +02:00
ngosang
232ddca512 Fix page reload after challenge is solved. resolves #162 resolves #143 2021-08-01 20:34:38 +02:00
ngosang
8572fab781 Avoid loading images/css/fonts to speed up page load 2021-08-01 19:35:26 +02:00
ngosang
fdb3eae051 Improve Cloudflare IP ban detection 2021-08-01 19:32:09 +02:00
ngosang
6dd8206a10 Fix vulnerabilities 2021-08-01 19:15:24 +02:00
ngosang
c4e4d28c8d Bump version 1.2.8 2021-06-01 02:00:39 +02:00
ngosang
543ce89eb6 Improve old JS challenge waiting. Resolves #129 2021-06-01 01:59:57 +02:00
ngosang
0f30e17ef1 Bump version 1.2.7 2021-06-01 01:22:36 +02:00
ngosang
24f1b4ec6f Improvements in Cloudflare redirect detection. Resolves #140 2021-06-01 01:21:06 +02:00
ngosang
f3b30268c3 Fix installation instructions 2021-05-31 22:59:51 +02:00
ngosang
be4354c68d Bump version 1.2.6 2021-05-30 14:58:13 +02:00
ngosang
5242cf3359 Show an error in hcaptcha-solver. Resolves #132 2021-05-30 14:15:08 +02:00
ngosang
c6677f4d84 Handle new Cloudflare challenge. Resolves #135 Resolves #134 2021-05-30 13:40:17 +02:00
ngosang
805a34c9d6 Provide reference Systemd unit file. Resolves #72 2021-05-30 12:16:34 +02:00
ngosang
2f9fe05a76 Update issue template. Resolves #130 2021-05-30 11:44:28 +02:00
ngosang
8961d67a29 Regenerate package-lock.json lockfileVersion 2 2021-05-30 11:41:03 +02:00
ngosang
5da5156851 Fix EACCES: permission denied, open '/tmp/flaresolverr.txt'. Resolves #120 2021-05-30 11:38:20 +02:00
ngosang
05f8ef95d9 Configure timezone with TZ env var. Resolves #109 2021-05-30 11:28:43 +02:00
dependabot[bot]
10f8b83e83 Bump ws from 7.4.1 to 7.4.6 (#137)
Bumps [ws](https://github.com/websockets/ws) from 7.4.1 to 7.4.6.
- [Release notes](https://github.com/websockets/ws/releases)
- [Commits](https://github.com/websockets/ws/compare/7.4.1...7.4.6)

Signed-off-by: dependabot[bot] <support@github.com>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2021-05-30 09:30:00 +02:00
Arias800
6cf948d0e1 Return the redirected URL in the response (#126)
It adds the possibility for the user to get the final url after a redirection.
2021-05-30 09:29:21 +02:00
dependabot[bot]
dcdc70273f Bump hosted-git-info from 2.8.8 to 2.8.9 (#124)
Bumps [hosted-git-info](https://github.com/npm/hosted-git-info) from 2.8.8 to 2.8.9.
- [Release notes](https://github.com/npm/hosted-git-info/releases)
- [Changelog](https://github.com/npm/hosted-git-info/blob/v2.8.9/CHANGELOG.md)
- [Commits](https://github.com/npm/hosted-git-info/compare/v2.8.8...v2.8.9)

Signed-off-by: dependabot[bot] <support@github.com>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2021-05-30 09:28:09 +02:00
dependabot[bot]
e2dc39ee4e Bump lodash from 4.17.20 to 4.17.21 (#125)
Bumps [lodash](https://github.com/lodash/lodash) from 4.17.20 to 4.17.21.
- [Release notes](https://github.com/lodash/lodash/releases)
- [Commits](https://github.com/lodash/lodash/compare/4.17.20...4.17.21)

Signed-off-by: dependabot[bot] <support@github.com>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2021-05-30 09:27:55 +02:00
ngosang
340638ca54 Bump version 1.2.5 2021-04-05 05:27:37 +02:00
ngosang
05abe69df6 Fix memory regression, close test browser 2021-04-05 05:26:45 +02:00
ngosang
e596906c19 Fix release-docker GitHub action 2021-04-04 22:46:48 +02:00
ngosang
8a1b0ea05c Bump version 1.2.4 2021-04-04 22:42:03 +02:00
ngosang
916fbf2c9d Include license in release zips. resolves #75 2021-04-04 22:39:02 +02:00
ngosang
a85e9c2c8c Validate Chrome is working at startup 2021-04-04 22:37:53 +02:00
ngosang
71814a86bc Speedup Docker image build 2021-04-04 22:36:53 +02:00
ngosang
757ec4358a Add health check endpoint 2021-04-04 20:33:07 +02:00
ngosang
f278c7cf8e Update issue template 2021-04-04 19:53:54 +02:00
ngosang
b4c99d8426 Minor improvements in debug traces 2021-04-04 18:42:04 +02:00
ngosang
8aa7723f45 Validate environment variables at startup. resolves #101 2021-04-04 18:02:17 +02:00
ngosang
c48d342b9c Add FlareSolverr logo. resolves #23 2021-01-10 16:19:20 +01:00
ngosang
7c361af204 Bump version 1.2.3 2021-01-10 15:40:09 +01:00
ngosang
6400449344 CI/CD: Generate release changelog from commits. resolves #34 2021-01-10 15:39:10 +01:00
Diego Heras
69c4d9edfa Update README.md 2021-01-10 15:25:42 +01:00
ngosang
85428a32f4 Add donation links 2021-01-10 15:13:17 +01:00
ngosang
ea5e461fb4 Simplify docker-compose.yml 2021-01-10 15:08:39 +01:00
ngosang
a57510aa0d Allow to configure "none" captcha resolver 2021-01-10 15:04:18 +01:00
JoshDi
91d1f0cb4a Override docker-compose.yml variables via .env resolves #64 (#66) 2021-01-10 15:03:30 +01:00
ngosang
7376ef9bc9 Bump version 1.2.2 2021-01-09 00:57:24 +01:00
ngosang
de9c7bcf76 Add documentation for precompiled binaries installation 2021-01-09 00:54:33 +01:00
ngosang
bef9411e1c Add instructions to set environment variables in Windows 2021-01-09 00:40:38 +01:00
ngosang
27ad58b2c6 Build Windows and Linux binaries. resolves #18 2021-01-09 00:30:51 +01:00
Diego Heras
d038944089 Add release badge in the readme 2021-01-09 00:22:42 +01:00
ngosang
a8bc6f5468 CI/CD: Generate release changelog from commits. resolves #34 2021-01-09 00:13:05 +01:00
ngosang
39fdde9a74 Add a notice about captcha solvers 2021-01-08 18:33:44 +01:00
ngosang
8234cdb516 Add Chrome flag --disable-dev-shm-usage to fix crashes. resolves #45 2021-01-08 16:20:35 +01:00
ngosang
66fe775d27 Fix Docker CLI documentation 2021-01-08 16:18:55 +01:00
ngosang
ade05bb7a8 Add traces with captcha solver service. resolves #39 2021-01-08 16:01:28 +01:00
ngosang
5710c08581 Improve logic to detect Cloudflare captcha. resolves #48 2021-01-08 15:45:40 +01:00
ngosang
f1e829fd3a Move Cloudflare provider logic to his own class 2021-01-08 13:01:52 +01:00
ngosang
dfc4383b50 Simplify and document the "return only cookies" parameter 2021-01-08 12:54:04 +01:00
ngosang
d140e9369d Show message when debug log is enabled 2021-01-07 13:49:04 +01:00
Diego Heras
6677329842 Update readme to add more clarifications. resolves #53 (#60) 2021-01-07 12:55:47 +01:00
ilike2burnthing
0f40054a73 issue_template: typo fix (#52) 2020-12-31 14:07:24 +01:00
ngosang
09c9404d5d Bump version 1.2.1 2020-12-20 02:55:02 +01:00
ngosang
9dd0478e69 Change version to match release tag / 1.2.0 => v1.2.0 2020-12-20 02:53:36 +01:00
ngosang
cd4f48721c CI/CD Publish release in GitHub repository. resolves #34 2020-12-20 02:51:28 +01:00
ngosang
89aed86390 Add welcome message in / endpoint 2020-12-20 01:57:34 +01:00
ngosang
a23fa0983f Rewrite request timeout handling (maxTimeout) resolves #42 2020-12-20 01:43:47 +01:00
Alexandre Beloin
d2b680520d Add http status for better logging 2020-12-17 16:01:27 -05:00
Alexandre Beloin
c3b2173f39 Return an error when no selectors are found, #25 2020-12-14 17:06:42 -05:00
Alexandre Beloin
37cd979bf7 Add issue template, fix #32 2020-12-14 16:27:06 -05:00
Alexandre Beloin
54d589464a Moving log.html right after loading the page and add one on reload, fix #30 2020-12-14 15:18:18 -05:00
Alexandre Beloin
7ca880da7c Update User-Agent to match chromium version, ref: #15 (#28) 2020-12-14 09:09:18 +01:00
ngosang
d4d7b93d7e Update install from source code documentation 2020-12-14 00:51:44 +01:00
Diego Heras
743058a37f Update readme to add Docker instructions (#20) 2020-12-13 21:14:07 +01:00
Diego Heras
87b5a6a1c8 Clean up readme (#19) 2020-12-13 20:46:05 +01:00
ngosang
08bec21dfc Add docker-compose 2020-12-13 20:44:10 +01:00
ngosang
f37ce039a1 Change default log level to info 2020-12-13 20:41:33 +01:00
21 changed files with 6975 additions and 418 deletions

View File

@@ -1,6 +1,7 @@
node_modules
npm-debug.log
Dockerfile
.dockerignore
.git
.gitignore
.git/
.github/
.idea/
bin/
dist/
node_modules/
resources/

31
.github/ISSUE_TEMPLATE.md vendored Normal file
View File

@@ -0,0 +1,31 @@
**Please use the search bar** at the top of the page and make sure you are not creating an already submitted issue.
Check closed issues as well, because your issue may have already been fixed.
### How to enable debug and html traces
[Follow the instructions from this wiki page](https://github.com/FlareSolverr/FlareSolverr/wiki/How-to-enable-debug-and-html-trace)
### Environment
* **FlareSolverr version**:
* **Last working FlareSolverr version**:
* **Operating system**:
* **Are you using Docker**: [yes/no]
* **Are you using a proxy or VPN?** [yes/no]
* **Are you using Captcha Solver:** [yes/no]
* **If using captcha solver, which one:**
* **URL to test this issue:**
### Description
[List steps to reproduce the error and details on what happens and what you expected to happen]
### Logged Error Messages
[Place any relevant error messages you noticed from the logs here.]
[Make sure you attach the full logs with your personal information removed in case we need more information]
### Screenshots
[Place any screenshots of the issue here if needed]

View File

@@ -1,4 +1,4 @@
name: publish
name: release-docker
on:
push:
@@ -24,19 +24,19 @@ jobs:
tag-sha: false
-
name: Set up QEMU
uses: docker/setup-qemu-action@v1
uses: docker/setup-qemu-action@v1.0.1
-
name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1
-
name: Login to DockerHub
uses: docker/login-action@v1
uses: docker/login-action@v1
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
-
name: Login to GitHub Container Registry
uses: docker/login-action@v1
uses: docker/login-action@v1
with:
registry: ghcr.io
username: ${{ github.repository_owner }}

55
.github/workflows/release.yml vendored Normal file
View File

@@ -0,0 +1,55 @@
name: release
on:
push:
tags:
- 'v*.*.*'
jobs:
build:
name: Create release
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v2
with:
fetch-depth: 0 # get all commits, branches and tags (required for the changelog)
- name: Setup Node
uses: actions/setup-node@v2
with:
node-version: '14'
- name: Build artifacts
run: |
npm install
npm run build
npm run package
- name: Build changelog
id: github_changelog
run: |
changelog=$(git log $(git tag | tail -2 | head -1)..HEAD --no-merges --oneline)
changelog="${changelog//'%'/'%25'}"
changelog="${changelog//$'\n'/'%0A'}"
changelog="${changelog//$'\r'/'%0D'}"
echo "##[set-output name=changelog;]${changelog}"
- name: Create release
id: create_release
uses: actions/create-release@v1
env:
GITHUB_TOKEN: ${{ secrets.GH_PAT }}
with:
tag_name: ${{ github.ref }}
release_name: ${{ github.ref }}
body: ${{ steps.github_changelog.outputs.changelog }}
draft: false
prerelease: false
- name: Upload release artifacts
uses: alexellis/upload-assets@0.2.2
env:
GITHUB_TOKEN: ${{ secrets.GH_PAT }}
with:
asset_paths: '["./bin/*.zip"]'

3
.gitignore vendored
View File

@@ -121,3 +121,6 @@ dist
# Project Development
testing/
# Binaries
bin/

View File

@@ -22,8 +22,8 @@ ENV PUPPETEER_PRODUCT=chrome \
PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium-browser
RUN npm install && \
npm run build && \
rm -rf src tsconfig.json && \
npm prune --production
npm prune --production && \
rm -rf /home/node/.npm
EXPOSE 8191
ENTRYPOINT ["/usr/bin/dumb-init", "--"]

228
README.md
View File

@@ -1,33 +1,84 @@
# FlareSolverr
Proxy server to bypass Cloudflare protection
[![Latest release](https://img.shields.io/github/v/release/FlareSolverr/FlareSolverr)](https://github.com/FlareSolverr/FlareSolverr/releases)
[![Docker Pulls](https://img.shields.io/docker/pulls/flaresolverr/flaresolverr)](https://hub.docker.com/r/flaresolverr/flaresolverr/)
[![GitHub issues](https://img.shields.io/github/issues/FlareSolverr/FlareSolverr)](https://github.com/FlareSolverr/FlareSolverr/issues)
[![GitHub pull requests](https://img.shields.io/github/issues-pr/FlareSolverr/FlareSolverr)](https://github.com/FlareSolverr/FlareSolverr/pulls)
[![Donate PayPal](https://img.shields.io/badge/Donate-PayPal-green.svg)](https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=X5NJLLX5GLTV6&source=url)
[![Donate Buy Me A Coffee](https://img.shields.io/badge/Donate-Buy%20me%20a%20coffee-yellow.svg)](https://www.buymeacoffee.com/ngosang)
[![Donate Bitcoin](https://img.shields.io/badge/Donate-Bitcoin-orange.svg)](https://en.cryptobadges.io/donate/13Hcv77AdnFWEUZ9qUpoPBttQsUT7q9TTh)
:warning: This project is in beta state. Some things may not work and the API can change at any time.
See the known issues section.
FlareSolverr is a proxy server to bypass Cloudflare protection.
## How it works
FlareSolverr starts a proxy server and it waits for user requests in an idle state using few resources.
When some request arrives, it uses [puppeteer](https://github.com/puppeteer/puppeteer) with the
[stealth plugin](https://github.com/berstend/puppeteer-extra/tree/master/packages/puppeteer-extra-plugin-stealth)
to create a headless browser (Chrome). It opens the URL with user parameters and waits until the
Cloudflare challenge is solved (or timeout). The HTML code and the cookies are sent back to the
user and those cookies can be used to bypass Cloudflare using other HTTP clients.
to create a headless browser (Chrome). It opens the URL with user parameters and waits until the Cloudflare challenge
is solved (or timeout). The HTML code and the cookies are sent back to the user, and those cookies can be used to
bypass Cloudflare using other HTTP clients.
**NOTE**: Web browsers consume a lot of memory. If you are running FlareSolverr on a machine with few RAM,
do not make many requests at once. With each request a new browser is launched.
(It is possible to use a permanent session. However, if you use sessions, you should make sure to close them as soon as you are done using them.)
**NOTE**: Web browsers consume a lot of memory. If you are running FlareSolverr on a machine with few RAM, do not make
many requests at once. With each request a new browser is launched.
It is also possible to use a permanent session. However, if you use sessions, you should make sure to close them as
soon as you are done using them.
## Installation
It requires NodeJS.
### Docker
Run `PUPPETEER_PRODUCT=chrome npm install` to install FlareSolverr dependencies.
It is recommended to install using a Docker container because the project depends on an external browser that is
already included within the image.
Docker images are available in:
* GitHub Registry => https://github.com/orgs/FlareSolverr/packages/container/package/flaresolverr
* DockerHub => https://hub.docker.com/r/flaresolverr/flaresolverr
Supported architectures are:
| Architecture | Tag |
| :----: | --- |
| x86-64 | linux/amd64 |
| ARM64 | linux/arm64 |
| ARM32 | linux/arm/v7 |
We provide a `docker-compose.yml` configuration file. Clone this repository and execute `docker-compose up -d` to start
the container.
If you prefer the `docker cli` execute the following command.
```bash
docker run -d \
--name=flaresolverr \
-p 8191:8191 \
-e LOG_LEVEL=info \
--restart unless-stopped \
ghcr.io/flaresolverr/flaresolverr:latest
```
### Precompiled binaries
This is the recommended way for Windows users.
* Download the [FlareSolverr zip](https://github.com/FlareSolverr/FlareSolverr/releases) from the release's assets. It is available for Windows and Linux.
* Extract the zip file. FlareSolverr executable and chrome folder must be in the same directory.
* Execute FlareSolverr binary. In the environment variables section you can find how to change the configuration.
### From source code
This is the recommended way for macOS users and for developers.
* Install [NodeJS](https://nodejs.org/).
* Clone this repository and open a shell in that path.
* Run `npm install` command to install FlareSolverr dependencies.
* Run `node node_modules/puppeteer/install.js` to install Chromium.
* Run `npm run build` command to compile TypeScript code.
* Run `npm start` command to start FlareSolverr.
### Systemd service
We provide an example Systemd unit file `flaresolverr.service` as reference. You have to modify the file to suit your needs: paths, user and environment variables.
## Usage
First run `npm run build`. Once the TypeScript is compiled, you can use `npm start` to start FlareSolverr.
Example request:
```bash
curl -L -X POST 'http://localhost:8191/v1' \
@@ -35,7 +86,7 @@ curl -L -X POST 'http://localhost:8191/v1' \
--data-raw '{
"cmd": "request.get",
"url":"http://www.google.com/",
"userAgent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36",
"userAgent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleW...",
"maxTimeout": 60000,
"headers": {
"X-Test": "Testing 123..."
@@ -47,23 +98,22 @@ curl -L -X POST 'http://localhost:8191/v1' \
#### + `sessions.create`
This will launch a new browser instance which will retain cookies until you destroy it
with `sessions.destroy`. This comes in handy so you don't have to keep solving challenges
over and over and you won't need to keep sending cookies for the browser to use.
This will launch a new browser instance which will retain cookies until you destroy it with `sessions.destroy`.
This comes in handy, so you don't have to keep solving challenges over and over and you won't need to keep sending
cookies for the browser to use.
This also speeds up the requests since it won't have to launch a new browser instance for
every request.
This also speeds up the requests since it won't have to launch a new browser instance for every request.
Parameter | Notes
|--|--|
session | Optional. The session ID that you want to be assinged to the instance. If one isn't set a random UUID will be assigned.
session | Optional. The session ID that you want to be assigned to the instance. If isn't set a random UUID will be assigned.
userAgent | Optional. Will be used by the headless browser.
#### + `sessions.list`
Returns a list of all the active sessions. More for debuging if you are curious to see
how many sessions are running. You should always make sure to properly close each
session when you are done using them as too many may slow your computer down.
Returns a list of all the active sessions. More for debugging if you are curious to see how many sessions are running.
You should always make sure to properly close each session when you are done using them as too many may slow your
computer down.
Example response:
@@ -79,9 +129,8 @@ Example response:
#### + `sessions.destroy`
This will properly shutdown a browser instance and remove all files associaded with it
to free up resources for a new session. Whenever you no longer need to use a session you
should make sure to close it.
This will properly shutdown a browser instance and remove all files associated with it to free up resources for a new
session. When you no longer need to use a session you should make sure to close it.
Parameter | Notes
|--|--|
@@ -94,8 +143,10 @@ Parameter | Notes
url | Mandatory
session | Optional. Will send the request from and existing browser instance. If one is not sent it will create a temporary instance that will be destroyed immediately after the request is completed.
headers | Optional. To specify user headers.
maxTimeout | Optional. Max timeout to solve the challenge
cookies | Optional. Will be used by the headless browser. Follow [this](https://github.com/puppeteer/puppeteer/blob/v3.3.0/docs/api.md#pagesetcookiecookies) format
maxTimeout | Optional, default value 60000. Max timeout to solve the challenge in milliseconds.
cookies | Optional. Will be used by the headless browser. Follow [this](https://github.com/puppeteer/puppeteer/blob/v3.3.0/docs/api.md#pagesetcookiecookies) format.
returnOnlyCookies | Optional, default false. Only returns the cookies. Response data, headers and other parts of the response are removed.
returnRawHtml | Optional, default false. The response data will be returned without JS processing. This is useful for JSON or plain text content.
Example response from running the `curl` above:
@@ -117,14 +168,13 @@ Example response from running the `curl` above:
"content-length": "61587",
"x-xss-protection": "0",
"x-frame-options": "SAMEORIGIN",
"set-cookie": "1P_JAR=2020-07-16-04; expires=Sat, 15-Aug-2020 04:15:49 GMT; path=/; domain=.google.com; Secure; SameSite=none\nNID=204=QE3Ocq15XalczqjuDy52HeseG3zAZuJzID3R57g_oeQHyoV5DuvDhpWc4r9IcPoeIYmkr_ZTX_MNOU8IAbtXmVO7Bmq0adb-hpIHaTBIdBk3Ofifp4gO6vZleVuFYfj7ePkHeHdzGoX-en0FvKtd9iofX4O6RiAdEIAnpL7Wge4; expires=Fri, 15-Jan-2021 04:15:49 GMT; path=/; domain=.google.com; Secure; HttpOnly; SameSite=none",
"alt-svc": "h3-29=\":443\"; ma=2592000,h3-27=\":443\"; ma=2592000,h3-25=\":443\"; ma=2592000,h3-T050=\":443\"; ma=2592000,h3-Q050=\":443\"; ma=2592000,h3-Q046=\":443\"; ma=2592000,h3-Q043=\":443\"; ma=2592000,quic=\":443\"; ma=2592000; v=\"46,43\""
"set-cookie": "1P_JAR=2020-07-16-04; expires=Sat..."
},
"response":"<!DOCTYPE html>...",
"cookies": [
{
"name": "NID",
"value": "204=QE3Ocq15XalczqjuDy52HeseG3zAZuJzID3R57g_oeQHyoV5DuvDhpWc4r9IcPoeIYmkr_ZTX_MNOU8IAbtXmVO7Bmq0adb-hpIHaTBIdBk3Ofifp4gO6vZleVuFYfj7ePkHeHdzGoX-en0FvKtd9iofX4O6RiAdEIAnpL7Wge4",
"value": "204=QE3Ocq15XalczqjuDy52HeseG3zAZuJzID3R57...",
"domain": ".google.com",
"path": "/",
"expires": 1610684149.307722,
@@ -147,7 +197,7 @@ Example response from running the `curl` above:
"sameSite": "None"
}
],
"userAgent": "Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36"
"userAgent": "Windows NT 10.0; Win64; x64) AppleWebKit/5..."
},
"status": "ok",
"message": "",
@@ -165,90 +215,74 @@ Parameter | Notes
|--|--|
postData | Must be a string. If you want to POST a form, don't forget to set the `Content-Type` header to `application/x-www-form-urlencoded` or the server might not understand your request.
## Downloading Images and PDFs (small files)
### Download small files
If you need to access an image/pdf or small file, you should pass the `download` parameter to
`request.get` setting it to `true`. Rather than access the html and return text it will
return a the buffer **base64** encoded which you will be able to decode and save the image/pdf.
If you need to access an image/pdf or small file, you should pass the `download` parameter to `request.get` setting it
to `true`. Rather than access the html and return text it will return the buffer **base64** encoded which you will be
able to decode and save the image/pdf.
This method isn't recommended for videos or anything larger. As that should be streamed back to
the client and at the moment there is nothing setup to do so. If this is something you need feel
free to create an issue and/or submit a PR.
This method isn't recommended for videos or anything larger. As that should be streamed back to the client and at the
moment there is nothing setup to do so. If this is something you need feel free to create an issue and/or submit a PR.
## Environment variables
To set the environment vars in Linux run `export LOG_LEVEL=debug` and then start FlareSolverr in the same shell.
Name | Default | Notes
|--|--|--|
LOG_LEVEL | info | Used to change the verbosity of the logging.
LOG_HTML | false | Used for debugging. If `true` all html that passes through the proxy will be logged to the console.
PORT | 8191 | Change this if you already have a process running on port `8191`.
HOST | 0.0.0.0 | This shouldn't need to be messed with but if you insist, it's here!
CAPTCHA_SOLVER | None | This is used to select which captcha solving method it used when a captcha is encounted.
HEADLESS | true | This is used to debug the browser by not running it in headless mode.
LOG_LEVEL | info | Verbosity of the logging. Use `LOG_LEVEL=debug` for more information.
LOG_HTML | false | Only for debugging. If `true` all HTML that passes through the proxy will be logged to the console in `debug` level.
CAPTCHA_SOLVER | none | Captcha solving method. It is used when a captcha is encountered. See the Captcha Solvers section.
TZ | UTC | Timezone used in the logs and the web browser. Example: `TZ=Europe/London`.
HEADLESS | true | Only for debugging. To run the web browser in headless mode or visible.
PORT | 8191 | Listening port. You don't need to change this if you are running on Docker.
HOST | 0.0.0.0 | Listening interface. You don't need to change this if you are running on Docker.
Environment variables are set differently depending on the operating system. Some examples:
* Docker: Take a look at the Docker section in this document. Environment variables can be set in the `docker-compose.yml` file or in the Docker CLI command.
* Linux: Run `export LOG_LEVEL=debug` and then start FlareSolverr in the same shell.
* Windows: Open `cmd.exe`, run `set LOG_LEVEL=debug` and then start FlareSolverr in the same shell.
## Captcha Solvers
Sometimes CF not only gives mathmatical computations and browser tests, sometimes they also require
the user to solve a captcha. If this is the case, FlareSolverr will return the captcha page. But that's
not very helpful to you is it?
:warning: At this time none of the captcha solvers work. You can check the status in the open issues. Any help is welcome.
FlareSolverr can be customized to solve the captcha's automatically by setting the environment variable
`CAPTCHA_SOLVER` to the file name of one of the adapters inside the [/captcha](src/captcha) directory.
Sometimes CloudFlare not only gives mathematical computations and browser tests, sometimes they also require the user to
solve a captcha.
If this is the case, FlareSolverr will return the error `Captcha detected but no automatic solver is configured.`
### [CaptchaHarvester](https://github.com/NoahCardoza/CaptchaHarvester)
FlareSolverr can be customized to solve the captchas automatically by setting the environment variable `CAPTCHA_SOLVER`
to the file name of one of the adapters inside the [/captcha](src/captcha) directory.
This method makes use of the [CaptchaHarvester](https://github.com/NoahCardoza/CaptchaHarvester) project which allows users to collect thier own tokens from ReCaptcha V2/V3 and hCaptcha for free.
### hcaptcha-solver
To use this method you must set these ENV variables:
This method makes use of the [hcaptcha-solver](https://github.com/JimmyLaurent/hcaptcha-solver) project.
NOTE: This solver works picking random images so it will fail in a lot of requests and it's hard to know if it is
working or not. In a real use case with Sonarr/Radarr + Jackett it is still useful because those apps make a new request
each 15 minutes. Eventually one of the requests is going to work and Jackett saves the cookie forever (until it stops
working).
To use this solver you must set the environment variable:
```bash
CAPTCHA_SOLVER=hcaptcha-solver
```
### CaptchaHarvester
This method makes use of the [CaptchaHarvester](https://github.com/NoahCardoza/CaptchaHarvester) project which allows
users to collect their own tokens from ReCaptcha V2/V3 and hCaptcha for free.
To use this method you must set these environment variables:
```bash
CAPTCHA_SOLVER=harvester
HARVESTER_ENDPOINT=https://127.0.0.1:5000/token
```
**Note**: above I set `HARVESTER_ENDPOINT` to the default configureation
of the captcha harvester's server, but that could change if
you customize the command line flags. Simply put, `HARVESTER_ENDPOINT`
should be set to the URI of the route that returns a token in plain text when called.
**Note**: above I set `HARVESTER_ENDPOINT` to the default configuration of the captcha harvester's server, but that
could change if you customize the command line flags. Simply put, `HARVESTER_ENDPOINT` should be set to the URI of the
route that returns a token in plain text when called.
### [hcaptcha-solver](https://github.com/JimmyLaurent/hcaptcha-solver)
## Related projects
This method makes use of the [hcaptcha-solver](https://github.com/JimmyLaurent/hcaptcha-solver) project which attempts to solve hcaptcha by randomly selecting images.
To use this solver you must first install it and then set it as the `CAPTCHA_SOLVER`.
```bash
npm i hcaptcha-solver
CAPTCHA_SOLVER=hcaptcha-solver
```
## Docker
You can edit environment variables in `./Dockerfile` and build your own image.
```bash
docker build -t flaresolverr:latest .
docker run --restart=always --name flaresolverr -p 8191:8191 -d flaresolverr:latest
```
## TypeScript
I'm quite new to TypeScript. If you spot any funny business or anything that is or isn't being
used properly feel free to submit a PR or open an issue.
## Known issues / Roadmap
The current implementation seems to be working on the sites I have been testing them on. However, if you find it unable to access a site, open an issue and I'd be happy to investigate.
That being said, the project uses the [puppeteer stealth plugin](https://github.com/berstend/puppeteer-extra/tree/master/packages/puppeteer-extra-plugin-stealth). If Cloudflare is able to detect the headless browser, it's more that projects domain to fix.
TODO:
* Fix remaining issues in the code (see TODOs in code)
* Make the maxTimeout more accurate (count the time to open the first page / maybe count the captcha solve time?)
* Hide sensitive information in logs
* Reduce Docker image size
* Docker image for ARM architecture
* Install instructions for Windows
* C# implementation => https://github.com/FlareSolverr/FlareSolverrSharp

85
build-binaries.js Normal file
View File

@@ -0,0 +1,85 @@
const fs = require('fs')
const path = require('path')
const { execSync } = require('child_process')
const archiver = require('archiver')
const puppeteer = require('puppeteer')
const version = 'v' + require('./package.json').version;
(async () => {
const builds = [
{
platform: 'linux',
version: 756035,
chromeFolder: 'chrome-linux',
fsExec: 'flaresolverr-linux',
fsZipExec: 'flaresolverr',
fsZipName: 'linux-x64',
fsLicenseName: 'LICENSE'
},
{
platform: 'win64',
version: 756035,
chromeFolder: 'chrome-win',
fsExec: 'flaresolverr-win.exe',
fsZipExec: 'flaresolverr.exe',
fsZipName: 'windows-x64',
fsLicenseName: 'LICENSE.txt'
}
// TODO: this is working but changes are required in session.ts to find chrome path
// {
// platform: 'mac',
// version: 756035,
// chromeFolder: 'chrome-mac',
// fsExec: 'flaresolverr-macos',
// fsZipExec: 'flaresolverr',
// fsZipName: 'macos',
// fsLicenseName: 'LICENSE'
// }
]
// generate executables
console.log('Generating executables...')
if (fs.existsSync('bin')) {
fs.rmSync('bin', { recursive: true })
}
execSync('pkg -t node14-win-x64,node14-linux-x64 --out-path bin .')
// execSync('pkg -t node14-win-x64,node14-mac-x64,node14-linux-x64 --out-path bin .')
// download Chrome and zip together
for (const os of builds) {
console.log('Building ' + os.fsZipName + ' artifact')
// download chrome
console.log('Downloading Chrome...')
const f = puppeteer.createBrowserFetcher({
platform: os.platform,
path: path.join(__dirname, 'bin', 'puppeteer')
})
await f.download(os.version)
// compress in zip
console.log('Compressing zip file...')
const zipName = 'bin/flaresolverr-' + version + '-' + os.fsZipName + '.zip'
const output = fs.createWriteStream(zipName)
const archive = archiver('zip')
output.on('close', function () {
console.log('File ' + zipName + ' created. Size: ' + archive.pointer() + ' bytes')
})
archive.on('error', function (err) {
throw err
})
archive.pipe(output)
archive.file('LICENSE', { name: 'flaresolverr/' + os.fsLicenseName })
archive.file('bin/' + os.fsExec, { name: 'flaresolverr/' + os.fsZipExec })
archive.directory('bin/puppeteer/' + os.platform + '-' + os.version + '/' + os.chromeFolder, 'flaresolverr/chrome')
if (os.platform === 'linux') {
archive.file('flaresolverr.service', { name: 'flaresolverr/flaresolverr.service' })
}
await archive.finalize()
}
})()

15
docker-compose.yml Normal file
View File

@@ -0,0 +1,15 @@
---
version: "2.1"
services:
flaresolverr:
# DockerHub mirror flaresolverr/flaresolverr:latest
image: ghcr.io/flaresolverr/flaresolverr:latest
container_name: flaresolverr
environment:
- LOG_LEVEL=${LOG_LEVEL:-info}
- LOG_HTML=${LOG_HTML:-false}
- CAPTCHA_SOLVER=${CAPTCHA_SOLVER:-none}
- TZ=Europe/London
ports:
- "${PORT:-8191}:8191"
restart: unless-stopped

19
flaresolverr.service Normal file
View File

@@ -0,0 +1,19 @@
[Unit]
Description=FlareSolverr
After=network.target
[Service]
SyslogIdentifier=flaresolverr
Restart=always
RestartSec=5
Type=simple
User=flaresolverr
Group=flaresolverr
Environment="LOG_LEVEL=info"
Environment="CAPTCHA_SOLVER=none"
WorkingDirectory=/opt/flaresolverr
ExecStart=/opt/flaresolverr/flaresolverr
TimeoutStopSec=30
[Install]
WantedBy=multi-user.target

6014
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,11 +1,12 @@
{
"name": "flaresolverr",
"version": "1.2.0",
"version": "1.2.9",
"description": "Proxy server to bypass Cloudflare protection.",
"scripts": {
"start": "node ./dist/index.js",
"build": "tsc",
"dev": "nodemon -e ts --exec ts-node src/index.ts"
"dev": "nodemon -e ts --exec ts-node src/index.ts",
"package": "node build-binaries.js"
},
"author": "Diego Heras (ngosang)",
"contributors": [
@@ -19,7 +20,16 @@
"type": "git",
"url": "https://github.com/ngosang/FlareSolverr"
},
"pkg": {
"assets": [
"node_modules/puppeteer-extra-plugin-stealth/**/*.*"
]
},
"bin": {
"flaresolverr": "dist/index.js"
},
"dependencies": {
"await-timeout": "^1.1.1",
"console-log-level": "^1.4.1",
"got": "^11.5.1",
"hcaptcha-solver": "^1.0.2",
@@ -29,9 +39,11 @@
"uuid": "^8.2.0"
},
"devDependencies": {
"@types/await-timeout": "^0.3.1",
"@types/node": "^14.0.23",
"@types/puppeteer": "^3.0.1",
"@types/uuid": "^8.0.0",
"archiver": "^5.2.0",
"eslint": "^7.5.0",
"eslint-config-airbnb-base": "^14.2.0",
"eslint-config-standard": "^14.1.1",
@@ -40,6 +52,7 @@
"eslint-plugin-promise": "^4.2.1",
"eslint-plugin-standard": "^4.0.1",
"nodemon": "^2.0.4",
"pkg": "^4.4.9",
"ts-node": "^8.10.2",
"typescript": "^3.9.7"
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.1 KiB

View File

@@ -0,0 +1,180 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!-- Created with Inkscape (http://www.inkscape.org/) -->
<svg
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:cc="http://creativecommons.org/ns#"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:svg="http://www.w3.org/2000/svg"
xmlns="http://www.w3.org/2000/svg"
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
width="256"
height="256"
viewBox="0 0 256 256"
id="svg2"
version="1.1"
inkscape:version="0.91 r13725"
sodipodi:docname="flaresolverr_logo.svg"
inkscape:export-filename="C:\Users\Diego\Desktop\flaresolverr_logo.png"
inkscape:export-xdpi="90"
inkscape:export-ydpi="90">
<defs
id="defs4" />
<sodipodi:namedview
id="base"
pagecolor="#ffffff"
bordercolor="#666666"
borderopacity="1.0"
inkscape:pageopacity="0.0"
inkscape:pageshadow="2"
inkscape:zoom="0.64"
inkscape:cx="-88.263072"
inkscape:cy="-93.571587"
inkscape:document-units="px"
inkscape:current-layer="layer1"
showgrid="false"
inkscape:window-width="2560"
inkscape:window-height="1377"
inkscape:window-x="-8"
inkscape:window-y="-8"
inkscape:window-maximized="1"
units="px"
showborder="true" />
<metadata
id="metadata7">
<rdf:RDF>
<cc:Work
rdf:about="">
<dc:format>image/svg+xml</dc:format>
<dc:type
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
<dc:title></dc:title>
</cc:Work>
</rdf:RDF>
</metadata>
<g
inkscape:label="Capa 1"
inkscape:groupmode="layer"
id="layer1"
transform="translate(0,-796.36219)">
<g
id="g4177"
transform="matrix(0.51436047,0,0,0.59495735,-334.60687,650.43877)">
<g
id="g4141" />
<g
id="g4143" />
<g
id="g4145" />
<g
id="g4147" />
<g
id="g4149" />
<g
id="g4151" />
<g
id="g4153" />
<g
id="g4155" />
<g
id="g4157" />
<g
id="g4159" />
<g
id="g4161" />
<g
id="g4163" />
<g
id="g4165" />
<g
id="g4167" />
<g
id="g4169" />
<g
id="g4263"
transform="matrix(0.94954959,0,0,0.94954959,-111.49858,393.65111)">
<g
id="g4269" />
<g
id="g4342"
transform="translate(736.24631,-345.97247)">
<path
style="fill:#9dc6fb"
inkscape:connector-curvature="0"
d="m 584.32729,454.42324 c -0.995,-51.995 -44.49,-93.257 -96.488,-91.376 -7.616,0.273 -14.792,-3.862 -18.446,-10.55 -22.605,-41.376 -66.519,-69.441 -116.989,-69.441 -51.757,0 -96.596,29.528 -118.647,72.648 -6.423,12.56 -19.224,10.9 -24.689,10.9 -40.126,0 -74.199,25.852 -86.512,61.804 -2.046,5.973 -6.938,10.463 -12.894,12.556 -22.389998,7.87 -38.250998,29.605 -37.275998,54.902 1.163,30.174 26.849,53.631 57.044998,53.631 l 359.817,0 c 52.291,0 96.08,-42.793 95.079,-95.074 z"
id="path4285" />
<path
style="fill:#80b4fb"
inkscape:connector-curvature="0"
d="m 190.59629,495.86724 c -0.975,-25.298 14.885,-47.033 37.276,-54.902 5.956,-2.094 10.848,-6.584 12.894,-12.556 12.313,-35.952 46.385,-61.804 86.512,-61.804 5.465,0 18.265,1.66 24.688,-10.9 13.005,-25.43 33.94,-46.125 59.541,-58.832 -17.812,-8.834 -37.873,-13.816 -59.103,-13.816 -51.757,0 -96.596,29.528 -118.647,72.648 -6.423,12.56 -19.224,10.9 -24.689,10.9 -40.126,0 -74.199,25.852 -86.512,61.804 -2.046,5.973 -6.938,10.463 -12.894,12.556 -22.389998,7.87 -38.250998,29.605 -37.275998,54.902 1.163,30.174 26.849,53.63 57.044998,53.63 l 118.21,0 c -30.196,0 -55.881,-23.457 -57.045,-53.63 z"
id="path4287" />
</g>
</g>
</g>
<g
id="g4241"
transform="matrix(0.1453379,0,0,0.1453379,47.012211,854.83732)"
style="fill:#4d4d4d">
<g
id="g4197"
style="fill:#4d4d4d;fill-opacity:1">
<path
id="path4201"
d="m 867.699,356.238 -31.5,-26.6 c -9.699,-8.2 -24,-7.8 -33.199,0.9 l -17.4,16.3 c -14.699,-7.1 -30.299,-12.1 -46.4,-15 l -4.898,-24 c -2.5,-12.4 -14,-21 -26.602,-20 l -41.1,3.5 c -12.6,1.1 -22.5,11.4 -22.9,24.1 l -0.799,24.4 c -15.801,5.7 -30.701,13.5 -44.301,23.3 l -20.799,-13.8 c -10.602,-7 -24.701,-5 -32.9,4.7 l -26.6,31.7 c -8.201,9.7 -7.801,24 0.898,33.2 l 18.201,19.399 c -6.301,14.2 -10.801,29.101 -13.4,44.4 l -26,5.3 c -12.4,2.5 -21,14 -20,26.601 l 3.5,41.1 c 1.1,12.6 11.4,22.5 24.1,22.9 l 28.1,0.899 c 5.102,13.4 11.801,26.101 19.9,38 l -15.699,23.7 c -7,10.6 -5,24.7 4.699,32.9 l 31.5,26.6 c 9.701,8.2 24,7.8 33.201,-0.9 l 20.6,-19.3 c 13.5,6.3 27.699,11 42.299,13.8 l 5.701,28.2 c 2.5,12.4 14,21 26.6,20 l 41.1,-3.5 c 12.6,-1.1 22.5,-11.399 22.9,-24.1 l 0.9,-27.601 c 15,-5.3 29.199,-12.5 42.299,-21.399 l 22.701,15 c 10.6,7 24.699,5 32.9,-4.7 l 26.6,-31.5 c 8.199,-9.7 7.799,-24 -0.9,-33.2 L 872.7,592.138 c 6.701,-14.2 11.602,-29.2 14.4,-44.601 l 25,-5.1 c 12.4,-2.5 21,-14 20,-26.601 l -3.5,-41.1 c -1.1,-12.6 -11.4,-22.5 -24.1,-22.9 l -25.1,-0.8 c -5.201,-14.6 -12.201,-28.399 -20.9,-41.2 l 13.699,-20.6 c 7.201,-10.598 5.201,-24.798 -4.5,-32.998 z M 712.801,593.837 c -44.4,3.801 -83.602,-29.3 -87.301,-73.699 -3.801,-44.4 29.301,-83.601 73.699,-87.301 44.4,-3.8 83.602,29.301 87.301,73.7 3.801,44.401 -29.301,83.601 -73.699,87.3 z"
inkscape:connector-curvature="0"
style="fill:#4d4d4d;fill-opacity:1" />
<path
id="path4203"
d="m 205,704.438 c -12.6,1.3 -22.3,11.899 -22.4,24.6 l -0.3,25.3 c -0.2,12.7 9.2,23.5 21.8,25.101 l 18.6,2.399 c 3.1,11.301 7.5,22.101 13.2,32.301 l -12,14.8 c -8,9.899 -7.4,24.1 1.5,33.2 l 17.7,18.1 c 8.9,9.1 23.1,10.1 33.2,2.3 l 14.899,-11.5 c 10.5,6.2 21.601,11.101 33.2,14.5 l 2,19.2 c 1.3,12.6 11.9,22.3 24.6,22.4 l 25.301,0.3 c 12.699,0.2 23.5,-9.2 25.1,-21.8 l 2.3,-18.2 c 12.601,-3.101 24.601,-7.8 36,-14 l 14,11.3 c 9.9,8 24.101,7.4 33.201,-1.5 l 18.1,-17.7 c 9.1,-8.899 10.1,-23.1 2.301,-33.2 L 496.6,818.438 c 6.6,-11 11.701,-22.7 15.201,-35 l 16.6,-1.7 c 12.6,-1.3 22.299,-11.9 22.4,-24.6 l 0.299,-25.301 c 0.201,-12.699 -9.199,-23.5 -21.799,-25.1 l -16.201,-2.1 c -3.1,-12.2 -7.699,-24 -13.699,-35 l 10.1,-12.4 c 8,-9.9 7.4,-24.1 -1.5,-33.2 l -17.699,-18.1 c -8.9,-9.101 -23.102,-10.101 -33.201,-2.3 l -12.101,9.3 c -11.399,-6.9 -23.6,-12.2 -36.399,-15.8 L 407,581.437 c -1.3,-12.601 -11.899,-22.3 -24.6,-22.4 l -25.3,-0.3 c -12.7,-0.2 -23.5,9.2 -25.101,21.8 l -2,15.601 c -13.199,3.399 -25.899,8.6 -37.699,15.399 l -12.5,-10.2 c -9.9,-8 -24.101,-7.399 -33.201,1.5 l -18.2,17.801 c -9.1,8.899 -10.1,23.1 -2.3,33.199 l 10.7,13.801 c -6.2,11 -11.1,22.699 -14.3,35 l -17.499,1.8 z m 163.3,-28.601 c 36.3,0.4 65.399,30.301 65,66.601 -0.4,36.3 -30.301,65.399 -66.601,65 -36.3,-0.4 -65.399,-30.3 -65,-66.601 0.401,-36.299 30.301,-65.399 66.601,-65 z"
inkscape:connector-curvature="0"
style="fill:#4d4d4d;fill-opacity:1" />
</g>
<g
id="g4205"
style="fill:#4d4d4d" />
<g
id="g4207"
style="fill:#4d4d4d" />
<g
id="g4209"
style="fill:#4d4d4d" />
<g
id="g4211"
style="fill:#4d4d4d" />
<g
id="g4213"
style="fill:#4d4d4d" />
<g
id="g4215"
style="fill:#4d4d4d" />
<g
id="g4217"
style="fill:#4d4d4d" />
<g
id="g4219"
style="fill:#4d4d4d" />
<g
id="g4221"
style="fill:#4d4d4d" />
<g
id="g4223"
style="fill:#4d4d4d" />
<g
id="g4225"
style="fill:#4d4d4d" />
<g
id="g4227"
style="fill:#4d4d4d" />
<g
id="g4229"
style="fill:#4d4d4d" />
<g
id="g4231"
style="fill:#4d4d4d" />
<g
id="g4233"
style="fill:#4d4d4d" />
</g>
</g>
</svg>

After

Width:  |  Height:  |  Size: 8.0 KiB

View File

@@ -11,11 +11,15 @@ import { SolverOptions } from '.'
*/
export default async function solve({ url }: SolverOptions): Promise<string> {
throw new Error("hcaptcha-solver is not able to solve the new hCaptcha challenge. This issue is already reported #31.");
/*
try {
const token = await solveCaptcha(url)
return token
return await solveCaptcha(url)
} catch (e) {
console.error(e)
return null
}
*/
}

View File

@@ -1,3 +1,5 @@
import log from "../log";
export enum CaptchaType {
re = 'reCaptcha',
h = 'hCaptcha'
@@ -16,7 +18,9 @@ const captchaSolvers: { [key: string]: Solver } = {}
export default (): Solver => {
const method = process.env.CAPTCHA_SOLVER
if (!method) { return null }
if (!method || method.toLowerCase() == 'none') {
return null;
}
if (!(method in captchaSolvers)) {
try {
@@ -26,10 +30,12 @@ export default (): Solver => {
throw Error(`The solver '${method}' is not a valid captcha solving method.`)
} else {
console.error(e)
throw Error(`An error occured loading the solver '${method}'.`)
throw Error(`An error occurred loading the solver '${method}'.`)
}
}
}
log.info(`Using '${method}' to solve the captcha.`);
return captchaSolvers[method]
}
}

View File

@@ -1,12 +1,64 @@
const fs = require('fs');
const os = require('os');
const path = require('path');
const process = require('process')
import log from './log'
import { createServer, IncomingMessage, ServerResponse } from 'http';
import { RequestContext } from './types'
import Router, { BaseAPICall } from './routes'
import getCaptchaSolver from "./captcha";
import sessions from "./session";
import {v1 as UUIDv1} from "uuid";
const version: string = require('../package.json').version
const version: string = "v" + require('../package.json').version
const serverPort: number = Number(process.env.PORT) || 8191
const serverHost: string = process.env.HOST || '0.0.0.0'
function validateEnvironmentVariables() {
// ip and port variables are validated by nodejs
if (process.env.LOG_LEVEL && ['error', 'warn', 'info', 'verbose', 'debug'].indexOf(process.env.LOG_LEVEL) == -1) {
log.error(`The environment variable 'LOG_LEVEL' is wrong. Check the documentation.`);
process.exit(1);
}
if (process.env.LOG_HTML && ['true', 'false'].indexOf(process.env.LOG_HTML) == -1) {
log.error(`The environment variable 'LOG_HTML' is wrong. Check the documentation.`);
process.exit(1);
}
if (process.env.HEADLESS && ['true', 'false'].indexOf(process.env.HEADLESS) == -1) {
log.error(`The environment variable 'HEADLESS' is wrong. Check the documentation.`);
process.exit(1);
}
try {
getCaptchaSolver();
} catch (e) {
log.error(`The environment variable 'CAPTCHA_SOLVER' is wrong. ${e.message}`);
process.exit(1);
}
}
async function testChromeInstallation() {
const sessionId = UUIDv1()
// create a temporary file for testing
log.debug("Testing Chrome installation...")
const fileContent = `flaresolverr_${version}`
const filePath = path.join(os.tmpdir(), `flaresolverr_${sessionId}.txt`)
const fileUrl = `file://${filePath}`
fs.writeFileSync(filePath, fileContent)
// launch the browser
const session = await sessions.create(sessionId, {
userAgent: null,
oneTimeSession: true
})
const page = await session.browser.newPage()
const response = await page.goto(fileUrl, { waitUntil: 'domcontentloaded' })
const responseBody = (await response.buffer()).toString().trim()
if (responseBody != fileContent) {
throw new Error("The response body does not match!")
}
await page.close()
await sessions.destroy(sessionId)
log.debug("Test successful")
}
function errorResponse(errorMsg: string, res: ServerResponse, startTimestamp: number) {
log.error(errorMsg)
@@ -26,7 +78,7 @@ function errorResponse(errorMsg: string, res: ServerResponse, startTimestamp: nu
function successResponse(successMsg: string, extendedProperties: object, res: ServerResponse, startTimestamp: number) {
const endTimestamp = Date.now()
log.info(`Successful response in ${(endTimestamp - startTimestamp) / 1000} s`)
log.info(`Response in ${(endTimestamp - startTimestamp) / 1000} s`)
if (successMsg) { log.info(successMsg) }
const response = Object.assign({
@@ -64,43 +116,80 @@ function validateIncomingRequest(ctx: RequestContext, params: BaseAPICall) {
return true
}
createServer((req: IncomingMessage, res: ServerResponse) => {
// count the request for the log prefix
log.incRequests()
// init
log.info(`FlareSolverr ${version}`);
log.debug('Debug log enabled');
const startTimestamp = Date.now()
log.info(`Incoming request: ${req.method} ${req.url}`)
const bodyParts: any[] = []
req.on('data', chunk => {
bodyParts.push(chunk)
}).on('end', () => {
// parse params
const body = Buffer.concat(bodyParts).toString()
let params: BaseAPICall = null
try {
params = JSON.parse(body)
} catch (err) {
errorResponse('Body must be in JSON format', res, startTimestamp)
return
}
const ctx: RequestContext = {
req,
res,
startTimestamp,
errorResponse: (msg) => errorResponse(msg, res, startTimestamp),
successResponse: (msg, extendedProperties) => successResponse(msg, extendedProperties, res, startTimestamp)
}
// validate params
if (!validateIncomingRequest(ctx, params)) { return }
// process request
Router(ctx, params).catch(e => {
console.error(e)
ctx.errorResponse(e.message)
})
})
}).listen(serverPort, serverHost, () => {
log.info(`FlareSolverr v${version} listening on http://${serverHost}:${serverPort}`)
process.on('SIGTERM', () => {
// Capture signal on Docker Stop #158
log.info("Process interrupted")
process.exit(0)
})
validateEnvironmentVariables();
testChromeInstallation()
.catch(e => {
log.error("Error starting Chrome browser.", e);
process.exit(1);
})
.then(r =>
createServer((req: IncomingMessage, res: ServerResponse) => {
const startTimestamp = Date.now()
// health endpoint. this endpoint is special because it doesn't print traces
if (req.url == '/health') {
res.writeHead(200, {
'Content-Type': 'application/json'
})
res.write(JSON.stringify({"status": "ok"}))
res.end()
return;
}
// count the request for the log prefix
log.incRequests()
log.info(`Incoming request: ${req.method} ${req.url}`)
// show welcome message
if (req.url == '/') {
successResponse("FlareSolverr is ready!", null, res, startTimestamp);
return;
}
// get request body
const bodyParts: any[] = []
req.on('data', chunk => {
bodyParts.push(chunk)
}).on('end', () => {
// parse params
const body = Buffer.concat(bodyParts).toString()
let params: BaseAPICall = null
try {
params = JSON.parse(body)
} catch (err) {
errorResponse('Body must be in JSON format', res, startTimestamp)
return
}
const ctx: RequestContext = {
req,
res,
startTimestamp,
errorResponse: (msg) => errorResponse(msg, res, startTimestamp),
successResponse: (msg, extendedProperties) => successResponse(msg, extendedProperties, res, startTimestamp)
}
// validate params
if (!validateIncomingRequest(ctx, params)) { return }
// process request
Router(ctx, params).catch(e => {
console.error(e)
ctx.errorResponse(e.message)
})
})
}).listen(serverPort, serverHost, () => {
log.info(`Listening on http://${serverHost}:${serverPort}`);
})
)

View File

@@ -1,6 +1,25 @@
let requests = 0
const LOG_HTML: boolean = Boolean(process.env.LOG_HTML) || false
const LOG_HTML: boolean = process.env.LOG_HTML == 'true';
function toIsoString(date: Date) {
// this function fixes Date.toISOString() adding timezone
let tzo = -date.getTimezoneOffset(),
dif = tzo >= 0 ? '+' : '-',
pad = function(num: number) {
let norm = Math.floor(Math.abs(num));
return (norm < 10 ? '0' : '') + norm;
};
return date.getFullYear() +
'-' + pad(date.getMonth() + 1) +
'-' + pad(date.getDate()) +
'T' + pad(date.getHours()) +
':' + pad(date.getMinutes()) +
':' + pad(date.getSeconds()) +
dif + pad(tzo / 60) +
':' + pad(tzo % 60);
}
export default {
incRequests: () => { requests++ },
@@ -9,11 +28,11 @@ export default {
this.debug(html)
},
...require('console-log-level')(
{
level: process.env.LOG_LEVEL || 'debug',
{level: process.env.LOG_LEVEL || 'info',
prefix(level: string) {
return `${new Date().toISOString()} ${level.toUpperCase()} REQ-${requests}`
const req = (requests > 0) ? ` REQ-${requests}` : '';
return `${toIsoString(new Date())} ${level.toUpperCase()}${req}`
}
}
)
}
}

198
src/providers/cloudflare.ts Normal file
View File

@@ -0,0 +1,198 @@
import {Response} from 'puppeteer'
import {Page} from "puppeteer-extra/dist/puppeteer";
import log from "../log";
import getCaptchaSolver, {CaptchaType} from "../captcha";
/**
* This class contains the logic to solve protections provided by CloudFlare
**/
const CHALLENGE_SELECTORS = ['#trk_jschal_js', '.ray_id', '.attack-box', '#cf-please-wait'];
const TOKEN_INPUT_NAMES = ['g-recaptcha-response', 'h-captcha-response'];
export default async function resolveChallenge(url: string, page: Page, response: Response): Promise<Response> {
// look for challenge and return fast if not detected
if (!response.headers().server.startsWith('cloudflare')) {
log.info('Cloudflare not detected');
return response;
}
log.info('Cloudflare detected');
if (await page.$('span[data-translate="error"]') || (await page.content()).includes('error code: 1020')) {
throw new Error('Cloudflare has blocked this request. Probably your IP is banned for this site, check in your web browser.')
}
let selectorFoundCount = 0;
if (response.status() > 400) {
// detect cloudflare wait 5s
for (const selector of CHALLENGE_SELECTORS) {
const cfChallengeElem = await page.$(selector)
if (cfChallengeElem) {
selectorFoundCount++
log.debug(`Javascript challenge element '${selector}' detected.`)
log.debug('Waiting for Cloudflare challenge...')
while (true) {
try {
// catch Execution context was destroyed
const cfChallengeElem = await page.$(selector)
if (!cfChallengeElem) {
// solved!
log.debug('Challenge element not found.')
break
} else {
// new Cloudflare Challenge #cf-please-wait
const displayStyle = await page.evaluate((selector) => {
return getComputedStyle(document.querySelector(selector)).getPropertyValue("display");
}, selector);
if (displayStyle == "none") {
// spinner is hidden, could be a captcha or not
log.debug('Challenge element is hidden.')
// wait until redirecting disappears
while (true) {
try {
await page.waitFor(1000)
const displayStyle2 = await page.evaluate(() => {
return getComputedStyle(document.querySelector('#cf-spinner-redirecting')).getPropertyValue("display");
});
if (displayStyle2 == "none") {
break // hCaptcha detected
}
} catch (error) {
break // redirection completed
}
}
break
} else {
log.debug('Challenge element is visible.')
}
}
log.debug('Found challenge element again.')
} catch (error)
{
log.debug("Unexpected error: " + error);
if (!error.toString().includes("Execution context was destroyed")) {
break
}
}
log.debug('Waiting for Cloudflare challenge...')
await page.waitFor(1000)
}
log.debug('Validating HTML code...')
break
} else {
log.debug(`No '${selector}' challenge element detected.`)
}
}
log.debug("Javascript challenge selectors found: " + selectorFoundCount + ", total selectors: " + CHALLENGE_SELECTORS.length)
} else {
// some sites use cloudflare but there is no challenge
log.debug(`Javascript challenge not detected. Status code: ${response.status()}`);
selectorFoundCount = 1;
}
// it seems some captcha pages return 200 sometimes
if (await page.$('input[name="cf_captcha_kind"]')) {
log.info('Captcha challenge detected.');
const captchaSolver = getCaptchaSolver()
if (captchaSolver) {
const captchaStartTimestamp = Date.now()
const challengeForm = await page.$('#challenge-form')
if (challengeForm) {
const captchaTypeElm = await page.$('input[name="cf_captcha_kind"]')
const cfCaptchaType: string = await captchaTypeElm.evaluate((e: any) => e.value)
const captchaType: CaptchaType = (CaptchaType as any)[cfCaptchaType]
if (!captchaType) {
throw new Error('Unknown captcha type!');
}
let sitekey = null
if (captchaType != 'hCaptcha' && process.env.CAPTCHA_SOLVER != 'hcaptcha-solver') {
const sitekeyElem = await page.$('*[data-sitekey]')
if (!sitekeyElem) {
throw new Error('Could not find sitekey!');
}
sitekey = await sitekeyElem.evaluate((e) => e.getAttribute('data-sitekey'))
}
log.info('Waiting to receive captcha token to bypass challenge...')
const token = await captchaSolver({
url,
sitekey,
type: captchaType
})
log.debug(`Token received: ${token}`);
if (!token) {
throw new Error('Token solver failed to return a token.')
}
let responseFieldsFoundCount = 0;
for (const name of TOKEN_INPUT_NAMES) {
const input = await page.$(`textarea[name="${name}"]`)
if (input) {
responseFieldsFoundCount ++;
log.debug(`Challenge response field '${name}' found in challenge form.`);
await input.evaluate((e: HTMLTextAreaElement, token) => { e.value = token }, token);
}
}
if (responseFieldsFoundCount == 0) {
throw new Error('Challenge response field not found in challenge form.');
}
// ignore preset event listeners on the form
await page.evaluate(() => {
window.addEventListener('submit', (e) => { e.stopPropagation() }, true)
})
// it seems some sites obfuscate their challenge forms
// TODO: look into how they do it and come up with a more solid solution
try {
// this element is added with js and we want to wait for all the js to load before submitting
await page.waitForSelector('#challenge-form', { timeout: 10000 })
} catch (err) {
throw new Error("No '#challenge-form' element detected.");
}
// calculates the time it took to solve the captcha
const captchaSolveTotalTime = Date.now() - captchaStartTimestamp
// generates a random wait time
const randomWaitTime = (Math.floor(Math.random() * 10) + 10) * 1000
// waits, if any, time remaining to appear human but stay as fast as possible
const timeLeft = randomWaitTime - captchaSolveTotalTime
if (timeLeft > 0) {
log.debug(`Waiting for '${timeLeft}' milliseconds.`);
await page.waitFor(timeLeft);
}
// submit captcha response
await challengeForm.evaluate((e: HTMLFormElement) => e.submit())
response = await page.waitForNavigation({ waitUntil: 'domcontentloaded' })
if (await page.$('input[name="cf_captcha_kind"]')) {
throw new Error('Captcha service failed to solve the challenge.');
}
}
} else {
throw new Error('Captcha detected but no automatic solver is configured.');
}
} else {
if (selectorFoundCount == 0)
{
throw new Error('No challenge selectors found, unable to proceed')
} else {
// reload the page to make sure we get the real response
// do not use page.reload() to avoid #162 #143
response = await page.goto(url, { waitUntil: 'domcontentloaded' })
await page.content()
log.info('Challenge solved.');
}
}
return response;
}

View File

@@ -1,11 +1,12 @@
import { v1 as UUIDv1 } from 'uuid'
import { SetCookie, Request, Response, Headers, HttpMethod, Overrides } from 'puppeteer'
import { Page, Browser } from "puppeteer-extra/dist/puppeteer";
const Timeout = require('await-timeout');
import log from './log'
import sessions, { SessionsCacheItem } from './session'
import { RequestContext } from './types'
import log from './log'
import { SetCookie, Request, Headers, HttpMethod, Overrides, Cookie } from 'puppeteer'
import { TimeoutError } from 'puppeteer/Errors'
import getCaptchaSolver, { CaptchaType } from './captcha'
import * as Puppeteer from "puppeteer-extra/dist/puppeteer";
import cloudflareProvider from './providers/cloudflare';
export interface BaseAPICall {
cmd: string
@@ -35,9 +36,9 @@ interface BaseRequestAPICall extends BaseAPICall {
proxy?: any, // TODO: use interface not any
download?: boolean
returnOnlyCookies?: boolean
returnRawHtml?: boolean
}
interface Routes {
[key: string]: (ctx: RequestContext, params: BaseAPICall) => void | Promise<void>
}
@@ -68,60 +69,27 @@ type OverridesProps =
'postData' |
'headers'
// We always set a Windows User-Agent because ARM builds are detected by CloudFlare
const DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36"
const CHALLENGE_SELECTORS = ['#trk_jschal_js', '.ray_id', '.attack-box']
const TOKEN_INPUT_NAMES = ['g-recaptcha-response', 'h-captcha-response']
// We always set a Windows User-Agent because ARM builds are detected by Cloudflare
const DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36"
async function interceptResponse(page: Puppeteer.Page, callback: (payload: ChallengeResolutionT) => any) {
const client = await page.target().createCDPSession();
await client.send('Fetch.enable', {
patterns: [
{
urlPattern: '*',
resourceType: 'Document',
requestStage: 'Response',
},
],
});
client.on('Fetch.requestPaused', async (e) => {
log.debug('Fetch.requestPaused. Checking if the response has valid cookies')
let headers = e.responseHeaders || []
let cookies = await page.cookies();
log.debug(cookies)
if (cookies.filter((c: Cookie) => c.name === 'cf_clearance').length > 0) {
log.debug('Aborting request and return cookies. valid cookies found')
await client.send('Fetch.failRequest', {requestId: e.requestId, errorReason: 'Aborted'})
let status = 'ok'
let message = ''
const payload: ChallengeResolutionT = {
status,
message,
result: {
url: page.url(),
status: e.status,
headers: headers.reduce((a: any, x: { name: any; value: any }) => ({ ...a, [x.name]: x.value }), {}),
response: null,
cookies: cookies,
userAgent: ''
}
}
callback(payload);
} else {
log.debug('Continuing request. no valid cookies found')
await client.send('Fetch.continueRequest', {requestId: e.requestId})
}
});
async function resolveChallengeWithTimeout(ctx: RequestContext, params: BaseRequestAPICall, page: Page) {
const maxTimeout = params.maxTimeout || 60000
const timer = new Timeout();
try {
const promise = resolveChallenge(ctx, params, page);
return await Promise.race([
promise,
timer.set(maxTimeout, `Maximum timeout reached. maxTimeout=${maxTimeout} (ms)`)
]);
} finally {
timer.clear();
}
}
async function resolveChallenge(ctx: RequestContext, { url, maxTimeout, proxy, download, returnOnlyCookies }: BaseRequestAPICall, page: Puppeteer.Page): Promise<ChallengeResolutionT | void> {
async function resolveChallenge(ctx: RequestContext,
{ url, proxy, download, returnOnlyCookies, returnRawHtml }: BaseRequestAPICall,
page: Page): Promise<ChallengeResolutionT | void> {
maxTimeout = maxTimeout || 60000
let status = 'ok'
let message = ''
@@ -132,154 +100,15 @@ async function resolveChallenge(ctx: RequestContext, { url, maxTimeout, proxy, d
}
log.debug(`Navigating to... ${url}`)
let response = await page.goto(url, { waitUntil: 'domcontentloaded' })
let response: Response = await page.goto(url, { waitUntil: 'domcontentloaded' })
log.html(await page.content())
// look for challenge
if (response.headers().server.startsWith('cloudflare')) {
log.info('Cloudflare detected')
if (await page.$('.cf-error-code')) {
await page.close()
return ctx.errorResponse('Cloudflare has blocked this request (Code 1020 Detected).')
}
if (response.status() > 400) {
// detect cloudflare wait 5s
for (const selector of CHALLENGE_SELECTORS) {
const cfChallengeElem = await page.$(selector)
if (cfChallengeElem) {
log.html(await page.content())
log.debug('Waiting for Cloudflare challenge...')
let interceptingResult: ChallengeResolutionT;
if (returnOnlyCookies) { //If we just want to get the cookies, intercept the response before we get the content/body (just cookies and headers)
await interceptResponse(page, async function(payload){
interceptingResult = payload;
});
}
// TODO: find out why these pages hang sometimes
while (Date.now() - ctx.startTimestamp < maxTimeout) {
await page.waitFor(1000)
try {
// catch exception timeout in waitForNavigation
response = await page.waitForNavigation({ waitUntil: 'domcontentloaded', timeout: 5000 })
} catch (error) { }
if (returnOnlyCookies && interceptingResult) {
await page.close();
return interceptingResult;
}
try {
// catch Execution context was destroyed
const cfChallengeElem = await page.$(selector)
if (!cfChallengeElem) { break }
log.debug('Found challenge element again...')
} catch (error)
{ }
response = await page.reload({ waitUntil: 'domcontentloaded' })
log.debug('Reloaded page...')
}
if (Date.now() - ctx.startTimestamp >= maxTimeout) {
ctx.errorResponse(`Maximum timeout reached. maxTimeout=${maxTimeout} (ms)`)
return
}
log.debug('Validating HTML code...')
break
} else {
log.debug(`No '${selector}' challenge element detected.`)
}
}
}
// it seems some captcha pages return 200 sometimes
if (await page.$('input[name="cf_captcha_kind"]')) {
const captchaSolver = getCaptchaSolver()
if (captchaSolver) {
const captchaStartTimestamp = Date.now()
const challengeForm = await page.$('#challenge-form')
if (challengeForm) {
log.html(await page.content())
const captchaTypeElm = await page.$('input[name="cf_captcha_kind"]')
const cfCaptchaType: string = await captchaTypeElm.evaluate((e: any) => e.value)
const captchaType: CaptchaType = (CaptchaType as any)[cfCaptchaType]
if (!captchaType) { return ctx.errorResponse('Unknown captcha type!') }
let sitekey = null
if (captchaType != 'hCaptcha' && process.env.CAPTCHA_SOLVER != 'hcaptcha-solver') {
const sitekeyElem = await page.$('*[data-sitekey]')
if (!sitekeyElem) { return ctx.errorResponse('Could not find sitekey!') }
sitekey = await sitekeyElem.evaluate((e) => e.getAttribute('data-sitekey'))
}
log.info('Waiting to receive captcha token to bypass challenge...')
const token = await captchaSolver({
url,
sitekey,
type: captchaType
})
if (!token) {
await page.close()
return ctx.errorResponse('Token solver failed to return a token.')
}
for (const name of TOKEN_INPUT_NAMES) {
const input = await page.$(`textarea[name="${name}"]`)
if (input) { await input.evaluate((e: HTMLTextAreaElement, token) => { e.value = token }, token) }
}
// ignore preset event listeners on the form
await page.evaluate(() => {
window.addEventListener('submit', (e) => { event.stopPropagation() }, true)
})
// it seems some sites obfuscate their challenge forms
// TODO: look into how they do it and come up with a more solid solution
try {
// this element is added with js and we want to wait for all the js to load before submitting
await page.waitForSelector('#challenge-form [type=submit]', { timeout: 5000 })
} catch (err) {
if (err instanceof TimeoutError) {
log.debug(`No '#challenge-form [type=submit]' element detected.`)
}
}
// calculates the time it took to solve the captcha
const captchaSolveTotalTime = Date.now() - captchaStartTimestamp
// generates a random wait time
const randomWaitTime = (Math.floor(Math.random() * 20) + 10) * 1000
// waits, if any, time remaining to appear human but stay as fast as possible
const timeLeft = randomWaitTime - captchaSolveTotalTime
if (timeLeft > 0) { await page.waitFor(timeLeft) }
let interceptingResult: ChallengeResolutionT;
if (returnOnlyCookies) { //If we just want to get the cookies, intercept the response before we get the content/body (just cookies and headers)
await interceptResponse(page, async function(payload){
interceptingResult = payload;
});
}
// submit captcha response
challengeForm.evaluate((e: HTMLFormElement) => e.submit())
response = await page.waitForNavigation({ waitUntil: 'domcontentloaded' })
if (returnOnlyCookies && interceptingResult) {
await page.close();
return interceptingResult;
}
}
} else {
status = 'warning'
message = 'Captcha detected but no automatic solver is configured.'
}
}
// Detect protection services and solve challenges
try {
response = await cloudflareProvider(url, page, response);
} catch (e) {
status = "error";
message = "Cloudflare " + e.toString();
}
const payload: ChallengeResolutionT = {
@@ -295,16 +124,26 @@ async function resolveChallenge(ctx: RequestContext, { url, maxTimeout, proxy, d
}
}
if (download) {
// for some reason we get an error unless we reload the page
// has something to do with a stale buffer and this is the quickest
// fix since I am short on time
response = await page.goto(url, { waitUntil: 'domcontentloaded' })
payload.result.response = (await response.buffer()).toString('base64')
if (returnOnlyCookies) {
payload.result.headers = null;
payload.result.userAgent = null;
} else {
payload.result.response = await page.content()
if (download) {
// for some reason we get an error unless we reload the page
// has something to do with a stale buffer and this is the quickest
// fix since I am short on time
response = await page.goto(url, { waitUntil: 'domcontentloaded' })
payload.result.response = (await response.buffer()).toString('base64')
} else if (returnRawHtml) {
payload.result.response = await response.text()
} else {
payload.result.response = await page.content()
}
}
// Add final url in result
payload.result.url = page.url();
// make sure the page is closed because if it isn't and error will be thrown
// when a user uses a temporary session, the browser make be quit before
// the page is properly closed.
@@ -322,7 +161,7 @@ function mergeSessionWithParams({ defaults }: SessionsCacheItem, params: BaseReq
return copy
}
async function setupPage(ctx: RequestContext, params: BaseRequestAPICall, browser: Puppeteer.Browser): Promise<Puppeteer.Page> {
async function setupPage(ctx: RequestContext, params: BaseRequestAPICall, browser: Browser): Promise<Page> {
const page = await browser.newPage()
// merge session defaults with params
@@ -348,21 +187,26 @@ async function setupPage(ctx: RequestContext, params: BaseRequestAPICall, browse
}
if (headers) {
log.debug(`Adding custom headers: ${JSON.stringify(headers, null, 2)}`,)
log.debug(`Adding custom headers: ${JSON.stringify(headers)}`)
overrideResolvers.headers = request => Object.assign(request.headers(), headers)
}
if (cookies) {
log.debug(`Setting custom cookies: ${JSON.stringify(cookies, null, 2)}`,)
log.debug(`Setting custom cookies: ${JSON.stringify(cookies)}`)
await page.setCookie(...cookies)
}
// if any keys have been set on the object
if (Object.keys(overrideResolvers).length > 0) {
log.debug(overrideResolvers)
let callbackRunOnce = false
const callback = (request: Request) => {
// avoid loading resources to speed up page load
if(request.resourceType() == 'stylesheet' || request.resourceType() == 'font' || request.resourceType() == 'image') {
request.abort()
return
}
if (callbackRunOnce || !request.isNavigationRequest()) {
request.continue()
return
@@ -376,8 +220,7 @@ async function setupPage(ctx: RequestContext, params: BaseRequestAPICall, browse
overrides[key] = overrideResolvers[key](request)
});
log.debug(overrides)
log.debug(`Overrides: ${JSON.stringify(overrides)}`)
request.continue(overrides)
}
@@ -406,7 +249,7 @@ const browserRequest = async (ctx: RequestContext, params: BaseRequestAPICall) =
try {
const page = await setupPage(ctx, params, session.browser)
const data = await resolveChallenge(ctx, params, page)
const data = await resolveChallengeWithTimeout(ctx, params, page)
if (data) {
const { status } = data
@@ -421,7 +264,9 @@ const browserRequest = async (ctx: RequestContext, params: BaseRequestAPICall) =
log.error(error)
return ctx.errorResponse("Unable to process browser request. Error: " + error)
} finally {
if (oneTimeSession) { sessions.destroy(sessionId) }
if (oneTimeSession) {
await sessions.destroy(sessionId)
}
}
}
@@ -454,14 +299,6 @@ export const routes: Routes = {
await browserRequest(ctx, params)
},
'request.cookies': async (ctx, params: BaseRequestAPICall) => {
params.returnOnlyCookies = true
params.method = 'GET'
if (params.postData) {
return ctx.errorResponse('Cannot use "postBody" when sending a GET request.')
}
await browserRequest(ctx, params)
},
}
export default async function Router(ctx: RequestContext, params: BaseAPICall): Promise<void> {

View File

@@ -56,7 +56,11 @@ function prepareBrowserProfile(id: string): string {
export default {
create: async (id: string, { cookies, oneTimeSession, userAgent, headers, maxTimeout, proxy }: SessionCreateOptions): Promise<SessionsCacheItem> => {
let args = ['--no-sandbox', '--disable-setuid-sandbox'];
let args = [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage' // issue #45
];
if (proxy && proxy.url) {
args.push(`--proxy-server=${proxy.url}`);
}
@@ -72,7 +76,13 @@ export default {
puppeteerOptions.userDataDir = prepareBrowserProfile(id)
}
log.debug('Launching headless browser...')
// if we are running inside executable binary, change chrome path
if (typeof (process as any).pkg !== 'undefined') {
const exe = process.platform === "win32" ? 'chrome.exe' : 'chrome';
puppeteerOptions.executablePath = path.join(path.dirname(process.execPath), 'chrome', exe)
}
log.debug('Launching browser...')
// TODO: maybe access env variable?
// TODO: sometimes browser instances are created and not connected to correctly.
@@ -137,4 +147,4 @@ export default {
},
get: (id: string): SessionsCacheItem | false => sessionCache[id] && sessionCache[id] || false
}
}