Compare commits

...

64 Commits

Author SHA1 Message Date
ngosang
c951ba2523 Bump version 2.2.1 2022-02-06 16:40:03 +01:00
ngosang
6c598d5360 Fix max timeout error in some pages 2022-02-06 16:35:52 +01:00
ngosang
2893f72237 Avoid crashing in NodeJS 17 due to Unhandled promise rejection 2022-02-06 13:31:30 +01:00
ngosang
cd221bbbf1 Improve proxy validation and debug traces 2022-02-06 13:07:11 +01:00
ngosang
68fb96f0d8 Remove @types/puppeteer dependency 2022-02-06 12:53:59 +01:00
ngosang
07724e598f Bump version 2.2.0 2022-01-31 00:20:44 +01:00
ngosang
56fc688517 Increase default BROWSER_TIMEOUT=40000 (40 seconds) 2022-01-30 23:24:15 +01:00
ngosang
0a438358d1 Fix Puppeter deprecation warnings 2022-01-30 23:23:06 +01:00
ngosang
0cbca1fb79 Update base Docker image Alpine 3.15 / NodeJS 16 2022-01-30 23:17:14 +01:00
ngosang
05dcae979c Build precompiled binaries with NodeJS 16 2022-01-30 23:09:28 +01:00
ngosang
fe6cfd75b8 Update Puppeter and other dependencies 2022-01-30 22:49:15 +01:00
ngosang
bb7e82e6c4 Add support for Custom CloudFlare challenge
EbookParadijs, Film-Paleis, MuziekFabriek and Puur-Hollands
2022-01-30 21:32:16 +01:00
ngosang
fdd1d245f4 Add support for DDoS-GUARD challenge 2022-01-30 20:36:38 +01:00
ngosang
bc6ac68e52 Bump version 2.1.0 2021-12-12 16:47:33 +01:00
simonfr
a9ab2569bc Add aarch64 to user agents to be replaced (#248)
Co-authored-by: Simon <simon@perols.dev>
2021-12-12 16:46:20 +01:00
ngosang
b1a6ad7688 Fix SOCKSv4 and SOCKSv5 proxy. resolves #214 #220 2021-12-12 14:29:38 +01:00
David Refoua
642d67b927 Remove redundant JSON key (postData) (#242) 2021-12-12 12:38:10 +01:00
ngosang
c4ef6a472e Make test URL configurable with TEST_URL env var. resolves #240 2021-12-12 12:35:05 +01:00
ngosang
a24b665bd1 Bypass new Cloudflare protection 2021-12-12 12:35:05 +01:00
Diego Heras
6576e1908d Update donation links 2021-12-04 23:43:30 +01:00
ngosang
8e518d7267 Bump version 2.0.2 2021-10-31 22:46:12 +01:00
ngosang
3005ba3629 Fix SOCKS5 proxy. Resolves #214 2021-10-31 22:39:32 +01:00
ngosang
176c69d1e8 Replace Firefox ERS with a newer version 2021-10-31 22:22:28 +01:00
ngosang
7a1cf7dd80 Catch startup exceptions and give some advices 2021-10-31 22:12:55 +01:00
ngosang
456dfc222e Add env var BROWSER_TIMEOUT for slow systems 2021-10-31 21:56:25 +01:00
ngosang
23fde49f2b Fix NPM warning in Docker images 2021-10-31 21:38:57 +01:00
ngosang
78daf24bc3 Bump version 2.0.1 2021-10-24 16:38:15 +02:00
ngosang
47c83ded58 Check user home dir before testing web browser installation 2021-10-24 15:52:03 +02:00
ngosang
35890cade4 Bump version 2.0.0 2021-10-20 18:37:39 +02:00
ngosang
753e8e1be8 Set puppeteer timeout half of maxTimeout param. Resolves #180 2021-10-20 18:28:30 +02:00
ngosang
a6628d0cda Add test for blocked IP 2021-10-20 18:06:25 +02:00
ngosang
a79a5f2b42 Avoid reloading the page in case of error 2021-10-20 18:06:15 +02:00
ngosang
1e463bb3e2 Improve Cloudflare detection 2021-10-20 18:05:59 +02:00
ngosang
02204a84d3 Fix version 2021-10-20 03:39:24 +02:00
ngosang
95d178b37a Fix browser preferences and proxy 2021-10-20 01:00:54 +02:00
ngosang
c4f890f9a1 Fix request.post method and clean error traces 2021-10-20 01:00:35 +02:00
ngosang
d16b982bb9 Use Firefox ESR for Docker images 2021-10-18 22:52:38 +02:00
ngosang
075b53ee24 Improve Firefox start time and code clean up 2021-10-18 21:45:21 +02:00
ngosang
356b893c18 Improve bad request management and tests 2021-10-18 19:27:21 +02:00
ngosang
a841d67745 Build native packages with Firefox 2021-10-18 11:13:49 +02:00
ngosang
2408a75a70 Update readme 2021-10-18 01:11:31 +02:00
ngosang
77a87c79fd Improve Docker image and clean TODOs 2021-10-18 00:23:28 +02:00
ngosang
cfd158462f Add proxy support 2021-10-18 00:02:30 +02:00
ngosang
ccfe21c15a Implement request.post method for Firefox 2021-10-17 22:05:15 +02:00
ngosang
a5b3e08e1f Code clean up, remove returnRawHtml, download, headers params 2021-10-17 20:43:36 +02:00
ngosang
a0e897067a Remove outdated chaptcha solvers 2021-10-17 18:25:44 +02:00
ngosang
744de4d158 Refactor the app to use Express server and Jest for tests 2021-10-17 18:00:19 +02:00
ngosang
0459f2642d Fix Cloudflare resolver for Linux ARM builds 2021-10-16 20:26:10 +02:00
ngosang
ca3f84f458 Fix Cloudflare resolver 2021-10-16 19:32:52 +02:00
ngosang
5dd563e003 Replace Chrome web browser with Firefox 2021-10-16 19:16:25 +02:00
ngosang
78c10d6b24 Remove userAgent parameter since any modification is detected by CF 2021-10-16 17:46:04 +02:00
ngosang
3de2e44bfd Update dependencies 2021-10-16 17:29:58 +02:00
ngosang
7738f7a360 Remove Puppeter steath plugin 2021-10-16 16:54:56 +02:00
ngosang
1b01caaa78 Bump version 1.2.9 2021-08-01 22:11:55 +02:00
ngosang
447c8f67a1 Improve "Execution context was destroyed" error handling 2021-08-01 22:10:53 +02:00
ngosang
9dae74bc28 Implement returnRawHtml parameter. resolves #172 resolves #165 2021-08-01 22:08:55 +02:00
ngosang
4199db5a41 Capture Docker stop signal. resolves #158 2021-08-01 21:37:45 +02:00
ngosang
2a4fae37c0 Reduce Docker image size 20 MB 2021-08-01 21:27:27 +02:00
ngosang
232ddca512 Fix page reload after challenge is solved. resolves #162 resolves #143 2021-08-01 20:34:38 +02:00
ngosang
8572fab781 Avoid loading images/css/fonts to speed up page load 2021-08-01 19:35:26 +02:00
ngosang
fdb3eae051 Improve Cloudflare IP ban detection 2021-08-01 19:32:09 +02:00
ngosang
6dd8206a10 Fix vulnerabilities 2021-08-01 19:15:24 +02:00
ngosang
c4e4d28c8d Bump version 1.2.8 2021-06-01 02:00:39 +02:00
ngosang
543ce89eb6 Improve old JS challenge waiting. Resolves #129 2021-06-01 01:59:57 +02:00
23 changed files with 10906 additions and 6072 deletions

View File

@@ -11,6 +11,7 @@ Check closed issues as well, because your issue may have already been fixed.
* **Last working FlareSolverr version**:
* **Operating system**:
* **Are you using Docker**: [yes/no]
* **FlareSolverr User-Agent (see log traces or / endpoint)**:
* **Are you using a proxy or VPN?** [yes/no]
* **Are you using Captcha Solver:** [yes/no]
* **If using captcha solver, which one:**

View File

@@ -1,13 +1,14 @@
FROM --platform=${TARGETPLATFORM:-linux/amd64} node:15.2.1-alpine3.11
FROM --platform=${TARGETPLATFORM:-linux/amd64} node:16-alpine3.15
# Print build information
ARG TARGETPLATFORM
ARG BUILDPLATFORM
RUN printf "I am running on ${BUILDPLATFORM:-linux/amd64}, building for ${TARGETPLATFORM:-linux/amd64}\n$(uname -a)\n"
# Install Chromium, dumb-init and remove all locales but en-US
RUN apk add --no-cache chromium dumb-init && \
find /usr/lib/chromium/locales -type f ! -name 'en-US.*' -delete
# Install the web browser (package firefox-esr is available too)
RUN apk update && \
apk add --no-cache firefox dumb-init && \
rm -Rf /var/cache
# Copy FlareSolverr code
USER node
@@ -16,15 +17,18 @@ WORKDIR /home/node/flaresolverr
COPY --chown=node:node package.json package-lock.json tsconfig.json ./
COPY --chown=node:node src ./src/
# Install package. Skip installing Chrome, we will use the installed package.
ENV PUPPETEER_PRODUCT=chrome \
# Install package. Skip installing the browser, we will use the installed package.
ENV PUPPETEER_PRODUCT=firefox \
PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true \
PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium-browser
PUPPETEER_EXECUTABLE_PATH=/usr/bin/firefox
RUN npm install && \
npm run build && \
rm -rf src tsconfig.json && \
npm prune --production
npm prune --production && \
rm -rf /home/node/.npm
EXPOSE 8191
ENTRYPOINT ["/usr/bin/dumb-init", "--"]
CMD ["npm", "start"]
CMD ["node", "./dist/server.js"]
# docker build -t flaresolverr:custom .
# docker run -p 8191:8191 -e LOG_LEVEL=debug flaresolverr:custom

View File

@@ -5,8 +5,8 @@
[![GitHub issues](https://img.shields.io/github/issues/FlareSolverr/FlareSolverr)](https://github.com/FlareSolverr/FlareSolverr/issues)
[![GitHub pull requests](https://img.shields.io/github/issues-pr/FlareSolverr/FlareSolverr)](https://github.com/FlareSolverr/FlareSolverr/pulls)
[![Donate PayPal](https://img.shields.io/badge/Donate-PayPal-green.svg)](https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=X5NJLLX5GLTV6&source=url)
[![Donate Buy Me A Coffee](https://img.shields.io/badge/Donate-Buy%20me%20a%20coffee-yellow.svg)](https://www.buymeacoffee.com/ngosang)
[![Donate Bitcoin](https://img.shields.io/badge/Donate-Bitcoin-orange.svg)](https://en.cryptobadges.io/donate/13Hcv77AdnFWEUZ9qUpoPBttQsUT7q9TTh)
[![Donate Bitcoin](https://en.cryptobadges.io/badge/micro/13Hcv77AdnFWEUZ9qUpoPBttQsUT7q9TTh)](https://en.cryptobadges.io/donate/13Hcv77AdnFWEUZ9qUpoPBttQsUT7q9TTh)
[![Donate Ethereum](https://en.cryptobadges.io/badge/micro/0x0D1549BbB00926BF3D92c1A8A58695e982f1BE2E)](https://en.cryptobadges.io/donate/0x0D1549BbB00926BF3D92c1A8A58695e982f1BE2E)
FlareSolverr is a proxy server to bypass Cloudflare protection.
@@ -15,7 +15,7 @@ FlareSolverr is a proxy server to bypass Cloudflare protection.
FlareSolverr starts a proxy server and it waits for user requests in an idle state using few resources.
When some request arrives, it uses [puppeteer](https://github.com/puppeteer/puppeteer) with the
[stealth plugin](https://github.com/berstend/puppeteer-extra/tree/master/packages/puppeteer-extra-plugin-stealth)
to create a headless browser (Chrome). It opens the URL with user parameters and waits until the Cloudflare challenge
to create a headless browser (Firefox). It opens the URL with user parameters and waits until the Cloudflare challenge
is solved (or timeout). The HTML code and the cookies are sent back to the user, and those cookies can be used to
bypass Cloudflare using other HTTP clients.
@@ -60,18 +60,19 @@ docker run -d \
This is the recommended way for Windows users.
* Download the [FlareSolverr zip](https://github.com/FlareSolverr/FlareSolverr/releases) from the release's assets. It is available for Windows and Linux.
* Extract the zip file. FlareSolverr executable and chrome folder must be in the same directory.
* Extract the zip file. FlareSolverr executable and firefox folder must be in the same directory.
* Execute FlareSolverr binary. In the environment variables section you can find how to change the configuration.
### From source code
This is the recommended way for macOS users and for developers.
* Install [NodeJS](https://nodejs.org/).
* Install [NodeJS](https://nodejs.org/) 16.
* Clone this repository and open a shell in that path.
* Run `export PUPPETEER_PRODUCT=firefox` (Linux/macOS) or `set PUPPETEER_PRODUCT=firefox` (Windows).
* Run `npm install` command to install FlareSolverr dependencies.
* Run `node node_modules/puppeteer/install.js` to install Chromium.
* Run `npm run build` command to compile TypeScript code.
* Run `npm start` command to start FlareSolverr.
* Run `npm start` command to compile TypeScript code and start FlareSolverr.
If you get errors related to firefox not installed try running `node node_modules/puppeteer/install.js` to install Firefox.
### Systemd service
@@ -86,11 +87,7 @@ curl -L -X POST 'http://localhost:8191/v1' \
--data-raw '{
"cmd": "request.get",
"url":"http://www.google.com/",
"userAgent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleW...",
"maxTimeout": 60000,
"headers": {
"X-Test": "Testing 123..."
}
"maxTimeout": 60000
}'
```
@@ -107,7 +104,6 @@ This also speeds up the requests since it won't have to launch a new browser ins
Parameter | Notes
|--|--|
session | Optional. The session ID that you want to be assigned to the instance. If isn't set a random UUID will be assigned.
userAgent | Optional. Will be used by the headless browser.
#### + `sessions.list`
@@ -142,10 +138,12 @@ Parameter | Notes
|--|--|
url | Mandatory
session | Optional. Will send the request from and existing browser instance. If one is not sent it will create a temporary instance that will be destroyed immediately after the request is completed.
headers | Optional. To specify user headers.
maxTimeout | Optional, default value 60000. Max timeout to solve the challenge in milliseconds.
cookies | Optional. Will be used by the headless browser. Follow [this](https://github.com/puppeteer/puppeteer/blob/v3.3.0/docs/api.md#pagesetcookiecookies) format.
returnOnlyCookies | Optional, default false. Only returns the cookies. Response data, headers and other parts of the response are removed.
proxy | Optional, default disabled. Eg: `"proxy": {"url": "http://127.0.0.1:8888"}`. You must include the proxy schema in the URL: `http://`, `socks4://` or `socks5://`. Authorization (username/password) is not supported.
:warning: If you want to use Cloudflare clearance cookie in your scripts, make sure you use the FlareSolverr User-Agent too. If they don't match you will see the challenge.
Example response from running the `curl` above:
@@ -212,16 +210,7 @@ This is the same as `request.get` but it takes one more param:
Parameter | Notes
|--|--|
postData | Must be a string. If you want to POST a form, don't forget to set the `Content-Type` header to `application/x-www-form-urlencoded` or the server might not understand your request.
### Download small files
If you need to access an image/pdf or small file, you should pass the `download` parameter to `request.get` setting it
to `true`. Rather than access the html and return text it will return the buffer **base64** encoded which you will be
able to decode and save the image/pdf.
This method isn't recommended for videos or anything larger. As that should be streamed back to the client and at the
moment there is nothing setup to do so. If this is something you need feel free to create an issue and/or submit a PR.
postData | Must be a string with `application/x-www-form-urlencoded`. Eg: `a=b&c=d`
## Environment variables
@@ -232,6 +221,8 @@ LOG_HTML | false | Only for debugging. If `true` all HTML that passes through th
CAPTCHA_SOLVER | none | Captcha solving method. It is used when a captcha is encountered. See the Captcha Solvers section.
TZ | UTC | Timezone used in the logs and the web browser. Example: `TZ=Europe/London`.
HEADLESS | true | Only for debugging. To run the web browser in headless mode or visible.
BROWSER_TIMEOUT | 40000 | If you are experiencing errors/timeouts because your system is slow, you can try to increase this value. Remember to increase the `maxTimeout` parameter too.
TEST_URL | https://www.google.com | FlareSolverr makes a request on start to make sure the web browser is working. You can change that URL if it is blocked in your country.
PORT | 8191 | Listening port. You don't need to change this if you are running on Docker.
HOST | 0.0.0.0 | Listening interface. You don't need to change this if you are running on Docker.
@@ -251,37 +242,6 @@ If this is the case, FlareSolverr will return the error `Captcha detected but no
FlareSolverr can be customized to solve the captchas automatically by setting the environment variable `CAPTCHA_SOLVER`
to the file name of one of the adapters inside the [/captcha](src/captcha) directory.
### hcaptcha-solver
This method makes use of the [hcaptcha-solver](https://github.com/JimmyLaurent/hcaptcha-solver) project.
NOTE: This solver works picking random images so it will fail in a lot of requests and it's hard to know if it is
working or not. In a real use case with Sonarr/Radarr + Jackett it is still useful because those apps make a new request
each 15 minutes. Eventually one of the requests is going to work and Jackett saves the cookie forever (until it stops
working).
To use this solver you must set the environment variable:
```bash
CAPTCHA_SOLVER=hcaptcha-solver
```
### CaptchaHarvester
This method makes use of the [CaptchaHarvester](https://github.com/NoahCardoza/CaptchaHarvester) project which allows
users to collect their own tokens from ReCaptcha V2/V3 and hCaptcha for free.
To use this method you must set these environment variables:
```bash
CAPTCHA_SOLVER=harvester
HARVESTER_ENDPOINT=https://127.0.0.1:5000/token
```
**Note**: above I set `HARVESTER_ENDPOINT` to the default configuration of the captcha harvester's server, but that
could change if you customize the command line flags. Simply put, `HARVESTER_ENDPOINT` should be set to the URI of the
route that returns a token in plain text when called.
## Related projects
* C# implementation => https://github.com/FlareSolverr/FlareSolverrSharp

View File

@@ -2,15 +2,39 @@ const fs = require('fs')
const path = require('path')
const { execSync } = require('child_process')
const archiver = require('archiver')
const https = require('https')
const puppeteer = require('puppeteer')
const version = 'v' + require('./package.json').version;
function getFirefoxNightlyVersion() {
const firefoxVersions = 'https://product-details.mozilla.org/1.0/firefox_versions.json';
return new Promise((resolve, reject) => {
let data = '';
https
.get(firefoxVersions, (r) => {
if (r.statusCode >= 400)
return reject(new Error(`Got status code ${r.statusCode}`));
r.on('data', (chunk) => {
data += chunk;
});
r.on('end', () => {
try {
const versions = JSON.parse(data);
return resolve(versions.FIREFOX_NIGHTLY);
} catch {
return reject(new Error('Firefox version not found'));
}
});
})
.on('error', reject);
});
}
(async () => {
const builds = [
{
platform: 'linux',
version: 756035,
chromeFolder: 'chrome-linux',
firefoxFolder: 'firefox',
fsExec: 'flaresolverr-linux',
fsZipExec: 'flaresolverr',
fsZipName: 'linux-x64',
@@ -18,18 +42,16 @@ const version = 'v' + require('./package.json').version;
},
{
platform: 'win64',
version: 756035,
chromeFolder: 'chrome-win',
firefoxFolder: 'firefox',
fsExec: 'flaresolverr-win.exe',
fsZipExec: 'flaresolverr.exe',
fsZipName: 'windows-x64',
fsLicenseName: 'LICENSE.txt'
}
// TODO: this is working but changes are required in session.ts to find chrome path
// todo: this has to be build in macOS (hdiutil is required). changes required in sessions.ts too
// {
// platform: 'mac',
// version: 756035,
// chromeFolder: 'chrome-mac',
// firefoxFolder: 'firefox',
// fsExec: 'flaresolverr-macos',
// fsZipExec: 'flaresolverr',
// fsZipName: 'macos',
@@ -42,20 +64,24 @@ const version = 'v' + require('./package.json').version;
if (fs.existsSync('bin')) {
fs.rmSync('bin', { recursive: true })
}
execSync('pkg -t node14-win-x64,node14-linux-x64 --out-path bin .')
// execSync('pkg -t node14-win-x64,node14-mac-x64,node14-linux-x64 --out-path bin .')
execSync('./node_modules/.bin/pkg -t node16-win-x64,node16-linux-x64 --out-path bin .')
// execSync('./node_modules/.bin/pkg -t node16-win-x64,node16-mac-x64,node16-linux-x64 --out-path bin .')
// download Chrome and zip together
// get firefox revision
const revision = await getFirefoxNightlyVersion();
// download firefox and zip together
for (const os of builds) {
console.log('Building ' + os.fsZipName + ' artifact')
// download chrome
console.log('Downloading Chrome...')
// download firefox
console.log(`Downloading firefox ${revision} for ${os.platform} ...`)
const f = puppeteer.createBrowserFetcher({
product: 'firefox',
platform: os.platform,
path: path.join(__dirname, 'bin', 'puppeteer')
})
await f.download(os.version)
await f.download(revision)
// compress in zip
console.log('Compressing zip file...')
@@ -75,7 +101,7 @@ const version = 'v' + require('./package.json').version;
archive.file('LICENSE', { name: 'flaresolverr/' + os.fsLicenseName })
archive.file('bin/' + os.fsExec, { name: 'flaresolverr/' + os.fsZipExec })
archive.directory('bin/puppeteer/' + os.platform + '-' + os.version + '/' + os.chromeFolder, 'flaresolverr/chrome')
archive.directory('bin/puppeteer/' + os.platform + '-' + revision + '/' + os.firefoxFolder, 'flaresolverr/firefox')
if (os.platform === 'linux') {
archive.file('flaresolverr.service', { name: 'flaresolverr/flaresolverr.service' })
}

12
jest.config.js Normal file
View File

@@ -0,0 +1,12 @@
module.exports = {
// A list of paths to directories that Jest should use to search for files in
roots: [
"./src/"
],
// Compile Typescript
transform: {
'^.+\\.(ts|tsx)$': 'ts-jest'
},
// Default value for FlareSolverr maxTimeout is 60000
testTimeout: 70000
}

14379
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,59 +1,45 @@
{
"name": "flaresolverr",
"version": "1.2.7",
"version": "2.2.1",
"description": "Proxy server to bypass Cloudflare protection.",
"scripts": {
"start": "node ./dist/index.js",
"start": "tsc && node ./dist/server.js",
"build": "tsc",
"dev": "nodemon -e ts --exec ts-node src/index.ts",
"package": "node build-binaries.js"
"dev": "nodemon -e ts --exec ts-node src/server.ts",
"package": "tsc && node build-binaries.js",
"test": "jest --runInBand"
},
"author": "Diego Heras (ngosang)",
"contributors": [
{
"name": "Noah Cardoza",
"url": "https://github.com/NoahCardoza/CloudProxy.git"
}
],
"license": "MIT",
"repository": {
"type": "git",
"url": "https://github.com/ngosang/FlareSolverr"
},
"pkg": {
"assets": [
"node_modules/puppeteer-extra-plugin-stealth/**/*.*"
]
},
"bin": {
"flaresolverr": "dist/index.js"
"flaresolverr": "dist/server.js"
},
"dependencies": {
"await-timeout": "^1.1.1",
"body-parser": "^1.19.0",
"console-log-level": "^1.4.1",
"got": "^11.5.1",
"hcaptcha-solver": "^1.0.2",
"puppeteer": "^3.3.0",
"puppeteer-extra": "^3.1.15",
"puppeteer-extra-plugin-stealth": "^2.6.5",
"uuid": "^8.2.0"
"express": "^4.17.1",
"puppeteer": "^13.1.2",
"uuid": "^8.3.2"
},
"devDependencies": {
"@types/await-timeout": "^0.3.1",
"@types/node": "^14.0.23",
"@types/puppeteer": "^3.0.1",
"@types/uuid": "^8.0.0",
"archiver": "^5.2.0",
"eslint": "^7.5.0",
"eslint-config-airbnb-base": "^14.2.0",
"eslint-config-standard": "^14.1.1",
"eslint-plugin-import": "^2.22.0",
"eslint-plugin-node": "^11.1.0",
"eslint-plugin-promise": "^4.2.1",
"eslint-plugin-standard": "^4.0.1",
"nodemon": "^2.0.4",
"pkg": "^4.4.9",
"ts-node": "^8.10.2",
"typescript": "^3.9.7"
"@types/body-parser": "^1.19.1",
"@types/express": "^4.17.13",
"@types/jest": "^27.0.2",
"@types/node": "^16.11.7",
"@types/supertest": "^2.0.11",
"@types/uuid": "^8.3.1",
"archiver": "^5.3.0",
"nodemon": "^2.0.13",
"pkg": "^5.5.2",
"supertest": "^6.1.6",
"ts-jest": "^27.0.7",
"ts-node": "^10.3.0",
"typescript": "^4.4.4"
}
}

83
src/app.ts Normal file
View File

@@ -0,0 +1,83 @@
import log from './services/log'
import {NextFunction, Request, Response} from 'express';
import {getUserAgent} from "./services/sessions";
import {controllerV1} from "./controllers/v1";
const express = require('express');
const app = express();
const bodyParser = require('body-parser');
const version: string = 'v' + require('../package.json').version
// Convert request objects to JSON
app.use(bodyParser.json({
limit: '50mb',
verify(req: Request, res: Response, buf: any) {
req.body = buf;
}
}));
// Access log
app.use(function(req: Request, res: Response, next: NextFunction) {
if (req.url != '/health') {
// count the request for the log prefix
log.incRequests()
// build access message
let body = "";
if (req.method == 'POST' && req.body) {
body += " body: "
try {
body += JSON.stringify(req.body)
} catch(e) {
body += req.body
}
}
log.info(`Incoming request => ${req.method} ${req.url}${body}`);
}
next();
});
// *********************************************************************************************************************
// Routes
// Show welcome message
app.get("/", ( req: Request, res: Response ) => {
res.send({
"msg": "FlareSolverr is ready!",
"version": version,
"userAgent": getUserAgent()
});
});
// Health endpoint. this endpoint is special because it doesn't print traces
app.get("/health", ( req: Request, res: Response ) => {
res.send({
"status": "ok"
});
});
// Controller v1
app.post("/v1", async( req: Request, res: Response ) => {
await controllerV1(req, res);
});
// *********************************************************************************************************************
// Unknown paths or verbs
app.use(function (req : Request, res : Response) {
res.status(404)
.send({"error": "Unknown resource or HTTP verb"})
})
// Errors
app.use(function (err: any, req: Request, res: Response, next: NextFunction) {
if (err) {
let msg = 'Invalid request: ' + err;
msg = msg.replace("\n", "").replace("\r", "")
log.error(msg)
res.send({"error": msg})
} else {
next()
}
})
module.exports = app;

View File

@@ -1,31 +0,0 @@
import got from 'got'
import { sleep } from '../utils'
/*
This method uses the captcha-harvester project:
https://github.com/NoahCardoza/CaptchaHarvester
While the function must take url/sitekey/type args,
they aren't used because the harvester server must
be preconfigured.
ENV:
HARVESTER_ENDPOINT: This must be the full path
to the /token endpoint of the harvester.
E.G. "https://127.0.0.1:5000/token"
*/
export default async function solve(): Promise<string> {
const endpoint = process.env.HARVESTER_ENDPOINT
if (!endpoint) { throw Error('ENV variable `HARVESTER_ENDPOINT` must be set.') }
while (true) {
try {
return (await got.get(process.env.HARVESTER_ENDPOINT, {
https: { rejectUnauthorized: false }
})).body
} catch (e) {
if (e.response.statusCode !== 418) { throw e }
}
await sleep(3000)
}
}

View File

@@ -1,25 +0,0 @@
const solveCaptcha = require('hcaptcha-solver');
import { SolverOptions } from '.'
/*
This method uses the hcaptcha-solver project:
https://github.com/JimmyLaurent/hcaptcha-solver
TODO: allow user pass custom options to the solver.
ENV:
There are no other variables that must be set to get this to work
*/
export default async function solve({ url }: SolverOptions): Promise<string> {
throw new Error("hcaptcha-solver is not able to solve the new hCaptcha challenge. This issue is already reported #31.");
/*
try {
return await solveCaptcha(url)
} catch (e) {
console.error(e)
return null
}
*/
}

View File

@@ -1,4 +1,4 @@
import log from "../log";
import log from "../services/log";
export enum CaptchaType {
re = 'reCaptcha',

178
src/controllers/v1.ts Normal file
View File

@@ -0,0 +1,178 @@
import {Request, Response} from 'express';
import {Protocol} from "devtools-protocol";
import log from '../services/log'
import {browserRequest, ChallengeResolutionResultT, ChallengeResolutionT} from "../services/solver";
import {SessionCreateOptions} from "../services/sessions";
const sessions = require('../services/sessions')
const version: string = 'v' + require('../../package.json').version
interface V1Routes {
[key: string]: (params: V1RequestBase, response: V1ResponseBase) => Promise<void>
}
export interface Proxy {
url?: string
username?: string
password?: string
}
export interface V1RequestBase {
cmd: string
cookies?: Protocol.Network.CookieParam[],
maxTimeout?: number
proxy?: Proxy
session: string
headers?: Record<string, string> // deprecated v2, not used
userAgent?: string // deprecated v2, not used
}
interface V1RequestSession extends V1RequestBase {
}
export interface V1Request extends V1RequestBase {
url: string
method?: string
postData?: string
returnOnlyCookies?: boolean
download?: boolean // deprecated v2, not used
returnRawHtml?: boolean // deprecated v2, not used
}
export interface V1ResponseBase {
status: string
message: string
startTimestamp: number
endTimestamp: number
version: string
}
export interface V1ResponseSolution extends V1ResponseBase {
solution: ChallengeResolutionResultT
}
export interface V1ResponseSession extends V1ResponseBase {
session: string
}
export interface V1ResponseSessions extends V1ResponseBase {
sessions: string[]
}
export const routes: V1Routes = {
'sessions.create': async (params: V1RequestSession, response: V1ResponseSession): Promise<void> => {
const options: SessionCreateOptions = {
oneTimeSession: false,
cookies: params.cookies,
maxTimeout: params.maxTimeout,
proxy: params.proxy
}
const { sessionId, browser } = await sessions.create(params.session, options)
if (browser) {
response.status = "ok";
response.message = "Session created successfully.";
response.session = sessionId
} else {
throw Error('Error creating session.')
}
},
'sessions.list': async (params: V1RequestSession, response: V1ResponseSessions): Promise<void> => {
response.status = "ok";
response.message = "";
response.sessions = sessions.list();
},
'sessions.destroy': async (params: V1RequestSession, response: V1ResponseBase): Promise<void> => {
if (await sessions.destroy(params.session)) {
response.status = "ok";
response.message = "The session has been removed.";
} else {
throw Error('This session does not exist.')
}
},
'request.get': async (params: V1Request, response: V1ResponseSolution): Promise<void> => {
params.method = 'GET'
if (params.postData) {
throw Error('Cannot use "postBody" when sending a GET request.')
}
if (params.returnRawHtml) {
log.warn("Request parameter 'returnRawHtml' was removed in FlareSolverr v2.")
}
if (params.download) {
log.warn("Request parameter 'download' was removed in FlareSolverr v2.")
}
const result: ChallengeResolutionT = await browserRequest(params)
response.status = result.status;
response.message = result.message;
response.solution = result.result;
if (response.message) {
log.info(response.message)
}
},
'request.post': async (params: V1Request, response: V1ResponseSolution): Promise<void> => {
params.method = 'POST'
if (!params.postData) {
throw Error('Must send param "postBody" when sending a POST request.')
}
if (params.returnRawHtml) {
log.warn("Request parameter 'returnRawHtml' was removed in FlareSolverr v2.")
}
if (params.download) {
log.warn("Request parameter 'download' was removed in FlareSolverr v2.")
}
const result: ChallengeResolutionT = await browserRequest(params)
response.status = result.status;
response.message = result.message;
response.solution = result.result;
if (response.message) {
log.info(response.message)
}
},
}
export async function controllerV1(req: Request, res: Response): Promise<void> {
const response: V1ResponseBase = {
status: null,
message: null,
startTimestamp: Date.now(),
endTimestamp: 0,
version: version
}
try {
const params: V1RequestBase = req.body
// do some validations
if (!params.cmd) {
throw Error("Request parameter 'cmd' is mandatory.")
}
if (params.headers) {
log.warn("Request parameter 'headers' was removed in FlareSolverr v2.")
}
if (params.userAgent) {
log.warn("Request parameter 'userAgent' was removed in FlareSolverr v2.")
}
// set default values
if (!params.maxTimeout || params.maxTimeout < 1) {
params.maxTimeout = 60000;
}
// execute the command
const route = routes[params.cmd]
if (route) {
await route(params, response)
} else {
throw Error(`The command '${params.cmd}' is invalid.`)
}
} catch (e) {
res.status(500)
response.status = "error";
response.message = e.toString();
log.error(response.message)
}
response.endTimestamp = Date.now()
log.info(`Response in ${(response.endTimestamp - response.startTimestamp) / 1000} s`)
res.send(response)
}

View File

@@ -1,186 +0,0 @@
const fs = require('fs');
const os = require('os');
const path = require('path');
import log from './log'
import { createServer, IncomingMessage, ServerResponse } from 'http';
import { RequestContext } from './types'
import Router, { BaseAPICall } from './routes'
import getCaptchaSolver from "./captcha";
import sessions from "./session";
import {v1 as UUIDv1} from "uuid";
const version: string = "v" + require('../package.json').version
const serverPort: number = Number(process.env.PORT) || 8191
const serverHost: string = process.env.HOST || '0.0.0.0'
function validateEnvironmentVariables() {
// ip and port variables are validated by nodejs
if (process.env.LOG_LEVEL && ['error', 'warn', 'info', 'verbose', 'debug'].indexOf(process.env.LOG_LEVEL) == -1) {
log.error(`The environment variable 'LOG_LEVEL' is wrong. Check the documentation.`);
process.exit(1);
}
if (process.env.LOG_HTML && ['true', 'false'].indexOf(process.env.LOG_HTML) == -1) {
log.error(`The environment variable 'LOG_HTML' is wrong. Check the documentation.`);
process.exit(1);
}
if (process.env.HEADLESS && ['true', 'false'].indexOf(process.env.HEADLESS) == -1) {
log.error(`The environment variable 'HEADLESS' is wrong. Check the documentation.`);
process.exit(1);
}
try {
getCaptchaSolver();
} catch (e) {
log.error(`The environment variable 'CAPTCHA_SOLVER' is wrong. ${e.message}`);
process.exit(1);
}
}
async function testChromeInstallation() {
const sessionId = UUIDv1()
// create a temporary file for testing
log.debug("Testing Chrome installation...")
const fileContent = `flaresolverr_${version}`
const filePath = path.join(os.tmpdir(), `flaresolverr_${sessionId}.txt`)
const fileUrl = `file://${filePath}`
fs.writeFileSync(filePath, fileContent)
// launch the browser
const session = await sessions.create(sessionId, {
userAgent: null,
oneTimeSession: true
})
const page = await session.browser.newPage()
const response = await page.goto(fileUrl, { waitUntil: 'domcontentloaded' })
const responseBody = (await response.buffer()).toString().trim()
if (responseBody != fileContent) {
throw new Error("The response body does not match!")
}
await page.close()
await sessions.destroy(sessionId)
log.debug("Test successful")
}
function errorResponse(errorMsg: string, res: ServerResponse, startTimestamp: number) {
log.error(errorMsg)
const response = {
status: 'error',
message: errorMsg,
startTimestamp,
endTimestamp: Date.now(),
version
}
res.writeHead(500, {
'Content-Type': 'application/json'
})
res.write(JSON.stringify(response))
res.end()
}
function successResponse(successMsg: string, extendedProperties: object, res: ServerResponse, startTimestamp: number) {
const endTimestamp = Date.now()
log.info(`Response in ${(endTimestamp - startTimestamp) / 1000} s`)
if (successMsg) { log.info(successMsg) }
const response = Object.assign({
status: 'ok',
message: successMsg || '',
startTimestamp,
endTimestamp,
version
}, extendedProperties || {})
res.writeHead(200, {
'Content-Type': 'application/json'
})
res.write(JSON.stringify(response))
res.end()
}
function validateIncomingRequest(ctx: RequestContext, params: BaseAPICall) {
log.info(`Params: ${JSON.stringify(params)}`)
if (ctx.req.method !== 'POST') {
ctx.errorResponse('Only the POST method is allowed')
return false
}
if (ctx.req.url !== '/v1') {
ctx.errorResponse('Only /v1 endpoint is allowed')
return false
}
if (!params.cmd) {
ctx.errorResponse("Parameter 'cmd' is mandatory")
return false
}
return true
}
// init
log.info(`FlareSolverr ${version}`);
log.debug('Debug log enabled');
validateEnvironmentVariables();
testChromeInstallation()
.catch(e => {
log.error("Error starting Chrome browser.", e);
process.exit(1);
})
.then(r =>
createServer((req: IncomingMessage, res: ServerResponse) => {
const startTimestamp = Date.now()
// health endpoint. this endpoint is special because it doesn't print traces
if (req.url == '/health') {
res.writeHead(200, {
'Content-Type': 'application/json'
})
res.write(JSON.stringify({"status": "ok"}))
res.end()
return;
}
// count the request for the log prefix
log.incRequests()
log.info(`Incoming request: ${req.method} ${req.url}`)
// show welcome message
if (req.url == '/') {
successResponse("FlareSolverr is ready!", null, res, startTimestamp);
return;
}
// get request body
const bodyParts: any[] = []
req.on('data', chunk => {
bodyParts.push(chunk)
}).on('end', () => {
// parse params
const body = Buffer.concat(bodyParts).toString()
let params: BaseAPICall = null
try {
params = JSON.parse(body)
} catch (err) {
errorResponse('Body must be in JSON format', res, startTimestamp)
return
}
const ctx: RequestContext = {
req,
res,
startTimestamp,
errorResponse: (msg) => errorResponse(msg, res, startTimestamp),
successResponse: (msg, extendedProperties) => successResponse(msg, extendedProperties, res, startTimestamp)
}
// validate params
if (!validateIncomingRequest(ctx, params)) { return }
// process request
Router(ctx, params).catch(e => {
console.error(e)
ctx.errorResponse(e.message)
})
})
}).listen(serverPort, serverHost, () => {
log.info(`Listening on http://${serverHost}:${serverPort}`);
})
)

View File

@@ -1,194 +1,147 @@
import {Response} from 'puppeteer'
import {Page} from "puppeteer-extra/dist/puppeteer";
import {Page, HTTPResponse} from 'puppeteer'
import log from "../log";
import getCaptchaSolver, {CaptchaType} from "../captcha";
import log from "../services/log";
/**
* This class contains the logic to solve protections provided by CloudFlare
**/
**/
const CHALLENGE_SELECTORS = ['#trk_jschal_js', '.ray_id', '.attack-box', '#cf-please-wait'];
const TOKEN_INPUT_NAMES = ['g-recaptcha-response', 'h-captcha-response'];
const BAN_SELECTORS = ['.text-gray-600'];
const CHALLENGE_SELECTORS = [
'#trk_jschal_js', '.ray_id', '.attack-box', '#cf-please-wait', // CloudFlare
'#link-ddg', // DDoS-GUARD
'td.info #js_info' // Custom CloudFlare for EbookParadijs, Film-Paleis, MuziekFabriek and Puur-Hollands
];
const CAPTCHA_SELECTORS = ['input[name="cf_captcha_kind"]'];
export default async function resolveChallenge(url: string, page: Page, response: Response): Promise<Response> {
export default async function resolveChallenge(url: string, page: Page, response: HTTPResponse): Promise<HTTPResponse> {
// look for challenge and return fast if not detected
if (!response.headers().server.startsWith('cloudflare')) {
let cfDetected = response.headers().server && response.headers().server.startsWith('cloudflare');
if (cfDetected) {
if (response.status() == 403 || response.status() == 503) {
cfDetected = true; // Defected CloudFlare and DDoS-GUARD
} else if (response.headers().vary && response.headers().vary.trim() == 'Accept-Encoding,User-Agent' &&
response.headers()['content-encoding'] && response.headers()['content-encoding'].trim() == 'br') {
cfDetected = true; // Detected Custom CloudFlare for EbookParadijs, Film-Paleis, MuziekFabriek and Puur-Hollands
} else {
cfDetected = false;
}
}
if (cfDetected) {
log.info('Cloudflare detected');
} else {
log.info('Cloudflare not detected');
return response;
}
log.info('Cloudflare detected');
if (await page.$('.cf-error-code')) {
throw new Error('Cloudflare has blocked this request (Code 1020 Detected).')
if (await findAnySelector(page, BAN_SELECTORS)) {
throw new Error('Cloudflare has blocked this request. Probably your IP is banned for this site, check in your web browser.')
}
let selectorFoundCount = 0;
if (response.status() > 400) {
// detect cloudflare wait 5s
for (const selector of CHALLENGE_SELECTORS) {
const cfChallengeElem = await page.$(selector)
if (cfChallengeElem) {
selectorFoundCount++
log.debug(`Javascript challenge element '${selector}' detected.`)
log.debug('Waiting for Cloudflare challenge...')
// find Cloudflare selectors
let selectorFound = false;
let selector: string = await findAnySelector(page, CHALLENGE_SELECTORS)
if (selector) {
selectorFound = true;
log.debug(`Javascript challenge element '${selector}' detected.`)
log.debug('Waiting for Cloudflare challenge...')
while (true) {
try {
// catch Execution context was destroyed
const cfChallengeElem = await page.$(selector)
if (!cfChallengeElem) {
// solved!
log.debug('Challenge element not found.')
break
} else {
// new Cloudflare Challenge #cf-please-wait
const displayStyle = await page.evaluate((selector) => {
return getComputedStyle(document.querySelector(selector)).getPropertyValue("display");
}, selector);
if (displayStyle == "none") {
// spinner is hidden, could be a captcha or not
log.debug('Challenge element is hidden.')
// wait until redirecting disappears
while (true) {
try {
await page.waitFor(1000)
const displayStyle2 = await page.evaluate(() => {
return getComputedStyle(document.querySelector('#cf-spinner-redirecting')).getPropertyValue("display");
});
if (displayStyle2 == "none") {
break // hCaptcha detected
}
} catch (error) {
break // redirection completed
}
while (true) {
try {
selector = await findAnySelector(page, CHALLENGE_SELECTORS)
if (!selector) {
// solved!
log.debug('Challenge element not found')
break
} else {
log.debug(`Javascript challenge element '${selector}' detected.`)
// new Cloudflare Challenge #cf-please-wait
const displayStyle = await page.evaluate((selector) => {
return getComputedStyle(document.querySelector(selector)).getPropertyValue("display");
}, selector);
if (displayStyle == "none") {
// spinner is hidden, could be a captcha or not
log.debug('Challenge element is hidden')
// wait until redirecting disappears
while (true) {
try {
await page.waitForTimeout(1000)
const displayStyle2 = await page.evaluate(() => {
return getComputedStyle(document.querySelector('#cf-spinner-redirecting')).getPropertyValue("display");
});
if (displayStyle2 == "none") {
break // hCaptcha detected
}
break
} else {
log.debug('Challenge element is visible.')
} catch (error) {
break // redirection completed
}
}
log.debug('Found challenge element again.')
} catch (error)
{
log.debug("Unexpected error: " + error);
break
} else {
log.debug('Challenge element is visible')
}
log.debug('Waiting for Cloudflare challenge...')
await page.waitFor(1000)
}
log.debug('Found challenge element again')
log.debug('Validating HTML code...')
break
} else {
log.debug(`No '${selector}' challenge element detected.`)
} catch (error)
{
log.debug("Unexpected error: " + error);
if (!error.toString().includes("Execution context was destroyed")) {
break
}
}
log.debug('Waiting for Cloudflare challenge...')
await page.waitForTimeout(1000)
}
log.debug("Javascript challenge selectors found: " + selectorFoundCount + ", total selectors: " + CHALLENGE_SELECTORS.length)
log.debug('Validating HTML code...')
} else {
// some sites use cloudflare but there is no challenge
log.debug(`Javascript challenge not detected. Status code: ${response.status()}`);
selectorFoundCount = 1;
log.debug(`No challenge element detected.`)
}
// it seems some captcha pages return 200 sometimes
if (await page.$('input[name="cf_captcha_kind"]')) {
log.info('Captcha challenge detected.');
const captchaSolver = getCaptchaSolver()
if (captchaSolver) {
const captchaStartTimestamp = Date.now()
const challengeForm = await page.$('#challenge-form')
if (challengeForm) {
const captchaTypeElm = await page.$('input[name="cf_captcha_kind"]')
const cfCaptchaType: string = await captchaTypeElm.evaluate((e: any) => e.value)
const captchaType: CaptchaType = (CaptchaType as any)[cfCaptchaType]
if (!captchaType) {
throw new Error('Unknown captcha type!');
}
// check for CAPTCHA challenge
if (await findAnySelector(page, CAPTCHA_SELECTORS)) {
log.info('CAPTCHA challenge detected');
throw new Error('FlareSolverr can not resolve CAPTCHA challenges. Since the captcha doesn\'t always appear, you may have better luck with the next request.');
let sitekey = null
if (captchaType != 'hCaptcha' && process.env.CAPTCHA_SOLVER != 'hcaptcha-solver') {
const sitekeyElem = await page.$('*[data-sitekey]')
if (!sitekeyElem) {
throw new Error('Could not find sitekey!');
}
sitekey = await sitekeyElem.evaluate((e) => e.getAttribute('data-sitekey'))
}
log.info('Waiting to receive captcha token to bypass challenge...')
const token = await captchaSolver({
url,
sitekey,
type: captchaType
})
log.debug(`Token received: ${token}`);
if (!token) {
throw new Error('Token solver failed to return a token.')
}
let responseFieldsFoundCount = 0;
for (const name of TOKEN_INPUT_NAMES) {
const input = await page.$(`textarea[name="${name}"]`)
if (input) {
responseFieldsFoundCount ++;
log.debug(`Challenge response field '${name}' found in challenge form.`);
await input.evaluate((e: HTMLTextAreaElement, token) => { e.value = token }, token);
}
}
if (responseFieldsFoundCount == 0) {
throw new Error('Challenge response field not found in challenge form.');
}
// ignore preset event listeners on the form
await page.evaluate(() => {
window.addEventListener('submit', (e) => { e.stopPropagation() }, true)
})
// it seems some sites obfuscate their challenge forms
// TODO: look into how they do it and come up with a more solid solution
try {
// this element is added with js and we want to wait for all the js to load before submitting
await page.waitForSelector('#challenge-form', { timeout: 10000 })
} catch (err) {
throw new Error("No '#challenge-form' element detected.");
}
// calculates the time it took to solve the captcha
const captchaSolveTotalTime = Date.now() - captchaStartTimestamp
// generates a random wait time
const randomWaitTime = (Math.floor(Math.random() * 10) + 10) * 1000
// waits, if any, time remaining to appear human but stay as fast as possible
const timeLeft = randomWaitTime - captchaSolveTotalTime
if (timeLeft > 0) {
log.debug(`Waiting for '${timeLeft}' milliseconds.`);
await page.waitFor(timeLeft);
}
// submit captcha response
challengeForm.evaluate((e: HTMLFormElement) => e.submit())
response = await page.waitForNavigation({ waitUntil: 'domcontentloaded' })
if (await page.$('input[name="cf_captcha_kind"]')) {
throw new Error('Captcha service failed to solve the challenge.');
}
}
} else {
throw new Error('Captcha detected but no automatic solver is configured.');
}
// const captchaSolver = getCaptchaSolver()
// if (captchaSolver) {
// // to-do: get the params
// log.info('Waiting to receive captcha token to bypass challenge...')
// const token = await captchaSolver({
// url,
// sitekey,
// type: captchaType
// })
// log.debug(`Token received: ${token}`);
// // to-do: send the token
// }
// } else {
// throw new Error('Captcha detected but no automatic solver is configured.');
// }
} else {
if (selectorFoundCount == 0)
if (!selectorFound)
{
throw new Error('No challenge selectors found, unable to proceed')
throw new Error('No challenge selectors found, unable to proceed.')
} else {
// reload the page to make sure we get the real response
response = await page.reload()
log.info('Challenge solved.');
log.info('Challenge solved');
}
}
return response;
}
async function findAnySelector(page: Page, selectors: string[]) {
for (const selector of selectors) {
const cfChallengeElem = await page.$(selector)
if (cfChallengeElem) {
return selector;
}
}
return null;
}

View File

@@ -1,298 +0,0 @@
import { v1 as UUIDv1 } from 'uuid'
import { SetCookie, Request, Response, Headers, HttpMethod, Overrides } from 'puppeteer'
import { Page, Browser } from "puppeteer-extra/dist/puppeteer";
const Timeout = require('await-timeout');
import log from './log'
import sessions, { SessionsCacheItem } from './session'
import { RequestContext } from './types'
import cloudflareProvider from './providers/cloudflare';
export interface BaseAPICall {
cmd: string
}
interface BaseSessionsAPICall extends BaseAPICall {
session?: string
}
interface SessionsCreateAPICall extends BaseSessionsAPICall {
userAgent?: string,
cookies?: SetCookie[],
headers?: Headers
maxTimeout?: number
proxy?: any
}
interface BaseRequestAPICall extends BaseAPICall {
url: string
method?: HttpMethod
postData?: string
session?: string
userAgent?: string
maxTimeout?: number
cookies?: SetCookie[],
headers?: Headers
proxy?: any, // TODO: use interface not any
download?: boolean
returnOnlyCookies?: boolean
}
interface Routes {
[key: string]: (ctx: RequestContext, params: BaseAPICall) => void | Promise<void>
}
interface ChallengeResolutionResultT {
url: string
status: number,
headers?: Headers,
response: string,
cookies: object[]
userAgent: string
}
interface ChallengeResolutionT {
status?: string
message: string
result: ChallengeResolutionResultT
}
interface OverrideResolvers {
method?: (request: Request) => HttpMethod,
postData?: (request: Request) => string,
headers?: (request: Request) => Headers
}
type OverridesProps =
'method' |
'postData' |
'headers'
// We always set a Windows User-Agent because ARM builds are detected by Cloudflare
const DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36"
async function resolveChallengeWithTimeout(ctx: RequestContext, params: BaseRequestAPICall, page: Page) {
const maxTimeout = params.maxTimeout || 60000
const timer = new Timeout();
try {
const promise = resolveChallenge(ctx, params, page);
return await Promise.race([
promise,
timer.set(maxTimeout, `Maximum timeout reached. maxTimeout=${maxTimeout} (ms)`)
]);
} finally {
timer.clear();
}
}
async function resolveChallenge(ctx: RequestContext, { url, proxy, download, returnOnlyCookies }: BaseRequestAPICall, page: Page): Promise<ChallengeResolutionT | void> {
let status = 'ok'
let message = ''
if (proxy) {
log.debug("Apply proxy");
if (proxy.username)
await page.authenticate({ username: proxy.username, password: proxy.password });
}
log.debug(`Navigating to... ${url}`)
let response: Response = await page.goto(url, { waitUntil: 'domcontentloaded' })
log.html(await page.content())
// Detect protection services and solve challenges
try {
response = await cloudflareProvider(url, page, response);
} catch (e) {
status = "error";
message = "Cloudflare " + e.toString();
}
const payload: ChallengeResolutionT = {
status,
message,
result: {
url: page.url(),
status: response.status(),
headers: response.headers(),
response: null,
cookies: await page.cookies(),
userAgent: await page.evaluate(() => navigator.userAgent)
}
}
if (returnOnlyCookies) {
payload.result.headers = null;
payload.result.userAgent = null;
} else {
if (download) {
// for some reason we get an error unless we reload the page
// has something to do with a stale buffer and this is the quickest
// fix since I am short on time
response = await page.goto(url, { waitUntil: 'domcontentloaded' })
payload.result.response = (await response.buffer()).toString('base64')
} else {
payload.result.response = await page.content()
}
}
// Add final url in result
payload.result.url = page.url();
// make sure the page is closed because if it isn't and error will be thrown
// when a user uses a temporary session, the browser make be quit before
// the page is properly closed.
await page.close()
return payload
}
function mergeSessionWithParams({ defaults }: SessionsCacheItem, params: BaseRequestAPICall): BaseRequestAPICall {
const copy = { ...defaults, ...params }
// custom merging logic
copy.headers = { ...defaults.headers || {}, ...params.headers || {} } || null
return copy
}
async function setupPage(ctx: RequestContext, params: BaseRequestAPICall, browser: Browser): Promise<Page> {
const page = await browser.newPage()
// merge session defaults with params
const { method, postData, userAgent, headers, cookies } = params
let overrideResolvers: OverrideResolvers = {}
if (method !== 'GET') {
log.debug(`Setting method to ${method}`)
overrideResolvers.method = request => method
}
if (postData) {
log.debug(`Setting body data to ${postData}`)
overrideResolvers.postData = request => postData
}
if (userAgent) {
log.debug(`Using custom UA: ${userAgent}`)
await page.setUserAgent(userAgent)
} else {
await page.setUserAgent(DEFAULT_USER_AGENT)
}
if (headers) {
log.debug(`Adding custom headers: ${JSON.stringify(headers)}`)
overrideResolvers.headers = request => Object.assign(request.headers(), headers)
}
if (cookies) {
log.debug(`Setting custom cookies: ${JSON.stringify(cookies)}`)
await page.setCookie(...cookies)
}
// if any keys have been set on the object
if (Object.keys(overrideResolvers).length > 0) {
let callbackRunOnce = false
const callback = (request: Request) => {
if (callbackRunOnce || !request.isNavigationRequest()) {
request.continue()
return
}
callbackRunOnce = true
const overrides: Overrides = {}
Object.keys(overrideResolvers).forEach((key: OverridesProps) => {
// @ts-ignore
overrides[key] = overrideResolvers[key](request)
});
log.debug(`Overrides: ${JSON.stringify(overrides)}`)
request.continue(overrides)
}
await page.setRequestInterception(true)
page.on('request', callback)
}
return page
}
const browserRequest = async (ctx: RequestContext, params: BaseRequestAPICall) => {
const oneTimeSession = params.session === undefined
const sessionId = params.session || UUIDv1()
const session = oneTimeSession
? await sessions.create(sessionId, {
userAgent: params.userAgent,
oneTimeSession
})
: sessions.get(sessionId)
if (session === false) {
return ctx.errorResponse('This session does not exist. Use \'list_sessions\' to see all the existing sessions.')
}
params = mergeSessionWithParams(session, params)
try {
const page = await setupPage(ctx, params, session.browser)
const data = await resolveChallengeWithTimeout(ctx, params, page)
if (data) {
const { status } = data
delete data.status
ctx.successResponse(data.message, {
...(oneTimeSession ? {} : { session: sessionId }),
...(status ? { status } : {}),
solution: data.result
})
}
} catch (error) {
log.error(error)
return ctx.errorResponse("Unable to process browser request. Error: " + error)
} finally {
if (oneTimeSession) {
await sessions.destroy(sessionId)
}
}
}
export const routes: Routes = {
'sessions.create': async (ctx, { session, ...options }: SessionsCreateAPICall) => {
session = session || UUIDv1()
const { browser } = await sessions.create(session, options)
if (browser) { ctx.successResponse('Session created successfully.', { session }) }
},
'sessions.list': (ctx) => {
ctx.successResponse(null, { sessions: sessions.list() })
},
'sessions.destroy': async (ctx, { session }: BaseSessionsAPICall) => {
if (await sessions.destroy(session)) { return ctx.successResponse('The session has been removed.') }
ctx.errorResponse('This session does not exist.')
},
'request.get': async (ctx, params: BaseRequestAPICall) => {
params.method = 'GET'
if (params.postData) {
return ctx.errorResponse('Cannot use "postBody" when sending a GET request.')
}
await browserRequest(ctx, params)
},
'request.post': async (ctx, params: BaseRequestAPICall) => {
params.method = 'POST'
if (!params.postData) {
return ctx.errorResponse('Must send param "postBody" when sending a POST request.')
}
await browserRequest(ctx, params)
},
}
export default async function Router(ctx: RequestContext, params: BaseAPICall): Promise<void> {
const route = routes[params.cmd]
if (route) { return await route(ctx, params) }
return ctx.errorResponse(`The command '${params.cmd}' is invalid.`)
}

63
src/server.ts Normal file
View File

@@ -0,0 +1,63 @@
import log from './services/log'
import {testWebBrowserInstallation} from "./services/sessions";
const app = require("./app");
const version: string = 'v' + require('../package.json').version
const serverPort: number = Number(process.env.PORT) || 8191
const serverHost: string = process.env.HOST || '0.0.0.0'
function validateEnvironmentVariables() {
// ip and port variables are validated by nodejs
if (process.env.LOG_LEVEL && ['error', 'warn', 'info', 'verbose', 'debug'].indexOf(process.env.LOG_LEVEL) == -1) {
log.error(`The environment variable 'LOG_LEVEL' is wrong. Check the documentation.`);
process.exit(1);
}
if (process.env.LOG_HTML && ['true', 'false'].indexOf(process.env.LOG_HTML) == -1) {
log.error(`The environment variable 'LOG_HTML' is wrong. Check the documentation.`);
process.exit(1);
}
if (process.env.HEADLESS && ['true', 'false'].indexOf(process.env.HEADLESS) == -1) {
log.error(`The environment variable 'HEADLESS' is wrong. Check the documentation.`);
process.exit(1);
}
// todo: fix resolvers
// try {
// getCaptchaSolver();
// } catch (e) {
// log.error(`The environment variable 'CAPTCHA_SOLVER' is wrong. ${e.message}`);
// process.exit(1);
// }
}
// Init
log.info(`FlareSolverr ${version}`);
log.debug('Debug log enabled');
process.on('SIGTERM', () => {
// Capture signal on Docker Stop #158
log.info("Process interrupted")
process.exit(0)
})
process.on('uncaughtException', function(err) {
// Avoid crashing in NodeJS 17 due to UnhandledPromiseRejectionWarning: Unhandled promise rejection.
log.error(err)
})
validateEnvironmentVariables();
testWebBrowserInstallation().then(() => {
// Start server
app.listen(serverPort, serverHost, () => {
log.info(`Listening on http://${serverHost}:${serverPort}`);
})
}).catch(function(e) {
log.error(e);
const msg: string = "" + e;
if (msg.includes('while trying to connect to the browser!')) {
log.error(`It seems that the system is too slow to run FlareSolverr.
If you are running with Docker, try to remove CPU limits in the container.
If not, try setting the 'BROWSER_TIMEOUT' environment variable and the 'maxTimeout' parameter to higher values.`);
}
process.exit(1);
})

View File

@@ -22,10 +22,13 @@ function toIsoString(date: Date) {
}
export default {
incRequests: () => { requests++ },
incRequests: () => {
requests++
},
html(html: string) {
if (LOG_HTML)
this.debug(html)
if (LOG_HTML) {
this.debug(html)
}
},
...require('console-log-level')(
{level: process.env.LOG_LEVEL || 'info',

194
src/services/sessions.ts Normal file
View File

@@ -0,0 +1,194 @@
import {v1 as UUIDv1} from 'uuid'
import * as path from 'path'
import {Browser} from 'puppeteer'
import {Protocol} from "devtools-protocol";
import log from './log'
import {Proxy} from "../controllers/v1";
const os = require('os');
const fs = require('fs');
const puppeteer = require('puppeteer');
export interface SessionsCacheItem {
sessionId: string
browser: Browser
}
interface SessionsCache {
[key: string]: SessionsCacheItem
}
export interface SessionCreateOptions {
oneTimeSession: boolean
cookies?: Protocol.Network.CookieParam[],
maxTimeout?: number
proxy?: Proxy
}
const sessionCache: SessionsCache = {}
let webBrowserUserAgent: string;
function buildExtraPrefsFirefox(proxy: Proxy): object {
// Default configurations are defined here
// https://github.com/puppeteer/puppeteer/blob/v3.3.0/src/Launcher.ts#L481
const extraPrefsFirefox = {
// Disable newtabpage
"browser.newtabpage.enabled": false,
"browser.startup.homepage": "about:blank",
// Do not warn when closing all open tabs
"browser.tabs.warnOnClose": false,
// Disable telemetry
"toolkit.telemetry.reportingpolicy.firstRun": false,
// Disable first-run welcome page
"startup.homepage_welcome_url": "about:blank",
"startup.homepage_welcome_url.additional": "",
// Detected !
// // Disable images to speed up load
// "permissions.default.image": 2,
// Limit content processes to 1
"dom.ipc.processCount": 1
}
// proxy.url format => http://<host>:<port>
if (proxy && proxy.url) {
log.debug(`Using proxy: ${proxy.url}`)
const [host, portStr] = proxy.url.replace(/.+:\/\//g, '').split(':');
const port = parseInt(portStr);
if (!host || !portStr || !port) {
throw new Error("Proxy configuration is invalid! Use the format: protocol://ip:port")
}
const proxyPrefs = {
"network.proxy.type": 1,
"network.proxy.share_proxy_settings": true
}
if (proxy.url.indexOf("socks") != -1) {
// SOCKSv4 & SOCKSv5
Object.assign(proxyPrefs, {
"network.proxy.socks": host,
"network.proxy.socks_port": port,
"network.proxy.socks_remote_dns": true
});
if (proxy.url.indexOf("socks4") != -1) {
Object.assign(proxyPrefs, {
"network.proxy.socks_version": 4
});
} else {
Object.assign(proxyPrefs, {
"network.proxy.socks_version": 5
});
}
} else {
// HTTP
Object.assign(proxyPrefs, {
"network.proxy.ftp": host,
"network.proxy.ftp_port": port,
"network.proxy.http": host,
"network.proxy.http_port": port,
"network.proxy.ssl": host,
"network.proxy.ssl_port": port
});
}
// merge objects
Object.assign(extraPrefsFirefox, proxyPrefs);
}
return extraPrefsFirefox;
}
export function getUserAgent() {
return webBrowserUserAgent
}
export async function testWebBrowserInstallation(): Promise<void> {
log.info("Testing web browser installation...")
// check user home dir. this dir will be used by Firefox
const homeDir = os.homedir();
fs.accessSync(homeDir, fs.constants.F_OK | fs.constants.R_OK | fs.constants.W_OK | fs.constants.X_OK);
log.debug("FlareSolverr user home directory is OK: " + homeDir)
// test web browser
const testUrl = process.env.TEST_URL || "https://www.google.com";
log.debug("Test URL: " + testUrl)
const session = await create(null, {
oneTimeSession: true
})
const page = await session.browser.newPage()
const pageTimeout = Number(process.env.BROWSER_TIMEOUT) || 40000
await page.goto(testUrl, {waitUntil: 'domcontentloaded', timeout: pageTimeout})
webBrowserUserAgent = await page.evaluate(() => navigator.userAgent)
// replace Linux ARM user-agent because it's detected
if (["arm", "aarch64"].some(arch => webBrowserUserAgent.toLocaleLowerCase().includes('linux ' + arch))) {
webBrowserUserAgent = webBrowserUserAgent.replace(/linux \w+;/i, 'Linux x86_64;')
}
log.info("FlareSolverr User-Agent: " + webBrowserUserAgent)
await page.close()
await destroy(session.sessionId)
log.info("Test successful")
}
export async function create(session: string, options: SessionCreateOptions): Promise<SessionsCacheItem> {
log.debug('Creating new session...')
const sessionId = session || UUIDv1()
// NOTE: cookies can't be set in the session, you need to open the page first
const puppeteerOptions: any = {
product: 'firefox',
headless: process.env.HEADLESS !== 'false',
timeout: Number(process.env.BROWSER_TIMEOUT) || 40000
}
puppeteerOptions.extraPrefsFirefox = buildExtraPrefsFirefox(options.proxy)
// if we are running inside executable binary, change browser path
if (typeof (process as any).pkg !== 'undefined') {
const exe = process.platform === "win32" ? 'firefox.exe' : 'firefox';
puppeteerOptions.executablePath = path.join(path.dirname(process.execPath), 'firefox', exe)
}
log.debug('Launching web browser...')
let browser: Browser = await puppeteer.launch(puppeteerOptions)
if (!browser) {
throw Error(`Failed to launch web browser.`)
}
sessionCache[sessionId] = {
sessionId: sessionId,
browser: browser
}
return sessionCache[sessionId]
}
export function list(): string[] {
return Object.keys(sessionCache)
}
export async function destroy(id: string): Promise<boolean>{
if (id && sessionCache.hasOwnProperty(id)) {
const { browser } = sessionCache[id]
if (browser) {
await browser.close()
delete sessionCache[id]
return true
}
}
return false
}
export function get(id: string): SessionsCacheItem {
return sessionCache[id]
}

223
src/services/solver.ts Normal file
View File

@@ -0,0 +1,223 @@
import {Page, HTTPResponse} from 'puppeteer'
const Timeout = require('await-timeout');
import log from './log'
import {SessionCreateOptions, SessionsCacheItem} from "./sessions";
import {V1Request} from "../controllers/v1";
import cloudflareProvider from '../providers/cloudflare';
const sessions = require('./sessions')
export interface ChallengeResolutionResultT {
url: string
status: number,
headers?: Record<string, string>,
response: string,
cookies: object[]
userAgent: string
}
export interface ChallengeResolutionT {
status?: string
message: string
result: ChallengeResolutionResultT
}
async function resolveChallengeWithTimeout(params: V1Request, session: SessionsCacheItem) {
const timer = new Timeout();
try {
const promise = resolveChallenge(params, session);
return await Promise.race([
promise,
timer.set(params.maxTimeout, `Maximum timeout reached. maxTimeout=${params.maxTimeout} (ms)`)
]);
} finally {
timer.clear();
}
}
async function resolveChallenge(params: V1Request, session: SessionsCacheItem): Promise<ChallengeResolutionT | void> {
try {
let status = 'ok'
let message = ''
const page: Page = await session.browser.newPage()
// the Puppeter timeout should be half the maxTimeout because we reload the page and wait for challenge
// the user can set a really high maxTimeout if he wants to
await page.setDefaultNavigationTimeout(params.maxTimeout / 2)
// the user-agent is changed just for linux arm build
await page.setUserAgent(sessions.getUserAgent())
// set the proxy
if (params.proxy) {
log.debug(`Using proxy: ${params.proxy.url}`);
// todo: credentials are not working
// if (params.proxy.username) {
// await page.authenticate({
// username: params.proxy.username,
// password: params.proxy.password
// });
// }
}
// go to the page
log.debug(`Navigating to... ${params.url}`)
let response: HTTPResponse = await gotoPage(params, page);
// set cookies
if (params.cookies) {
for (const cookie of params.cookies) {
// the other fields in the cookie can cause issues
await page.setCookie({
"name": cookie.name,
"value": cookie.value
})
}
// reload the page
response = await gotoPage(params, page);
}
// log html in debug mode
log.html(await page.content())
// detect protection services and solve challenges
try {
response = await cloudflareProvider(params.url, page, response);
// is response is ok
// reload the page to be sure we get the real page
log.debug("Reloading the page")
try {
response = await gotoPage(params, page);
} catch (e) {
log.warn("Page not reloaded (do not report!): Cause: " + e.toString())
}
} catch (e) {
status = "error";
message = "Cloudflare " + e.toString();
}
const payload: ChallengeResolutionT = {
status,
message,
result: {
url: page.url(),
status: response.status(),
headers: response.headers(),
response: null,
cookies: await page.cookies(),
userAgent: sessions.getUserAgent()
}
}
if (params.returnOnlyCookies) {
payload.result.headers = null;
payload.result.userAgent = null;
} else {
payload.result.response = await page.content()
}
// make sure the page is closed because if it isn't and error will be thrown
// when a user uses a temporary session, the browser make be quit before
// the page is properly closed.
await page.close()
return payload
} catch (e) {
log.error("Unexpected error: " + e);
throw e;
}
}
async function gotoPage(params: V1Request, page: Page): Promise<HTTPResponse> {
let pageTimeout = params.maxTimeout / 3;
let response: HTTPResponse
try {
response = await page.goto(params.url, {waitUntil: 'domcontentloaded', timeout: pageTimeout});
} catch (e) {
// retry
response = await page.goto(params.url, {waitUntil: 'domcontentloaded', timeout: 2000});
}
if (params.method == 'POST') {
// post hack
await page.setContent(
`
<!DOCTYPE html>
<html>
<body>
<script>
function parseQuery(queryString) {
var query = {};
var pairs = (queryString[0] === '?' ? queryString.substr(1) : queryString).split('&');
for (var i = 0; i < pairs.length; i++) {
var pair = pairs[i].split('=');
query[decodeURIComponent(pair[0])] = decodeURIComponent(pair[1] || '');
}
return query;
}
const form = document.createElement('form');
form.method = 'POST';
form.action = '${params.url}';
const params = parseQuery('${params.postData}');
for (const key in params) {
if (params.hasOwnProperty(key)) {
const hiddenField = document.createElement('input');
hiddenField.type = 'hidden';
hiddenField.name = key;
hiddenField.value = params[key];
form.appendChild(hiddenField);
}
}
document.body.appendChild(form);
form.submit();
</script>
</body>
</html>
`
);
await page.waitForTimeout(2000)
try {
await page.waitForNavigation({waitUntil: 'domcontentloaded', timeout: 2000})
} catch (e) {}
}
return response
}
export async function browserRequest(params: V1Request): Promise<ChallengeResolutionT> {
const oneTimeSession = params.session === undefined;
const options: SessionCreateOptions = {
oneTimeSession: oneTimeSession,
cookies: params.cookies,
maxTimeout: params.maxTimeout,
proxy: params.proxy
}
const session: SessionsCacheItem = oneTimeSession
? await sessions.create(null, options)
: sessions.get(params.session)
if (!session) {
throw Error('This session does not exist. Use \'list_sessions\' to see all the existing sessions.')
}
try {
return await resolveChallengeWithTimeout(params, session)
} catch (error) {
throw Error("Unable to process browser request. " + error)
} finally {
if (oneTimeSession) {
await sessions.destroy(session.sessionId)
}
}
}

View File

@@ -1,150 +0,0 @@
import * as os from 'os'
import * as path from 'path'
import * as fs from 'fs'
import puppeteer from 'puppeteer-extra'
import { LaunchOptions, Headers, SetCookie } from 'puppeteer'
import log from './log'
import { deleteFolderRecursive, sleep, removeEmptyFields } from './utils'
import * as Puppeteer from "puppeteer-extra/dist/puppeteer";
interface SessionPageDefaults {
headers?: Headers
userAgent?: string
}
export interface SessionsCacheItem {
browser: Puppeteer.Browser
userDataDir?: string
defaults: SessionPageDefaults
}
interface SessionsCache {
[key: string]: SessionsCacheItem
}
interface SessionCreateOptions {
oneTimeSession?: boolean
userAgent?: string
cookies?: SetCookie[]
headers?: Headers,
maxTimeout?: number
proxy?: any
}
const sessionCache: SessionsCache = {}
// setting "user-agent-override" evasion is not working for us because it can't be changed
// in each request. we set the user-agent in the browser args instead
puppeteer.use(require('puppeteer-extra-plugin-stealth')())
function userDataDirFromId(id: string): string {
return path.join(os.tmpdir(), `/puppeteer_chrome_profile_${id}`)
}
function prepareBrowserProfile(id: string): string {
// TODO: maybe pass SessionCreateOptions for loading later?
const userDataDir = userDataDirFromId(id)
if (!fs.existsSync(userDataDir)) {
fs.mkdirSync(userDataDir, { recursive: true })
}
return userDataDir
}
export default {
create: async (id: string, { cookies, oneTimeSession, userAgent, headers, maxTimeout, proxy }: SessionCreateOptions): Promise<SessionsCacheItem> => {
let args = [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage' // issue #45
];
if (proxy && proxy.url) {
args.push(`--proxy-server=${proxy.url}`);
}
const puppeteerOptions: LaunchOptions = {
product: 'chrome',
headless: process.env.HEADLESS !== 'false',
args
}
if (!oneTimeSession) {
log.debug('Creating userDataDir for session.')
puppeteerOptions.userDataDir = prepareBrowserProfile(id)
}
// if we are running inside executable binary, change chrome path
if (typeof (process as any).pkg !== 'undefined') {
const exe = process.platform === "win32" ? 'chrome.exe' : 'chrome';
puppeteerOptions.executablePath = path.join(path.dirname(process.execPath), 'chrome', exe)
}
log.debug('Launching browser...')
// TODO: maybe access env variable?
// TODO: sometimes browser instances are created and not connected to correctly.
// how do we handle/quit those instances inside Docker?
let launchTries = 3
let browser: Puppeteer.Browser;
while (0 <= launchTries--) {
try {
browser = await puppeteer.launch(puppeteerOptions)
break
} catch (e) {
if (e.message !== 'Failed to launch the browser process!')
throw e
log.warn('Failed to open browser, trying again...')
}
}
if (!browser) { throw Error(`Failed to launch browser 3 times in a row.`) }
if (cookies) {
const page = await browser.newPage()
await page.setCookie(...cookies)
}
sessionCache[id] = {
browser: browser,
userDataDir: puppeteerOptions.userDataDir,
defaults: removeEmptyFields({
userAgent,
headers,
maxTimeout
})
}
return sessionCache[id]
},
list: (): string[] => Object.keys(sessionCache),
// TODO: create a sessions.close that doesn't rm the userDataDir
destroy: async (id: string): Promise<boolean> => {
const { browser, userDataDir } = sessionCache[id]
if (browser) {
await browser.close()
delete sessionCache[id]
if (userDataDir) {
const userDataDirPath = userDataDirFromId(id)
try {
// for some reason this keeps an error from being thrown in Windows, figures
await sleep(5000)
deleteFolderRecursive(userDataDirPath)
} catch (e) {
console.error(e)
throw Error(`Error deleting browser session folder. ${e.message}`)
}
}
return true
}
return false
},
get: (id: string): SessionsCacheItem | false => sessionCache[id] && sessionCache[id] || false
}

625
src/tests/app.test.ts Normal file
View File

@@ -0,0 +1,625 @@
// noinspection DuplicatedCode
import {Response} from "superagent";
import {V1ResponseBase, V1ResponseSession, V1ResponseSessions, V1ResponseSolution} from "../controllers/v1"
const request = require("supertest");
const app = require("../app");
const sessions = require('../services/sessions');
const version: string = 'v' + require('../../package.json').version
const proxyUrl = "http://127.0.0.1:8888"
const proxySocksUrl = "socks5://127.0.0.1:1080"
const googleUrl = "https://www.google.com";
const postUrl = "https://ptsv2.com/t/qv4j3-1634496523";
const cfUrl = "https://pirateiro.com/torrents/?search=harry";
const cfCaptchaUrl = "https://idope.se"
const cfBlockedUrl = "https://www.torrentmafya.org/table.php"
const ddgUrl = "https://www.erai-raws.info/feed/?type=magnet";
const ccfUrl = "https://www.muziekfabriek.org";
beforeAll(async () => {
// Init session
await sessions.testWebBrowserInstallation();
});
afterEach(async () => {
// Clean sessions
const sessionList = sessions.list();
for (const session of sessionList) {
await sessions.destroy(session);
}
});
describe("Test '/' path", () => {
test("GET method should return OK ", async () => {
const response: Response = await request(app).get("/");
expect(response.statusCode).toBe(200);
expect(response.body.msg).toBe("FlareSolverr is ready!");
expect(response.body.version).toBe(version);
expect(response.body.userAgent).toContain("Firefox/")
});
test("POST method should fail", async () => {
const response: Response = await request(app).post("/");
expect(response.statusCode).toBe(404);
expect(response.body.error).toBe("Unknown resource or HTTP verb");
});
});
describe("Test '/health' path", () => {
test("GET method should return OK", async () => {
const response: Response = await request(app).get("/health");
expect(response.statusCode).toBe(200);
expect(response.body.status).toBe("ok");
});
});
describe("Test '/wrong' path", () => {
test("GET method should fail", async () => {
const response: Response = await request(app).get("/wrong");
expect(response.statusCode).toBe(404);
expect(response.body.error).toBe("Unknown resource or HTTP verb");
});
});
describe("Test '/v1' path", () => {
test("Cmd 'request.bad' should fail", async () => {
const payload = {
"cmd": "request.bad",
"url": googleUrl
}
const response: Response = await request(app).post("/v1").send(payload);
expect(response.statusCode).toBe(500);
const apiResponse: V1ResponseBase = response.body;
expect(apiResponse.status).toBe("error");
expect(apiResponse.message).toBe("Error: The command 'request.bad' is invalid.");
expect(apiResponse.startTimestamp).toBeGreaterThan(1000);
expect(apiResponse.endTimestamp).toBeGreaterThanOrEqual(apiResponse.startTimestamp);
expect(apiResponse.version).toBe(version);
});
test("Cmd 'request.get' should return OK with no Cloudflare", async () => {
const payload = {
"cmd": "request.get",
"url": googleUrl
}
const response: Response = await request(app).post("/v1").send(payload);
expect(response.statusCode).toBe(200);
const apiResponse: V1ResponseSolution = response.body;
expect(apiResponse.status).toBe("ok");
expect(apiResponse.message).toBe("");
expect(apiResponse.startTimestamp).toBeGreaterThan(1000);
expect(apiResponse.endTimestamp).toBeGreaterThan(apiResponse.startTimestamp);
expect(apiResponse.version).toBe(version);
const solution = apiResponse.solution;
expect(solution.url).toContain(googleUrl)
expect(solution.status).toBe(200);
expect(Object.keys(solution.headers).length).toBeGreaterThan(0)
expect(solution.response).toContain("<!DOCTYPE html>")
expect(Object.keys(solution.cookies).length).toBeGreaterThan(0)
expect(solution.userAgent).toContain("Firefox/")
});
test("Cmd 'request.get' should return OK with Cloudflare JS", async () => {
const payload = {
"cmd": "request.get",
"url": cfUrl
}
const response: Response = await request(app).post("/v1").send(payload);
expect(response.statusCode).toBe(200);
const apiResponse: V1ResponseSolution = response.body;
expect(apiResponse.status).toBe("ok");
expect(apiResponse.message).toBe("");
expect(apiResponse.startTimestamp).toBeGreaterThan(1000);
expect(apiResponse.endTimestamp).toBeGreaterThan(apiResponse.startTimestamp);
expect(apiResponse.version).toBe(version);
const solution = apiResponse.solution;
expect(solution.url).toContain(cfUrl)
expect(solution.status).toBe(200);
expect(Object.keys(solution.headers).length).toBeGreaterThan(0)
expect(solution.response).toContain("<!DOCTYPE html>")
expect(Object.keys(solution.cookies).length).toBeGreaterThan(0)
expect(solution.userAgent).toContain("Firefox/")
const cfCookie: string = (solution.cookies as any[]).filter(function(cookie) {
return cookie.name == "cf_clearance";
})[0].value
expect(cfCookie.length).toBeGreaterThan(30)
});
test("Cmd 'request.get' should return fail with Cloudflare CAPTCHA", async () => {
const payload = {
"cmd": "request.get",
"url": cfCaptchaUrl
}
const response: Response = await request(app).post("/v1").send(payload);
expect(response.statusCode).toBe(200);
const apiResponse: V1ResponseSolution = response.body;
expect(apiResponse.status).toBe("error");
expect(apiResponse.message).toBe("Cloudflare Error: FlareSolverr can not resolve CAPTCHA challenges. Since the captcha doesn't always appear, you may have better luck with the next request.");
expect(apiResponse.startTimestamp).toBeGreaterThan(1000);
expect(apiResponse.endTimestamp).toBeGreaterThan(apiResponse.startTimestamp);
expect(apiResponse.version).toBe(version);
// solution is filled but not useful
expect(apiResponse.solution.url).toContain(cfCaptchaUrl)
});
test("Cmd 'request.post' should return fail with Cloudflare Blocked", async () => {
const payload = {
"cmd": "request.post",
"url": cfBlockedUrl,
"postData": "test1=test2"
}
const response: Response = await request(app).post("/v1").send(payload);
expect(response.statusCode).toBe(200);
const apiResponse: V1ResponseSolution = response.body;
expect(apiResponse.status).toBe("error");
expect(apiResponse.message).toBe("Cloudflare Error: Cloudflare has blocked this request. Probably your IP is banned for this site, check in your web browser.");
expect(apiResponse.startTimestamp).toBeGreaterThan(1000);
expect(apiResponse.endTimestamp).toBeGreaterThan(apiResponse.startTimestamp);
expect(apiResponse.version).toBe(version);
// solution is filled but not useful
expect(apiResponse.solution.url).toContain(cfBlockedUrl)
});
test("Cmd 'request.get' should return OK with DDoS-GUARD JS", async () => {
const payload = {
"cmd": "request.get",
"url": ddgUrl
}
const response: Response = await request(app).post("/v1").send(payload);
expect(response.statusCode).toBe(200);
const apiResponse: V1ResponseSolution = response.body;
expect(apiResponse.status).toBe("ok");
expect(apiResponse.message).toBe("");
expect(apiResponse.startTimestamp).toBeGreaterThan(1000);
expect(apiResponse.endTimestamp).toBeGreaterThan(apiResponse.startTimestamp);
expect(apiResponse.version).toBe(version);
const solution = apiResponse.solution;
expect(solution.url).toContain(ddgUrl)
expect(solution.status).toBe(200);
expect(Object.keys(solution.headers).length).toBeGreaterThan(0)
expect(solution.response).toContain("<rss version")
expect(Object.keys(solution.cookies).length).toBeGreaterThan(0)
expect(solution.userAgent).toContain("Firefox/")
const cfCookie: string = (solution.cookies as any[]).filter(function(cookie) {
return cookie.name == "__ddg1";
})[0].value
expect(cfCookie.length).toBeGreaterThan(10)
});
test("Cmd 'request.get' should return OK with Custom CloudFlare JS", async () => {
const payload = {
"cmd": "request.get",
"url": ccfUrl
}
const response: Response = await request(app).post("/v1").send(payload);
expect(response.statusCode).toBe(200);
const apiResponse: V1ResponseSolution = response.body;
expect(apiResponse.status).toBe("ok");
expect(apiResponse.message).toBe("");
expect(apiResponse.startTimestamp).toBeGreaterThan(1000);
expect(apiResponse.endTimestamp).toBeGreaterThan(apiResponse.startTimestamp);
expect(apiResponse.version).toBe(version);
const solution = apiResponse.solution;
expect(solution.url).toContain(ccfUrl)
expect(solution.status).toBe(200);
expect(Object.keys(solution.headers).length).toBeGreaterThan(0)
expect(solution.response).toContain("<html><head>")
expect(Object.keys(solution.cookies).length).toBeGreaterThan(0)
expect(solution.userAgent).toContain("Firefox/")
const cfCookie: string = (solution.cookies as any[]).filter(function(cookie) {
return cookie.name == "ct_anti_ddos_key";
})[0].value
expect(cfCookie.length).toBeGreaterThan(10)
});
test("Cmd 'request.get' should return OK with 'cookies' param", async () => {
const payload = {
"cmd": "request.get",
"url": googleUrl,
"cookies": [
{
"name": "testcookie1",
"value": "testvalue1"
},
{
"name": "testcookie2",
"value": "testvalue2"
}
]
}
const response: Response = await request(app).post("/v1").send(payload);
expect(response.statusCode).toBe(200);
const apiResponse: V1ResponseSolution = response.body;
expect(apiResponse.status).toBe("ok");
const solution = apiResponse.solution;
expect(solution.url).toContain(googleUrl)
expect(Object.keys(solution.cookies).length).toBeGreaterThan(1)
const cookie1: string = (solution.cookies as any[]).filter(function(cookie) {
return cookie.name == "testcookie1";
})[0].value
expect(cookie1).toBe("testvalue1")
const cookie2: string = (solution.cookies as any[]).filter(function(cookie) {
return cookie.name == "testcookie2";
})[0].value
expect(cookie2).toBe("testvalue2")
});
test("Cmd 'request.get' should return OK with 'returnOnlyCookies' param", async () => {
const payload = {
"cmd": "request.get",
"url": googleUrl,
"returnOnlyCookies": true
}
const response: Response = await request(app).post("/v1").send(payload);
expect(response.statusCode).toBe(200);
const apiResponse: V1ResponseSolution = response.body;
const solution = apiResponse.solution;
expect(solution.url).toContain(googleUrl)
expect(solution.status).toBe(200);
expect(solution.headers).toBe(null)
expect(solution.response).toBe(null)
expect(Object.keys(solution.cookies).length).toBeGreaterThan(0)
expect(solution.userAgent).toBe(null)
});
test("Cmd 'request.get' should return OK with HTTP 'proxy' param", async () => {
/*
To configure TinyProxy in local:
* sudo vim /etc/tinyproxy/tinyproxy.conf
* edit => LogFile "/tmp/tinyproxy.log"
* edit => Syslog Off
* sudo tinyproxy -d
* sudo tail -f /tmp/tinyproxy.log
*/
const payload = {
"cmd": "request.get",
"url": googleUrl,
"proxy": {
"url": proxyUrl
}
}
const response: Response = await request(app).post("/v1").send(payload);
expect(response.statusCode).toBe(200);
const apiResponse: V1ResponseSolution = response.body;
expect(apiResponse.status).toBe("ok");
const solution = apiResponse.solution;
expect(solution.url).toContain(googleUrl)
expect(solution.status).toBe(200);
});
// todo: credentials are not working
test.skip("Cmd 'request.get' should return OK with HTTP 'proxy' param with credentials", async () => {
/*
To configure TinyProxy in local:
* sudo vim /etc/tinyproxy/tinyproxy.conf
* edit => LogFile "/tmp/tinyproxy.log"
* edit => Syslog Off
* add => BasicAuth testuser testpass
* sudo tinyproxy -d
* sudo tail -f /tmp/tinyproxy.log
*/
const payload = {
"cmd": "request.get",
"url": googleUrl,
"proxy": {
"url": proxyUrl,
"username": "testuser",
"password": "testpass"
}
}
const response: Response = await request(app).post("/v1").send(payload);
expect(response.statusCode).toBe(200);
const apiResponse: V1ResponseSolution = response.body;
expect(apiResponse.status).toBe("ok");
const solution = apiResponse.solution;
expect(solution.url).toContain(googleUrl)
expect(solution.status).toContain(200)
});
test("Cmd 'request.get' should return OK with SOCKSv5 'proxy' param", async () => {
/*
To configure Dante in local:
* https://linuxhint.com/set-up-a-socks5-proxy-on-ubuntu-with-dante/
* sudo vim /etc/sockd.conf
* sudo systemctl restart sockd.service
* curl --socks5 socks5://127.0.0.1:1080 https://www.google.com
*/
const payload = {
"cmd": "request.get",
"url": googleUrl,
"proxy": {
"url": proxySocksUrl
}
}
const response: Response = await request(app).post("/v1").send(payload);
expect(response.statusCode).toBe(200);
const apiResponse: V1ResponseSolution = response.body;
expect(apiResponse.status).toBe("ok");
const solution = apiResponse.solution;
expect(solution.url).toContain(googleUrl)
expect(solution.status).toBe(200);
});
test("Cmd 'request.get' should fail with wrong 'proxy' param", async () => {
const payload = {
"cmd": "request.get",
"url": googleUrl,
"proxy": {
"url": "http://127.0.0.1:43210"
}
}
const response: Response = await request(app).post("/v1").send(payload);
expect(response.statusCode).toBe(500);
const apiResponse: V1ResponseSolution = response.body;
expect(apiResponse.status).toBe("error");
expect(apiResponse.message).toBe("Error: Unable to process browser request. Error: NS_ERROR_PROXY_CONNECTION_REFUSED at https://www.google.com");
});
test("Cmd 'request.get' should return fail with timeout", async () => {
const payload = {
"cmd": "request.get",
"url": googleUrl,
"maxTimeout": 10
}
const response: Response = await request(app).post("/v1").send(payload);
expect(response.statusCode).toBe(500);
const apiResponse: V1ResponseBase = response.body;
expect(apiResponse.status).toBe("error");
expect(apiResponse.message).toBe("Error: Unable to process browser request. Error: Maximum timeout reached. maxTimeout=10 (ms)");
expect(apiResponse.startTimestamp).toBeGreaterThan(1000);
expect(apiResponse.endTimestamp).toBeGreaterThan(apiResponse.startTimestamp);
expect(apiResponse.version).toBe(version);
});
test("Cmd 'request.get' should return fail with bad domain", async () => {
const payload = {
"cmd": "request.get",
"url": "https://www.google.combad"
}
const response: Response = await request(app).post("/v1").send(payload);
expect(response.statusCode).toBe(500);
const apiResponse: V1ResponseBase = response.body;
expect(apiResponse.status).toBe("error");
expect(apiResponse.message).toBe("Error: Unable to process browser request. Error: NS_ERROR_UNKNOWN_HOST at https://www.google.combad");
});
test("Cmd 'request.get' should accept deprecated params", async () => {
const payload = {
"cmd": "request.get",
"url": googleUrl,
"userAgent": "Test User-Agent" // was removed in v2, not used
}
const response: Response = await request(app).post("/v1").send(payload);
expect(response.statusCode).toBe(200);
const apiResponse: V1ResponseSolution = response.body;
expect(apiResponse.status).toBe("ok");
const solution = apiResponse.solution;
expect(solution.url).toContain(googleUrl)
expect(solution.status).toBe(200);
expect(solution.userAgent).toContain("Firefox/")
});
test("Cmd 'request.post' should return OK with no Cloudflare", async () => {
const payload = {
"cmd": "request.post",
"url": postUrl + '/post',
"postData": "param1=value1&param2=value2"
}
const response: Response = await request(app).post("/v1").send(payload);
expect(response.statusCode).toBe(200);
const apiResponse: V1ResponseSolution = response.body;
expect(apiResponse.status).toBe("ok");
expect(apiResponse.message).toBe("");
expect(apiResponse.startTimestamp).toBeGreaterThan(1000);
expect(apiResponse.endTimestamp).toBeGreaterThan(apiResponse.startTimestamp);
expect(apiResponse.version).toBe(version);
const solution = apiResponse.solution;
expect(solution.url).toContain(postUrl)
expect(solution.status).toBe(200);
expect(Object.keys(solution.headers).length).toBeGreaterThan(0)
expect(solution.response).toContain(" I hope you have a lovely day!")
expect(Object.keys(solution.cookies).length).toBe(0)
expect(solution.userAgent).toContain("Firefox/")
// check that we sent the date
const payload2 = {
"cmd": "request.get",
"url": postUrl
}
const response2: Response = await request(app).post("/v1").send(payload2);
expect(response2.statusCode).toBe(200);
const apiResponse2: V1ResponseSolution = response2.body;
expect(apiResponse2.status).toBe("ok");
const solution2 = apiResponse2.solution;
expect(solution2.status).toBe(200);
expect(solution2.response).toContain(new Date().toISOString().split(':')[0].replace('T', ' '))
});
test("Cmd 'request.post' should fail without 'postData' param", async () => {
const payload = {
"cmd": "request.post",
"url": googleUrl
}
const response: Response = await request(app).post("/v1").send(payload);
expect(response.statusCode).toBe(500);
const apiResponse: V1ResponseBase = response.body;
expect(apiResponse.status).toBe("error");
expect(apiResponse.message).toBe("Error: Must send param \"postBody\" when sending a POST request.");
expect(apiResponse.startTimestamp).toBeGreaterThan(1000);
expect(apiResponse.endTimestamp).toBeGreaterThanOrEqual(apiResponse.startTimestamp);
expect(apiResponse.version).toBe(version);
});
test("Cmd 'sessions.create' should return OK", async () => {
const payload = {
"cmd": "sessions.create"
}
const response: Response = await request(app).post("/v1").send(payload);
expect(response.statusCode).toBe(200);
const apiResponse: V1ResponseSession = response.body;
expect(apiResponse.status).toBe("ok");
expect(apiResponse.message).toBe("Session created successfully.");
expect(apiResponse.startTimestamp).toBeGreaterThan(1000);
expect(apiResponse.endTimestamp).toBeGreaterThan(apiResponse.startTimestamp);
expect(apiResponse.version).toBe(version);
expect(apiResponse.session.length).toBe(36);
});
test("Cmd 'sessions.create' should return OK with session", async () => {
const payload = {
"cmd": "sessions.create",
"session": "2bc6bb20-2f56-11ec-9543-test"
}
const response: Response = await request(app).post("/v1").send(payload);
expect(response.statusCode).toBe(200);
const apiResponse: V1ResponseSession = response.body;
expect(apiResponse.status).toBe("ok");
expect(apiResponse.message).toBe("Session created successfully.");
expect(apiResponse.startTimestamp).toBeGreaterThan(1000);
expect(apiResponse.endTimestamp).toBeGreaterThan(apiResponse.startTimestamp);
expect(apiResponse.version).toBe(version);
expect(apiResponse.session).toBe("2bc6bb20-2f56-11ec-9543-test");
});
test("Cmd 'sessions.list' should return OK", async () => {
// create one session for testing
const payload0 = {
"cmd": "sessions.create"
}
const response0: Response = await request(app).post("/v1").send(payload0);
expect(response0.statusCode).toBe(200);
const payload = {
"cmd": "sessions.list"
}
const response: Response = await request(app).post("/v1").send(payload);
expect(response.statusCode).toBe(200);
const apiResponse: V1ResponseSessions = response.body;
expect(apiResponse.status).toBe("ok");
expect(apiResponse.message).toBe("");
expect(apiResponse.startTimestamp).toBeGreaterThan(1000);
expect(apiResponse.endTimestamp).toBeGreaterThanOrEqual(apiResponse.startTimestamp);
expect(apiResponse.version).toBe(version);
expect(apiResponse.sessions.length).toBeGreaterThan(0)
});
test("Cmd 'sessions.destroy' should return OK", async () => {
// create one session for testing
const payload0 = {
"cmd": "sessions.create"
}
const response0: Response = await request(app).post("/v1").send(payload0);
expect(response0.statusCode).toBe(200);
const apiResponse0: V1ResponseSession = response0.body;
const sessionId0 = apiResponse0.session
const payload = {
"cmd": "sessions.destroy",
"session": sessionId0
}
const response: Response = await request(app).post("/v1").send(payload);
expect(response.statusCode).toBe(200);
const apiResponse: V1ResponseBase = response.body;
expect(apiResponse.status).toBe("ok");
expect(apiResponse.message).toBe("The session has been removed.");
expect(apiResponse.startTimestamp).toBeGreaterThan(1000);
expect(apiResponse.endTimestamp).toBeGreaterThanOrEqual(apiResponse.startTimestamp);
expect(apiResponse.version).toBe(version);
});
test("Cmd 'sessions.destroy' should fail", async () => {
const payload = {
"cmd": "sessions.destroy",
"session": "bad-session"
}
const response: Response = await request(app).post("/v1").send(payload);
expect(response.statusCode).toBe(500);
const apiResponse: V1ResponseBase = response.body;
expect(apiResponse.status).toBe("error");
expect(apiResponse.message).toBe("Error: This session does not exist.");
expect(apiResponse.startTimestamp).toBeGreaterThan(1000);
expect(apiResponse.endTimestamp).toBeGreaterThan(apiResponse.startTimestamp);
expect(apiResponse.version).toBe(version);
});
test("Cmd 'request.get' should use session", async () => {
// create one session for testing
const payload0 = {
"cmd": "sessions.create"
}
const response0: Response = await request(app).post("/v1").send(payload0);
expect(response0.statusCode).toBe(200);
const apiResponse0: V1ResponseSession = response0.body;
const sessionId0 = apiResponse0.session
// first request should solve the challenge
const payload = {
"cmd": "request.get",
"url": cfUrl,
"session": sessionId0
}
const response: Response = await request(app).post("/v1").send(payload);
expect(response.statusCode).toBe(200);
const apiResponse: V1ResponseSolution = response.body;
expect(apiResponse.status).toBe("ok");
const cfCookie: string = (apiResponse.solution.cookies as any[]).filter(function(cookie) {
return cookie.name == "cf_clearance";
})[0].value
expect(cfCookie.length).toBeGreaterThan(30)
// second request should have the same cookie
const response2: Response = await request(app).post("/v1").send(payload);
expect(response2.statusCode).toBe(200);
const apiResponse2: V1ResponseSolution = response2.body;
expect(apiResponse2.status).toBe("ok");
const cfCookie2: string = (apiResponse2.solution.cookies as any[]).filter(function(cookie) {
return cookie.name == "cf_clearance";
})[0].value
expect(cfCookie2.length).toBeGreaterThan(30)
expect(cfCookie2).toBe(cfCookie)
});
});

View File

@@ -1,9 +0,0 @@
import { IncomingMessage, ServerResponse } from 'http';
export interface RequestContext {
req: IncomingMessage
res: ServerResponse
startTimestamp: number
errorResponse: (msg: string) => void,
successResponse: (msg: string, extendedProperties?: object) => void
}

View File

@@ -1,31 +0,0 @@
import * as fs from 'fs'
import * as Path from 'path'
import { promisify } from 'util'
export const sleep = promisify(setTimeout)
// recursive fs.rmdir needs node version 12:
// https://github.com/ngosang/FlareSolverr/issues/5#issuecomment-655572712
export function deleteFolderRecursive(path: string) {
if (fs.existsSync(path)) {
fs.readdirSync(path).forEach((file) => {
const curPath = Path.join(path, file)
if (fs.lstatSync(curPath).isDirectory()) { // recurse
deleteFolderRecursive(curPath)
} else { // delete file
fs.unlinkSync(curPath)
}
})
fs.rmdirSync(path)
}
}
export const removeEmptyFields = (o: Record<string, any>): typeof o => {
const r: typeof o = {}
for (const k in o) {
if (o[k] !== undefined) {
r[k] = o[k]
}
}
return r
}