mirror of
https://github.com/FlareSolverr/FlareSolverr.git
synced 2025-12-05 17:18:19 +01:00
Compare commits
36 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
78daf24bc3 | ||
|
|
47c83ded58 | ||
|
|
35890cade4 | ||
|
|
753e8e1be8 | ||
|
|
a6628d0cda | ||
|
|
a79a5f2b42 | ||
|
|
1e463bb3e2 | ||
|
|
02204a84d3 | ||
|
|
95d178b37a | ||
|
|
c4f890f9a1 | ||
|
|
d16b982bb9 | ||
|
|
075b53ee24 | ||
|
|
356b893c18 | ||
|
|
a841d67745 | ||
|
|
2408a75a70 | ||
|
|
77a87c79fd | ||
|
|
cfd158462f | ||
|
|
ccfe21c15a | ||
|
|
a5b3e08e1f | ||
|
|
a0e897067a | ||
|
|
744de4d158 | ||
|
|
0459f2642d | ||
|
|
ca3f84f458 | ||
|
|
5dd563e003 | ||
|
|
78c10d6b24 | ||
|
|
3de2e44bfd | ||
|
|
7738f7a360 | ||
|
|
1b01caaa78 | ||
|
|
447c8f67a1 | ||
|
|
9dae74bc28 | ||
|
|
4199db5a41 | ||
|
|
2a4fae37c0 | ||
|
|
232ddca512 | ||
|
|
8572fab781 | ||
|
|
fdb3eae051 | ||
|
|
6dd8206a10 |
1
.github/ISSUE_TEMPLATE.md
vendored
1
.github/ISSUE_TEMPLATE.md
vendored
@@ -11,6 +11,7 @@ Check closed issues as well, because your issue may have already been fixed.
|
||||
* **Last working FlareSolverr version**:
|
||||
* **Operating system**:
|
||||
* **Are you using Docker**: [yes/no]
|
||||
* **FlareSolverr User-Agent (see log traces or / endpoint)**:
|
||||
* **Are you using a proxy or VPN?** [yes/no]
|
||||
* **Are you using Captcha Solver:** [yes/no]
|
||||
* **If using captcha solver, which one:**
|
||||
|
||||
22
Dockerfile
22
Dockerfile
@@ -1,13 +1,14 @@
|
||||
FROM --platform=${TARGETPLATFORM:-linux/amd64} node:15.2.1-alpine3.11
|
||||
FROM --platform=${TARGETPLATFORM:-linux/amd64} node:16-alpine3.14
|
||||
|
||||
# Print build information
|
||||
ARG TARGETPLATFORM
|
||||
ARG BUILDPLATFORM
|
||||
RUN printf "I am running on ${BUILDPLATFORM:-linux/amd64}, building for ${TARGETPLATFORM:-linux/amd64}\n$(uname -a)\n"
|
||||
|
||||
# Install Chromium, dumb-init and remove all locales but en-US
|
||||
RUN apk add --no-cache chromium dumb-init && \
|
||||
find /usr/lib/chromium/locales -type f ! -name 'en-US.*' -delete
|
||||
# Install the web browser (package firefox is available too)
|
||||
RUN apk update && \
|
||||
apk add --no-cache firefox-esr dumb-init && \
|
||||
rm -Rf /var/cache
|
||||
|
||||
# Copy FlareSolverr code
|
||||
USER node
|
||||
@@ -16,15 +17,18 @@ WORKDIR /home/node/flaresolverr
|
||||
COPY --chown=node:node package.json package-lock.json tsconfig.json ./
|
||||
COPY --chown=node:node src ./src/
|
||||
|
||||
# Install package. Skip installing Chrome, we will use the installed package.
|
||||
ENV PUPPETEER_PRODUCT=chrome \
|
||||
# Install package. Skip installing the browser, we will use the installed package.
|
||||
ENV PUPPETEER_PRODUCT=firefox \
|
||||
PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true \
|
||||
PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium-browser
|
||||
PUPPETEER_EXECUTABLE_PATH=/usr/bin/firefox
|
||||
RUN npm install && \
|
||||
npm run build && \
|
||||
rm -rf src tsconfig.json && \
|
||||
npm prune --production
|
||||
npm prune --production && \
|
||||
rm -rf /home/node/.npm
|
||||
|
||||
EXPOSE 8191
|
||||
ENTRYPOINT ["/usr/bin/dumb-init", "--"]
|
||||
CMD ["npm", "start"]
|
||||
|
||||
# docker build -t flaresolverr:custom .
|
||||
# docker run -p 8191:8191 -e LOG_LEVEL=debug flaresolverr:custom
|
||||
|
||||
60
README.md
60
README.md
@@ -15,7 +15,7 @@ FlareSolverr is a proxy server to bypass Cloudflare protection.
|
||||
FlareSolverr starts a proxy server and it waits for user requests in an idle state using few resources.
|
||||
When some request arrives, it uses [puppeteer](https://github.com/puppeteer/puppeteer) with the
|
||||
[stealth plugin](https://github.com/berstend/puppeteer-extra/tree/master/packages/puppeteer-extra-plugin-stealth)
|
||||
to create a headless browser (Chrome). It opens the URL with user parameters and waits until the Cloudflare challenge
|
||||
to create a headless browser (Firefox). It opens the URL with user parameters and waits until the Cloudflare challenge
|
||||
is solved (or timeout). The HTML code and the cookies are sent back to the user, and those cookies can be used to
|
||||
bypass Cloudflare using other HTTP clients.
|
||||
|
||||
@@ -60,7 +60,7 @@ docker run -d \
|
||||
|
||||
This is the recommended way for Windows users.
|
||||
* Download the [FlareSolverr zip](https://github.com/FlareSolverr/FlareSolverr/releases) from the release's assets. It is available for Windows and Linux.
|
||||
* Extract the zip file. FlareSolverr executable and chrome folder must be in the same directory.
|
||||
* Extract the zip file. FlareSolverr executable and firefox folder must be in the same directory.
|
||||
* Execute FlareSolverr binary. In the environment variables section you can find how to change the configuration.
|
||||
|
||||
### From source code
|
||||
@@ -68,8 +68,9 @@ This is the recommended way for Windows users.
|
||||
This is the recommended way for macOS users and for developers.
|
||||
* Install [NodeJS](https://nodejs.org/).
|
||||
* Clone this repository and open a shell in that path.
|
||||
* Run `export PUPPETEER_PRODUCT=firefox` (Linux/macOS) or `set PUPPETEER_PRODUCT=firefox` (Windows).
|
||||
* Run `npm install` command to install FlareSolverr dependencies.
|
||||
* Run `node node_modules/puppeteer/install.js` to install Chromium.
|
||||
* Run `node node_modules/puppeteer/install.js` to install Firefox.
|
||||
* Run `npm run build` command to compile TypeScript code.
|
||||
* Run `npm start` command to start FlareSolverr.
|
||||
|
||||
@@ -86,11 +87,7 @@ curl -L -X POST 'http://localhost:8191/v1' \
|
||||
--data-raw '{
|
||||
"cmd": "request.get",
|
||||
"url":"http://www.google.com/",
|
||||
"userAgent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleW...",
|
||||
"maxTimeout": 60000,
|
||||
"headers": {
|
||||
"X-Test": "Testing 123..."
|
||||
}
|
||||
"maxTimeout": 60000
|
||||
}'
|
||||
```
|
||||
|
||||
@@ -107,7 +104,6 @@ This also speeds up the requests since it won't have to launch a new browser ins
|
||||
Parameter | Notes
|
||||
|--|--|
|
||||
session | Optional. The session ID that you want to be assigned to the instance. If isn't set a random UUID will be assigned.
|
||||
userAgent | Optional. Will be used by the headless browser.
|
||||
|
||||
#### + `sessions.list`
|
||||
|
||||
@@ -142,10 +138,12 @@ Parameter | Notes
|
||||
|--|--|
|
||||
url | Mandatory
|
||||
session | Optional. Will send the request from and existing browser instance. If one is not sent it will create a temporary instance that will be destroyed immediately after the request is completed.
|
||||
headers | Optional. To specify user headers.
|
||||
maxTimeout | Optional, default value 60000. Max timeout to solve the challenge in milliseconds.
|
||||
cookies | Optional. Will be used by the headless browser. Follow [this](https://github.com/puppeteer/puppeteer/blob/v3.3.0/docs/api.md#pagesetcookiecookies) format.
|
||||
returnOnlyCookies | Optional, default false. Only returns the cookies. Response data, headers and other parts of the response are removed.
|
||||
proxy | Optional, default disabled. Eg: `"proxy": {"url": "http://127.0.0.1:8888"}`. Authorization (username/password) is not supported.
|
||||
|
||||
:warning: If you want to use Cloudflare clearance cookie in your scripts, make sure you use the FlareSolverr User-Agent too. If they don't match you will see the challenge.
|
||||
|
||||
Example response from running the `curl` above:
|
||||
|
||||
@@ -212,16 +210,7 @@ This is the same as `request.get` but it takes one more param:
|
||||
|
||||
Parameter | Notes
|
||||
|--|--|
|
||||
postData | Must be a string. If you want to POST a form, don't forget to set the `Content-Type` header to `application/x-www-form-urlencoded` or the server might not understand your request.
|
||||
|
||||
### Download small files
|
||||
|
||||
If you need to access an image/pdf or small file, you should pass the `download` parameter to `request.get` setting it
|
||||
to `true`. Rather than access the html and return text it will return the buffer **base64** encoded which you will be
|
||||
able to decode and save the image/pdf.
|
||||
|
||||
This method isn't recommended for videos or anything larger. As that should be streamed back to the client and at the
|
||||
moment there is nothing setup to do so. If this is something you need feel free to create an issue and/or submit a PR.
|
||||
postData | Must be a string with `application/x-www-form-urlencoded`. Eg: `postData": "a=b&c=d"`
|
||||
|
||||
## Environment variables
|
||||
|
||||
@@ -251,37 +240,6 @@ If this is the case, FlareSolverr will return the error `Captcha detected but no
|
||||
FlareSolverr can be customized to solve the captchas automatically by setting the environment variable `CAPTCHA_SOLVER`
|
||||
to the file name of one of the adapters inside the [/captcha](src/captcha) directory.
|
||||
|
||||
### hcaptcha-solver
|
||||
|
||||
This method makes use of the [hcaptcha-solver](https://github.com/JimmyLaurent/hcaptcha-solver) project.
|
||||
|
||||
NOTE: This solver works picking random images so it will fail in a lot of requests and it's hard to know if it is
|
||||
working or not. In a real use case with Sonarr/Radarr + Jackett it is still useful because those apps make a new request
|
||||
each 15 minutes. Eventually one of the requests is going to work and Jackett saves the cookie forever (until it stops
|
||||
working).
|
||||
|
||||
To use this solver you must set the environment variable:
|
||||
|
||||
```bash
|
||||
CAPTCHA_SOLVER=hcaptcha-solver
|
||||
```
|
||||
|
||||
### CaptchaHarvester
|
||||
|
||||
This method makes use of the [CaptchaHarvester](https://github.com/NoahCardoza/CaptchaHarvester) project which allows
|
||||
users to collect their own tokens from ReCaptcha V2/V3 and hCaptcha for free.
|
||||
|
||||
To use this method you must set these environment variables:
|
||||
|
||||
```bash
|
||||
CAPTCHA_SOLVER=harvester
|
||||
HARVESTER_ENDPOINT=https://127.0.0.1:5000/token
|
||||
```
|
||||
|
||||
**Note**: above I set `HARVESTER_ENDPOINT` to the default configuration of the captcha harvester's server, but that
|
||||
could change if you customize the command line flags. Simply put, `HARVESTER_ENDPOINT` should be set to the URI of the
|
||||
route that returns a token in plain text when called.
|
||||
|
||||
## Related projects
|
||||
|
||||
* C# implementation => https://github.com/FlareSolverr/FlareSolverrSharp
|
||||
|
||||
@@ -2,15 +2,39 @@ const fs = require('fs')
|
||||
const path = require('path')
|
||||
const { execSync } = require('child_process')
|
||||
const archiver = require('archiver')
|
||||
const https = require('https')
|
||||
const puppeteer = require('puppeteer')
|
||||
const version = 'v' + require('./package.json').version;
|
||||
|
||||
function getFirefoxNightlyVersion() {
|
||||
const firefoxVersions = 'https://product-details.mozilla.org/1.0/firefox_versions.json';
|
||||
return new Promise((resolve, reject) => {
|
||||
let data = '';
|
||||
https
|
||||
.get(firefoxVersions, (r) => {
|
||||
if (r.statusCode >= 400)
|
||||
return reject(new Error(`Got status code ${r.statusCode}`));
|
||||
r.on('data', (chunk) => {
|
||||
data += chunk;
|
||||
});
|
||||
r.on('end', () => {
|
||||
try {
|
||||
const versions = JSON.parse(data);
|
||||
return resolve(versions.FIREFOX_NIGHTLY);
|
||||
} catch {
|
||||
return reject(new Error('Firefox version not found'));
|
||||
}
|
||||
});
|
||||
})
|
||||
.on('error', reject);
|
||||
});
|
||||
}
|
||||
|
||||
(async () => {
|
||||
const builds = [
|
||||
{
|
||||
platform: 'linux',
|
||||
version: 756035,
|
||||
chromeFolder: 'chrome-linux',
|
||||
firefoxFolder: 'firefox',
|
||||
fsExec: 'flaresolverr-linux',
|
||||
fsZipExec: 'flaresolverr',
|
||||
fsZipName: 'linux-x64',
|
||||
@@ -18,18 +42,16 @@ const version = 'v' + require('./package.json').version;
|
||||
},
|
||||
{
|
||||
platform: 'win64',
|
||||
version: 756035,
|
||||
chromeFolder: 'chrome-win',
|
||||
firefoxFolder: 'firefox',
|
||||
fsExec: 'flaresolverr-win.exe',
|
||||
fsZipExec: 'flaresolverr.exe',
|
||||
fsZipName: 'windows-x64',
|
||||
fsLicenseName: 'LICENSE.txt'
|
||||
}
|
||||
// TODO: this is working but changes are required in session.ts to find chrome path
|
||||
// todo: this has to be build in macOS (hdiutil is required). changes required in sessions.ts too
|
||||
// {
|
||||
// platform: 'mac',
|
||||
// version: 756035,
|
||||
// chromeFolder: 'chrome-mac',
|
||||
// firefoxFolder: 'firefox',
|
||||
// fsExec: 'flaresolverr-macos',
|
||||
// fsZipExec: 'flaresolverr',
|
||||
// fsZipName: 'macos',
|
||||
@@ -42,20 +64,24 @@ const version = 'v' + require('./package.json').version;
|
||||
if (fs.existsSync('bin')) {
|
||||
fs.rmSync('bin', { recursive: true })
|
||||
}
|
||||
execSync('pkg -t node14-win-x64,node14-linux-x64 --out-path bin .')
|
||||
// execSync('pkg -t node14-win-x64,node14-mac-x64,node14-linux-x64 --out-path bin .')
|
||||
execSync('./node_modules/.bin/pkg -t node14-win-x64,node14-linux-x64 --out-path bin .')
|
||||
// execSync('./node_modules/.bin/pkg -t node14-win-x64,node14-mac-x64,node14-linux-x64 --out-path bin .')
|
||||
|
||||
// download Chrome and zip together
|
||||
// get firefox revision
|
||||
const revision = await getFirefoxNightlyVersion();
|
||||
|
||||
// download firefox and zip together
|
||||
for (const os of builds) {
|
||||
console.log('Building ' + os.fsZipName + ' artifact')
|
||||
|
||||
// download chrome
|
||||
console.log('Downloading Chrome...')
|
||||
// download firefox
|
||||
console.log(`Downloading firefox ${revision} for ${os.platform} ...`)
|
||||
const f = puppeteer.createBrowserFetcher({
|
||||
product: 'firefox',
|
||||
platform: os.platform,
|
||||
path: path.join(__dirname, 'bin', 'puppeteer')
|
||||
})
|
||||
await f.download(os.version)
|
||||
await f.download(revision)
|
||||
|
||||
// compress in zip
|
||||
console.log('Compressing zip file...')
|
||||
@@ -75,7 +101,7 @@ const version = 'v' + require('./package.json').version;
|
||||
|
||||
archive.file('LICENSE', { name: 'flaresolverr/' + os.fsLicenseName })
|
||||
archive.file('bin/' + os.fsExec, { name: 'flaresolverr/' + os.fsZipExec })
|
||||
archive.directory('bin/puppeteer/' + os.platform + '-' + os.version + '/' + os.chromeFolder, 'flaresolverr/chrome')
|
||||
archive.directory('bin/puppeteer/' + os.platform + '-' + revision + '/' + os.firefoxFolder, 'flaresolverr/firefox')
|
||||
if (os.platform === 'linux') {
|
||||
archive.file('flaresolverr.service', { name: 'flaresolverr/flaresolverr.service' })
|
||||
}
|
||||
|
||||
12
jest.config.js
Normal file
12
jest.config.js
Normal file
@@ -0,0 +1,12 @@
|
||||
module.exports = {
|
||||
// A list of paths to directories that Jest should use to search for files in
|
||||
roots: [
|
||||
"./src/"
|
||||
],
|
||||
// Compile Typescript
|
||||
transform: {
|
||||
'^.+\\.(ts|tsx)$': 'ts-jest'
|
||||
},
|
||||
// Default value for FlareSolverr maxTimeout is 60000
|
||||
testTimeout: 70000
|
||||
}
|
||||
14819
package-lock.json
generated
14819
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
59
package.json
59
package.json
@@ -1,59 +1,46 @@
|
||||
{
|
||||
"name": "flaresolverr",
|
||||
"version": "1.2.8",
|
||||
"version": "2.0.1",
|
||||
"description": "Proxy server to bypass Cloudflare protection.",
|
||||
"scripts": {
|
||||
"start": "node ./dist/index.js",
|
||||
"start": "node ./dist/server.js",
|
||||
"build": "tsc",
|
||||
"dev": "nodemon -e ts --exec ts-node src/index.ts",
|
||||
"package": "node build-binaries.js"
|
||||
"dev": "nodemon -e ts --exec ts-node src/server.ts",
|
||||
"package": "node build-binaries.js",
|
||||
"test": "jest --runInBand"
|
||||
},
|
||||
"author": "Diego Heras (ngosang)",
|
||||
"contributors": [
|
||||
{
|
||||
"name": "Noah Cardoza",
|
||||
"url": "https://github.com/NoahCardoza/CloudProxy.git"
|
||||
}
|
||||
],
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/ngosang/FlareSolverr"
|
||||
},
|
||||
"pkg": {
|
||||
"assets": [
|
||||
"node_modules/puppeteer-extra-plugin-stealth/**/*.*"
|
||||
]
|
||||
},
|
||||
"bin": {
|
||||
"flaresolverr": "dist/index.js"
|
||||
"flaresolverr": "dist/server.js"
|
||||
},
|
||||
"dependencies": {
|
||||
"await-timeout": "^1.1.1",
|
||||
"body-parser": "^1.19.0",
|
||||
"console-log-level": "^1.4.1",
|
||||
"got": "^11.5.1",
|
||||
"hcaptcha-solver": "^1.0.2",
|
||||
"express": "^4.17.1",
|
||||
"puppeteer": "^3.3.0",
|
||||
"puppeteer-extra": "^3.1.15",
|
||||
"puppeteer-extra-plugin-stealth": "^2.6.5",
|
||||
"uuid": "^8.2.0"
|
||||
"uuid": "^8.3.2"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/await-timeout": "^0.3.1",
|
||||
"@types/node": "^14.0.23",
|
||||
"@types/puppeteer": "^3.0.1",
|
||||
"@types/uuid": "^8.0.0",
|
||||
"archiver": "^5.2.0",
|
||||
"eslint": "^7.5.0",
|
||||
"eslint-config-airbnb-base": "^14.2.0",
|
||||
"eslint-config-standard": "^14.1.1",
|
||||
"eslint-plugin-import": "^2.22.0",
|
||||
"eslint-plugin-node": "^11.1.0",
|
||||
"eslint-plugin-promise": "^4.2.1",
|
||||
"eslint-plugin-standard": "^4.0.1",
|
||||
"nodemon": "^2.0.4",
|
||||
"pkg": "^4.4.9",
|
||||
"ts-node": "^8.10.2",
|
||||
"typescript": "^3.9.7"
|
||||
"@types/body-parser": "^1.19.1",
|
||||
"@types/express": "^4.17.13",
|
||||
"@types/jest": "^27.0.2",
|
||||
"@types/node": "^14.17.27",
|
||||
"@types/puppeteer": "^3.0.6",
|
||||
"@types/supertest": "^2.0.11",
|
||||
"@types/uuid": "^8.3.1",
|
||||
"archiver": "^5.3.0",
|
||||
"nodemon": "^2.0.13",
|
||||
"pkg": "^5.3.3",
|
||||
"supertest": "^6.1.6",
|
||||
"ts-jest": "^27.0.7",
|
||||
"ts-node": "^10.3.0",
|
||||
"typescript": "^4.4.4"
|
||||
}
|
||||
}
|
||||
|
||||
83
src/app.ts
Normal file
83
src/app.ts
Normal file
@@ -0,0 +1,83 @@
|
||||
import log from './services/log'
|
||||
import {NextFunction, Request, Response} from 'express';
|
||||
import {getUserAgent} from "./services/sessions";
|
||||
import {controllerV1} from "./controllers/v1";
|
||||
|
||||
const express = require('express');
|
||||
const app = express();
|
||||
const bodyParser = require('body-parser');
|
||||
const version: string = 'v' + require('../package.json').version
|
||||
|
||||
// Convert request objects to JSON
|
||||
app.use(bodyParser.json({
|
||||
limit: '50mb',
|
||||
verify(req: Request, res: Response, buf: any) {
|
||||
req.body = buf;
|
||||
}
|
||||
}));
|
||||
|
||||
// Access log
|
||||
app.use(function(req: Request, res: Response, next: NextFunction) {
|
||||
if (req.url != '/health') {
|
||||
// count the request for the log prefix
|
||||
log.incRequests()
|
||||
// build access message
|
||||
let body = "";
|
||||
if (req.method == 'POST' && req.body) {
|
||||
body += " body: "
|
||||
try {
|
||||
body += JSON.stringify(req.body)
|
||||
} catch(e) {
|
||||
body += req.body
|
||||
}
|
||||
}
|
||||
log.info(`Incoming request => ${req.method} ${req.url}${body}`);
|
||||
}
|
||||
next();
|
||||
});
|
||||
|
||||
// *********************************************************************************************************************
|
||||
// Routes
|
||||
|
||||
// Show welcome message
|
||||
app.get("/", ( req: Request, res: Response ) => {
|
||||
res.send({
|
||||
"msg": "FlareSolverr is ready!",
|
||||
"version": version,
|
||||
"userAgent": getUserAgent()
|
||||
});
|
||||
});
|
||||
|
||||
// Health endpoint. this endpoint is special because it doesn't print traces
|
||||
app.get("/health", ( req: Request, res: Response ) => {
|
||||
res.send({
|
||||
"status": "ok"
|
||||
});
|
||||
});
|
||||
|
||||
// Controller v1
|
||||
app.post("/v1", async( req: Request, res: Response ) => {
|
||||
await controllerV1(req, res);
|
||||
});
|
||||
|
||||
// *********************************************************************************************************************
|
||||
|
||||
// Unknown paths or verbs
|
||||
app.use(function (req : Request, res : Response) {
|
||||
res.status(404)
|
||||
.send({"error": "Unknown resource or HTTP verb"})
|
||||
})
|
||||
|
||||
// Errors
|
||||
app.use(function (err: any, req: Request, res: Response, next: NextFunction) {
|
||||
if (err) {
|
||||
let msg = 'Invalid request: ' + err;
|
||||
msg = msg.replace("\n", "").replace("\r", "")
|
||||
log.error(msg)
|
||||
res.send({"error": msg})
|
||||
} else {
|
||||
next()
|
||||
}
|
||||
})
|
||||
|
||||
module.exports = app;
|
||||
@@ -1,31 +0,0 @@
|
||||
import got from 'got'
|
||||
import { sleep } from '../utils'
|
||||
|
||||
/*
|
||||
This method uses the captcha-harvester project:
|
||||
https://github.com/NoahCardoza/CaptchaHarvester
|
||||
|
||||
While the function must take url/sitekey/type args,
|
||||
they aren't used because the harvester server must
|
||||
be preconfigured.
|
||||
|
||||
ENV:
|
||||
HARVESTER_ENDPOINT: This must be the full path
|
||||
to the /token endpoint of the harvester.
|
||||
E.G. "https://127.0.0.1:5000/token"
|
||||
*/
|
||||
|
||||
export default async function solve(): Promise<string> {
|
||||
const endpoint = process.env.HARVESTER_ENDPOINT
|
||||
if (!endpoint) { throw Error('ENV variable `HARVESTER_ENDPOINT` must be set.') }
|
||||
while (true) {
|
||||
try {
|
||||
return (await got.get(process.env.HARVESTER_ENDPOINT, {
|
||||
https: { rejectUnauthorized: false }
|
||||
})).body
|
||||
} catch (e) {
|
||||
if (e.response.statusCode !== 418) { throw e }
|
||||
}
|
||||
await sleep(3000)
|
||||
}
|
||||
}
|
||||
@@ -1,25 +0,0 @@
|
||||
const solveCaptcha = require('hcaptcha-solver');
|
||||
import { SolverOptions } from '.'
|
||||
/*
|
||||
This method uses the hcaptcha-solver project:
|
||||
https://github.com/JimmyLaurent/hcaptcha-solver
|
||||
|
||||
TODO: allow user pass custom options to the solver.
|
||||
|
||||
ENV:
|
||||
There are no other variables that must be set to get this to work
|
||||
*/
|
||||
|
||||
export default async function solve({ url }: SolverOptions): Promise<string> {
|
||||
throw new Error("hcaptcha-solver is not able to solve the new hCaptcha challenge. This issue is already reported #31.");
|
||||
|
||||
/*
|
||||
try {
|
||||
return await solveCaptcha(url)
|
||||
} catch (e) {
|
||||
console.error(e)
|
||||
return null
|
||||
}
|
||||
*/
|
||||
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
import log from "../log";
|
||||
import log from "../services/log";
|
||||
|
||||
export enum CaptchaType {
|
||||
re = 'reCaptcha',
|
||||
|
||||
179
src/controllers/v1.ts
Normal file
179
src/controllers/v1.ts
Normal file
@@ -0,0 +1,179 @@
|
||||
// todo: avoid puppeter objects
|
||||
import {SetCookie, Headers, HttpMethod} from 'puppeteer'
|
||||
import {Request, Response} from 'express';
|
||||
|
||||
import log from '../services/log'
|
||||
import {browserRequest, ChallengeResolutionResultT, ChallengeResolutionT} from "../services/solver";
|
||||
import {SessionCreateOptions} from "../services/sessions";
|
||||
const sessions = require('../services/sessions')
|
||||
const version: string = 'v' + require('../../package.json').version
|
||||
|
||||
interface V1Routes {
|
||||
[key: string]: (params: V1RequestBase, response: V1ResponseBase) => Promise<void>
|
||||
}
|
||||
|
||||
export interface Proxy {
|
||||
url?: string
|
||||
username?: string
|
||||
password?: string
|
||||
}
|
||||
|
||||
export interface V1RequestBase {
|
||||
cmd: string
|
||||
cookies?: SetCookie[],
|
||||
maxTimeout?: number
|
||||
proxy?: Proxy
|
||||
session: string
|
||||
headers?: Headers // deprecated v2, not used
|
||||
userAgent?: string // deprecated v2, not used
|
||||
}
|
||||
|
||||
interface V1RequestSession extends V1RequestBase {
|
||||
}
|
||||
|
||||
export interface V1Request extends V1RequestBase {
|
||||
url: string
|
||||
method?: HttpMethod
|
||||
postData?: string
|
||||
returnOnlyCookies?: boolean
|
||||
download?: boolean // deprecated v2, not used
|
||||
returnRawHtml?: boolean // deprecated v2, not used
|
||||
}
|
||||
|
||||
export interface V1ResponseBase {
|
||||
status: string
|
||||
message: string
|
||||
startTimestamp: number
|
||||
endTimestamp: number
|
||||
version: string
|
||||
}
|
||||
|
||||
export interface V1ResponseSolution extends V1ResponseBase {
|
||||
solution: ChallengeResolutionResultT
|
||||
}
|
||||
|
||||
export interface V1ResponseSession extends V1ResponseBase {
|
||||
session: string
|
||||
}
|
||||
|
||||
export interface V1ResponseSessions extends V1ResponseBase {
|
||||
sessions: string[]
|
||||
}
|
||||
|
||||
export const routes: V1Routes = {
|
||||
'sessions.create': async (params: V1RequestSession, response: V1ResponseSession): Promise<void> => {
|
||||
const options: SessionCreateOptions = {
|
||||
oneTimeSession: false,
|
||||
cookies: params.cookies,
|
||||
maxTimeout: params.maxTimeout,
|
||||
proxy: params.proxy
|
||||
}
|
||||
const { sessionId, browser } = await sessions.create(params.session, options)
|
||||
if (browser) {
|
||||
response.status = "ok";
|
||||
response.message = "Session created successfully.";
|
||||
response.session = sessionId
|
||||
} else {
|
||||
throw Error('Error creating session.')
|
||||
}
|
||||
},
|
||||
'sessions.list': async (params: V1RequestSession, response: V1ResponseSessions): Promise<void> => {
|
||||
response.status = "ok";
|
||||
response.message = "";
|
||||
response.sessions = sessions.list();
|
||||
},
|
||||
'sessions.destroy': async (params: V1RequestSession, response: V1ResponseBase): Promise<void> => {
|
||||
if (await sessions.destroy(params.session)) {
|
||||
response.status = "ok";
|
||||
response.message = "The session has been removed.";
|
||||
} else {
|
||||
throw Error('This session does not exist.')
|
||||
}
|
||||
},
|
||||
'request.get': async (params: V1Request, response: V1ResponseSolution): Promise<void> => {
|
||||
params.method = 'GET'
|
||||
if (params.postData) {
|
||||
throw Error('Cannot use "postBody" when sending a GET request.')
|
||||
}
|
||||
if (params.returnRawHtml) {
|
||||
log.warn("Request parameter 'returnRawHtml' was removed in FlareSolverr v2.")
|
||||
}
|
||||
if (params.download) {
|
||||
log.warn("Request parameter 'download' was removed in FlareSolverr v2.")
|
||||
}
|
||||
const result: ChallengeResolutionT = await browserRequest(params)
|
||||
|
||||
response.status = result.status;
|
||||
response.message = result.message;
|
||||
response.solution = result.result;
|
||||
if (response.message) {
|
||||
log.info(response.message)
|
||||
}
|
||||
},
|
||||
'request.post': async (params: V1Request, response: V1ResponseSolution): Promise<void> => {
|
||||
params.method = 'POST'
|
||||
if (!params.postData) {
|
||||
throw Error('Must send param "postBody" when sending a POST request.')
|
||||
}
|
||||
if (params.returnRawHtml) {
|
||||
log.warn("Request parameter 'returnRawHtml' was removed in FlareSolverr v2.")
|
||||
}
|
||||
if (params.download) {
|
||||
log.warn("Request parameter 'download' was removed in FlareSolverr v2.")
|
||||
}
|
||||
const result: ChallengeResolutionT = await browserRequest(params)
|
||||
|
||||
response.status = result.status;
|
||||
response.message = result.message;
|
||||
response.solution = result.result;
|
||||
if (response.message) {
|
||||
log.info(response.message)
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
export async function controllerV1(req: Request, res: Response): Promise<void> {
|
||||
const response: V1ResponseBase = {
|
||||
status: null,
|
||||
message: null,
|
||||
startTimestamp: Date.now(),
|
||||
endTimestamp: 0,
|
||||
version: version
|
||||
}
|
||||
|
||||
try {
|
||||
const params: V1RequestBase = req.body
|
||||
// do some validations
|
||||
if (!params.cmd) {
|
||||
throw Error("Request parameter 'cmd' is mandatory.")
|
||||
}
|
||||
if (params.headers) {
|
||||
log.warn("Request parameter 'headers' was removed in FlareSolverr v2.")
|
||||
}
|
||||
if (params.userAgent) {
|
||||
log.warn("Request parameter 'userAgent' was removed in FlareSolverr v2.")
|
||||
}
|
||||
|
||||
// set default values
|
||||
if (!params.maxTimeout || params.maxTimeout < 1) {
|
||||
params.maxTimeout = 60000;
|
||||
}
|
||||
|
||||
// execute the command
|
||||
const route = routes[params.cmd]
|
||||
if (route) {
|
||||
await route(params, response)
|
||||
} else {
|
||||
throw Error(`The command '${params.cmd}' is invalid.`)
|
||||
}
|
||||
} catch (e) {
|
||||
res.status(500)
|
||||
response.status = "error";
|
||||
response.message = e.toString();
|
||||
log.error(response.message)
|
||||
}
|
||||
|
||||
response.endTimestamp = Date.now()
|
||||
log.info(`Response in ${(response.endTimestamp - response.startTimestamp) / 1000} s`)
|
||||
res.send(response)
|
||||
}
|
||||
186
src/index.ts
186
src/index.ts
@@ -1,186 +0,0 @@
|
||||
const fs = require('fs');
|
||||
const os = require('os');
|
||||
const path = require('path');
|
||||
import log from './log'
|
||||
import { createServer, IncomingMessage, ServerResponse } from 'http';
|
||||
import { RequestContext } from './types'
|
||||
import Router, { BaseAPICall } from './routes'
|
||||
import getCaptchaSolver from "./captcha";
|
||||
import sessions from "./session";
|
||||
import {v1 as UUIDv1} from "uuid";
|
||||
|
||||
const version: string = "v" + require('../package.json').version
|
||||
const serverPort: number = Number(process.env.PORT) || 8191
|
||||
const serverHost: string = process.env.HOST || '0.0.0.0'
|
||||
|
||||
function validateEnvironmentVariables() {
|
||||
// ip and port variables are validated by nodejs
|
||||
if (process.env.LOG_LEVEL && ['error', 'warn', 'info', 'verbose', 'debug'].indexOf(process.env.LOG_LEVEL) == -1) {
|
||||
log.error(`The environment variable 'LOG_LEVEL' is wrong. Check the documentation.`);
|
||||
process.exit(1);
|
||||
}
|
||||
if (process.env.LOG_HTML && ['true', 'false'].indexOf(process.env.LOG_HTML) == -1) {
|
||||
log.error(`The environment variable 'LOG_HTML' is wrong. Check the documentation.`);
|
||||
process.exit(1);
|
||||
}
|
||||
if (process.env.HEADLESS && ['true', 'false'].indexOf(process.env.HEADLESS) == -1) {
|
||||
log.error(`The environment variable 'HEADLESS' is wrong. Check the documentation.`);
|
||||
process.exit(1);
|
||||
}
|
||||
try {
|
||||
getCaptchaSolver();
|
||||
} catch (e) {
|
||||
log.error(`The environment variable 'CAPTCHA_SOLVER' is wrong. ${e.message}`);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
async function testChromeInstallation() {
|
||||
const sessionId = UUIDv1()
|
||||
// create a temporary file for testing
|
||||
log.debug("Testing Chrome installation...")
|
||||
const fileContent = `flaresolverr_${version}`
|
||||
const filePath = path.join(os.tmpdir(), `flaresolverr_${sessionId}.txt`)
|
||||
const fileUrl = `file://${filePath}`
|
||||
fs.writeFileSync(filePath, fileContent)
|
||||
// launch the browser
|
||||
const session = await sessions.create(sessionId, {
|
||||
userAgent: null,
|
||||
oneTimeSession: true
|
||||
})
|
||||
const page = await session.browser.newPage()
|
||||
const response = await page.goto(fileUrl, { waitUntil: 'domcontentloaded' })
|
||||
const responseBody = (await response.buffer()).toString().trim()
|
||||
if (responseBody != fileContent) {
|
||||
throw new Error("The response body does not match!")
|
||||
}
|
||||
await page.close()
|
||||
await sessions.destroy(sessionId)
|
||||
log.debug("Test successful")
|
||||
}
|
||||
|
||||
function errorResponse(errorMsg: string, res: ServerResponse, startTimestamp: number) {
|
||||
log.error(errorMsg)
|
||||
const response = {
|
||||
status: 'error',
|
||||
message: errorMsg,
|
||||
startTimestamp,
|
||||
endTimestamp: Date.now(),
|
||||
version
|
||||
}
|
||||
res.writeHead(500, {
|
||||
'Content-Type': 'application/json'
|
||||
})
|
||||
res.write(JSON.stringify(response))
|
||||
res.end()
|
||||
}
|
||||
|
||||
function successResponse(successMsg: string, extendedProperties: object, res: ServerResponse, startTimestamp: number) {
|
||||
const endTimestamp = Date.now()
|
||||
log.info(`Response in ${(endTimestamp - startTimestamp) / 1000} s`)
|
||||
if (successMsg) { log.info(successMsg) }
|
||||
|
||||
const response = Object.assign({
|
||||
status: 'ok',
|
||||
message: successMsg || '',
|
||||
startTimestamp,
|
||||
endTimestamp,
|
||||
version
|
||||
}, extendedProperties || {})
|
||||
res.writeHead(200, {
|
||||
'Content-Type': 'application/json'
|
||||
})
|
||||
res.write(JSON.stringify(response))
|
||||
res.end()
|
||||
}
|
||||
|
||||
function validateIncomingRequest(ctx: RequestContext, params: BaseAPICall) {
|
||||
log.info(`Params: ${JSON.stringify(params)}`)
|
||||
|
||||
if (ctx.req.method !== 'POST') {
|
||||
ctx.errorResponse('Only the POST method is allowed')
|
||||
return false
|
||||
}
|
||||
|
||||
if (ctx.req.url !== '/v1') {
|
||||
ctx.errorResponse('Only /v1 endpoint is allowed')
|
||||
return false
|
||||
}
|
||||
|
||||
if (!params.cmd) {
|
||||
ctx.errorResponse("Parameter 'cmd' is mandatory")
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// init
|
||||
log.info(`FlareSolverr ${version}`);
|
||||
log.debug('Debug log enabled');
|
||||
validateEnvironmentVariables();
|
||||
testChromeInstallation()
|
||||
.catch(e => {
|
||||
log.error("Error starting Chrome browser.", e);
|
||||
process.exit(1);
|
||||
})
|
||||
.then(r =>
|
||||
createServer((req: IncomingMessage, res: ServerResponse) => {
|
||||
const startTimestamp = Date.now()
|
||||
|
||||
// health endpoint. this endpoint is special because it doesn't print traces
|
||||
if (req.url == '/health') {
|
||||
res.writeHead(200, {
|
||||
'Content-Type': 'application/json'
|
||||
})
|
||||
res.write(JSON.stringify({"status": "ok"}))
|
||||
res.end()
|
||||
return;
|
||||
}
|
||||
|
||||
// count the request for the log prefix
|
||||
log.incRequests()
|
||||
log.info(`Incoming request: ${req.method} ${req.url}`)
|
||||
|
||||
// show welcome message
|
||||
if (req.url == '/') {
|
||||
successResponse("FlareSolverr is ready!", null, res, startTimestamp);
|
||||
return;
|
||||
}
|
||||
|
||||
// get request body
|
||||
const bodyParts: any[] = []
|
||||
req.on('data', chunk => {
|
||||
bodyParts.push(chunk)
|
||||
}).on('end', () => {
|
||||
// parse params
|
||||
const body = Buffer.concat(bodyParts).toString()
|
||||
let params: BaseAPICall = null
|
||||
try {
|
||||
params = JSON.parse(body)
|
||||
} catch (err) {
|
||||
errorResponse('Body must be in JSON format', res, startTimestamp)
|
||||
return
|
||||
}
|
||||
|
||||
const ctx: RequestContext = {
|
||||
req,
|
||||
res,
|
||||
startTimestamp,
|
||||
errorResponse: (msg) => errorResponse(msg, res, startTimestamp),
|
||||
successResponse: (msg, extendedProperties) => successResponse(msg, extendedProperties, res, startTimestamp)
|
||||
}
|
||||
|
||||
// validate params
|
||||
if (!validateIncomingRequest(ctx, params)) { return }
|
||||
|
||||
// process request
|
||||
Router(ctx, params).catch(e => {
|
||||
console.error(e)
|
||||
ctx.errorResponse(e.message)
|
||||
})
|
||||
})
|
||||
}).listen(serverPort, serverHost, () => {
|
||||
log.info(`Listening on http://${serverHost}:${serverPort}`);
|
||||
})
|
||||
)
|
||||
@@ -1,195 +1,141 @@
|
||||
import {Response} from 'puppeteer'
|
||||
import {Page} from "puppeteer-extra/dist/puppeteer";
|
||||
import {Page, Response} from 'puppeteer'
|
||||
|
||||
import log from "../log";
|
||||
import getCaptchaSolver, {CaptchaType} from "../captcha";
|
||||
import log from "../services/log";
|
||||
|
||||
/**
|
||||
* This class contains the logic to solve protections provided by CloudFlare
|
||||
**/
|
||||
**/
|
||||
|
||||
const BAN_SELECTORS = ['span[data-translate="error"]'];
|
||||
const CHALLENGE_SELECTORS = ['#trk_jschal_js', '.ray_id', '.attack-box', '#cf-please-wait'];
|
||||
const TOKEN_INPUT_NAMES = ['g-recaptcha-response', 'h-captcha-response'];
|
||||
const CAPTCHA_SELECTORS = ['input[name="cf_captcha_kind"]'];
|
||||
|
||||
export default async function resolveChallenge(url: string, page: Page, response: Response): Promise<Response> {
|
||||
|
||||
// look for challenge and return fast if not detected
|
||||
if (!response.headers().server.startsWith('cloudflare')) {
|
||||
if (response.headers().server &&
|
||||
response.headers().server.startsWith('cloudflare') &&
|
||||
(response.status() == 403 || response.status() == 503)) {
|
||||
log.info('Cloudflare detected');
|
||||
} else {
|
||||
log.info('Cloudflare not detected');
|
||||
return response;
|
||||
}
|
||||
log.info('Cloudflare detected');
|
||||
|
||||
if (await page.$('.cf-error-code')) {
|
||||
throw new Error('Cloudflare has blocked this request (Code 1020 Detected).')
|
||||
if (await findAnySelector(page, BAN_SELECTORS)) {
|
||||
throw new Error('Cloudflare has blocked this request. Probably your IP is banned for this site, check in your web browser.')
|
||||
}
|
||||
|
||||
let selectorFoundCount = 0;
|
||||
let selectorFound = false;
|
||||
if (response.status() > 400) {
|
||||
// detect cloudflare wait 5s
|
||||
for (const selector of CHALLENGE_SELECTORS) {
|
||||
const cfChallengeElem = await page.$(selector)
|
||||
if (cfChallengeElem) {
|
||||
selectorFoundCount++
|
||||
log.debug(`Javascript challenge element '${selector}' detected.`)
|
||||
log.debug('Waiting for Cloudflare challenge...')
|
||||
|
||||
while (true) {
|
||||
try {
|
||||
// catch Execution context was destroyed
|
||||
const cfChallengeElem = await page.$(selector)
|
||||
if (!cfChallengeElem) {
|
||||
// solved!
|
||||
log.debug('Challenge element not found.')
|
||||
// find Cloudflare selectors
|
||||
let selector: string = await findAnySelector(page, CHALLENGE_SELECTORS)
|
||||
if (selector) {
|
||||
selectorFound = true;
|
||||
log.debug(`Javascript challenge element '${selector}' detected.`)
|
||||
log.debug('Waiting for Cloudflare challenge...')
|
||||
|
||||
while (true) {
|
||||
try {
|
||||
|
||||
selector = await findAnySelector(page, CHALLENGE_SELECTORS)
|
||||
if (!selector) {
|
||||
// solved!
|
||||
log.debug('Challenge element not found')
|
||||
break
|
||||
} else {
|
||||
log.debug(`Javascript challenge element '${selector}' detected.`)
|
||||
|
||||
// new Cloudflare Challenge #cf-please-wait
|
||||
const displayStyle = await page.evaluate((selector) => {
|
||||
return getComputedStyle(document.querySelector(selector)).getPropertyValue("display");
|
||||
}, selector);
|
||||
if (displayStyle == "none") {
|
||||
// spinner is hidden, could be a captcha or not
|
||||
log.debug('Challenge element is hidden')
|
||||
// wait until redirecting disappears
|
||||
while (true) {
|
||||
try {
|
||||
await page.waitFor(1000)
|
||||
const displayStyle2 = await page.evaluate(() => {
|
||||
return getComputedStyle(document.querySelector('#cf-spinner-redirecting')).getPropertyValue("display");
|
||||
});
|
||||
if (displayStyle2 == "none") {
|
||||
break // hCaptcha detected
|
||||
}
|
||||
} catch (error) {
|
||||
break // redirection completed
|
||||
}
|
||||
}
|
||||
break
|
||||
} else {
|
||||
// new Cloudflare Challenge #cf-please-wait
|
||||
const displayStyle = await page.evaluate((selector) => {
|
||||
return getComputedStyle(document.querySelector(selector)).getPropertyValue("display");
|
||||
}, selector);
|
||||
if (displayStyle == "none") {
|
||||
// spinner is hidden, could be a captcha or not
|
||||
log.debug('Challenge element is hidden.')
|
||||
// wait until redirecting disappears
|
||||
while (true) {
|
||||
try {
|
||||
await page.waitFor(1000)
|
||||
const displayStyle2 = await page.evaluate(() => {
|
||||
return getComputedStyle(document.querySelector('#cf-spinner-redirecting')).getPropertyValue("display");
|
||||
});
|
||||
if (displayStyle2 == "none") {
|
||||
break // hCaptcha detected
|
||||
}
|
||||
} catch (error) {
|
||||
break // redirection completed
|
||||
}
|
||||
}
|
||||
break
|
||||
} else {
|
||||
log.debug('Challenge element is visible.')
|
||||
}
|
||||
log.debug('Challenge element is visible')
|
||||
}
|
||||
log.debug('Found challenge element again.')
|
||||
} catch (error)
|
||||
{
|
||||
log.debug("Unexpected error: " + error);
|
||||
}
|
||||
log.debug('Found challenge element again')
|
||||
|
||||
} catch (error)
|
||||
{
|
||||
log.debug("Unexpected error: " + error);
|
||||
if (!error.toString().includes("Execution context was destroyed")) {
|
||||
break
|
||||
}
|
||||
|
||||
log.debug('Waiting for Cloudflare challenge...')
|
||||
await page.waitFor(1000)
|
||||
}
|
||||
|
||||
log.debug('Validating HTML code...')
|
||||
break
|
||||
} else {
|
||||
log.debug(`No '${selector}' challenge element detected.`)
|
||||
log.debug('Waiting for Cloudflare challenge...')
|
||||
await page.waitFor(1000)
|
||||
}
|
||||
|
||||
log.debug('Validating HTML code...')
|
||||
} else {
|
||||
log.debug(`No challenge element detected.`)
|
||||
}
|
||||
log.debug("Javascript challenge selectors found: " + selectorFoundCount + ", total selectors: " + CHALLENGE_SELECTORS.length)
|
||||
|
||||
} else {
|
||||
// some sites use cloudflare but there is no challenge
|
||||
log.debug(`Javascript challenge not detected. Status code: ${response.status()}`);
|
||||
selectorFoundCount = 1;
|
||||
selectorFound = true;
|
||||
}
|
||||
|
||||
// it seems some captcha pages return 200 sometimes
|
||||
if (await page.$('input[name="cf_captcha_kind"]')) {
|
||||
log.info('Captcha challenge detected.');
|
||||
const captchaSolver = getCaptchaSolver()
|
||||
if (captchaSolver) {
|
||||
const captchaStartTimestamp = Date.now()
|
||||
const challengeForm = await page.$('#challenge-form')
|
||||
if (challengeForm) {
|
||||
const captchaTypeElm = await page.$('input[name="cf_captcha_kind"]')
|
||||
const cfCaptchaType: string = await captchaTypeElm.evaluate((e: any) => e.value)
|
||||
const captchaType: CaptchaType = (CaptchaType as any)[cfCaptchaType]
|
||||
if (!captchaType) {
|
||||
throw new Error('Unknown captcha type!');
|
||||
}
|
||||
// check for CAPTCHA challenge
|
||||
if (await findAnySelector(page, CAPTCHA_SELECTORS)) {
|
||||
log.info('CAPTCHA challenge detected');
|
||||
throw new Error('FlareSolverr can not resolve CAPTCHA challenges. Since the captcha doesn\'t always appear, you may have better luck with the next request.');
|
||||
|
||||
let sitekey = null
|
||||
if (captchaType != 'hCaptcha' && process.env.CAPTCHA_SOLVER != 'hcaptcha-solver') {
|
||||
const sitekeyElem = await page.$('*[data-sitekey]')
|
||||
if (!sitekeyElem) {
|
||||
throw new Error('Could not find sitekey!');
|
||||
}
|
||||
sitekey = await sitekeyElem.evaluate((e) => e.getAttribute('data-sitekey'))
|
||||
}
|
||||
|
||||
log.info('Waiting to receive captcha token to bypass challenge...')
|
||||
const token = await captchaSolver({
|
||||
url,
|
||||
sitekey,
|
||||
type: captchaType
|
||||
})
|
||||
log.debug(`Token received: ${token}`);
|
||||
if (!token) {
|
||||
throw new Error('Token solver failed to return a token.')
|
||||
}
|
||||
|
||||
let responseFieldsFoundCount = 0;
|
||||
for (const name of TOKEN_INPUT_NAMES) {
|
||||
const input = await page.$(`textarea[name="${name}"]`)
|
||||
if (input) {
|
||||
responseFieldsFoundCount ++;
|
||||
log.debug(`Challenge response field '${name}' found in challenge form.`);
|
||||
await input.evaluate((e: HTMLTextAreaElement, token) => { e.value = token }, token);
|
||||
}
|
||||
}
|
||||
if (responseFieldsFoundCount == 0) {
|
||||
throw new Error('Challenge response field not found in challenge form.');
|
||||
}
|
||||
|
||||
// ignore preset event listeners on the form
|
||||
await page.evaluate(() => {
|
||||
window.addEventListener('submit', (e) => { e.stopPropagation() }, true)
|
||||
})
|
||||
|
||||
// it seems some sites obfuscate their challenge forms
|
||||
// TODO: look into how they do it and come up with a more solid solution
|
||||
try {
|
||||
// this element is added with js and we want to wait for all the js to load before submitting
|
||||
await page.waitForSelector('#challenge-form', { timeout: 10000 })
|
||||
} catch (err) {
|
||||
throw new Error("No '#challenge-form' element detected.");
|
||||
}
|
||||
|
||||
// calculates the time it took to solve the captcha
|
||||
const captchaSolveTotalTime = Date.now() - captchaStartTimestamp
|
||||
|
||||
// generates a random wait time
|
||||
const randomWaitTime = (Math.floor(Math.random() * 10) + 10) * 1000
|
||||
|
||||
// waits, if any, time remaining to appear human but stay as fast as possible
|
||||
const timeLeft = randomWaitTime - captchaSolveTotalTime
|
||||
if (timeLeft > 0) {
|
||||
log.debug(`Waiting for '${timeLeft}' milliseconds.`);
|
||||
await page.waitFor(timeLeft);
|
||||
}
|
||||
|
||||
// submit captcha response
|
||||
challengeForm.evaluate((e: HTMLFormElement) => e.submit())
|
||||
response = await page.waitForNavigation({ waitUntil: 'domcontentloaded' })
|
||||
|
||||
if (await page.$('input[name="cf_captcha_kind"]')) {
|
||||
throw new Error('Captcha service failed to solve the challenge.');
|
||||
}
|
||||
}
|
||||
} else {
|
||||
throw new Error('Captcha detected but no automatic solver is configured.');
|
||||
}
|
||||
// const captchaSolver = getCaptchaSolver()
|
||||
// if (captchaSolver) {
|
||||
// // to-do: get the params
|
||||
// log.info('Waiting to receive captcha token to bypass challenge...')
|
||||
// const token = await captchaSolver({
|
||||
// url,
|
||||
// sitekey,
|
||||
// type: captchaType
|
||||
// })
|
||||
// log.debug(`Token received: ${token}`);
|
||||
// // to-do: send the token
|
||||
// }
|
||||
// } else {
|
||||
// throw new Error('Captcha detected but no automatic solver is configured.');
|
||||
// }
|
||||
} else {
|
||||
if (selectorFoundCount == 0)
|
||||
if (!selectorFound)
|
||||
{
|
||||
throw new Error('No challenge selectors found, unable to proceed')
|
||||
throw new Error('No challenge selectors found, unable to proceed.')
|
||||
} else {
|
||||
// reload the page to make sure we get the real response
|
||||
response = await page.reload()
|
||||
await page.content()
|
||||
log.info('Challenge solved.');
|
||||
log.info('Challenge solved');
|
||||
}
|
||||
}
|
||||
|
||||
return response;
|
||||
}
|
||||
|
||||
async function findAnySelector(page: Page, selectors: string[]) {
|
||||
for (const selector of selectors) {
|
||||
const cfChallengeElem = await page.$(selector)
|
||||
if (cfChallengeElem) {
|
||||
return selector;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
298
src/routes.ts
298
src/routes.ts
@@ -1,298 +0,0 @@
|
||||
import { v1 as UUIDv1 } from 'uuid'
|
||||
import { SetCookie, Request, Response, Headers, HttpMethod, Overrides } from 'puppeteer'
|
||||
import { Page, Browser } from "puppeteer-extra/dist/puppeteer";
|
||||
const Timeout = require('await-timeout');
|
||||
|
||||
import log from './log'
|
||||
import sessions, { SessionsCacheItem } from './session'
|
||||
import { RequestContext } from './types'
|
||||
import cloudflareProvider from './providers/cloudflare';
|
||||
|
||||
export interface BaseAPICall {
|
||||
cmd: string
|
||||
}
|
||||
|
||||
interface BaseSessionsAPICall extends BaseAPICall {
|
||||
session?: string
|
||||
}
|
||||
|
||||
interface SessionsCreateAPICall extends BaseSessionsAPICall {
|
||||
userAgent?: string,
|
||||
cookies?: SetCookie[],
|
||||
headers?: Headers
|
||||
maxTimeout?: number
|
||||
proxy?: any
|
||||
}
|
||||
|
||||
interface BaseRequestAPICall extends BaseAPICall {
|
||||
url: string
|
||||
method?: HttpMethod
|
||||
postData?: string
|
||||
session?: string
|
||||
userAgent?: string
|
||||
maxTimeout?: number
|
||||
cookies?: SetCookie[],
|
||||
headers?: Headers
|
||||
proxy?: any, // TODO: use interface not any
|
||||
download?: boolean
|
||||
returnOnlyCookies?: boolean
|
||||
}
|
||||
|
||||
|
||||
interface Routes {
|
||||
[key: string]: (ctx: RequestContext, params: BaseAPICall) => void | Promise<void>
|
||||
}
|
||||
|
||||
interface ChallengeResolutionResultT {
|
||||
url: string
|
||||
status: number,
|
||||
headers?: Headers,
|
||||
response: string,
|
||||
cookies: object[]
|
||||
userAgent: string
|
||||
}
|
||||
|
||||
interface ChallengeResolutionT {
|
||||
status?: string
|
||||
message: string
|
||||
result: ChallengeResolutionResultT
|
||||
}
|
||||
|
||||
interface OverrideResolvers {
|
||||
method?: (request: Request) => HttpMethod,
|
||||
postData?: (request: Request) => string,
|
||||
headers?: (request: Request) => Headers
|
||||
}
|
||||
|
||||
type OverridesProps =
|
||||
'method' |
|
||||
'postData' |
|
||||
'headers'
|
||||
|
||||
// We always set a Windows User-Agent because ARM builds are detected by Cloudflare
|
||||
const DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36"
|
||||
|
||||
async function resolveChallengeWithTimeout(ctx: RequestContext, params: BaseRequestAPICall, page: Page) {
|
||||
const maxTimeout = params.maxTimeout || 60000
|
||||
const timer = new Timeout();
|
||||
try {
|
||||
const promise = resolveChallenge(ctx, params, page);
|
||||
return await Promise.race([
|
||||
promise,
|
||||
timer.set(maxTimeout, `Maximum timeout reached. maxTimeout=${maxTimeout} (ms)`)
|
||||
]);
|
||||
} finally {
|
||||
timer.clear();
|
||||
}
|
||||
}
|
||||
|
||||
async function resolveChallenge(ctx: RequestContext, { url, proxy, download, returnOnlyCookies }: BaseRequestAPICall, page: Page): Promise<ChallengeResolutionT | void> {
|
||||
|
||||
let status = 'ok'
|
||||
let message = ''
|
||||
|
||||
if (proxy) {
|
||||
log.debug("Apply proxy");
|
||||
if (proxy.username)
|
||||
await page.authenticate({ username: proxy.username, password: proxy.password });
|
||||
}
|
||||
|
||||
log.debug(`Navigating to... ${url}`)
|
||||
let response: Response = await page.goto(url, { waitUntil: 'domcontentloaded' })
|
||||
log.html(await page.content())
|
||||
|
||||
// Detect protection services and solve challenges
|
||||
try {
|
||||
response = await cloudflareProvider(url, page, response);
|
||||
} catch (e) {
|
||||
status = "error";
|
||||
message = "Cloudflare " + e.toString();
|
||||
}
|
||||
|
||||
const payload: ChallengeResolutionT = {
|
||||
status,
|
||||
message,
|
||||
result: {
|
||||
url: page.url(),
|
||||
status: response.status(),
|
||||
headers: response.headers(),
|
||||
response: null,
|
||||
cookies: await page.cookies(),
|
||||
userAgent: await page.evaluate(() => navigator.userAgent)
|
||||
}
|
||||
}
|
||||
|
||||
if (returnOnlyCookies) {
|
||||
payload.result.headers = null;
|
||||
payload.result.userAgent = null;
|
||||
} else {
|
||||
if (download) {
|
||||
// for some reason we get an error unless we reload the page
|
||||
// has something to do with a stale buffer and this is the quickest
|
||||
// fix since I am short on time
|
||||
response = await page.goto(url, { waitUntil: 'domcontentloaded' })
|
||||
payload.result.response = (await response.buffer()).toString('base64')
|
||||
} else {
|
||||
payload.result.response = await page.content()
|
||||
}
|
||||
}
|
||||
|
||||
// Add final url in result
|
||||
payload.result.url = page.url();
|
||||
|
||||
// make sure the page is closed because if it isn't and error will be thrown
|
||||
// when a user uses a temporary session, the browser make be quit before
|
||||
// the page is properly closed.
|
||||
await page.close()
|
||||
|
||||
return payload
|
||||
}
|
||||
|
||||
function mergeSessionWithParams({ defaults }: SessionsCacheItem, params: BaseRequestAPICall): BaseRequestAPICall {
|
||||
const copy = { ...defaults, ...params }
|
||||
|
||||
// custom merging logic
|
||||
copy.headers = { ...defaults.headers || {}, ...params.headers || {} } || null
|
||||
|
||||
return copy
|
||||
}
|
||||
|
||||
async function setupPage(ctx: RequestContext, params: BaseRequestAPICall, browser: Browser): Promise<Page> {
|
||||
const page = await browser.newPage()
|
||||
|
||||
// merge session defaults with params
|
||||
const { method, postData, userAgent, headers, cookies } = params
|
||||
|
||||
let overrideResolvers: OverrideResolvers = {}
|
||||
|
||||
if (method !== 'GET') {
|
||||
log.debug(`Setting method to ${method}`)
|
||||
overrideResolvers.method = request => method
|
||||
}
|
||||
|
||||
if (postData) {
|
||||
log.debug(`Setting body data to ${postData}`)
|
||||
overrideResolvers.postData = request => postData
|
||||
}
|
||||
|
||||
if (userAgent) {
|
||||
log.debug(`Using custom UA: ${userAgent}`)
|
||||
await page.setUserAgent(userAgent)
|
||||
} else {
|
||||
await page.setUserAgent(DEFAULT_USER_AGENT)
|
||||
}
|
||||
|
||||
if (headers) {
|
||||
log.debug(`Adding custom headers: ${JSON.stringify(headers)}`)
|
||||
overrideResolvers.headers = request => Object.assign(request.headers(), headers)
|
||||
}
|
||||
|
||||
if (cookies) {
|
||||
log.debug(`Setting custom cookies: ${JSON.stringify(cookies)}`)
|
||||
await page.setCookie(...cookies)
|
||||
}
|
||||
|
||||
// if any keys have been set on the object
|
||||
if (Object.keys(overrideResolvers).length > 0) {
|
||||
let callbackRunOnce = false
|
||||
const callback = (request: Request) => {
|
||||
|
||||
if (callbackRunOnce || !request.isNavigationRequest()) {
|
||||
request.continue()
|
||||
return
|
||||
}
|
||||
|
||||
callbackRunOnce = true
|
||||
const overrides: Overrides = {}
|
||||
|
||||
Object.keys(overrideResolvers).forEach((key: OverridesProps) => {
|
||||
// @ts-ignore
|
||||
overrides[key] = overrideResolvers[key](request)
|
||||
});
|
||||
|
||||
log.debug(`Overrides: ${JSON.stringify(overrides)}`)
|
||||
request.continue(overrides)
|
||||
}
|
||||
|
||||
await page.setRequestInterception(true)
|
||||
page.on('request', callback)
|
||||
}
|
||||
|
||||
return page
|
||||
}
|
||||
|
||||
const browserRequest = async (ctx: RequestContext, params: BaseRequestAPICall) => {
|
||||
const oneTimeSession = params.session === undefined
|
||||
const sessionId = params.session || UUIDv1()
|
||||
const session = oneTimeSession
|
||||
? await sessions.create(sessionId, {
|
||||
userAgent: params.userAgent,
|
||||
oneTimeSession
|
||||
})
|
||||
: sessions.get(sessionId)
|
||||
|
||||
if (session === false) {
|
||||
return ctx.errorResponse('This session does not exist. Use \'list_sessions\' to see all the existing sessions.')
|
||||
}
|
||||
|
||||
params = mergeSessionWithParams(session, params)
|
||||
|
||||
try {
|
||||
const page = await setupPage(ctx, params, session.browser)
|
||||
const data = await resolveChallengeWithTimeout(ctx, params, page)
|
||||
|
||||
if (data) {
|
||||
const { status } = data
|
||||
delete data.status
|
||||
ctx.successResponse(data.message, {
|
||||
...(oneTimeSession ? {} : { session: sessionId }),
|
||||
...(status ? { status } : {}),
|
||||
solution: data.result
|
||||
})
|
||||
}
|
||||
} catch (error) {
|
||||
log.error(error)
|
||||
return ctx.errorResponse("Unable to process browser request. Error: " + error)
|
||||
} finally {
|
||||
if (oneTimeSession) {
|
||||
await sessions.destroy(sessionId)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export const routes: Routes = {
|
||||
'sessions.create': async (ctx, { session, ...options }: SessionsCreateAPICall) => {
|
||||
session = session || UUIDv1()
|
||||
const { browser } = await sessions.create(session, options)
|
||||
if (browser) { ctx.successResponse('Session created successfully.', { session }) }
|
||||
},
|
||||
'sessions.list': (ctx) => {
|
||||
ctx.successResponse(null, { sessions: sessions.list() })
|
||||
},
|
||||
'sessions.destroy': async (ctx, { session }: BaseSessionsAPICall) => {
|
||||
if (await sessions.destroy(session)) { return ctx.successResponse('The session has been removed.') }
|
||||
ctx.errorResponse('This session does not exist.')
|
||||
},
|
||||
'request.get': async (ctx, params: BaseRequestAPICall) => {
|
||||
params.method = 'GET'
|
||||
if (params.postData) {
|
||||
return ctx.errorResponse('Cannot use "postBody" when sending a GET request.')
|
||||
}
|
||||
await browserRequest(ctx, params)
|
||||
},
|
||||
'request.post': async (ctx, params: BaseRequestAPICall) => {
|
||||
params.method = 'POST'
|
||||
|
||||
if (!params.postData) {
|
||||
return ctx.errorResponse('Must send param "postBody" when sending a POST request.')
|
||||
}
|
||||
|
||||
await browserRequest(ctx, params)
|
||||
},
|
||||
}
|
||||
|
||||
export default async function Router(ctx: RequestContext, params: BaseAPICall): Promise<void> {
|
||||
const route = routes[params.cmd]
|
||||
if (route) { return await route(ctx, params) }
|
||||
return ctx.errorResponse(`The command '${params.cmd}' is invalid.`)
|
||||
}
|
||||
49
src/server.ts
Normal file
49
src/server.ts
Normal file
@@ -0,0 +1,49 @@
|
||||
import log from './services/log'
|
||||
import {testWebBrowserInstallation} from "./services/sessions";
|
||||
|
||||
const app = require("./app");
|
||||
const version: string = 'v' + require('../package.json').version
|
||||
const serverPort: number = Number(process.env.PORT) || 8191
|
||||
const serverHost: string = process.env.HOST || '0.0.0.0'
|
||||
|
||||
function validateEnvironmentVariables() {
|
||||
// ip and port variables are validated by nodejs
|
||||
if (process.env.LOG_LEVEL && ['error', 'warn', 'info', 'verbose', 'debug'].indexOf(process.env.LOG_LEVEL) == -1) {
|
||||
log.error(`The environment variable 'LOG_LEVEL' is wrong. Check the documentation.`);
|
||||
process.exit(1);
|
||||
}
|
||||
if (process.env.LOG_HTML && ['true', 'false'].indexOf(process.env.LOG_HTML) == -1) {
|
||||
log.error(`The environment variable 'LOG_HTML' is wrong. Check the documentation.`);
|
||||
process.exit(1);
|
||||
}
|
||||
if (process.env.HEADLESS && ['true', 'false'].indexOf(process.env.HEADLESS) == -1) {
|
||||
log.error(`The environment variable 'HEADLESS' is wrong. Check the documentation.`);
|
||||
process.exit(1);
|
||||
}
|
||||
// todo: fix resolvers
|
||||
// try {
|
||||
// getCaptchaSolver();
|
||||
// } catch (e) {
|
||||
// log.error(`The environment variable 'CAPTCHA_SOLVER' is wrong. ${e.message}`);
|
||||
// process.exit(1);
|
||||
// }
|
||||
}
|
||||
|
||||
// Init
|
||||
log.info(`FlareSolverr ${version}`);
|
||||
log.debug('Debug log enabled');
|
||||
|
||||
process.on('SIGTERM', () => {
|
||||
// Capture signal on Docker Stop #158
|
||||
log.info("Process interrupted")
|
||||
process.exit(0)
|
||||
})
|
||||
|
||||
validateEnvironmentVariables();
|
||||
|
||||
testWebBrowserInstallation().then(() => {
|
||||
// Start server
|
||||
app.listen(serverPort, serverHost, () => {
|
||||
log.info(`Listening on http://${serverHost}:${serverPort}`);
|
||||
})
|
||||
})
|
||||
@@ -22,10 +22,13 @@ function toIsoString(date: Date) {
|
||||
}
|
||||
|
||||
export default {
|
||||
incRequests: () => { requests++ },
|
||||
incRequests: () => {
|
||||
requests++
|
||||
},
|
||||
html(html: string) {
|
||||
if (LOG_HTML)
|
||||
this.debug(html)
|
||||
if (LOG_HTML) {
|
||||
this.debug(html)
|
||||
}
|
||||
},
|
||||
...require('console-log-level')(
|
||||
{level: process.env.LOG_LEVEL || 'info',
|
||||
161
src/services/sessions.ts
Normal file
161
src/services/sessions.ts
Normal file
@@ -0,0 +1,161 @@
|
||||
import {v1 as UUIDv1} from 'uuid'
|
||||
import * as path from 'path'
|
||||
import {SetCookie, Browser} from 'puppeteer'
|
||||
|
||||
import log from './log'
|
||||
import {Proxy} from "../controllers/v1";
|
||||
|
||||
const os = require('os');
|
||||
const fs = require('fs');
|
||||
const puppeteer = require('puppeteer');
|
||||
|
||||
export interface SessionsCacheItem {
|
||||
sessionId: string
|
||||
browser: Browser
|
||||
}
|
||||
|
||||
interface SessionsCache {
|
||||
[key: string]: SessionsCacheItem
|
||||
}
|
||||
|
||||
export interface SessionCreateOptions {
|
||||
oneTimeSession: boolean
|
||||
cookies?: SetCookie[],
|
||||
maxTimeout?: number
|
||||
proxy?: Proxy
|
||||
}
|
||||
|
||||
const sessionCache: SessionsCache = {}
|
||||
let webBrowserUserAgent: string;
|
||||
|
||||
function buildExtraPrefsFirefox(proxy: Proxy): object {
|
||||
// Default configurations are defined here
|
||||
// https://github.com/puppeteer/puppeteer/blob/v3.3.0/src/Launcher.ts#L481
|
||||
const extraPrefsFirefox = {
|
||||
// Disable newtabpage
|
||||
"browser.newtabpage.enabled": false,
|
||||
"browser.startup.homepage": "about:blank",
|
||||
|
||||
// Do not warn when closing all open tabs
|
||||
"browser.tabs.warnOnClose": false,
|
||||
|
||||
// Disable telemetry
|
||||
"toolkit.telemetry.reportingpolicy.firstRun": false,
|
||||
|
||||
// Disable first-run welcome page
|
||||
"startup.homepage_welcome_url": "about:blank",
|
||||
"startup.homepage_welcome_url.additional": "",
|
||||
|
||||
// Disable images to speed up load
|
||||
"permissions.default.image": 2
|
||||
}
|
||||
|
||||
// proxy.url format => http://<host>:<port>
|
||||
if (proxy && proxy.url) {
|
||||
const [host, portStr] = proxy.url.replace(/https?:\/\//g, '').split(':');
|
||||
const port = parseInt(portStr);
|
||||
|
||||
const proxyPrefs = {
|
||||
// Proxy configuration
|
||||
"network.proxy.ftp": host,
|
||||
"network.proxy.ftp_port": port,
|
||||
"network.proxy.http": host,
|
||||
"network.proxy.http_port": port,
|
||||
"network.proxy.share_proxy_settings": true,
|
||||
"network.proxy.socks": host,
|
||||
"network.proxy.socks_port": port,
|
||||
"network.proxy.ssl": host,
|
||||
"network.proxy.ssl_port": port,
|
||||
"network.proxy.type": 1
|
||||
}
|
||||
|
||||
// merge objects
|
||||
Object.assign(extraPrefsFirefox, proxyPrefs);
|
||||
}
|
||||
|
||||
return extraPrefsFirefox;
|
||||
}
|
||||
|
||||
export function getUserAgent() {
|
||||
return webBrowserUserAgent
|
||||
}
|
||||
|
||||
export async function testWebBrowserInstallation(): Promise<void> {
|
||||
log.info("Testing web browser installation...")
|
||||
|
||||
// check user home dir. this dir will be used by Firefox
|
||||
const homeDir = os.homedir();
|
||||
fs.accessSync(homeDir, fs.constants.F_OK | fs.constants.R_OK | fs.constants.W_OK | fs.constants.X_OK);
|
||||
log.debug("FlareSolverr user home directory is OK: " + homeDir)
|
||||
|
||||
// test web browser
|
||||
const session = await create(null, {
|
||||
oneTimeSession: true
|
||||
})
|
||||
const page = await session.browser.newPage()
|
||||
await page.goto("https://www.google.com")
|
||||
webBrowserUserAgent = await page.evaluate(() => navigator.userAgent)
|
||||
|
||||
// replace Linux ARM user-agent because it's detected
|
||||
if (webBrowserUserAgent.toLocaleLowerCase().includes('linux arm')) {
|
||||
webBrowserUserAgent = webBrowserUserAgent.replace(/linux arm[^;]+;/i, 'Linux x86_64;')
|
||||
}
|
||||
|
||||
log.info("FlareSolverr User-Agent: " + webBrowserUserAgent)
|
||||
await page.close()
|
||||
await destroy(session.sessionId)
|
||||
|
||||
log.info("Test successful")
|
||||
}
|
||||
|
||||
export async function create(session: string, options: SessionCreateOptions): Promise<SessionsCacheItem> {
|
||||
const sessionId = session || UUIDv1()
|
||||
|
||||
// NOTE: cookies can't be set in the session, you need to open the page first
|
||||
|
||||
const puppeteerOptions: any = {
|
||||
product: 'firefox',
|
||||
headless: process.env.HEADLESS !== 'false',
|
||||
}
|
||||
|
||||
puppeteerOptions.extraPrefsFirefox = buildExtraPrefsFirefox(options.proxy)
|
||||
|
||||
// if we are running inside executable binary, change browser path
|
||||
if (typeof (process as any).pkg !== 'undefined') {
|
||||
const exe = process.platform === "win32" ? 'firefox.exe' : 'firefox';
|
||||
puppeteerOptions.executablePath = path.join(path.dirname(process.execPath), 'firefox', exe)
|
||||
}
|
||||
|
||||
log.debug('Launching web browser...')
|
||||
let browser: Browser = await puppeteer.launch(puppeteerOptions)
|
||||
if (!browser) {
|
||||
throw Error(`Failed to launch web browser.`)
|
||||
}
|
||||
|
||||
sessionCache[sessionId] = {
|
||||
sessionId: sessionId,
|
||||
browser: browser
|
||||
}
|
||||
|
||||
return sessionCache[sessionId]
|
||||
}
|
||||
|
||||
export function list(): string[] {
|
||||
return Object.keys(sessionCache)
|
||||
}
|
||||
|
||||
export async function destroy(id: string): Promise<boolean>{
|
||||
if (id && sessionCache.hasOwnProperty(id)) {
|
||||
const { browser } = sessionCache[id]
|
||||
if (browser) {
|
||||
await browser.close()
|
||||
delete sessionCache[id]
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
export function get(id: string): SessionsCacheItem {
|
||||
return sessionCache[id]
|
||||
}
|
||||
216
src/services/solver.ts
Normal file
216
src/services/solver.ts
Normal file
@@ -0,0 +1,216 @@
|
||||
import {Response, Headers, Page} from 'puppeteer'
|
||||
const Timeout = require('await-timeout');
|
||||
|
||||
import log from './log'
|
||||
import {SessionCreateOptions, SessionsCacheItem} from "./sessions";
|
||||
import {V1Request} from "../controllers/v1";
|
||||
import cloudflareProvider from '../providers/cloudflare';
|
||||
|
||||
const sessions = require('./sessions')
|
||||
|
||||
export interface ChallengeResolutionResultT {
|
||||
url: string
|
||||
status: number,
|
||||
headers?: Headers,
|
||||
response: string,
|
||||
cookies: object[]
|
||||
userAgent: string
|
||||
}
|
||||
|
||||
export interface ChallengeResolutionT {
|
||||
status?: string
|
||||
message: string
|
||||
result: ChallengeResolutionResultT
|
||||
}
|
||||
|
||||
async function resolveChallengeWithTimeout(params: V1Request, session: SessionsCacheItem) {
|
||||
const timer = new Timeout();
|
||||
try {
|
||||
const promise = resolveChallenge(params, session);
|
||||
return await Promise.race([
|
||||
promise,
|
||||
timer.set(params.maxTimeout, `Maximum timeout reached. maxTimeout=${params.maxTimeout} (ms)`)
|
||||
]);
|
||||
} finally {
|
||||
timer.clear();
|
||||
}
|
||||
}
|
||||
|
||||
async function resolveChallenge(params: V1Request, session: SessionsCacheItem): Promise<ChallengeResolutionT | void> {
|
||||
try {
|
||||
let status = 'ok'
|
||||
let message = ''
|
||||
|
||||
const page: Page = await session.browser.newPage()
|
||||
|
||||
// the Puppeter timeout should be half the maxTimeout because we reload the page and wait for challenge
|
||||
// the user can set a really high maxTimeout if he wants to
|
||||
await page.setDefaultNavigationTimeout(params.maxTimeout / 2)
|
||||
|
||||
// the user-agent is changed just for linux arm build
|
||||
await page.setUserAgent(sessions.getUserAgent())
|
||||
|
||||
// set the proxy
|
||||
if (params.proxy) {
|
||||
log.debug(`Using proxy: ${params.proxy.url}`);
|
||||
// todo: credentials are not working
|
||||
// if (params.proxy.username) {
|
||||
// await page.authenticate({
|
||||
// username: params.proxy.username,
|
||||
// password: params.proxy.password
|
||||
// });
|
||||
// }
|
||||
}
|
||||
|
||||
// go to the page
|
||||
log.debug(`Navigating to... ${params.url}`)
|
||||
let response: Response = await gotoPage(params, page);
|
||||
|
||||
// set cookies
|
||||
if (params.cookies) {
|
||||
for (const cookie of params.cookies) {
|
||||
// the other fields in the cookie can cause issues
|
||||
await page.setCookie({
|
||||
"name": cookie.name,
|
||||
"value": cookie.value
|
||||
})
|
||||
}
|
||||
// reload the page
|
||||
response = await gotoPage(params, page);
|
||||
}
|
||||
|
||||
// log html in debug mode
|
||||
log.html(await page.content())
|
||||
|
||||
// detect protection services and solve challenges
|
||||
try {
|
||||
response = await cloudflareProvider(params.url, page, response);
|
||||
|
||||
// is response is ok
|
||||
// reload the page to be sure we get the real page
|
||||
log.debug("Reloading the page")
|
||||
response = await gotoPage(params, page);
|
||||
|
||||
} catch (e) {
|
||||
status = "error";
|
||||
message = "Cloudflare " + e.toString();
|
||||
}
|
||||
|
||||
const payload: ChallengeResolutionT = {
|
||||
status,
|
||||
message,
|
||||
result: {
|
||||
url: page.url(),
|
||||
status: response.status(),
|
||||
headers: response.headers(),
|
||||
response: null,
|
||||
cookies: await page.cookies(),
|
||||
userAgent: sessions.getUserAgent()
|
||||
}
|
||||
}
|
||||
|
||||
if (params.returnOnlyCookies) {
|
||||
payload.result.headers = null;
|
||||
payload.result.userAgent = null;
|
||||
} else {
|
||||
payload.result.response = await page.content()
|
||||
}
|
||||
|
||||
// make sure the page is closed because if it isn't and error will be thrown
|
||||
// when a user uses a temporary session, the browser make be quit before
|
||||
// the page is properly closed.
|
||||
await page.close()
|
||||
|
||||
return payload
|
||||
} catch (e) {
|
||||
log.error("Unexpected error: " + e);
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
async function gotoPage(params: V1Request, page: Page): Promise<Response> {
|
||||
let response: Response;
|
||||
if (params.method != 'POST') {
|
||||
response = await page.goto(params.url, {waitUntil: 'domcontentloaded'});
|
||||
|
||||
} else {
|
||||
// post hack
|
||||
// first request a page without cloudflare
|
||||
response = await page.goto(params.url, {waitUntil: 'domcontentloaded'});
|
||||
await page.setContent(
|
||||
`
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<body>
|
||||
<script>
|
||||
|
||||
function parseQuery(queryString) {
|
||||
var query = {};
|
||||
var pairs = (queryString[0] === '?' ? queryString.substr(1) : queryString).split('&');
|
||||
for (var i = 0; i < pairs.length; i++) {
|
||||
var pair = pairs[i].split('=');
|
||||
query[decodeURIComponent(pair[0])] = decodeURIComponent(pair[1] || '');
|
||||
}
|
||||
return query;
|
||||
}
|
||||
|
||||
const form = document.createElement('form');
|
||||
form.method = 'POST';
|
||||
form.action = '${params.url}';
|
||||
|
||||
const params = parseQuery('${params.postData}');
|
||||
for (const key in params) {
|
||||
if (params.hasOwnProperty(key)) {
|
||||
const hiddenField = document.createElement('input');
|
||||
hiddenField.type = 'hidden';
|
||||
hiddenField.name = key;
|
||||
hiddenField.value = params[key];
|
||||
form.appendChild(hiddenField);
|
||||
}
|
||||
}
|
||||
|
||||
document.body.appendChild(form);
|
||||
form.submit();
|
||||
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
`
|
||||
);
|
||||
await page.waitFor(2000)
|
||||
try {
|
||||
await page.waitForNavigation({waitUntil: 'domcontentloaded', timeout: 2000})
|
||||
} catch (e) {}
|
||||
|
||||
}
|
||||
return response
|
||||
}
|
||||
|
||||
export async function browserRequest(params: V1Request): Promise<ChallengeResolutionT> {
|
||||
const oneTimeSession = params.session === undefined;
|
||||
|
||||
const options: SessionCreateOptions = {
|
||||
oneTimeSession: oneTimeSession,
|
||||
cookies: params.cookies,
|
||||
maxTimeout: params.maxTimeout,
|
||||
proxy: params.proxy
|
||||
}
|
||||
|
||||
const session: SessionsCacheItem = oneTimeSession
|
||||
? await sessions.create(null, options)
|
||||
: sessions.get(params.session)
|
||||
|
||||
if (!session) {
|
||||
throw Error('This session does not exist. Use \'list_sessions\' to see all the existing sessions.')
|
||||
}
|
||||
|
||||
try {
|
||||
return await resolveChallengeWithTimeout(params, session)
|
||||
} catch (error) {
|
||||
throw Error("Unable to process browser request. " + error)
|
||||
} finally {
|
||||
if (oneTimeSession) {
|
||||
await sessions.destroy(session.sessionId)
|
||||
}
|
||||
}
|
||||
}
|
||||
150
src/session.ts
150
src/session.ts
@@ -1,150 +0,0 @@
|
||||
import * as os from 'os'
|
||||
import * as path from 'path'
|
||||
import * as fs from 'fs'
|
||||
|
||||
import puppeteer from 'puppeteer-extra'
|
||||
import { LaunchOptions, Headers, SetCookie } from 'puppeteer'
|
||||
|
||||
import log from './log'
|
||||
import { deleteFolderRecursive, sleep, removeEmptyFields } from './utils'
|
||||
import * as Puppeteer from "puppeteer-extra/dist/puppeteer";
|
||||
|
||||
interface SessionPageDefaults {
|
||||
headers?: Headers
|
||||
userAgent?: string
|
||||
}
|
||||
|
||||
export interface SessionsCacheItem {
|
||||
browser: Puppeteer.Browser
|
||||
userDataDir?: string
|
||||
defaults: SessionPageDefaults
|
||||
}
|
||||
|
||||
interface SessionsCache {
|
||||
[key: string]: SessionsCacheItem
|
||||
}
|
||||
|
||||
interface SessionCreateOptions {
|
||||
oneTimeSession?: boolean
|
||||
userAgent?: string
|
||||
cookies?: SetCookie[]
|
||||
headers?: Headers,
|
||||
maxTimeout?: number
|
||||
proxy?: any
|
||||
}
|
||||
|
||||
const sessionCache: SessionsCache = {}
|
||||
|
||||
// setting "user-agent-override" evasion is not working for us because it can't be changed
|
||||
// in each request. we set the user-agent in the browser args instead
|
||||
puppeteer.use(require('puppeteer-extra-plugin-stealth')())
|
||||
|
||||
function userDataDirFromId(id: string): string {
|
||||
return path.join(os.tmpdir(), `/puppeteer_chrome_profile_${id}`)
|
||||
}
|
||||
|
||||
function prepareBrowserProfile(id: string): string {
|
||||
// TODO: maybe pass SessionCreateOptions for loading later?
|
||||
const userDataDir = userDataDirFromId(id)
|
||||
|
||||
if (!fs.existsSync(userDataDir)) {
|
||||
fs.mkdirSync(userDataDir, { recursive: true })
|
||||
}
|
||||
|
||||
return userDataDir
|
||||
}
|
||||
|
||||
export default {
|
||||
create: async (id: string, { cookies, oneTimeSession, userAgent, headers, maxTimeout, proxy }: SessionCreateOptions): Promise<SessionsCacheItem> => {
|
||||
let args = [
|
||||
'--no-sandbox',
|
||||
'--disable-setuid-sandbox',
|
||||
'--disable-dev-shm-usage' // issue #45
|
||||
];
|
||||
if (proxy && proxy.url) {
|
||||
args.push(`--proxy-server=${proxy.url}`);
|
||||
}
|
||||
|
||||
const puppeteerOptions: LaunchOptions = {
|
||||
product: 'chrome',
|
||||
headless: process.env.HEADLESS !== 'false',
|
||||
args
|
||||
}
|
||||
|
||||
if (!oneTimeSession) {
|
||||
log.debug('Creating userDataDir for session.')
|
||||
puppeteerOptions.userDataDir = prepareBrowserProfile(id)
|
||||
}
|
||||
|
||||
// if we are running inside executable binary, change chrome path
|
||||
if (typeof (process as any).pkg !== 'undefined') {
|
||||
const exe = process.platform === "win32" ? 'chrome.exe' : 'chrome';
|
||||
puppeteerOptions.executablePath = path.join(path.dirname(process.execPath), 'chrome', exe)
|
||||
}
|
||||
|
||||
log.debug('Launching browser...')
|
||||
|
||||
// TODO: maybe access env variable?
|
||||
// TODO: sometimes browser instances are created and not connected to correctly.
|
||||
// how do we handle/quit those instances inside Docker?
|
||||
let launchTries = 3
|
||||
let browser: Puppeteer.Browser;
|
||||
|
||||
while (0 <= launchTries--) {
|
||||
try {
|
||||
browser = await puppeteer.launch(puppeteerOptions)
|
||||
break
|
||||
} catch (e) {
|
||||
if (e.message !== 'Failed to launch the browser process!')
|
||||
throw e
|
||||
log.warn('Failed to open browser, trying again...')
|
||||
}
|
||||
}
|
||||
|
||||
if (!browser) { throw Error(`Failed to launch browser 3 times in a row.`) }
|
||||
|
||||
if (cookies) {
|
||||
const page = await browser.newPage()
|
||||
await page.setCookie(...cookies)
|
||||
}
|
||||
|
||||
sessionCache[id] = {
|
||||
browser: browser,
|
||||
userDataDir: puppeteerOptions.userDataDir,
|
||||
defaults: removeEmptyFields({
|
||||
userAgent,
|
||||
headers,
|
||||
maxTimeout
|
||||
})
|
||||
}
|
||||
|
||||
return sessionCache[id]
|
||||
},
|
||||
|
||||
list: (): string[] => Object.keys(sessionCache),
|
||||
|
||||
// TODO: create a sessions.close that doesn't rm the userDataDir
|
||||
|
||||
destroy: async (id: string): Promise<boolean> => {
|
||||
const { browser, userDataDir } = sessionCache[id]
|
||||
if (browser) {
|
||||
await browser.close()
|
||||
delete sessionCache[id]
|
||||
if (userDataDir) {
|
||||
const userDataDirPath = userDataDirFromId(id)
|
||||
try {
|
||||
// for some reason this keeps an error from being thrown in Windows, figures
|
||||
await sleep(5000)
|
||||
deleteFolderRecursive(userDataDirPath)
|
||||
} catch (e) {
|
||||
console.error(e)
|
||||
throw Error(`Error deleting browser session folder. ${e.message}`)
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
return false
|
||||
},
|
||||
|
||||
get: (id: string): SessionsCacheItem | false => sessionCache[id] && sessionCache[id] || false
|
||||
}
|
||||
538
src/tests/app.test.ts
Normal file
538
src/tests/app.test.ts
Normal file
@@ -0,0 +1,538 @@
|
||||
// noinspection DuplicatedCode
|
||||
|
||||
import {Response} from "superagent";
|
||||
import {V1ResponseBase, V1ResponseSession, V1ResponseSessions, V1ResponseSolution} from "../controllers/v1"
|
||||
|
||||
const request = require("supertest");
|
||||
const app = require("../app");
|
||||
const sessions = require('../services/sessions');
|
||||
const version: string = 'v' + require('../../package.json').version
|
||||
|
||||
const proxyUrl = "http://127.0.0.1:8888"
|
||||
const googleUrl = "https://www.google.com";
|
||||
const postUrl = "https://ptsv2.com/t/qv4j3-1634496523";
|
||||
const cfUrl = "https://pirateiro.com/torrents/?search=harry";
|
||||
const cfCaptchaUrl = "https://idope.se"
|
||||
const cfBlockedUrl = "https://www.torrentmafya.org/table.php"
|
||||
|
||||
beforeAll(async () => {
|
||||
// Init session
|
||||
await sessions.testWebBrowserInstallation();
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
// Clean sessions
|
||||
const sessionList = sessions.list();
|
||||
for (const session of sessionList) {
|
||||
await sessions.destroy(session);
|
||||
}
|
||||
});
|
||||
|
||||
describe("Test '/' path", () => {
|
||||
test("GET method should return OK ", async () => {
|
||||
const response: Response = await request(app).get("/");
|
||||
expect(response.statusCode).toBe(200);
|
||||
expect(response.body.msg).toBe("FlareSolverr is ready!");
|
||||
expect(response.body.version).toBe(version);
|
||||
expect(response.body.userAgent).toContain("Firefox/")
|
||||
});
|
||||
|
||||
test("POST method should fail", async () => {
|
||||
const response: Response = await request(app).post("/");
|
||||
expect(response.statusCode).toBe(404);
|
||||
expect(response.body.error).toBe("Unknown resource or HTTP verb");
|
||||
});
|
||||
});
|
||||
|
||||
describe("Test '/health' path", () => {
|
||||
test("GET method should return OK", async () => {
|
||||
const response: Response = await request(app).get("/health");
|
||||
expect(response.statusCode).toBe(200);
|
||||
expect(response.body.status).toBe("ok");
|
||||
});
|
||||
});
|
||||
|
||||
describe("Test '/wrong' path", () => {
|
||||
test("GET method should fail", async () => {
|
||||
const response: Response = await request(app).get("/wrong");
|
||||
expect(response.statusCode).toBe(404);
|
||||
expect(response.body.error).toBe("Unknown resource or HTTP verb");
|
||||
});
|
||||
});
|
||||
|
||||
describe("Test '/v1' path", () => {
|
||||
test("Cmd 'request.bad' should fail", async () => {
|
||||
const payload = {
|
||||
"cmd": "request.bad",
|
||||
"url": googleUrl
|
||||
}
|
||||
const response: Response = await request(app).post("/v1").send(payload);
|
||||
expect(response.statusCode).toBe(500);
|
||||
|
||||
const apiResponse: V1ResponseBase = response.body;
|
||||
expect(apiResponse.status).toBe("error");
|
||||
expect(apiResponse.message).toBe("Error: The command 'request.bad' is invalid.");
|
||||
expect(apiResponse.startTimestamp).toBeGreaterThan(1000);
|
||||
expect(apiResponse.endTimestamp).toBeGreaterThanOrEqual(apiResponse.startTimestamp);
|
||||
expect(apiResponse.version).toBe(version);
|
||||
});
|
||||
|
||||
test("Cmd 'request.get' should return OK with no Cloudflare", async () => {
|
||||
const payload = {
|
||||
"cmd": "request.get",
|
||||
"url": googleUrl
|
||||
}
|
||||
const response: Response = await request(app).post("/v1").send(payload);
|
||||
expect(response.statusCode).toBe(200);
|
||||
|
||||
const apiResponse: V1ResponseSolution = response.body;
|
||||
expect(apiResponse.status).toBe("ok");
|
||||
expect(apiResponse.message).toBe("");
|
||||
expect(apiResponse.startTimestamp).toBeGreaterThan(1000);
|
||||
expect(apiResponse.endTimestamp).toBeGreaterThan(apiResponse.startTimestamp);
|
||||
expect(apiResponse.version).toBe(version);
|
||||
|
||||
const solution = apiResponse.solution;
|
||||
expect(solution.url).toContain(googleUrl)
|
||||
expect(solution.status).toBe(200);
|
||||
expect(Object.keys(solution.headers).length).toBeGreaterThan(0)
|
||||
expect(solution.response).toContain("<!DOCTYPE html>")
|
||||
expect(Object.keys(solution.cookies).length).toBeGreaterThan(0)
|
||||
expect(solution.userAgent).toContain("Firefox/")
|
||||
});
|
||||
|
||||
test("Cmd 'request.get' should return OK with Cloudflare JS", async () => {
|
||||
const payload = {
|
||||
"cmd": "request.get",
|
||||
"url": cfUrl
|
||||
}
|
||||
const response: Response = await request(app).post("/v1").send(payload);
|
||||
expect(response.statusCode).toBe(200);
|
||||
|
||||
const apiResponse: V1ResponseSolution = response.body;
|
||||
expect(apiResponse.status).toBe("ok");
|
||||
expect(apiResponse.message).toBe("");
|
||||
expect(apiResponse.startTimestamp).toBeGreaterThan(1000);
|
||||
expect(apiResponse.endTimestamp).toBeGreaterThan(apiResponse.startTimestamp);
|
||||
expect(apiResponse.version).toBe(version);
|
||||
|
||||
const solution = apiResponse.solution;
|
||||
expect(solution.url).toContain(cfUrl)
|
||||
expect(solution.status).toBe(200);
|
||||
expect(Object.keys(solution.headers).length).toBeGreaterThan(0)
|
||||
expect(solution.response).toContain("<!DOCTYPE html>")
|
||||
expect(Object.keys(solution.cookies).length).toBeGreaterThan(0)
|
||||
expect(solution.userAgent).toContain("Firefox/")
|
||||
|
||||
const cfCookie: string = (solution.cookies as any[]).filter(function(cookie) {
|
||||
return cookie.name == "cf_clearance";
|
||||
})[0].value
|
||||
expect(cfCookie.length).toBeGreaterThan(30)
|
||||
});
|
||||
|
||||
test("Cmd 'request.get' should return fail with Cloudflare CAPTCHA", async () => {
|
||||
const payload = {
|
||||
"cmd": "request.get",
|
||||
"url": cfCaptchaUrl
|
||||
}
|
||||
const response: Response = await request(app).post("/v1").send(payload);
|
||||
expect(response.statusCode).toBe(200);
|
||||
|
||||
const apiResponse: V1ResponseSolution = response.body;
|
||||
expect(apiResponse.status).toBe("error");
|
||||
expect(apiResponse.message).toBe("Cloudflare Error: FlareSolverr can not resolve CAPTCHA challenges. Since the captcha doesn't always appear, you may have better luck with the next request.");
|
||||
expect(apiResponse.startTimestamp).toBeGreaterThan(1000);
|
||||
expect(apiResponse.endTimestamp).toBeGreaterThan(apiResponse.startTimestamp);
|
||||
expect(apiResponse.version).toBe(version);
|
||||
// solution is filled but not useful
|
||||
expect(apiResponse.solution.url).toContain(cfCaptchaUrl)
|
||||
});
|
||||
|
||||
test("Cmd 'request.post' should return fail with Cloudflare Blocked", async () => {
|
||||
const payload = {
|
||||
"cmd": "request.post",
|
||||
"url": cfBlockedUrl,
|
||||
"postData": "test1=test2"
|
||||
}
|
||||
const response: Response = await request(app).post("/v1").send(payload);
|
||||
expect(response.statusCode).toBe(200);
|
||||
|
||||
const apiResponse: V1ResponseSolution = response.body;
|
||||
expect(apiResponse.status).toBe("error");
|
||||
expect(apiResponse.message).toBe("Cloudflare Error: Cloudflare has blocked this request. Probably your IP is banned for this site, check in your web browser.");
|
||||
expect(apiResponse.startTimestamp).toBeGreaterThan(1000);
|
||||
expect(apiResponse.endTimestamp).toBeGreaterThan(apiResponse.startTimestamp);
|
||||
expect(apiResponse.version).toBe(version);
|
||||
// solution is filled but not useful
|
||||
expect(apiResponse.solution.url).toContain(cfBlockedUrl)
|
||||
});
|
||||
|
||||
test("Cmd 'request.get' should return OK with 'cookies' param", async () => {
|
||||
const payload = {
|
||||
"cmd": "request.get",
|
||||
"url": googleUrl,
|
||||
"cookies": [
|
||||
{
|
||||
"name": "testcookie1",
|
||||
"value": "testvalue1"
|
||||
},
|
||||
{
|
||||
"name": "testcookie2",
|
||||
"value": "testvalue2"
|
||||
}
|
||||
]
|
||||
}
|
||||
const response: Response = await request(app).post("/v1").send(payload);
|
||||
expect(response.statusCode).toBe(200);
|
||||
|
||||
const apiResponse: V1ResponseSolution = response.body;
|
||||
expect(apiResponse.status).toBe("ok");
|
||||
|
||||
const solution = apiResponse.solution;
|
||||
expect(solution.url).toContain(googleUrl)
|
||||
expect(Object.keys(solution.cookies).length).toBeGreaterThan(1)
|
||||
const cookie1: string = (solution.cookies as any[]).filter(function(cookie) {
|
||||
return cookie.name == "testcookie1";
|
||||
})[0].value
|
||||
expect(cookie1).toBe("testvalue1")
|
||||
const cookie2: string = (solution.cookies as any[]).filter(function(cookie) {
|
||||
return cookie.name == "testcookie2";
|
||||
})[0].value
|
||||
expect(cookie2).toBe("testvalue2")
|
||||
});
|
||||
|
||||
test("Cmd 'request.get' should return OK with 'returnOnlyCookies' param", async () => {
|
||||
const payload = {
|
||||
"cmd": "request.get",
|
||||
"url": googleUrl,
|
||||
"returnOnlyCookies": true
|
||||
}
|
||||
const response: Response = await request(app).post("/v1").send(payload);
|
||||
expect(response.statusCode).toBe(200);
|
||||
|
||||
const apiResponse: V1ResponseSolution = response.body;
|
||||
|
||||
const solution = apiResponse.solution;
|
||||
expect(solution.url).toContain(googleUrl)
|
||||
expect(solution.status).toBe(200);
|
||||
expect(solution.headers).toBe(null)
|
||||
expect(solution.response).toBe(null)
|
||||
expect(Object.keys(solution.cookies).length).toBeGreaterThan(0)
|
||||
expect(solution.userAgent).toBe(null)
|
||||
});
|
||||
|
||||
test("Cmd 'request.get' should return OK with 'proxy' param", async () => {
|
||||
/*
|
||||
To configure TinyProxy in local:
|
||||
* sudo vim /etc/tinyproxy/tinyproxy.conf
|
||||
* edit => LogFile "/tmp/tinyproxy.log"
|
||||
* edit => Syslog Off
|
||||
* sudo tinyproxy -d
|
||||
* sudo tail -f /tmp/tinyproxy.log
|
||||
*/
|
||||
const payload = {
|
||||
"cmd": "request.get",
|
||||
"url": googleUrl,
|
||||
"proxy": {
|
||||
"url": proxyUrl
|
||||
}
|
||||
}
|
||||
const response: Response = await request(app).post("/v1").send(payload);
|
||||
expect(response.statusCode).toBe(200);
|
||||
|
||||
const apiResponse: V1ResponseSolution = response.body;
|
||||
expect(apiResponse.status).toBe("ok");
|
||||
|
||||
const solution = apiResponse.solution;
|
||||
expect(solution.url).toContain(googleUrl)
|
||||
expect(solution.status).toBe(200);
|
||||
});
|
||||
|
||||
// todo: credentials are not working
|
||||
test.skip("Cmd 'request.get' should return OK with 'proxy' param with credentials", async () => {
|
||||
/*
|
||||
To configure TinyProxy in local:
|
||||
* sudo vim /etc/tinyproxy/tinyproxy.conf
|
||||
* edit => LogFile "/tmp/tinyproxy.log"
|
||||
* edit => Syslog Off
|
||||
* add => BasicAuth testuser testpass
|
||||
* sudo tinyproxy -d
|
||||
* sudo tail -f /tmp/tinyproxy.log
|
||||
*/
|
||||
const payload = {
|
||||
"cmd": "request.get",
|
||||
"url": googleUrl,
|
||||
"proxy": {
|
||||
"url": proxyUrl,
|
||||
"username": "testuser",
|
||||
"password": "testpass"
|
||||
}
|
||||
}
|
||||
const response: Response = await request(app).post("/v1").send(payload);
|
||||
expect(response.statusCode).toBe(200);
|
||||
|
||||
const apiResponse: V1ResponseSolution = response.body;
|
||||
expect(apiResponse.status).toBe("ok");
|
||||
|
||||
const solution = apiResponse.solution;
|
||||
expect(solution.url).toContain(googleUrl)
|
||||
expect(solution.status).toContain(200)
|
||||
});
|
||||
|
||||
test("Cmd 'request.get' should fail with wrong 'proxy' param", async () => {
|
||||
const payload = {
|
||||
"cmd": "request.get",
|
||||
"url": googleUrl,
|
||||
"proxy": {
|
||||
"url": "http://127.0.0.1:43210"
|
||||
}
|
||||
}
|
||||
const response: Response = await request(app).post("/v1").send(payload);
|
||||
expect(response.statusCode).toBe(500);
|
||||
|
||||
const apiResponse: V1ResponseSolution = response.body;
|
||||
expect(apiResponse.status).toBe("error");
|
||||
expect(apiResponse.message).toBe("Error: Unable to process browser request. Error: NS_ERROR_PROXY_CONNECTION_REFUSED at https://www.google.com");
|
||||
});
|
||||
|
||||
test("Cmd 'request.get' should return fail with timeout", async () => {
|
||||
const payload = {
|
||||
"cmd": "request.get",
|
||||
"url": googleUrl,
|
||||
"maxTimeout": 10
|
||||
}
|
||||
const response: Response = await request(app).post("/v1").send(payload);
|
||||
expect(response.statusCode).toBe(500);
|
||||
|
||||
const apiResponse: V1ResponseBase = response.body;
|
||||
expect(apiResponse.status).toBe("error");
|
||||
expect(apiResponse.message).toBe("Error: Unable to process browser request. Error: Maximum timeout reached. maxTimeout=10 (ms)");
|
||||
expect(apiResponse.startTimestamp).toBeGreaterThan(1000);
|
||||
expect(apiResponse.endTimestamp).toBeGreaterThan(apiResponse.startTimestamp);
|
||||
expect(apiResponse.version).toBe(version);
|
||||
});
|
||||
|
||||
test("Cmd 'request.get' should return fail with bad domain", async () => {
|
||||
const payload = {
|
||||
"cmd": "request.get",
|
||||
"url": "https://www.google.combad"
|
||||
}
|
||||
const response: Response = await request(app).post("/v1").send(payload);
|
||||
expect(response.statusCode).toBe(500);
|
||||
|
||||
const apiResponse: V1ResponseBase = response.body;
|
||||
expect(apiResponse.status).toBe("error");
|
||||
expect(apiResponse.message).toBe("Error: Unable to process browser request. Error: NS_ERROR_UNKNOWN_HOST at https://www.google.combad");
|
||||
});
|
||||
|
||||
test("Cmd 'request.get' should accept deprecated params", async () => {
|
||||
const payload = {
|
||||
"cmd": "request.get",
|
||||
"url": googleUrl,
|
||||
"userAgent": "Test User-Agent" // was removed in v2, not used
|
||||
}
|
||||
const response: Response = await request(app).post("/v1").send(payload);
|
||||
expect(response.statusCode).toBe(200);
|
||||
|
||||
const apiResponse: V1ResponseSolution = response.body;
|
||||
expect(apiResponse.status).toBe("ok");
|
||||
|
||||
const solution = apiResponse.solution;
|
||||
expect(solution.url).toContain(googleUrl)
|
||||
expect(solution.status).toBe(200);
|
||||
expect(solution.userAgent).toContain("Firefox/")
|
||||
});
|
||||
|
||||
test("Cmd 'request.post' should return OK with no Cloudflare", async () => {
|
||||
const payload = {
|
||||
"cmd": "request.post",
|
||||
"url": postUrl + '/post',
|
||||
"postData": "param1=value1¶m2=value2"
|
||||
}
|
||||
const response: Response = await request(app).post("/v1").send(payload);
|
||||
expect(response.statusCode).toBe(200);
|
||||
|
||||
const apiResponse: V1ResponseSolution = response.body;
|
||||
expect(apiResponse.status).toBe("ok");
|
||||
expect(apiResponse.message).toBe("");
|
||||
expect(apiResponse.startTimestamp).toBeGreaterThan(1000);
|
||||
expect(apiResponse.endTimestamp).toBeGreaterThan(apiResponse.startTimestamp);
|
||||
expect(apiResponse.version).toBe(version);
|
||||
|
||||
const solution = apiResponse.solution;
|
||||
expect(solution.url).toContain(postUrl)
|
||||
expect(solution.status).toBe(200);
|
||||
expect(Object.keys(solution.headers).length).toBeGreaterThan(0)
|
||||
expect(solution.response).toContain(" I hope you have a lovely day!")
|
||||
expect(Object.keys(solution.cookies).length).toBe(0)
|
||||
expect(solution.userAgent).toContain("Firefox/")
|
||||
|
||||
// check that we sent the date
|
||||
const payload2 = {
|
||||
"cmd": "request.get",
|
||||
"url": postUrl
|
||||
}
|
||||
const response2: Response = await request(app).post("/v1").send(payload2);
|
||||
expect(response2.statusCode).toBe(200);
|
||||
|
||||
const apiResponse2: V1ResponseSolution = response2.body;
|
||||
expect(apiResponse2.status).toBe("ok");
|
||||
|
||||
const solution2 = apiResponse2.solution;
|
||||
expect(solution2.status).toBe(200);
|
||||
expect(solution2.response).toContain(new Date().toISOString().split(':')[0].replace('T', ' '))
|
||||
});
|
||||
|
||||
test("Cmd 'request.post' should fail without 'postData' param", async () => {
|
||||
const payload = {
|
||||
"cmd": "request.post",
|
||||
"url": googleUrl
|
||||
}
|
||||
const response: Response = await request(app).post("/v1").send(payload);
|
||||
expect(response.statusCode).toBe(500);
|
||||
|
||||
const apiResponse: V1ResponseBase = response.body;
|
||||
expect(apiResponse.status).toBe("error");
|
||||
expect(apiResponse.message).toBe("Error: Must send param \"postBody\" when sending a POST request.");
|
||||
expect(apiResponse.startTimestamp).toBeGreaterThan(1000);
|
||||
expect(apiResponse.endTimestamp).toBeGreaterThanOrEqual(apiResponse.startTimestamp);
|
||||
expect(apiResponse.version).toBe(version);
|
||||
});
|
||||
|
||||
test("Cmd 'sessions.create' should return OK", async () => {
|
||||
const payload = {
|
||||
"cmd": "sessions.create"
|
||||
}
|
||||
const response: Response = await request(app).post("/v1").send(payload);
|
||||
expect(response.statusCode).toBe(200);
|
||||
|
||||
const apiResponse: V1ResponseSession = response.body;
|
||||
expect(apiResponse.status).toBe("ok");
|
||||
expect(apiResponse.message).toBe("Session created successfully.");
|
||||
expect(apiResponse.startTimestamp).toBeGreaterThan(1000);
|
||||
expect(apiResponse.endTimestamp).toBeGreaterThan(apiResponse.startTimestamp);
|
||||
expect(apiResponse.version).toBe(version);
|
||||
expect(apiResponse.session.length).toBe(36);
|
||||
});
|
||||
|
||||
test("Cmd 'sessions.create' should return OK with session", async () => {
|
||||
const payload = {
|
||||
"cmd": "sessions.create",
|
||||
"session": "2bc6bb20-2f56-11ec-9543-test"
|
||||
}
|
||||
const response: Response = await request(app).post("/v1").send(payload);
|
||||
expect(response.statusCode).toBe(200);
|
||||
|
||||
const apiResponse: V1ResponseSession = response.body;
|
||||
expect(apiResponse.status).toBe("ok");
|
||||
expect(apiResponse.message).toBe("Session created successfully.");
|
||||
expect(apiResponse.startTimestamp).toBeGreaterThan(1000);
|
||||
expect(apiResponse.endTimestamp).toBeGreaterThan(apiResponse.startTimestamp);
|
||||
expect(apiResponse.version).toBe(version);
|
||||
expect(apiResponse.session).toBe("2bc6bb20-2f56-11ec-9543-test");
|
||||
});
|
||||
|
||||
test("Cmd 'sessions.list' should return OK", async () => {
|
||||
// create one session for testing
|
||||
const payload0 = {
|
||||
"cmd": "sessions.create"
|
||||
}
|
||||
const response0: Response = await request(app).post("/v1").send(payload0);
|
||||
expect(response0.statusCode).toBe(200);
|
||||
|
||||
const payload = {
|
||||
"cmd": "sessions.list"
|
||||
}
|
||||
const response: Response = await request(app).post("/v1").send(payload);
|
||||
expect(response.statusCode).toBe(200);
|
||||
|
||||
const apiResponse: V1ResponseSessions = response.body;
|
||||
expect(apiResponse.status).toBe("ok");
|
||||
expect(apiResponse.message).toBe("");
|
||||
expect(apiResponse.startTimestamp).toBeGreaterThan(1000);
|
||||
expect(apiResponse.endTimestamp).toBeGreaterThanOrEqual(apiResponse.startTimestamp);
|
||||
expect(apiResponse.version).toBe(version);
|
||||
expect(apiResponse.sessions.length).toBeGreaterThan(0)
|
||||
});
|
||||
|
||||
test("Cmd 'sessions.destroy' should return OK", async () => {
|
||||
// create one session for testing
|
||||
const payload0 = {
|
||||
"cmd": "sessions.create"
|
||||
}
|
||||
const response0: Response = await request(app).post("/v1").send(payload0);
|
||||
expect(response0.statusCode).toBe(200);
|
||||
const apiResponse0: V1ResponseSession = response0.body;
|
||||
const sessionId0 = apiResponse0.session
|
||||
|
||||
const payload = {
|
||||
"cmd": "sessions.destroy",
|
||||
"session": sessionId0
|
||||
}
|
||||
const response: Response = await request(app).post("/v1").send(payload);
|
||||
expect(response.statusCode).toBe(200);
|
||||
|
||||
const apiResponse: V1ResponseBase = response.body;
|
||||
expect(apiResponse.status).toBe("ok");
|
||||
expect(apiResponse.message).toBe("The session has been removed.");
|
||||
expect(apiResponse.startTimestamp).toBeGreaterThan(1000);
|
||||
expect(apiResponse.endTimestamp).toBeGreaterThanOrEqual(apiResponse.startTimestamp);
|
||||
expect(apiResponse.version).toBe(version);
|
||||
});
|
||||
|
||||
test("Cmd 'sessions.destroy' should fail", async () => {
|
||||
const payload = {
|
||||
"cmd": "sessions.destroy",
|
||||
"session": "bad-session"
|
||||
}
|
||||
const response: Response = await request(app).post("/v1").send(payload);
|
||||
expect(response.statusCode).toBe(500);
|
||||
|
||||
const apiResponse: V1ResponseBase = response.body;
|
||||
expect(apiResponse.status).toBe("error");
|
||||
expect(apiResponse.message).toBe("Error: This session does not exist.");
|
||||
expect(apiResponse.startTimestamp).toBeGreaterThan(1000);
|
||||
expect(apiResponse.endTimestamp).toBeGreaterThan(apiResponse.startTimestamp);
|
||||
expect(apiResponse.version).toBe(version);
|
||||
});
|
||||
|
||||
test("Cmd 'request.get' should use session", async () => {
|
||||
// create one session for testing
|
||||
const payload0 = {
|
||||
"cmd": "sessions.create"
|
||||
}
|
||||
const response0: Response = await request(app).post("/v1").send(payload0);
|
||||
expect(response0.statusCode).toBe(200);
|
||||
const apiResponse0: V1ResponseSession = response0.body;
|
||||
const sessionId0 = apiResponse0.session
|
||||
|
||||
// first request should solve the challenge
|
||||
const payload = {
|
||||
"cmd": "request.get",
|
||||
"url": cfUrl,
|
||||
"session": sessionId0
|
||||
}
|
||||
const response: Response = await request(app).post("/v1").send(payload);
|
||||
expect(response.statusCode).toBe(200);
|
||||
|
||||
const apiResponse: V1ResponseSolution = response.body;
|
||||
expect(apiResponse.status).toBe("ok");
|
||||
const cfCookie: string = (apiResponse.solution.cookies as any[]).filter(function(cookie) {
|
||||
return cookie.name == "cf_clearance";
|
||||
})[0].value
|
||||
expect(cfCookie.length).toBeGreaterThan(30)
|
||||
|
||||
// second request should have the same cookie
|
||||
const response2: Response = await request(app).post("/v1").send(payload);
|
||||
expect(response2.statusCode).toBe(200);
|
||||
|
||||
const apiResponse2: V1ResponseSolution = response2.body;
|
||||
expect(apiResponse2.status).toBe("ok");
|
||||
const cfCookie2: string = (apiResponse2.solution.cookies as any[]).filter(function(cookie) {
|
||||
return cookie.name == "cf_clearance";
|
||||
})[0].value
|
||||
expect(cfCookie2.length).toBeGreaterThan(30)
|
||||
expect(cfCookie2).toBe(cfCookie)
|
||||
});
|
||||
|
||||
});
|
||||
@@ -1,9 +0,0 @@
|
||||
import { IncomingMessage, ServerResponse } from 'http';
|
||||
|
||||
export interface RequestContext {
|
||||
req: IncomingMessage
|
||||
res: ServerResponse
|
||||
startTimestamp: number
|
||||
errorResponse: (msg: string) => void,
|
||||
successResponse: (msg: string, extendedProperties?: object) => void
|
||||
}
|
||||
31
src/utils.ts
31
src/utils.ts
@@ -1,31 +0,0 @@
|
||||
import * as fs from 'fs'
|
||||
import * as Path from 'path'
|
||||
import { promisify } from 'util'
|
||||
|
||||
export const sleep = promisify(setTimeout)
|
||||
|
||||
// recursive fs.rmdir needs node version 12:
|
||||
// https://github.com/ngosang/FlareSolverr/issues/5#issuecomment-655572712
|
||||
export function deleteFolderRecursive(path: string) {
|
||||
if (fs.existsSync(path)) {
|
||||
fs.readdirSync(path).forEach((file) => {
|
||||
const curPath = Path.join(path, file)
|
||||
if (fs.lstatSync(curPath).isDirectory()) { // recurse
|
||||
deleteFolderRecursive(curPath)
|
||||
} else { // delete file
|
||||
fs.unlinkSync(curPath)
|
||||
}
|
||||
})
|
||||
fs.rmdirSync(path)
|
||||
}
|
||||
}
|
||||
|
||||
export const removeEmptyFields = (o: Record<string, any>): typeof o => {
|
||||
const r: typeof o = {}
|
||||
for (const k in o) {
|
||||
if (o[k] !== undefined) {
|
||||
r[k] = o[k]
|
||||
}
|
||||
}
|
||||
return r
|
||||
}
|
||||
Reference in New Issue
Block a user