mirror of
https://github.com/FlareSolverr/FlareSolverr.git
synced 2025-12-05 17:18:19 +01:00
Compare commits
16 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7376ef9bc9 | ||
|
|
de9c7bcf76 | ||
|
|
bef9411e1c | ||
|
|
27ad58b2c6 | ||
|
|
d038944089 | ||
|
|
a8bc6f5468 | ||
|
|
39fdde9a74 | ||
|
|
8234cdb516 | ||
|
|
66fe775d27 | ||
|
|
ade05bb7a8 | ||
|
|
5710c08581 | ||
|
|
f1e829fd3a | ||
|
|
dfc4383b50 | ||
|
|
d140e9369d | ||
|
|
6677329842 | ||
|
|
0f40054a73 |
2
.github/ISSUE_TEMPLATE.md
vendored
2
.github/ISSUE_TEMPLATE.md
vendored
@@ -17,7 +17,7 @@ Check closed issues as well, because your issue may have already been fixed.
|
||||
|
||||
**Are you using a proxy or VPN?** [yes/no]
|
||||
|
||||
**Using Captcha Solver:** [yse/no]
|
||||
**Using Captcha Solver:** [yes/no]
|
||||
|
||||
**If using captcha solver, which one:**
|
||||
|
||||
|
||||
39
.github/workflows/release.yml
vendored
39
.github/workflows/release.yml
vendored
@@ -7,19 +7,35 @@ on:
|
||||
|
||||
jobs:
|
||||
build:
|
||||
name: Create Release
|
||||
name: Create release
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
fetch-depth: 0 # get all commits, branches and tags (required for the changelog)
|
||||
|
||||
- name: Build Changelog
|
||||
id: github_release
|
||||
uses: mikepenz/release-changelog-builder-action@main
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GH_PAT }}
|
||||
- name: Setup Node
|
||||
uses: actions/setup-node@v2
|
||||
with:
|
||||
node-version: '14'
|
||||
|
||||
- name: Create Release
|
||||
- name: Build artifacts
|
||||
run: |
|
||||
npm install
|
||||
npm run build
|
||||
npm run package
|
||||
|
||||
- name: Build changelog
|
||||
id: github_changelog
|
||||
run: |
|
||||
changelog=$(git log $(git describe --tags --abbrev=0)..HEAD --no-merges --oneline)
|
||||
changelog="${changelog//'%'/'%25'}"
|
||||
changelog="${changelog//$'\n'/'%0A'}"
|
||||
changelog="${changelog//$'\r'/'%0D'}"
|
||||
echo "##[set-output name=changelog;]${changelog}"
|
||||
|
||||
- name: Create release
|
||||
id: create_release
|
||||
uses: actions/create-release@v1
|
||||
env:
|
||||
@@ -27,6 +43,13 @@ jobs:
|
||||
with:
|
||||
tag_name: ${{ github.ref }}
|
||||
release_name: ${{ github.ref }}
|
||||
body: ${{ steps.github_release.outputs.changelog }}
|
||||
body: ${{ steps.github_changelog.outputs.changelog }}
|
||||
draft: false
|
||||
prerelease: false
|
||||
|
||||
- name: Upload release artifacts
|
||||
uses: alexellis/upload-assets@0.2.2
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GH_PAT }}
|
||||
with:
|
||||
asset_paths: '["./bin/*.zip"]'
|
||||
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -121,3 +121,6 @@ dist
|
||||
|
||||
# Project Development
|
||||
testing/
|
||||
|
||||
# Binaries
|
||||
bin/
|
||||
|
||||
53
README.md
53
README.md
@@ -1,8 +1,9 @@
|
||||
# FlareSolverr
|
||||
|
||||
[](https://github.com/FlareSolverr/FlareSolverr/issues)
|
||||
[](https://github.com/FlareSolverr/FlareSolverr/pulls)
|
||||
[](https://hub.docker.com/r/flaresolverr/flaresolverr/)
|
||||
[](https://github.com/FlareSolverr/FlareSolverr/releases)
|
||||
[](https://hub.docker.com/r/flaresolverr/flaresolverr/)
|
||||
[](https://github.com/FlareSolverr/FlareSolverr/issues)
|
||||
[](https://github.com/FlareSolverr/FlareSolverr/pulls)
|
||||
|
||||
FlareSolverr is a proxy server to bypass Cloudflare protection
|
||||
|
||||
@@ -48,14 +49,22 @@ If you prefer the `docker cli` execute the following command.
|
||||
```bash
|
||||
docker run -d \
|
||||
--name=flaresolverr \
|
||||
-p 8191:8191 \
|
||||
-e LOG_LEVEL=info \
|
||||
--restart unless-stopped \
|
||||
ghcr.io/flaresolverr/flaresolverr:latest
|
||||
```
|
||||
|
||||
### Precompiled binaries
|
||||
|
||||
This is the recommended way for Windows users.
|
||||
* Download the [FlareSolverr zip](https://github.com/FlareSolverr/FlareSolverr/releases) from the release's assets. It is available for Windows and Linux.
|
||||
* Extract the zip file. FlareSolverr executable and chrome folder must be in the same directory.
|
||||
* Execute FlareSolverr binary. In the environment variables section you can find how to change the configuration.
|
||||
|
||||
### From source code
|
||||
|
||||
This is the recommended way for Windows / MacOS users and for developers.
|
||||
This is the recommended way for MacOS users and for developers.
|
||||
* Install [NodeJS](https://nodejs.org/)
|
||||
* Clone this repository and open a shell in that path
|
||||
* Run `npm install` command to install FlareSolverr dependencies
|
||||
@@ -128,8 +137,9 @@ Parameter | Notes
|
||||
url | Mandatory
|
||||
session | Optional. Will send the request from and existing browser instance. If one is not sent it will create a temporary instance that will be destroyed immediately after the request is completed.
|
||||
headers | Optional. To specify user headers.
|
||||
maxTimeout | Optional. Max timeout to solve the challenge
|
||||
cookies | Optional. Will be used by the headless browser. Follow [this](https://github.com/puppeteer/puppeteer/blob/v3.3.0/docs/api.md#pagesetcookiecookies) format
|
||||
maxTimeout | Optional, default value 60000. Max timeout to solve the challenge in milliseconds.
|
||||
cookies | Optional. Will be used by the headless browser. Follow [this](https://github.com/puppeteer/puppeteer/blob/v3.3.0/docs/api.md#pagesetcookiecookies) format.
|
||||
returnOnlyCookies | Optional, default false. Only returns the cookies. Response data, headers and other parts of the response are removed.
|
||||
|
||||
Example response from running the `curl` above:
|
||||
|
||||
@@ -209,43 +219,52 @@ moment there is nothing setup to do so. If this is something you need feel free
|
||||
|
||||
## Environment variables
|
||||
|
||||
To set the environment vars in Linux run `export LOG_LEVEL=debug` and then start FlareSolverr in the same shell.
|
||||
|
||||
Name | Default | Notes
|
||||
|--|--|--|
|
||||
LOG_LEVEL | info | Used to change the verbosity of the logging.
|
||||
LOG_LEVEL | info | Used to change the verbosity of the logging. Use `LOG_LEVEL=debug` for more information.
|
||||
LOG_HTML | false | Used for debugging. If `true` all HTML that passes through the proxy will be logged to the console in `debug` level.
|
||||
PORT | 8191 | Change this if you already have a process running on port `8191`.
|
||||
HOST | 0.0.0.0 | This shouldn't need to be messed with but if you insist, it's here!
|
||||
CAPTCHA_SOLVER | None | This is used to select which captcha solving method it used when a captcha is encountered.
|
||||
HEADLESS | true | This is used to debug the browser by not running it in headless mode.
|
||||
|
||||
Environment variables are set differently depending on the operating system. Some examples:
|
||||
* Docker: Take a look at the Docker section in this document. Environment variables can be set in the `docker-compose.yml` file or in the Docker CLI command.
|
||||
* Linux: Run `export LOG_LEVEL=debug` and then start FlareSolverr in the same shell.
|
||||
* Windows: Open `cmd.exe`, run `set LOG_LEVEL=debug` and then start FlareSolverr in the same shell.
|
||||
|
||||
## Captcha Solvers
|
||||
|
||||
Sometimes CF not only gives mathematical computations and browser tests, sometimes they also require the user to solve
|
||||
a captcha. If this is the case, FlareSolverr will return the captcha page. But that's not very helpful to you is it?
|
||||
:warning: At this time none of the captcha solvers work. You can check the status in the open issues. Any help is welcome.
|
||||
|
||||
Sometimes CloudFlare not only gives mathematical computations and browser tests, sometimes they also require the user to
|
||||
solve a captcha.
|
||||
If this is the case, FlareSolverr will return the error `Captcha detected but no automatic solver is configured.`
|
||||
|
||||
FlareSolverr can be customized to solve the captchas automatically by setting the environment variable `CAPTCHA_SOLVER`
|
||||
to the file name of one of the adapters inside the [/captcha](src/captcha) directory.
|
||||
|
||||
### hcaptcha-solver
|
||||
|
||||
This method makes use of the [hcaptcha-solver](https://github.com/JimmyLaurent/hcaptcha-solver) project which attempts
|
||||
to solve hCaptcha by randomly selecting images.
|
||||
This method makes use of the [hcaptcha-solver](https://github.com/JimmyLaurent/hcaptcha-solver) project.
|
||||
|
||||
To use this solver you must first install it and then set it as the `CAPTCHA_SOLVER`.
|
||||
NOTE: This solver works picking random images so it will fail in a lot of requests and it's hard to know if it is
|
||||
working or not. In a real use case with Sonarr/Radarr + Jackett it is still useful because those apps make a new request
|
||||
each 15 minutes. Eventually one of the requests is going to work and Jackett saves the cookie forever (until it stops
|
||||
working).
|
||||
|
||||
To use this solver you must set the environment variable:
|
||||
|
||||
```bash
|
||||
npm i hcaptcha-solver
|
||||
CAPTCHA_SOLVER=hcaptcha-solver
|
||||
```
|
||||
|
||||
### CaptchaHarvester
|
||||
|
||||
This method makes use of the [CaptchaHarvester](https://github.com/NoahCardoza/CaptchaHarvester) project which allows
|
||||
users to collect thier own tokens from ReCaptcha V2/V3 and hCaptcha for free.
|
||||
users to collect their own tokens from ReCaptcha V2/V3 and hCaptcha for free.
|
||||
|
||||
To use this method you must set these ENV variables:
|
||||
To use this method you must set these environment variables:
|
||||
|
||||
```bash
|
||||
CAPTCHA_SOLVER=harvester
|
||||
|
||||
78
build-binaries.js
Normal file
78
build-binaries.js
Normal file
@@ -0,0 +1,78 @@
|
||||
const fs = require('fs')
|
||||
const path = require('path')
|
||||
const {execSync} = require('child_process')
|
||||
const archiver = require('archiver')
|
||||
const puppeteer = require('puppeteer')
|
||||
const version = 'v' + require('./package.json').version;
|
||||
|
||||
(async () => {
|
||||
const builds = [
|
||||
{
|
||||
platform: 'linux',
|
||||
version: 756035,
|
||||
chromeFolder: 'chrome-linux',
|
||||
fsExec: 'flaresolverr-linux',
|
||||
fsZipExec: 'flaresolverr',
|
||||
fsZipName: 'linux-x64'
|
||||
},
|
||||
{
|
||||
platform: 'win64',
|
||||
version: 756035,
|
||||
chromeFolder: 'chrome-win',
|
||||
fsExec: 'flaresolverr-win.exe',
|
||||
fsZipExec: 'flaresolverr.exe',
|
||||
fsZipName: 'windows-x64'
|
||||
}
|
||||
// TODO: this is working but changes are required in session.ts to find chrome path
|
||||
// {
|
||||
// platform: 'mac',
|
||||
// version: 756035,
|
||||
// chromeFolder: 'chrome-mac',
|
||||
// fsExec: 'flaresolverr-macos',
|
||||
// fsZipExec: 'flaresolverr',
|
||||
// fsZipName: 'macos'
|
||||
// }
|
||||
]
|
||||
|
||||
// generate executables
|
||||
console.log('Generating executables...')
|
||||
if (fs.existsSync('bin')) {
|
||||
fs.rmdirSync('bin', {recursive: true})
|
||||
}
|
||||
execSync('pkg -t node14-win-x64,node14-linux-x64 --out-path bin .')
|
||||
// execSync('pkg -t node14-win-x64,node14-mac-x64,node14-linux-x64 --out-path bin .')
|
||||
|
||||
// download Chrome and zip together
|
||||
for (const os of builds) {
|
||||
console.log('Building ' + os.fsZipName + ' artifact')
|
||||
|
||||
// download chrome
|
||||
console.log('Downloading Chrome...')
|
||||
const f = puppeteer.createBrowserFetcher({
|
||||
platform: os.platform,
|
||||
path: path.join(__dirname, 'bin', 'puppeteer')
|
||||
})
|
||||
await f.download(os.version)
|
||||
|
||||
// compress in zip
|
||||
console.log('Compressing zip file...')
|
||||
const zipName = 'bin/flaresolverr-' + version + '-' + os.fsZipName + '.zip'
|
||||
const output = fs.createWriteStream(zipName)
|
||||
const archive = archiver('zip')
|
||||
|
||||
output.on('close', function () {
|
||||
console.log('File ' + zipName + ' created. Size: ' + archive.pointer() + ' bytes')
|
||||
})
|
||||
|
||||
archive.on('error', function (err) {
|
||||
throw err
|
||||
})
|
||||
|
||||
archive.pipe(output)
|
||||
|
||||
archive.file('bin/' + os.fsExec, { name: 'flaresolverr/' + os.fsZipExec })
|
||||
archive.directory('bin/puppeteer/' + os.platform + '-' + os.version + '/' + os.chromeFolder, 'flaresolverr/chrome')
|
||||
|
||||
archive.finalize()
|
||||
}
|
||||
})()
|
||||
791
package-lock.json
generated
791
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
15
package.json
15
package.json
@@ -1,11 +1,12 @@
|
||||
{
|
||||
"name": "flaresolverr",
|
||||
"version": "1.2.1",
|
||||
"version": "1.2.2",
|
||||
"description": "Proxy server to bypass Cloudflare protection.",
|
||||
"scripts": {
|
||||
"start": "node ./dist/index.js",
|
||||
"build": "tsc",
|
||||
"dev": "nodemon -e ts --exec ts-node src/index.ts"
|
||||
"dev": "nodemon -e ts --exec ts-node src/index.ts",
|
||||
"package": "node build-binaries.js"
|
||||
},
|
||||
"author": "Diego Heras (ngosang)",
|
||||
"contributors": [
|
||||
@@ -19,6 +20,14 @@
|
||||
"type": "git",
|
||||
"url": "https://github.com/ngosang/FlareSolverr"
|
||||
},
|
||||
"pkg": {
|
||||
"assets": [
|
||||
"node_modules/puppeteer-extra-plugin-stealth/**/*.*"
|
||||
]
|
||||
},
|
||||
"bin": {
|
||||
"flaresolverr": "dist/index.js"
|
||||
},
|
||||
"dependencies": {
|
||||
"await-timeout": "^1.1.1",
|
||||
"console-log-level": "^1.4.1",
|
||||
@@ -34,6 +43,7 @@
|
||||
"@types/node": "^14.0.23",
|
||||
"@types/puppeteer": "^3.0.1",
|
||||
"@types/uuid": "^8.0.0",
|
||||
"archiver": "^5.2.0",
|
||||
"eslint": "^7.5.0",
|
||||
"eslint-config-airbnb-base": "^14.2.0",
|
||||
"eslint-config-standard": "^14.1.1",
|
||||
@@ -42,6 +52,7 @@
|
||||
"eslint-plugin-promise": "^4.2.1",
|
||||
"eslint-plugin-standard": "^4.0.1",
|
||||
"nodemon": "^2.0.4",
|
||||
"pkg": "^4.4.9",
|
||||
"ts-node": "^8.10.2",
|
||||
"typescript": "^3.9.7"
|
||||
}
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
import log from "../log";
|
||||
|
||||
export enum CaptchaType {
|
||||
re = 'reCaptcha',
|
||||
h = 'hCaptcha'
|
||||
@@ -31,5 +33,7 @@ export default (): Solver => {
|
||||
}
|
||||
}
|
||||
|
||||
log.info(`Using '${method} to solve the captcha.`);
|
||||
|
||||
return captchaSolvers[method]
|
||||
}
|
||||
@@ -110,5 +110,6 @@ createServer((req: IncomingMessage, res: ServerResponse) => {
|
||||
})
|
||||
})
|
||||
}).listen(serverPort, serverHost, () => {
|
||||
log.info(`FlareSolverr ${version} listening on http://${serverHost}:${serverPort}`)
|
||||
log.info(`FlareSolverr ${version} listening on http://${serverHost}:${serverPort}`);
|
||||
log.debug('Debug log enabled');
|
||||
})
|
||||
|
||||
164
src/providers/cloudflare.ts
Normal file
164
src/providers/cloudflare.ts
Normal file
@@ -0,0 +1,164 @@
|
||||
import {Response} from 'puppeteer'
|
||||
import {Page} from "puppeteer-extra/dist/puppeteer";
|
||||
|
||||
import log from "../log";
|
||||
import getCaptchaSolver, {CaptchaType} from "../captcha";
|
||||
|
||||
/**
|
||||
* This class contains the logic to solve protections provided by CloudFlare
|
||||
**/
|
||||
|
||||
const CHALLENGE_SELECTORS = ['#trk_jschal_js', '.ray_id', '.attack-box'];
|
||||
const TOKEN_INPUT_NAMES = ['g-recaptcha-response', 'h-captcha-response'];
|
||||
|
||||
export default async function resolveChallenge(url: string, page: Page, response: Response): Promise<Response> {
|
||||
|
||||
// look for challenge and return fast if not detected
|
||||
if (!response.headers().server.startsWith('cloudflare')) {
|
||||
log.info('Cloudflare not detected');
|
||||
return response;
|
||||
}
|
||||
log.info('Cloudflare detected');
|
||||
|
||||
if (await page.$('.cf-error-code')) {
|
||||
throw new Error('Cloudflare has blocked this request (Code 1020 Detected).')
|
||||
}
|
||||
|
||||
let selectorFoundCount = 0;
|
||||
if (response.status() > 400) {
|
||||
// detect cloudflare wait 5s
|
||||
for (const selector of CHALLENGE_SELECTORS) {
|
||||
const cfChallengeElem = await page.$(selector)
|
||||
if (cfChallengeElem) {
|
||||
selectorFoundCount++
|
||||
log.debug(`Javascript challenge element '${selector}' detected.`)
|
||||
log.debug('Waiting for Cloudflare challenge...')
|
||||
|
||||
while (true) {
|
||||
await page.waitFor(1000)
|
||||
try {
|
||||
// catch exception timeout in waitForNavigation
|
||||
response = await page.waitForNavigation({ waitUntil: 'domcontentloaded', timeout: 5000 })
|
||||
} catch (error) { }
|
||||
|
||||
try {
|
||||
// catch Execution context was destroyed
|
||||
const cfChallengeElem = await page.$(selector)
|
||||
if (!cfChallengeElem) { break }
|
||||
log.debug('Found challenge element again...')
|
||||
} catch (error)
|
||||
{ }
|
||||
|
||||
response = await page.reload({ waitUntil: 'domcontentloaded' })
|
||||
log.debug('Page reloaded.')
|
||||
log.html(await page.content())
|
||||
}
|
||||
|
||||
log.debug('Validating HTML code...')
|
||||
break
|
||||
} else {
|
||||
log.debug(`No '${selector}' challenge element detected.`)
|
||||
}
|
||||
}
|
||||
log.debug("Javascript challenge selectors found: " + selectorFoundCount + ", total selectors: " + CHALLENGE_SELECTORS.length)
|
||||
} else {
|
||||
// some sites use cloudflare but there is no challenge
|
||||
log.debug(`Javascript challenge not detected. Status code: ${response.status()}`);
|
||||
selectorFoundCount = 1;
|
||||
}
|
||||
|
||||
// it seems some captcha pages return 200 sometimes
|
||||
if (await page.$('input[name="cf_captcha_kind"]')) {
|
||||
log.info('Captcha challenge detected.');
|
||||
const captchaSolver = getCaptchaSolver()
|
||||
if (captchaSolver) {
|
||||
const captchaStartTimestamp = Date.now()
|
||||
const challengeForm = await page.$('#challenge-form')
|
||||
if (challengeForm) {
|
||||
const captchaTypeElm = await page.$('input[name="cf_captcha_kind"]')
|
||||
const cfCaptchaType: string = await captchaTypeElm.evaluate((e: any) => e.value)
|
||||
const captchaType: CaptchaType = (CaptchaType as any)[cfCaptchaType]
|
||||
if (!captchaType) {
|
||||
throw new Error('Unknown captcha type!');
|
||||
}
|
||||
|
||||
let sitekey = null
|
||||
if (captchaType != 'hCaptcha' && process.env.CAPTCHA_SOLVER != 'hcaptcha-solver') {
|
||||
const sitekeyElem = await page.$('*[data-sitekey]')
|
||||
if (!sitekeyElem) {
|
||||
throw new Error('Could not find sitekey!');
|
||||
}
|
||||
sitekey = await sitekeyElem.evaluate((e) => e.getAttribute('data-sitekey'))
|
||||
}
|
||||
|
||||
log.info('Waiting to receive captcha token to bypass challenge...')
|
||||
const token = await captchaSolver({
|
||||
url,
|
||||
sitekey,
|
||||
type: captchaType
|
||||
})
|
||||
log.debug(`Token received: ${token}`);
|
||||
if (!token) {
|
||||
throw new Error('Token solver failed to return a token.')
|
||||
}
|
||||
|
||||
let responseFieldsFoundCount = 0;
|
||||
for (const name of TOKEN_INPUT_NAMES) {
|
||||
const input = await page.$(`textarea[name="${name}"]`)
|
||||
if (input) {
|
||||
responseFieldsFoundCount ++;
|
||||
log.debug(`Challenge response field '${name}' found in challenge form.`);
|
||||
await input.evaluate((e: HTMLTextAreaElement, token) => { e.value = token }, token);
|
||||
}
|
||||
}
|
||||
if (responseFieldsFoundCount == 0) {
|
||||
throw new Error('Challenge response field not found in challenge form.');
|
||||
}
|
||||
|
||||
// ignore preset event listeners on the form
|
||||
await page.evaluate(() => {
|
||||
window.addEventListener('submit', (e) => { event.stopPropagation() }, true)
|
||||
})
|
||||
|
||||
// it seems some sites obfuscate their challenge forms
|
||||
// TODO: look into how they do it and come up with a more solid solution
|
||||
try {
|
||||
// this element is added with js and we want to wait for all the js to load before submitting
|
||||
await page.waitForSelector('#challenge-form', { timeout: 10000 })
|
||||
} catch (err) {
|
||||
throw new Error("No '#challenge-form' element detected.");
|
||||
}
|
||||
|
||||
// calculates the time it took to solve the captcha
|
||||
const captchaSolveTotalTime = Date.now() - captchaStartTimestamp
|
||||
|
||||
// generates a random wait time
|
||||
const randomWaitTime = (Math.floor(Math.random() * 10) + 10) * 1000
|
||||
|
||||
// waits, if any, time remaining to appear human but stay as fast as possible
|
||||
const timeLeft = randomWaitTime - captchaSolveTotalTime
|
||||
if (timeLeft > 0) {
|
||||
log.debug(`Waiting for '${timeLeft}' milliseconds.`);
|
||||
await page.waitFor(timeLeft);
|
||||
}
|
||||
|
||||
// submit captcha response
|
||||
challengeForm.evaluate((e: HTMLFormElement) => e.submit())
|
||||
response = await page.waitForNavigation({ waitUntil: 'domcontentloaded' })
|
||||
|
||||
if (await page.$('input[name="cf_captcha_kind"]')) {
|
||||
throw new Error('Captcha service failed to solve the challenge.');
|
||||
}
|
||||
}
|
||||
} else {
|
||||
throw new Error('Captcha detected but no automatic solver is configured.');
|
||||
}
|
||||
} else {
|
||||
if (selectorFoundCount == 0)
|
||||
{
|
||||
throw new Error('No challenge selectors found, unable to proceed')
|
||||
}
|
||||
}
|
||||
|
||||
return response;
|
||||
}
|
||||
254
src/routes.ts
254
src/routes.ts
@@ -1,12 +1,12 @@
|
||||
import { v1 as UUIDv1 } from 'uuid'
|
||||
import { SetCookie, Request, Response, Headers, HttpMethod, Overrides } from 'puppeteer'
|
||||
import { Page, Browser } from "puppeteer-extra/dist/puppeteer";
|
||||
const Timeout = require('await-timeout');
|
||||
|
||||
import log from './log'
|
||||
import sessions, { SessionsCacheItem } from './session'
|
||||
import { RequestContext } from './types'
|
||||
import log from './log'
|
||||
import { SetCookie, Request, Headers, HttpMethod, Overrides, Cookie } from 'puppeteer'
|
||||
import { TimeoutError } from 'puppeteer/Errors'
|
||||
import getCaptchaSolver, { CaptchaType } from './captcha'
|
||||
import * as Puppeteer from "puppeteer-extra/dist/puppeteer";
|
||||
const Timeout = require('await-timeout');
|
||||
import cloudflareProvider from './providers/cloudflare';
|
||||
|
||||
export interface BaseAPICall {
|
||||
cmd: string
|
||||
@@ -69,58 +69,10 @@ type OverridesProps =
|
||||
'postData' |
|
||||
'headers'
|
||||
|
||||
// We always set a Windows User-Agent because ARM builds are detected by CloudFlare
|
||||
// We always set a Windows User-Agent because ARM builds are detected by Cloudflare
|
||||
const DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36"
|
||||
const CHALLENGE_SELECTORS = ['#trk_jschal_js', '.ray_id', '.attack-box']
|
||||
const TOKEN_INPUT_NAMES = ['g-recaptcha-response', 'h-captcha-response']
|
||||
|
||||
async function interceptResponse(page: Puppeteer.Page, callback: (payload: ChallengeResolutionT) => any) {
|
||||
const client = await page.target().createCDPSession();
|
||||
await client.send('Fetch.enable', {
|
||||
patterns: [
|
||||
{
|
||||
urlPattern: '*',
|
||||
resourceType: 'Document',
|
||||
requestStage: 'Response',
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
client.on('Fetch.requestPaused', async (e) => {
|
||||
log.debug('Fetch.requestPaused. Checking if the response has valid cookies')
|
||||
let headers = e.responseHeaders || []
|
||||
|
||||
let cookies = await page.cookies();
|
||||
log.debug(cookies)
|
||||
|
||||
if (cookies.filter((c: Cookie) => c.name === 'cf_clearance').length > 0) {
|
||||
log.debug('Aborting request and return cookies. valid cookies found')
|
||||
await client.send('Fetch.failRequest', {requestId: e.requestId, errorReason: 'Aborted'})
|
||||
|
||||
let status = 'ok'
|
||||
let message = ''
|
||||
const payload: ChallengeResolutionT = {
|
||||
status,
|
||||
message,
|
||||
result: {
|
||||
url: page.url(),
|
||||
status: e.status,
|
||||
headers: headers.reduce((a: any, x: { name: any; value: any }) => ({ ...a, [x.name]: x.value }), {}),
|
||||
response: null,
|
||||
cookies: cookies,
|
||||
userAgent: ''
|
||||
}
|
||||
}
|
||||
|
||||
callback(payload);
|
||||
} else {
|
||||
log.debug('Continuing request. no valid cookies found')
|
||||
await client.send('Fetch.continueRequest', {requestId: e.requestId})
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
async function resolveChallengeWithTimeout(ctx: RequestContext, params: BaseRequestAPICall, page: Puppeteer.Page) {
|
||||
async function resolveChallengeWithTimeout(ctx: RequestContext, params: BaseRequestAPICall, page: Page) {
|
||||
const maxTimeout = params.maxTimeout || 60000
|
||||
const timer = new Timeout();
|
||||
try {
|
||||
@@ -134,7 +86,7 @@ async function resolveChallengeWithTimeout(ctx: RequestContext, params: BaseRequ
|
||||
}
|
||||
}
|
||||
|
||||
async function resolveChallenge(ctx: RequestContext, { url, proxy, download, returnOnlyCookies }: BaseRequestAPICall, page: Puppeteer.Page): Promise<ChallengeResolutionT | void> {
|
||||
async function resolveChallenge(ctx: RequestContext, { url, proxy, download, returnOnlyCookies }: BaseRequestAPICall, page: Page): Promise<ChallengeResolutionT | void> {
|
||||
|
||||
let status = 'ok'
|
||||
let message = ''
|
||||
@@ -146,160 +98,15 @@ async function resolveChallenge(ctx: RequestContext, { url, proxy, download, ret
|
||||
}
|
||||
|
||||
log.debug(`Navigating to... ${url}`)
|
||||
let response = await page.goto(url, { waitUntil: 'domcontentloaded' })
|
||||
|
||||
let response: Response = await page.goto(url, { waitUntil: 'domcontentloaded' })
|
||||
log.html(await page.content())
|
||||
|
||||
// look for challenge
|
||||
if (response.headers().server.startsWith('cloudflare')) {
|
||||
log.info('Cloudflare detected')
|
||||
|
||||
if (await page.$('.cf-error-code')) {
|
||||
await page.close()
|
||||
return ctx.errorResponse('Cloudflare has blocked this request (Code 1020 Detected).')
|
||||
}
|
||||
|
||||
if (response.status() > 400) {
|
||||
// detect cloudflare wait 5s
|
||||
let selectorFoundCount = 0
|
||||
for (const selector of CHALLENGE_SELECTORS) {
|
||||
const cfChallengeElem = await page.$(selector)
|
||||
if (cfChallengeElem) {
|
||||
selectorFoundCount++
|
||||
log.debug(`'${selector}' challenge element detected.`)
|
||||
log.debug('Waiting for Cloudflare challenge...')
|
||||
|
||||
let interceptingResult: ChallengeResolutionT;
|
||||
if (returnOnlyCookies) { //If we just want to get the cookies, intercept the response before we get the content/body (just cookies and headers)
|
||||
await interceptResponse(page, async function(payload){
|
||||
interceptingResult = payload;
|
||||
});
|
||||
}
|
||||
|
||||
while (true) {
|
||||
await page.waitFor(1000)
|
||||
try {
|
||||
// catch exception timeout in waitForNavigation
|
||||
response = await page.waitForNavigation({ waitUntil: 'domcontentloaded', timeout: 5000 })
|
||||
} catch (error) { }
|
||||
|
||||
if (returnOnlyCookies && interceptingResult) {
|
||||
await page.close();
|
||||
return interceptingResult;
|
||||
}
|
||||
|
||||
try {
|
||||
// catch Execution context was destroyed
|
||||
const cfChallengeElem = await page.$(selector)
|
||||
if (!cfChallengeElem) { break }
|
||||
log.debug('Found challenge element again...')
|
||||
} catch (error)
|
||||
{ }
|
||||
|
||||
response = await page.reload({ waitUntil: 'domcontentloaded' })
|
||||
log.debug('Reloaded page...')
|
||||
log.html(await page.content())
|
||||
}
|
||||
|
||||
log.debug('Validating HTML code...')
|
||||
break
|
||||
} else {
|
||||
log.debug(`No '${selector}' challenge element detected.`)
|
||||
}
|
||||
}
|
||||
log.debug("Number of selector found: " + selectorFoundCount + ", total selector: " + CHALLENGE_SELECTORS.length)
|
||||
if (selectorFoundCount == 0)
|
||||
{
|
||||
await page.close()
|
||||
return ctx.errorResponse('No challenge selectors found, unable to proceed')
|
||||
}
|
||||
}
|
||||
|
||||
// it seems some captcha pages return 200 sometimes
|
||||
if (await page.$('input[name="cf_captcha_kind"]')) {
|
||||
const captchaSolver = getCaptchaSolver()
|
||||
if (captchaSolver) {
|
||||
const captchaStartTimestamp = Date.now()
|
||||
const challengeForm = await page.$('#challenge-form')
|
||||
if (challengeForm) {
|
||||
const captchaTypeElm = await page.$('input[name="cf_captcha_kind"]')
|
||||
const cfCaptchaType: string = await captchaTypeElm.evaluate((e: any) => e.value)
|
||||
const captchaType: CaptchaType = (CaptchaType as any)[cfCaptchaType]
|
||||
if (!captchaType) { return ctx.errorResponse('Unknown captcha type!') }
|
||||
|
||||
let sitekey = null
|
||||
if (captchaType != 'hCaptcha' && process.env.CAPTCHA_SOLVER != 'hcaptcha-solver') {
|
||||
const sitekeyElem = await page.$('*[data-sitekey]')
|
||||
if (!sitekeyElem) { return ctx.errorResponse('Could not find sitekey!') }
|
||||
sitekey = await sitekeyElem.evaluate((e) => e.getAttribute('data-sitekey'))
|
||||
}
|
||||
|
||||
log.info('Waiting to receive captcha token to bypass challenge...')
|
||||
const token = await captchaSolver({
|
||||
url,
|
||||
sitekey,
|
||||
type: captchaType
|
||||
})
|
||||
|
||||
if (!token) {
|
||||
await page.close()
|
||||
return ctx.errorResponse('Token solver failed to return a token.')
|
||||
}
|
||||
|
||||
for (const name of TOKEN_INPUT_NAMES) {
|
||||
const input = await page.$(`textarea[name="${name}"]`)
|
||||
if (input) { await input.evaluate((e: HTMLTextAreaElement, token) => { e.value = token }, token) }
|
||||
}
|
||||
|
||||
// ignore preset event listeners on the form
|
||||
await page.evaluate(() => {
|
||||
window.addEventListener('submit', (e) => { event.stopPropagation() }, true)
|
||||
})
|
||||
|
||||
// it seems some sites obfuscate their challenge forms
|
||||
// TODO: look into how they do it and come up with a more solid solution
|
||||
try {
|
||||
// this element is added with js and we want to wait for all the js to load before submitting
|
||||
await page.waitForSelector('#challenge-form [type=submit]', { timeout: 5000 })
|
||||
} catch (err) {
|
||||
if (err instanceof TimeoutError) {
|
||||
log.debug(`No '#challenge-form [type=submit]' element detected.`)
|
||||
}
|
||||
}
|
||||
|
||||
// calculates the time it took to solve the captcha
|
||||
const captchaSolveTotalTime = Date.now() - captchaStartTimestamp
|
||||
|
||||
// generates a random wait time
|
||||
const randomWaitTime = (Math.floor(Math.random() * 20) + 10) * 1000
|
||||
|
||||
// waits, if any, time remaining to appear human but stay as fast as possible
|
||||
const timeLeft = randomWaitTime - captchaSolveTotalTime
|
||||
if (timeLeft > 0) { await page.waitFor(timeLeft) }
|
||||
|
||||
let interceptingResult: ChallengeResolutionT;
|
||||
if (returnOnlyCookies) { //If we just want to get the cookies, intercept the response before we get the content/body (just cookies and headers)
|
||||
await interceptResponse(page, async function(payload){
|
||||
interceptingResult = payload;
|
||||
});
|
||||
}
|
||||
|
||||
// submit captcha response
|
||||
challengeForm.evaluate((e: HTMLFormElement) => e.submit())
|
||||
response = await page.waitForNavigation({ waitUntil: 'domcontentloaded' })
|
||||
|
||||
if (returnOnlyCookies && interceptingResult) {
|
||||
await page.close();
|
||||
return interceptingResult;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
status = 'warning'
|
||||
message = 'Captcha detected but no automatic solver is configured.'
|
||||
}
|
||||
}
|
||||
|
||||
log.debug("Response is: " + response.status())
|
||||
// Detect protection services and solve challenges
|
||||
try {
|
||||
response = await cloudflareProvider(url, page, response);
|
||||
} catch (e) {
|
||||
status = "error";
|
||||
message = "Cloudflare " + e.toString();
|
||||
}
|
||||
|
||||
const payload: ChallengeResolutionT = {
|
||||
@@ -315,14 +122,19 @@ async function resolveChallenge(ctx: RequestContext, { url, proxy, download, ret
|
||||
}
|
||||
}
|
||||
|
||||
if (download) {
|
||||
// for some reason we get an error unless we reload the page
|
||||
// has something to do with a stale buffer and this is the quickest
|
||||
// fix since I am short on time
|
||||
response = await page.goto(url, { waitUntil: 'domcontentloaded' })
|
||||
payload.result.response = (await response.buffer()).toString('base64')
|
||||
if (returnOnlyCookies) {
|
||||
payload.result.headers = null;
|
||||
payload.result.userAgent = null;
|
||||
} else {
|
||||
payload.result.response = await page.content()
|
||||
if (download) {
|
||||
// for some reason we get an error unless we reload the page
|
||||
// has something to do with a stale buffer and this is the quickest
|
||||
// fix since I am short on time
|
||||
response = await page.goto(url, { waitUntil: 'domcontentloaded' })
|
||||
payload.result.response = (await response.buffer()).toString('base64')
|
||||
} else {
|
||||
payload.result.response = await page.content()
|
||||
}
|
||||
}
|
||||
|
||||
// make sure the page is closed because if it isn't and error will be thrown
|
||||
@@ -342,7 +154,7 @@ function mergeSessionWithParams({ defaults }: SessionsCacheItem, params: BaseReq
|
||||
return copy
|
||||
}
|
||||
|
||||
async function setupPage(ctx: RequestContext, params: BaseRequestAPICall, browser: Puppeteer.Browser): Promise<Puppeteer.Page> {
|
||||
async function setupPage(ctx: RequestContext, params: BaseRequestAPICall, browser: Browser): Promise<Page> {
|
||||
const page = await browser.newPage()
|
||||
|
||||
// merge session defaults with params
|
||||
@@ -474,14 +286,6 @@ export const routes: Routes = {
|
||||
|
||||
await browserRequest(ctx, params)
|
||||
},
|
||||
'request.cookies': async (ctx, params: BaseRequestAPICall) => {
|
||||
params.returnOnlyCookies = true
|
||||
params.method = 'GET'
|
||||
if (params.postData) {
|
||||
return ctx.errorResponse('Cannot use "postBody" when sending a GET request.')
|
||||
}
|
||||
await browserRequest(ctx, params)
|
||||
},
|
||||
}
|
||||
|
||||
export default async function Router(ctx: RequestContext, params: BaseAPICall): Promise<void> {
|
||||
|
||||
@@ -56,7 +56,11 @@ function prepareBrowserProfile(id: string): string {
|
||||
|
||||
export default {
|
||||
create: async (id: string, { cookies, oneTimeSession, userAgent, headers, maxTimeout, proxy }: SessionCreateOptions): Promise<SessionsCacheItem> => {
|
||||
let args = ['--no-sandbox', '--disable-setuid-sandbox'];
|
||||
let args = [
|
||||
'--no-sandbox',
|
||||
'--disable-setuid-sandbox',
|
||||
'--disable-dev-shm-usage' // issue #45
|
||||
];
|
||||
if (proxy && proxy.url) {
|
||||
args.push(`--proxy-server=${proxy.url}`);
|
||||
}
|
||||
@@ -72,6 +76,12 @@ export default {
|
||||
puppeteerOptions.userDataDir = prepareBrowserProfile(id)
|
||||
}
|
||||
|
||||
// if we are running inside executable binary, change chrome path
|
||||
if (typeof (process as any).pkg !== 'undefined') {
|
||||
const exe = process.platform === "win32" ? 'chrome.exe' : 'chrome';
|
||||
puppeteerOptions.executablePath = path.join(path.dirname(process.execPath), 'chrome', exe)
|
||||
}
|
||||
|
||||
log.debug('Launching headless browser...')
|
||||
|
||||
// TODO: maybe access env variable?
|
||||
|
||||
Reference in New Issue
Block a user