Remove outdated chaptcha solvers

This commit is contained in:
ngosang
2021-10-17 18:25:44 +02:00
parent 744de4d158
commit a0e897067a
7 changed files with 81 additions and 853 deletions

View File

@@ -1,31 +0,0 @@
import got from 'got'
import { sleep } from '../services/utils'
/*
This method uses the captcha-harvester project:
https://github.com/NoahCardoza/CaptchaHarvester
While the function must take url/sitekey/type args,
they aren't used because the harvester server must
be preconfigured.
ENV:
HARVESTER_ENDPOINT: This must be the full path
to the /token endpoint of the harvester.
E.G. "https://127.0.0.1:5000/token"
*/
export default async function solve(): Promise<string> {
const endpoint = process.env.HARVESTER_ENDPOINT
if (!endpoint) { throw Error('ENV variable `HARVESTER_ENDPOINT` must be set.') }
while (true) {
try {
return (await got.get(process.env.HARVESTER_ENDPOINT, {
https: { rejectUnauthorized: false }
})).body
} catch (e) {
if (e.response.statusCode !== 418) { throw e }
}
await sleep(3000)
}
}

View File

@@ -1,25 +0,0 @@
const solveCaptcha = require('hcaptcha-solver');
import { SolverOptions } from '.'
/*
This method uses the hcaptcha-solver project:
https://github.com/JimmyLaurent/hcaptcha-solver
TODO: allow user pass custom options to the solver.
ENV:
There are no other variables that must be set to get this to work
*/
export default async function solve({ url }: SolverOptions): Promise<string> {
throw new Error("hcaptcha-solver is not able to solve the new hCaptcha challenge. This issue is already reported #31.");
/*
try {
return await solveCaptcha(url)
} catch (e) {
console.error(e)
return null
}
*/
}

View File

@@ -1,14 +1,14 @@
import {Page, Response} from 'puppeteer'
import log from "../services/log";
import getCaptchaSolver, {CaptchaType} from "../captcha";
/**
* This class contains the logic to solve protections provided by CloudFlare
**/
const BAN_SELECTORS = ['span[data-translate="error"]'];
const CHALLENGE_SELECTORS = ['#trk_jschal_js', '.ray_id', '.attack-box', '#cf-please-wait'];
const TOKEN_INPUT_NAMES = ['g-recaptcha-response', 'h-captcha-response'];
const CAPTCHA_SELECTORS = ['input[name="cf_captcha_kind"]'];
export default async function resolveChallenge(url: string, page: Page, response: Response): Promise<Response> {
@@ -19,7 +19,7 @@ export default async function resolveChallenge(url: string, page: Page, response
}
log.info('Cloudflare detected');
if (await page.$('span[data-translate="error"]')) {
if (await findAnySelector(page, BAN_SELECTORS)) {
throw new Error('Cloudflare has blocked this request. Probably your IP is banned for this site, check in your web browser.')
}
@@ -95,96 +95,30 @@ export default async function resolveChallenge(url: string, page: Page, response
selectorFound = true;
}
// it seems some captcha pages return 200 sometimes
if (await page.$('input[name="cf_captcha_kind"]')) {
log.info('Captcha challenge detected.');
const captchaSolver = getCaptchaSolver()
if (captchaSolver) {
const captchaStartTimestamp = Date.now()
const challengeForm = await page.$('#challenge-form')
if (challengeForm) {
const captchaTypeElm = await page.$('input[name="cf_captcha_kind"]')
const cfCaptchaType: string = await captchaTypeElm.evaluate((e: any) => e.value)
const captchaType: CaptchaType = (CaptchaType as any)[cfCaptchaType]
if (!captchaType) {
throw new Error('Unknown captcha type!');
}
// check for CAPTCHA challenge
if (await findAnySelector(page, CAPTCHA_SELECTORS)) {
log.info('CAPTCHA challenge detected.');
throw new Error('FlareSolverr can not resolve CAPTCHA challenges. Since the captcha doesn\'t always appear, you may have better luck with the next request.');
let sitekey = null
if (captchaType != 'hCaptcha' && process.env.CAPTCHA_SOLVER != 'hcaptcha-solver') {
const sitekeyElem = await page.$('*[data-sitekey]')
if (!sitekeyElem) {
throw new Error('Could not find sitekey!');
}
sitekey = await sitekeyElem.evaluate((e) => e.getAttribute('data-sitekey'))
}
log.info('Waiting to receive captcha token to bypass challenge...')
const token = await captchaSolver({
url,
sitekey,
type: captchaType
})
log.debug(`Token received: ${token}`);
if (!token) {
throw new Error('Token solver failed to return a token.')
}
let responseFieldsFoundCount = 0;
for (const name of TOKEN_INPUT_NAMES) {
const input = await page.$(`textarea[name="${name}"]`)
if (input) {
responseFieldsFoundCount ++;
log.debug(`Challenge response field '${name}' found in challenge form.`);
await input.evaluate((e: HTMLTextAreaElement, token) => { e.value = token }, token);
}
}
if (responseFieldsFoundCount == 0) {
throw new Error('Challenge response field not found in challenge form.');
}
// ignore preset event listeners on the form
await page.evaluate(() => {
window.addEventListener('submit', (e) => { e.stopPropagation() }, true)
})
// it seems some sites obfuscate their challenge forms
// TODO: look into how they do it and come up with a more solid solution
try {
// this element is added with js and we want to wait for all the js to load before submitting
await page.waitForSelector('#challenge-form', { timeout: 10000 })
} catch (err) {
throw new Error("No '#challenge-form' element detected.");
}
// calculates the time it took to solve the captcha
const captchaSolveTotalTime = Date.now() - captchaStartTimestamp
// generates a random wait time
const randomWaitTime = (Math.floor(Math.random() * 10) + 10) * 1000
// waits, if any, time remaining to appear human but stay as fast as possible
const timeLeft = randomWaitTime - captchaSolveTotalTime
if (timeLeft > 0) {
log.debug(`Waiting for '${timeLeft}' milliseconds.`);
await page.waitFor(timeLeft);
}
// submit captcha response
await challengeForm.evaluate((e: HTMLFormElement) => e.submit())
response = await page.waitForNavigation({ waitUntil: 'domcontentloaded' })
if (await page.$('input[name="cf_captcha_kind"]')) {
throw new Error('Captcha service failed to solve the challenge.');
}
}
} else {
throw new Error('Captcha detected but no automatic solver is configured.');
}
// const captchaSolver = getCaptchaSolver()
// if (captchaSolver) {
// // todo: get the params
// log.info('Waiting to receive captcha token to bypass challenge...')
// const token = await captchaSolver({
// url,
// sitekey,
// type: captchaType
// })
// log.debug(`Token received: ${token}`);
// // todo: send the token
// }
// } else {
// throw new Error('Captcha detected but no automatic solver is configured.');
// }
} else {
if (!selectorFound)
{
throw new Error('No challenge selectors found, unable to proceed')
throw new Error('No challenge selectors found, unable to proceed.')
} else {
// reload the page to make sure we get the real response
// do not use page.reload() to avoid #162 #143

View File

@@ -9,6 +9,7 @@ const app = require("../app");
const version: string = require('../../package.json').version
const googleUrl = "https://www.google.com";
const cfUrl = "https://pirateiro.com/torrents/?search=s";
const cfCaptchaUrl = "https://idope.se"
describe("Test '/' path", () => {
test("GET method should return OK ", async () => {
@@ -88,7 +89,7 @@ describe("Test '/v1' path", () => {
expect(solution.userAgent).toContain("Firefox/")
});
test("Cmd 'request.get' should return OK with Cloudflare", async () => {
test("Cmd 'request.get' should return OK with Cloudflare JS", async () => {
const payload = {
"cmd": "request.get",
"url": cfUrl
@@ -117,6 +118,24 @@ describe("Test '/v1' path", () => {
expect(cfCookie.length).toBeGreaterThan(30)
});
test("Cmd 'request.get' should return fail with Cloudflare CAPTCHA", async () => {
const payload = {
"cmd": "request.get",
"url": cfCaptchaUrl
}
const response: Response = await request(app).post("/v1").send(payload);
expect(response.statusCode).toBe(200);
const apiResponse: V1ResponseSolution = response.body;
expect(apiResponse.status).toBe("error");
expect(apiResponse.message).toBe("Cloudflare Error: FlareSolverr can not resolve CAPTCHA challenges. Since the captcha doesn't always appear, you may have better luck with the next request.");
expect(apiResponse.startTimestamp).toBeGreaterThan(1000);
expect(apiResponse.endTimestamp).toBeGreaterThan(apiResponse.startTimestamp);
expect(apiResponse.version).toBe(version);
// solution is filled but not useful
expect(apiResponse.solution.url).toContain(cfCaptchaUrl)
});
test("Cmd 'request.get' should return timeout", async () => {
const payload = {
"cmd": "request.get",