mirror of
https://github.com/FlareSolverr/FlareSolverr.git
synced 2025-12-16 06:21:34 +01:00
Backport changes from Cloudproxy (#11)
This commit is contained in:
31
src/captcha/harvester.ts
Normal file
31
src/captcha/harvester.ts
Normal file
@@ -0,0 +1,31 @@
|
||||
import got from 'got'
|
||||
import { sleep } from '../utils'
|
||||
|
||||
/*
|
||||
This method uses the captcha-harvester project:
|
||||
https://github.com/NoahCardoza/CaptchaHarvester
|
||||
|
||||
While the function must take url/sitekey/type args,
|
||||
they aren't used because the harvester server must
|
||||
be preconfigured.
|
||||
|
||||
ENV:
|
||||
HARVESTER_ENDPOINT: This must be the full path
|
||||
to the /token endpoint of the harvester.
|
||||
E.G. "https://127.0.0.1:5000/token"
|
||||
*/
|
||||
|
||||
export default async function solve(): Promise<string> {
|
||||
const endpoint = process.env.HARVESTER_ENDPOINT
|
||||
if (!endpoint) { throw Error('ENV variable `HARVESTER_ENDPOINT` must be set.') }
|
||||
while (true) {
|
||||
try {
|
||||
return (await got.get(process.env.HARVESTER_ENDPOINT, {
|
||||
https: { rejectUnauthorized: false }
|
||||
})).body
|
||||
} catch (e) {
|
||||
if (e.response.statusCode !== 418) { throw e }
|
||||
}
|
||||
await sleep(3000)
|
||||
}
|
||||
}
|
||||
21
src/captcha/hcaptcha-solver.ts
Normal file
21
src/captcha/hcaptcha-solver.ts
Normal file
@@ -0,0 +1,21 @@
|
||||
const solveCaptcha = require('hcaptcha-solver');
|
||||
import { SolverOptions } from '.'
|
||||
/*
|
||||
This method uses the hcaptcha-solver project:
|
||||
https://github.com/JimmyLaurent/hcaptcha-solver
|
||||
|
||||
TODO: allow user pass custom options to the solver.
|
||||
|
||||
ENV:
|
||||
There are no other variables that must be set to get this to work
|
||||
*/
|
||||
|
||||
export default async function solve({ url }: SolverOptions): Promise<string> {
|
||||
try {
|
||||
const token = await solveCaptcha(url)
|
||||
return token
|
||||
} catch (e) {
|
||||
console.error(e)
|
||||
return null
|
||||
}
|
||||
}
|
||||
35
src/captcha/index.ts
Normal file
35
src/captcha/index.ts
Normal file
@@ -0,0 +1,35 @@
|
||||
export enum CaptchaType {
|
||||
re = 'reCaptcha',
|
||||
h = 'hCaptcha'
|
||||
}
|
||||
|
||||
export interface SolverOptions {
|
||||
url: string
|
||||
sitekey: string
|
||||
type: CaptchaType
|
||||
}
|
||||
|
||||
export type Solver = (options: SolverOptions) => Promise<string>
|
||||
|
||||
const captchaSolvers: { [key: string]: Solver } = {}
|
||||
|
||||
export default (): Solver => {
|
||||
const method = process.env.CAPTCHA_SOLVER
|
||||
|
||||
if (!method) { return null }
|
||||
|
||||
if (!(method in captchaSolvers)) {
|
||||
try {
|
||||
captchaSolvers[method] = require('./' + method).default as Solver
|
||||
} catch (e) {
|
||||
if (e.code === 'MODULE_NOT_FOUND') {
|
||||
throw Error(`The solver '${method}' is not a valid captcha solving method.`)
|
||||
} else {
|
||||
console.error(e)
|
||||
throw Error(`An error occured loading the solver '${method}'.`)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return captchaSolvers[method]
|
||||
}
|
||||
106
src/index.ts
Normal file
106
src/index.ts
Normal file
@@ -0,0 +1,106 @@
|
||||
import log from './log'
|
||||
import { createServer, IncomingMessage, ServerResponse } from 'http';
|
||||
import { RequestContext } from './types'
|
||||
import Router, { BaseAPICall } from './routes'
|
||||
|
||||
const version: string = require('../package.json').version
|
||||
const serverPort: number = Number(process.env.PORT) || 8191
|
||||
const serverHost: string = process.env.HOST || '0.0.0.0'
|
||||
|
||||
|
||||
function errorResponse(errorMsg: string, res: ServerResponse, startTimestamp: number) {
|
||||
log.error(errorMsg)
|
||||
const response = {
|
||||
status: 'error',
|
||||
message: errorMsg,
|
||||
startTimestamp,
|
||||
endTimestamp: Date.now(),
|
||||
version
|
||||
}
|
||||
res.writeHead(500, {
|
||||
'Content-Type': 'application/json'
|
||||
})
|
||||
res.write(JSON.stringify(response))
|
||||
res.end()
|
||||
}
|
||||
|
||||
function successResponse(successMsg: string, extendedProperties: object, res: ServerResponse, startTimestamp: number) {
|
||||
const endTimestamp = Date.now()
|
||||
log.info(`Successful response in ${(endTimestamp - startTimestamp) / 1000} s`)
|
||||
if (successMsg) { log.info(successMsg) }
|
||||
|
||||
const response = Object.assign({
|
||||
status: 'ok',
|
||||
message: successMsg || '',
|
||||
startTimestamp,
|
||||
endTimestamp,
|
||||
version
|
||||
}, extendedProperties || {})
|
||||
res.writeHead(200, {
|
||||
'Content-Type': 'application/json'
|
||||
})
|
||||
res.write(JSON.stringify(response))
|
||||
res.end()
|
||||
}
|
||||
|
||||
function validateIncomingRequest(ctx: RequestContext, params: BaseAPICall) {
|
||||
log.info(`Params: ${JSON.stringify(params)}`)
|
||||
|
||||
if (ctx.req.method !== 'POST') {
|
||||
ctx.errorResponse('Only the POST method is allowed')
|
||||
return false
|
||||
}
|
||||
|
||||
if (ctx.req.url !== '/v1') {
|
||||
ctx.errorResponse('Only /v1 endpoint is allowed')
|
||||
return false
|
||||
}
|
||||
|
||||
if (!params.cmd) {
|
||||
ctx.errorResponse("Parameter 'cmd' is mandatory")
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
createServer((req: IncomingMessage, res: ServerResponse) => {
|
||||
// count the request for the log prefix
|
||||
log.incRequests()
|
||||
|
||||
const startTimestamp = Date.now()
|
||||
log.info(`Incoming request: ${req.method} ${req.url}`)
|
||||
const bodyParts: any[] = []
|
||||
req.on('data', chunk => {
|
||||
bodyParts.push(chunk)
|
||||
}).on('end', () => {
|
||||
// parse params
|
||||
const body = Buffer.concat(bodyParts).toString()
|
||||
let params: BaseAPICall = null
|
||||
try {
|
||||
params = JSON.parse(body)
|
||||
} catch (err) {
|
||||
errorResponse('Body must be in JSON format', res, startTimestamp)
|
||||
return
|
||||
}
|
||||
|
||||
const ctx: RequestContext = {
|
||||
req,
|
||||
res,
|
||||
startTimestamp,
|
||||
errorResponse: (msg) => errorResponse(msg, res, startTimestamp),
|
||||
successResponse: (msg, extendedProperties) => successResponse(msg, extendedProperties, res, startTimestamp)
|
||||
}
|
||||
|
||||
// validate params
|
||||
if (!validateIncomingRequest(ctx, params)) { return }
|
||||
|
||||
// process request
|
||||
Router(ctx, params).catch(e => {
|
||||
console.error(e)
|
||||
ctx.errorResponse(e.message)
|
||||
})
|
||||
})
|
||||
}).listen(serverPort, serverHost, () => {
|
||||
log.info(`FlareSolverr v${version} listening on http://${serverHost}:${serverPort}`)
|
||||
})
|
||||
19
src/log.ts
Normal file
19
src/log.ts
Normal file
@@ -0,0 +1,19 @@
|
||||
let requests = 0
|
||||
|
||||
const LOG_HTML: boolean = Boolean(process.env.LOG_HTML) || false
|
||||
|
||||
export default {
|
||||
incRequests: () => { requests++ },
|
||||
html(html: string) {
|
||||
if (LOG_HTML)
|
||||
this.debug(html)
|
||||
},
|
||||
...require('console-log-level')(
|
||||
{
|
||||
level: process.env.LOG_LEVEL || 'debug',
|
||||
prefix(level: string) {
|
||||
return `${new Date().toISOString()} ${level.toUpperCase()} REQ-${requests}`
|
||||
}
|
||||
}
|
||||
)
|
||||
}
|
||||
466
src/routes.ts
Normal file
466
src/routes.ts
Normal file
@@ -0,0 +1,466 @@
|
||||
import { v1 as UUIDv1 } from 'uuid'
|
||||
import sessions, { SessionsCacheItem } from './session'
|
||||
import { RequestContext } from './types'
|
||||
import log from './log'
|
||||
import { Browser, SetCookie, Request, Page, Headers, HttpMethod, Overrides, Cookie } from 'puppeteer'
|
||||
import { TimeoutError } from 'puppeteer/Errors'
|
||||
import getCaptchaSolver, { CaptchaType } from './captcha'
|
||||
|
||||
export interface BaseAPICall {
|
||||
cmd: string
|
||||
}
|
||||
|
||||
interface BaseSessionsAPICall extends BaseAPICall {
|
||||
session?: string
|
||||
}
|
||||
|
||||
interface SessionsCreateAPICall extends BaseSessionsAPICall {
|
||||
userAgent?: string,
|
||||
cookies?: SetCookie[],
|
||||
headers?: Headers
|
||||
maxTimeout?: number
|
||||
proxy?: any
|
||||
}
|
||||
|
||||
interface BaseRequestAPICall extends BaseAPICall {
|
||||
url: string
|
||||
method?: HttpMethod
|
||||
postData?: string
|
||||
session?: string
|
||||
userAgent?: string
|
||||
maxTimeout?: number
|
||||
cookies?: SetCookie[],
|
||||
headers?: Headers
|
||||
proxy?: any, // TODO: use interface not any
|
||||
download?: boolean
|
||||
returnOnlyCookies?: boolean
|
||||
}
|
||||
|
||||
|
||||
interface Routes {
|
||||
[key: string]: (ctx: RequestContext, params: BaseAPICall) => void | Promise<void>
|
||||
}
|
||||
|
||||
interface ChallengeResolutionResultT {
|
||||
url: string
|
||||
status: number,
|
||||
headers?: Headers,
|
||||
response: string,
|
||||
cookies: object[]
|
||||
userAgent: string
|
||||
}
|
||||
|
||||
interface ChallengeResolutionT {
|
||||
status?: string
|
||||
message: string
|
||||
result: ChallengeResolutionResultT
|
||||
}
|
||||
|
||||
interface OverrideResolvers {
|
||||
method?: (request: Request) => HttpMethod,
|
||||
postData?: (request: Request) => string,
|
||||
headers?: (request: Request) => Headers
|
||||
}
|
||||
|
||||
type OverridesProps =
|
||||
'method' |
|
||||
'postData' |
|
||||
'headers'
|
||||
|
||||
const CHALLENGE_SELECTORS = ['#trk_jschal_js', '.ray_id', '.attack-box']
|
||||
const TOKEN_INPUT_NAMES = ['g-recaptcha-response', 'h-captcha-response']
|
||||
|
||||
async function interceptResponse(page: Page, callback: (payload: ChallengeResolutionT) => any) {
|
||||
const client = await page.target().createCDPSession();
|
||||
await client.send('Fetch.enable', {
|
||||
patterns: [
|
||||
{
|
||||
urlPattern: '*',
|
||||
resourceType: 'Document',
|
||||
requestStage: 'Response',
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
client.on('Fetch.requestPaused', async (e) => {
|
||||
log.debug('Fetch.requestPaused. Checking if the response has valid cookies')
|
||||
let headers = e.responseHeaders || []
|
||||
|
||||
let cookies = await page.cookies();
|
||||
log.debug(cookies)
|
||||
|
||||
if (cookies.filter((c: Cookie) => c.name === 'cf_clearance').length > 0) {
|
||||
log.debug('Aborting request and return cookies. valid cookies found')
|
||||
await client.send('Fetch.failRequest', {requestId: e.requestId, errorReason: 'Aborted'})
|
||||
|
||||
let status = 'ok'
|
||||
let message = ''
|
||||
const payload: ChallengeResolutionT = {
|
||||
status,
|
||||
message,
|
||||
result: {
|
||||
url: page.url(),
|
||||
status: e.status,
|
||||
headers: headers.reduce((a: any, x: { name: any; value: any }) => ({ ...a, [x.name]: x.value }), {}),
|
||||
response: null,
|
||||
cookies: cookies,
|
||||
userAgent: ''
|
||||
}
|
||||
}
|
||||
|
||||
callback(payload);
|
||||
} else {
|
||||
log.debug('Continuing request. no valid cookies found')
|
||||
await client.send('Fetch.continueRequest', {requestId: e.requestId})
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
async function resolveChallenge(ctx: RequestContext, { url, maxTimeout, proxy, download, returnOnlyCookies }: BaseRequestAPICall, page: Page): Promise<ChallengeResolutionT | void> {
|
||||
|
||||
maxTimeout = maxTimeout || 60000
|
||||
let status = 'ok'
|
||||
let message = ''
|
||||
|
||||
if (proxy) {
|
||||
log.debug("Apply proxy");
|
||||
if (proxy.username)
|
||||
await page.authenticate({ username: proxy.username, password: proxy.password });
|
||||
}
|
||||
|
||||
log.debug(`Navigating to... ${url}`)
|
||||
let response = await page.goto(url, { waitUntil: 'domcontentloaded' })
|
||||
|
||||
// look for challenge
|
||||
if (response.headers().server.startsWith('cloudflare')) {
|
||||
log.info('Cloudflare detected')
|
||||
|
||||
if (await page.$('.cf-error-code')) {
|
||||
await page.close()
|
||||
return ctx.errorResponse('Cloudflare has blocked this request (Code 1020 Detected).')
|
||||
}
|
||||
|
||||
if (response.status() > 400) {
|
||||
// detect cloudflare wait 5s
|
||||
for (const selector of CHALLENGE_SELECTORS) {
|
||||
const cfChallengeElem = await page.$(selector)
|
||||
if (cfChallengeElem) {
|
||||
log.html(await page.content())
|
||||
log.debug('Waiting for Cloudflare challenge...')
|
||||
|
||||
let interceptingResult: ChallengeResolutionT;
|
||||
if (returnOnlyCookies) { //If we just want to get the cookies, intercept the response before we get the content/body (just cookies and headers)
|
||||
await interceptResponse(page, async function(payload){
|
||||
interceptingResult = payload;
|
||||
});
|
||||
}
|
||||
|
||||
// TODO: find out why these pages hang sometimes
|
||||
while (Date.now() - ctx.startTimestamp < maxTimeout) {
|
||||
await page.waitFor(1000)
|
||||
try {
|
||||
// catch exception timeout in waitForNavigation
|
||||
response = await page.waitForNavigation({ waitUntil: 'domcontentloaded', timeout: 5000 })
|
||||
} catch (error) { }
|
||||
|
||||
if (returnOnlyCookies && interceptingResult) {
|
||||
await page.close();
|
||||
return interceptingResult;
|
||||
}
|
||||
|
||||
try {
|
||||
// catch Execution context was destroyed
|
||||
const cfChallengeElem = await page.$(selector)
|
||||
if (!cfChallengeElem) { break }
|
||||
log.debug('Found challenge element again...')
|
||||
} catch (error)
|
||||
{ }
|
||||
|
||||
response = await page.reload({ waitUntil: 'domcontentloaded' })
|
||||
log.debug('Reloaded page...')
|
||||
}
|
||||
|
||||
if (Date.now() - ctx.startTimestamp >= maxTimeout) {
|
||||
ctx.errorResponse(`Maximum timeout reached. maxTimeout=${maxTimeout} (ms)`)
|
||||
return
|
||||
}
|
||||
|
||||
log.debug('Validating HTML code...')
|
||||
break
|
||||
} else {
|
||||
log.debug(`No '${selector}' challenge element detected.`)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// it seems some captcha pages return 200 sometimes
|
||||
if (await page.$('input[name="cf_captcha_kind"]')) {
|
||||
const captchaSolver = getCaptchaSolver()
|
||||
if (captchaSolver) {
|
||||
const captchaStartTimestamp = Date.now()
|
||||
const challengeForm = await page.$('#challenge-form')
|
||||
if (challengeForm) {
|
||||
log.html(await page.content())
|
||||
const captchaTypeElm = await page.$('input[name="cf_captcha_kind"]')
|
||||
const cfCaptchaType: string = await captchaTypeElm.evaluate((e: any) => e.value)
|
||||
const captchaType: CaptchaType = (CaptchaType as any)[cfCaptchaType]
|
||||
if (!captchaType) { return ctx.errorResponse('Unknown captcha type!') }
|
||||
|
||||
let sitekey = null
|
||||
if (captchaType != 'hCaptcha' && process.env.CAPTCHA_SOLVER != 'hcaptcha-solver') {
|
||||
const sitekeyElem = await page.$('*[data-sitekey]')
|
||||
if (!sitekeyElem) { return ctx.errorResponse('Could not find sitekey!') }
|
||||
sitekey = await sitekeyElem.evaluate((e) => e.getAttribute('data-sitekey'))
|
||||
}
|
||||
|
||||
log.info('Waiting to receive captcha token to bypass challenge...')
|
||||
const token = await captchaSolver({
|
||||
url,
|
||||
sitekey,
|
||||
type: captchaType
|
||||
})
|
||||
|
||||
if (!token) {
|
||||
await page.close()
|
||||
return ctx.errorResponse('Token solver failed to return a token.')
|
||||
}
|
||||
|
||||
for (const name of TOKEN_INPUT_NAMES) {
|
||||
const input = await page.$(`textarea[name="${name}"]`)
|
||||
if (input) { await input.evaluate((e: HTMLTextAreaElement, token) => { e.value = token }, token) }
|
||||
}
|
||||
|
||||
// ignore preset event listeners on the form
|
||||
await page.evaluate(() => {
|
||||
window.addEventListener('submit', (e) => { event.stopPropagation() }, true)
|
||||
})
|
||||
|
||||
// it seems some sites obfuscate their challenge forms
|
||||
// TODO: look into how they do it and come up with a more solid solution
|
||||
try {
|
||||
// this element is added with js and we want to wait for all the js to load before submitting
|
||||
await page.waitForSelector('#challenge-form [type=submit]', { timeout: 5000 })
|
||||
} catch (err) {
|
||||
if (err instanceof TimeoutError) {
|
||||
log.debug(`No '#challenge-form [type=submit]' element detected.`)
|
||||
}
|
||||
}
|
||||
|
||||
// calculates the time it took to solve the captcha
|
||||
const captchaSolveTotalTime = Date.now() - captchaStartTimestamp
|
||||
|
||||
// generates a random wait time
|
||||
const randomWaitTime = (Math.floor(Math.random() * 20) + 10) * 1000
|
||||
|
||||
// waits, if any, time remaining to appear human but stay as fast as possible
|
||||
const timeLeft = randomWaitTime - captchaSolveTotalTime
|
||||
if (timeLeft > 0) { await page.waitFor(timeLeft) }
|
||||
|
||||
let interceptingResult: ChallengeResolutionT;
|
||||
if (returnOnlyCookies) { //If we just want to get the cookies, intercept the response before we get the content/body (just cookies and headers)
|
||||
await interceptResponse(page, async function(payload){
|
||||
interceptingResult = payload;
|
||||
});
|
||||
}
|
||||
|
||||
// submit captcha response
|
||||
challengeForm.evaluate((e: HTMLFormElement) => e.submit())
|
||||
response = await page.waitForNavigation({ waitUntil: 'domcontentloaded' })
|
||||
|
||||
if (returnOnlyCookies && interceptingResult) {
|
||||
await page.close();
|
||||
return interceptingResult;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
status = 'warning'
|
||||
message = 'Captcha detected but no automatic solver is configured.'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const payload: ChallengeResolutionT = {
|
||||
status,
|
||||
message,
|
||||
result: {
|
||||
url: page.url(),
|
||||
status: response.status(),
|
||||
headers: response.headers(),
|
||||
response: null,
|
||||
cookies: await page.cookies(),
|
||||
userAgent: await page.evaluate(() => navigator.userAgent)
|
||||
}
|
||||
}
|
||||
|
||||
if (download) {
|
||||
// for some reason we get an error unless we reload the page
|
||||
// has something to do with a stale buffer and this is the quickest
|
||||
// fix since I am short on time
|
||||
response = await page.goto(url, { waitUntil: 'domcontentloaded' })
|
||||
payload.result.response = (await response.buffer()).toString('base64')
|
||||
} else {
|
||||
payload.result.response = await page.content()
|
||||
}
|
||||
|
||||
// make sure the page is closed because if it isn't and error will be thrown
|
||||
// when a user uses a temporary session, the browser make be quit before
|
||||
// the page is properly closed.
|
||||
await page.close()
|
||||
|
||||
return payload
|
||||
}
|
||||
|
||||
function mergeSessionWithParams({ defaults }: SessionsCacheItem, params: BaseRequestAPICall): BaseRequestAPICall {
|
||||
const copy = { ...defaults, ...params }
|
||||
|
||||
// custom merging logic
|
||||
copy.headers = { ...defaults.headers || {}, ...params.headers || {} } || null
|
||||
|
||||
return copy
|
||||
}
|
||||
|
||||
async function setupPage(ctx: RequestContext, params: BaseRequestAPICall, browser: Browser): Promise<Page> {
|
||||
const page = await browser.newPage()
|
||||
|
||||
// merge session defaults with params
|
||||
const { method, postData, userAgent, headers, cookies } = params
|
||||
|
||||
let overrideResolvers: OverrideResolvers = {}
|
||||
|
||||
if (method !== 'GET') {
|
||||
log.debug(`Setting method to ${method}`)
|
||||
overrideResolvers.method = request => method
|
||||
}
|
||||
|
||||
if (postData) {
|
||||
log.debug(`Setting body data to ${postData}`)
|
||||
overrideResolvers.postData = request => postData
|
||||
}
|
||||
|
||||
if (userAgent) {
|
||||
log.debug(`Using custom UA: ${userAgent}`)
|
||||
await page.setUserAgent(userAgent)
|
||||
}
|
||||
|
||||
if (headers) {
|
||||
log.debug(`Adding custom headers: ${JSON.stringify(headers, null, 2)}`,)
|
||||
overrideResolvers.headers = request => Object.assign(request.headers(), headers)
|
||||
}
|
||||
|
||||
if (cookies) {
|
||||
log.debug(`Setting custom cookies: ${JSON.stringify(cookies, null, 2)}`,)
|
||||
await page.setCookie(...cookies)
|
||||
}
|
||||
|
||||
// if any keys have been set on the object
|
||||
if (Object.keys(overrideResolvers).length > 0) {
|
||||
log.debug(overrideResolvers)
|
||||
let callbackRunOnce = false
|
||||
const callback = (request: Request) => {
|
||||
|
||||
if (callbackRunOnce || !request.isNavigationRequest()) {
|
||||
request.continue()
|
||||
return
|
||||
}
|
||||
|
||||
callbackRunOnce = true
|
||||
const overrides: Overrides = {}
|
||||
|
||||
Object.keys(overrideResolvers).forEach((key: OverridesProps) => {
|
||||
// @ts-ignore
|
||||
overrides[key] = overrideResolvers[key](request)
|
||||
});
|
||||
|
||||
log.debug(overrides)
|
||||
|
||||
request.continue(overrides)
|
||||
}
|
||||
|
||||
await page.setRequestInterception(true)
|
||||
page.on('request', callback)
|
||||
}
|
||||
|
||||
return page
|
||||
}
|
||||
|
||||
const browserRequest = async (ctx: RequestContext, params: BaseRequestAPICall) => {
|
||||
const oneTimeSession = params.session === undefined
|
||||
const sessionId = params.session || UUIDv1()
|
||||
const session = oneTimeSession
|
||||
? await sessions.create(sessionId, {
|
||||
userAgent: params.userAgent,
|
||||
oneTimeSession
|
||||
})
|
||||
: sessions.get(sessionId)
|
||||
|
||||
if (session === false) {
|
||||
return ctx.errorResponse('This session does not exist. Use \'list_sessions\' to see all the existing sessions.')
|
||||
}
|
||||
|
||||
params = mergeSessionWithParams(session, params)
|
||||
|
||||
try {
|
||||
const page = await setupPage(ctx, params, session.browser)
|
||||
const data = await resolveChallenge(ctx, params, page)
|
||||
|
||||
if (data) {
|
||||
const { status } = data
|
||||
delete data.status
|
||||
ctx.successResponse(data.message, {
|
||||
...(oneTimeSession ? {} : { session: sessionId }),
|
||||
...(status ? { status } : {}),
|
||||
solution: data.result
|
||||
})
|
||||
}
|
||||
} catch (error) {
|
||||
log.error(error)
|
||||
return ctx.errorResponse("Unable to process browser request")
|
||||
} finally {
|
||||
if (oneTimeSession) { sessions.destroy(sessionId) }
|
||||
}
|
||||
}
|
||||
|
||||
export const routes: Routes = {
|
||||
'sessions.create': async (ctx, { session, ...options }: SessionsCreateAPICall) => {
|
||||
session = session || UUIDv1()
|
||||
const { browser } = await sessions.create(session, options)
|
||||
if (browser) { ctx.successResponse('Session created successfully.', { session }) }
|
||||
},
|
||||
'sessions.list': (ctx) => {
|
||||
ctx.successResponse(null, { sessions: sessions.list() })
|
||||
},
|
||||
'sessions.destroy': async (ctx, { session }: BaseSessionsAPICall) => {
|
||||
if (await sessions.destroy(session)) { return ctx.successResponse('The session has been removed.') }
|
||||
ctx.errorResponse('This session does not exist.')
|
||||
},
|
||||
'request.get': async (ctx, params: BaseRequestAPICall) => {
|
||||
params.method = 'GET'
|
||||
if (params.postData) {
|
||||
return ctx.errorResponse('Cannot use "postBody" when sending a GET request.')
|
||||
}
|
||||
await browserRequest(ctx, params)
|
||||
},
|
||||
'request.post': async (ctx, params: BaseRequestAPICall) => {
|
||||
params.method = 'POST'
|
||||
|
||||
if (!params.postData) {
|
||||
return ctx.errorResponse('Must send param "postBody" when sending a POST request.')
|
||||
}
|
||||
|
||||
await browserRequest(ctx, params)
|
||||
},
|
||||
'request.cookies': async (ctx, params: BaseRequestAPICall) => {
|
||||
params.returnOnlyCookies = true
|
||||
params.method = 'GET'
|
||||
if (params.postData) {
|
||||
return ctx.errorResponse('Cannot use "postBody" when sending a GET request.')
|
||||
}
|
||||
await browserRequest(ctx, params)
|
||||
},
|
||||
}
|
||||
|
||||
export default async function Router(ctx: RequestContext, params: BaseAPICall): Promise<void> {
|
||||
const route = routes[params.cmd]
|
||||
if (route) { return await route(ctx, params) }
|
||||
return ctx.errorResponse(`The command '${params.cmd}' is invalid.`)
|
||||
}
|
||||
139
src/session.ts
Normal file
139
src/session.ts
Normal file
@@ -0,0 +1,139 @@
|
||||
import * as os from 'os'
|
||||
import * as path from 'path'
|
||||
import * as fs from 'fs'
|
||||
|
||||
import puppeteer from 'puppeteer-extra'
|
||||
import { LaunchOptions, Browser, Headers, SetCookie } from 'puppeteer'
|
||||
|
||||
import log from './log'
|
||||
import { deleteFolderRecursive, sleep, removeEmptyFields } from './utils'
|
||||
|
||||
interface SessionPageDefaults {
|
||||
headers?: Headers
|
||||
userAgent?: string
|
||||
}
|
||||
|
||||
export interface SessionsCacheItem {
|
||||
browser: Browser
|
||||
userDataDir?: string
|
||||
defaults: SessionPageDefaults
|
||||
}
|
||||
|
||||
interface SessionsCache {
|
||||
[key: string]: SessionsCacheItem
|
||||
}
|
||||
|
||||
interface SessionCreateOptions {
|
||||
oneTimeSession?: boolean
|
||||
userAgent?: string
|
||||
cookies?: SetCookie[]
|
||||
headers?: Headers,
|
||||
maxTimeout?: number
|
||||
proxy?: any
|
||||
}
|
||||
|
||||
const sessionCache: SessionsCache = {}
|
||||
|
||||
// setting "user-agent-override" evasion is not working for us because it can't be changed
|
||||
// in each request. we set the user-agent in the browser args instead
|
||||
puppeteer.use(require('puppeteer-extra-plugin-stealth')())
|
||||
|
||||
function userDataDirFromId(id: string): string {
|
||||
return path.join(os.tmpdir(), `/puppeteer_chrome_profile_${id}`)
|
||||
}
|
||||
|
||||
function prepareBrowserProfile(id: string): string {
|
||||
// TODO: maybe pass SessionCreateOptions for loading later?
|
||||
const userDataDir = userDataDirFromId(id)
|
||||
|
||||
if (!fs.existsSync(userDataDir)) {
|
||||
fs.mkdirSync(userDataDir, { recursive: true })
|
||||
}
|
||||
|
||||
return userDataDir
|
||||
}
|
||||
|
||||
export default {
|
||||
create: async (id: string, { cookies, oneTimeSession, userAgent, headers, maxTimeout, proxy }: SessionCreateOptions): Promise<SessionsCacheItem> => {
|
||||
let args = ['--no-sandbox', '--disable-setuid-sandbox'];
|
||||
if (proxy && proxy.url) {
|
||||
args.push(`--proxy-server=${proxy.url}`);
|
||||
}
|
||||
|
||||
const puppeteerOptions: LaunchOptions = {
|
||||
product: 'chrome',
|
||||
headless: process.env.HEADLESS !== 'false',
|
||||
args
|
||||
}
|
||||
|
||||
if (!oneTimeSession) {
|
||||
log.debug('Creating userDataDir for session.')
|
||||
puppeteerOptions.userDataDir = prepareBrowserProfile(id)
|
||||
}
|
||||
|
||||
log.debug('Launching headless browser...')
|
||||
|
||||
// TODO: maybe access env variable?
|
||||
// TODO: sometimes browser instances are created and not connected to correctly.
|
||||
// how do we handle/quit those instances inside Docker?
|
||||
let launchTries = 3
|
||||
let browser;
|
||||
|
||||
while (0 <= launchTries--) {
|
||||
try {
|
||||
browser = await puppeteer.launch(puppeteerOptions)
|
||||
break
|
||||
} catch (e) {
|
||||
if (e.message !== 'Failed to launch the browser process!')
|
||||
throw e
|
||||
log.warn('Failed to open browser, trying again...')
|
||||
}
|
||||
}
|
||||
|
||||
if (!browser) { throw Error(`Failed to lanch browser 3 times in a row.`) }
|
||||
|
||||
if (cookies) {
|
||||
const page = await browser.newPage()
|
||||
await page.setCookie(...cookies)
|
||||
}
|
||||
|
||||
sessionCache[id] = {
|
||||
browser,
|
||||
userDataDir: puppeteerOptions.userDataDir,
|
||||
defaults: removeEmptyFields({
|
||||
userAgent,
|
||||
headers,
|
||||
maxTimeout
|
||||
})
|
||||
}
|
||||
|
||||
return sessionCache[id]
|
||||
},
|
||||
|
||||
list: (): string[] => Object.keys(sessionCache),
|
||||
|
||||
// TODO: create a sessions.close that doesn't rm the userDataDir
|
||||
|
||||
destroy: async (id: string): Promise<boolean> => {
|
||||
const { browser, userDataDir } = sessionCache[id]
|
||||
if (browser) {
|
||||
await browser.close()
|
||||
delete sessionCache[id]
|
||||
if (userDataDir) {
|
||||
const userDataDirPath = userDataDirFromId(id)
|
||||
try {
|
||||
// for some reason this keeps an error from being thrown in Windows, figures
|
||||
await sleep(5000)
|
||||
deleteFolderRecursive(userDataDirPath)
|
||||
} catch (e) {
|
||||
console.error(e)
|
||||
throw Error(`Error deleting browser session folder. ${e.message}`)
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
return false
|
||||
},
|
||||
|
||||
get: (id: string): SessionsCacheItem | false => sessionCache[id] && sessionCache[id] || false
|
||||
}
|
||||
9
src/types.ts
Normal file
9
src/types.ts
Normal file
@@ -0,0 +1,9 @@
|
||||
import { IncomingMessage, ServerResponse } from 'http';
|
||||
|
||||
export interface RequestContext {
|
||||
req: IncomingMessage
|
||||
res: ServerResponse
|
||||
startTimestamp: number
|
||||
errorResponse: (msg: string) => void,
|
||||
successResponse: (msg: string, extendedProperties?: object) => void
|
||||
}
|
||||
31
src/utils.ts
Normal file
31
src/utils.ts
Normal file
@@ -0,0 +1,31 @@
|
||||
import * as fs from 'fs'
|
||||
import * as Path from 'path'
|
||||
import { promisify } from 'util'
|
||||
|
||||
export const sleep = promisify(setTimeout)
|
||||
|
||||
// recursive fs.rmdir needs node version 12:
|
||||
// https://github.com/ngosang/FlareSolverr/issues/5#issuecomment-655572712
|
||||
export function deleteFolderRecursive(path: string) {
|
||||
if (fs.existsSync(path)) {
|
||||
fs.readdirSync(path).forEach((file) => {
|
||||
const curPath = Path.join(path, file)
|
||||
if (fs.lstatSync(curPath).isDirectory()) { // recurse
|
||||
deleteFolderRecursive(curPath)
|
||||
} else { // delete file
|
||||
fs.unlinkSync(curPath)
|
||||
}
|
||||
})
|
||||
fs.rmdirSync(path)
|
||||
}
|
||||
}
|
||||
|
||||
export const removeEmptyFields = (o: Record<string, any>): typeof o => {
|
||||
const r: typeof o = {}
|
||||
for (const k in o) {
|
||||
if (o[k] !== undefined) {
|
||||
r[k] = o[k]
|
||||
}
|
||||
}
|
||||
return r
|
||||
}
|
||||
Reference in New Issue
Block a user