Code clean up, remove returnRawHtml, download, headers params

This commit is contained in:
ngosang
2021-10-17 20:43:36 +02:00
parent a0e897067a
commit a5b3e08e1f
6 changed files with 173 additions and 208 deletions

View File

@@ -15,11 +15,11 @@ interface V1Routes {
export interface V1RequestBase {
cmd: string
cookies?: SetCookie[],
headers?: Headers
maxTimeout?: number
proxy?: any// TODO: use interface not any
session: string
userAgent?: string // deprecated, not used
headers?: Headers // deprecated v2, not used
userAgent?: string // deprecated v2, not used
}
interface V1RequestSession extends V1RequestBase {
@@ -29,9 +29,9 @@ export interface V1Request extends V1RequestBase {
url: string
method?: HttpMethod
postData?: string
download?: boolean
returnOnlyCookies?: boolean
returnRawHtml?: boolean
download?: boolean // deprecated v2, not used
returnRawHtml?: boolean // deprecated v2, not used
}
export interface V1ResponseBase {
@@ -59,7 +59,6 @@ export const routes: V1Routes = {
const options: SessionCreateOptions = {
oneTimeSession: false,
cookies: params.cookies,
headers: params.headers,
maxTimeout: params.maxTimeout,
proxy: params.proxy
}
@@ -87,12 +86,15 @@ export const routes: V1Routes = {
},
'request.get': async (params: V1Request, response: V1ResponseSolution): Promise<void> => {
params.method = 'GET'
if (params.userAgent) {
log.warn('Request parameter "userAgent" was removed in FlareSolverr v2.')
}
if (params.postData) {
throw Error('Cannot use "postBody" when sending a GET request.')
}
if (params.returnRawHtml) {
log.warn("Request parameter 'returnRawHtml' was removed in FlareSolverr v2.")
}
if (params.download) {
log.warn("Request parameter 'download' was removed in FlareSolverr v2.")
}
const result: ChallengeResolutionT = await browserRequest(params)
response.status = result.status;
@@ -104,12 +106,15 @@ export const routes: V1Routes = {
},
'request.post': async (params: V1Request, response: V1ResponseSolution): Promise<void> => {
params.method = 'POST'
if (params.userAgent) {
log.warn('Request parameter "userAgent" was removed in FlareSolverr v2.')
}
if (!params.postData) {
throw Error('Must send param "postBody" when sending a POST request.')
}
if (params.returnRawHtml) {
log.warn("Request parameter 'returnRawHtml' was removed in FlareSolverr v2.")
}
if (params.download) {
log.warn("Request parameter 'download' was removed in FlareSolverr v2.")
}
const result: ChallengeResolutionT = await browserRequest(params)
response.status = result.status;
@@ -133,8 +138,15 @@ export async function controllerV1(req: Request, res: Response): Promise<void> {
try {
const params: V1RequestBase = req.body
if (!params.cmd) {
throw Error("Parameter 'cmd' is mandatory.")
throw Error("Request parameter 'cmd' is mandatory.")
}
if (params.headers) {
log.warn("Request parameter 'headers' was removed in FlareSolverr v2.")
}
if (params.userAgent) {
log.warn("Request parameter 'userAgent' was removed in FlareSolverr v2.")
}
const route = routes[params.cmd]
if (route) {
await route(params, response)

View File

@@ -2,22 +2,17 @@ import {v1 as UUIDv1} from 'uuid'
import * as os from 'os'
import * as path from 'path'
import * as fs from 'fs'
import {LaunchOptions, Headers, SetCookie, Browser} from 'puppeteer'
import {LaunchOptions, SetCookie, Browser} from 'puppeteer'
import log from './log'
import {deleteFolderRecursive, sleep, removeEmptyFields} from './utils'
import {deleteFolderRecursive, sleep} from './utils'
const puppeteer = require('puppeteer');
interface SessionPageDefaults {
headers?: Headers
}
export interface SessionsCacheItem {
sessionId: string
browser: Browser
userDataDir?: string
defaults: SessionPageDefaults
}
interface SessionsCache {
@@ -27,7 +22,6 @@ interface SessionsCache {
export interface SessionCreateOptions {
oneTimeSession: boolean
cookies?: SetCookie[],
headers?: Headers
maxTimeout?: number
proxy?: any// TODO: use interface not any
}
@@ -79,6 +73,8 @@ export async function testWebBrowserInstallation(): Promise<void> {
export async function create(session: string, options: SessionCreateOptions): Promise<SessionsCacheItem> {
const sessionId = session || UUIDv1()
// todo: cookies can't be set in the session, you need to open the page first
// todo: these args are only supported in chrome
let args = [
'--no-sandbox',
@@ -126,18 +122,14 @@ export async function create(session: string, options: SessionCreateOptions): Pr
}
}
if (!browser) { throw Error(`Failed to launch browser 3 times in a row.`) }
if (options.cookies) {
const page = await browser.newPage()
await page.setCookie(...options.cookies)
if (!browser) {
throw Error(`Failed to launch browser 3 times in a row.`)
}
sessionCache[sessionId] = {
sessionId: sessionId,
browser: browser,
userDataDir: puppeteerOptions.userDataDir,
defaults: removeEmptyFields(options) // todo: review
userDataDir: puppeteerOptions.userDataDir
}
return sessionCache[sessionId]

View File

@@ -1,8 +1,8 @@
import {Response, Headers, Page, Browser} from 'puppeteer'
import {Response, Headers, Page} from 'puppeteer'
const Timeout = require('await-timeout');
import log from './log'
import {SessionsCacheItem} from "./sessions";
import {SessionCreateOptions, SessionsCacheItem} from "./sessions";
import {V1Request} from "../controllers/v1";
import cloudflareProvider from '../providers/cloudflare';
@@ -23,22 +23,11 @@ export interface ChallengeResolutionT {
result: ChallengeResolutionResultT
}
// interface OverrideResolvers {
// method?: (request: Request) => HttpMethod,
// postData?: (request: Request) => string,
// headers?: (request: Request) => Headers
// }
//
// type OverridesProps =
// 'method' |
// 'postData' |
// 'headers'
async function resolveChallengeWithTimeout(params: V1Request, page: Page) {
async function resolveChallengeWithTimeout(params: V1Request, session: SessionsCacheItem) {
const maxTimeout = params.maxTimeout || 60000
const timer = new Timeout();
try {
const promise = resolveChallenge(params, page);
const promise = resolveChallenge(params, session);
return await Promise.race([
promise,
timer.set(maxTimeout, `Maximum timeout reached. maxTimeout=${maxTimeout} (ms)`)
@@ -48,167 +37,107 @@ async function resolveChallengeWithTimeout(params: V1Request, page: Page) {
}
}
async function resolveChallenge({ url, proxy, download, returnOnlyCookies, returnRawHtml }: V1Request,
page: Page): Promise<ChallengeResolutionT | void> {
let status = 'ok'
let message = ''
if (proxy) {
log.debug("Apply proxy");
if (proxy.username)
await page.authenticate({ username: proxy.username, password: proxy.password });
}
log.debug(`Navigating to... ${url}`)
let response: Response = await page.goto(url, { waitUntil: 'domcontentloaded' })
log.html(await page.content())
// Detect protection services and solve challenges
async function resolveChallenge(params: V1Request, session: SessionsCacheItem): Promise<ChallengeResolutionT | void> {
try {
response = await cloudflareProvider(url, page, response);
} catch (e) {
status = "error";
message = "Cloudflare " + e.toString();
}
let status = 'ok'
let message = ''
const payload: ChallengeResolutionT = {
status,
message,
result: {
url: page.url(),
status: response.status(),
headers: response.headers(),
response: null,
cookies: await page.cookies(),
userAgent: await page.evaluate(() => navigator.userAgent)
const page: Page = await session.browser.newPage()
// the user-agent is changed just for linux arm build
await page.setUserAgent(sessions.getUserAgent())
// todo: review
if (params.proxy) {
log.debug("Apply proxy");
if (params.proxy.username) {
await page.authenticate({
username: params.proxy.username,
password: params.proxy.password
});
}
}
}
if (returnOnlyCookies) {
payload.result.headers = null;
payload.result.userAgent = null;
} else {
if (download) {
// for some reason we get an error unless we reload the page
// has something to do with a stale buffer and this is the quickest
// fix since I am short on time
response = await page.goto(url, { waitUntil: 'domcontentloaded' })
payload.result.response = (await response.buffer()).toString('base64')
log.debug(`Navigating to... ${params.url}`)
let response: Response = await page.goto(params.url, { waitUntil: 'domcontentloaded' })
// todo: review this functionality
// } else if (returnRawHtml) {
// payload.result.response = await response.text()
// set cookies
if (params.cookies) {
for (const cookie of params.cookies) {
// the other fields in the cookie can cause issues
await page.setCookie({
"name": cookie.name,
"value": cookie.value
})
}
// reload the page
response = await page.goto(params.url, { waitUntil: 'domcontentloaded' })
}
// log html in debug mode
log.html(await page.content())
// Detect protection services and solve challenges
try {
response = await cloudflareProvider(params.url, page, response);
} catch (e) {
status = "error";
message = "Cloudflare " + e.toString();
}
const payload: ChallengeResolutionT = {
status,
message,
result: {
url: page.url(),
status: response.status(),
headers: response.headers(),
response: null,
cookies: await page.cookies(),
userAgent: sessions.getUserAgent()
}
}
if (params.returnOnlyCookies) {
payload.result.headers = null;
payload.result.userAgent = null;
} else {
payload.result.response = await page.content()
}
// make sure the page is closed because if it isn't and error will be thrown
// when a user uses a temporary session, the browser make be quit before
// the page is properly closed.
await page.close()
return payload
} catch (e) {
log.error("Unexpected error: " + e);
throw e;
}
// Add final url in result
payload.result.url = page.url();
// make sure the page is closed because if it isn't and error will be thrown
// when a user uses a temporary session, the browser make be quit before
// the page is properly closed.
await page.close()
return payload
}
function mergeSessionWithParams({ defaults }: SessionsCacheItem, params: V1Request): V1Request {
const copy = { ...defaults, ...params }
// custom merging logic
copy.headers = { ...defaults.headers || {}, ...params.headers || {} } || null
return copy
}
async function setupPage(params: V1Request, browser: Browser): Promise<Page> {
const page = await browser.newPage()
// merge session defaults with params
const { method, postData, headers, cookies } = params
// the user-agent is changed just for linux arm build
await page.setUserAgent(sessions.getUserAgent())
// todo: redo all functionality
// let overrideResolvers: OverrideResolvers = {}
//
// if (method !== 'GET') {
// log.debug(`Setting method to ${method}`)
// overrideResolvers.method = request => method
// }
//
// if (postData) {
// log.debug(`Setting body data to ${postData}`)
// overrideResolvers.postData = request => postData
// }
//
// if (headers) {
// log.debug(`Adding custom headers: ${JSON.stringify(headers)}`)
// overrideResolvers.headers = request => Object.assign(request.headers(), headers)
// }
//
// if (cookies) {
// log.debug(`Setting custom cookies: ${JSON.stringify(cookies)}`)
// await page.setCookie(...cookies)
// }
//
// // if any keys have been set on the object
// if (Object.keys(overrideResolvers).length > 0) {
// let callbackRunOnce = false
// const callback = (request: Request) => {
//
// // avoid loading resources to speed up page load
// if(request.resourceType() == 'stylesheet' || request.resourceType() == 'font' || request.resourceType() == 'image') {
// request.abort()
// return
// }
//
// if (callbackRunOnce || !request.isNavigationRequest()) {
// request.continue()
// return
// }
//
// callbackRunOnce = true
// const overrides: Overrides = {}
//
// Object.keys(overrideResolvers).forEach((key: OverridesProps) => {
// // @ts-ignore
// overrides[key] = overrideResolvers[key](request)
// });
//
// log.debug(`Overrides: ${JSON.stringify(overrides)}`)
// request.continue(overrides)
// }
//
// await page.setRequestInterception(true)
// page.on('request', callback)
// }
return page
}
export async function browserRequest(params: V1Request): Promise<ChallengeResolutionT> {
const oneTimeSession = params.session === undefined;
const options: SessionCreateOptions = {
oneTimeSession: oneTimeSession,
cookies: params.cookies,
maxTimeout: params.maxTimeout,
proxy: params.proxy
}
const session: SessionsCacheItem = oneTimeSession
? await sessions.create(null, {
oneTimeSession: true
})
? await sessions.create(null, options)
: sessions.get(params.session)
if (!session) {
throw Error('This session does not exist. Use \'list_sessions\' to see all the existing sessions.')
}
params = mergeSessionWithParams(session, params)
try {
const page = await setupPage(params, session.browser)
return await resolveChallengeWithTimeout(params, page)
// const page = await setupPage(params, session.browser)
return await resolveChallengeWithTimeout(params, session)
} catch (error) {
throw Error("Unable to process browser request. Error: " + error)
} finally {

View File

@@ -19,13 +19,3 @@ export function deleteFolderRecursive(path: string) {
fs.rmdirSync(path)
}
}
export const removeEmptyFields = (o: Record<string, any>): typeof o => {
const r: typeof o = {}
for (const k in o) {
if (o[k] !== undefined) {
r[k] = o[k]
}
}
return r
}

View File

@@ -7,6 +7,7 @@ import {testWebBrowserInstallation} from "../services/sessions";
const request = require("supertest");
const app = require("../app");
const version: string = require('../../package.json').version
const googleUrl = "https://www.google.com";
const cfUrl = "https://pirateiro.com/torrents/?search=s";
const cfCaptchaUrl = "https://idope.se"
@@ -136,6 +137,60 @@ describe("Test '/v1' path", () => {
expect(apiResponse.solution.url).toContain(cfCaptchaUrl)
});
test("Cmd 'request.get' should return OK with 'cookies' param", async () => {
const payload = {
"cmd": "request.get",
"url": googleUrl,
"cookies": [
{
"name": "testcookie1",
"value": "testvalue1"
},
{
"name": "testcookie2",
"value": "testvalue2"
}
]
}
const response: Response = await request(app).post("/v1").send(payload);
expect(response.statusCode).toBe(200);
const apiResponse: V1ResponseSolution = response.body;
expect(apiResponse.status).toBe("ok");
const solution = apiResponse.solution;
expect(solution.url).toContain(googleUrl)
expect(Object.keys(solution.cookies).length).toBeGreaterThan(1)
const cookie1: string = (solution.cookies as any[]).filter(function(cookie) {
return cookie.name == "testcookie1";
})[0].value
expect(cookie1).toBe("testvalue1")
const cookie2: string = (solution.cookies as any[]).filter(function(cookie) {
return cookie.name == "testcookie2";
})[0].value
expect(cookie2).toBe("testvalue2")
});
test("Cmd 'request.get' should return OK with 'returnOnlyCookies' param", async () => {
const payload = {
"cmd": "request.get",
"url": googleUrl,
"returnOnlyCookies": true
}
const response: Response = await request(app).post("/v1").send(payload);
expect(response.statusCode).toBe(200);
const apiResponse: V1ResponseSolution = response.body;
const solution = apiResponse.solution;
expect(solution.url).toContain(googleUrl)
expect(solution.status).toBe(200);
expect(solution.headers).toBe(null)
expect(solution.response).toBe(null)
expect(Object.keys(solution.cookies).length).toBeGreaterThan(0)
expect(solution.userAgent).toBe(null)
});
test("Cmd 'request.get' should return timeout", async () => {
const payload = {
"cmd": "request.get",
@@ -306,4 +361,5 @@ describe("Test '/v1' path", () => {
expect(cfCookie2.length).toBeGreaterThan(30)
expect(cfCookie2).toBe(cfCookie)
});
});