mirror of
https://github.com/FlareSolverr/FlareSolverr.git
synced 2026-04-28 18:45:36 +02:00
Refactor the app to use Express server and Jest for tests
This commit is contained in:
38
src/services/log.ts
Normal file
38
src/services/log.ts
Normal file
@@ -0,0 +1,38 @@
|
||||
let requests = 0
|
||||
|
||||
const LOG_HTML: boolean = process.env.LOG_HTML == 'true';
|
||||
|
||||
function toIsoString(date: Date) {
|
||||
// this function fixes Date.toISOString() adding timezone
|
||||
let tzo = -date.getTimezoneOffset(),
|
||||
dif = tzo >= 0 ? '+' : '-',
|
||||
pad = function(num: number) {
|
||||
let norm = Math.floor(Math.abs(num));
|
||||
return (norm < 10 ? '0' : '') + norm;
|
||||
};
|
||||
|
||||
return date.getFullYear() +
|
||||
'-' + pad(date.getMonth() + 1) +
|
||||
'-' + pad(date.getDate()) +
|
||||
'T' + pad(date.getHours()) +
|
||||
':' + pad(date.getMinutes()) +
|
||||
':' + pad(date.getSeconds()) +
|
||||
dif + pad(tzo / 60) +
|
||||
':' + pad(tzo % 60);
|
||||
}
|
||||
|
||||
export default {
|
||||
incRequests: () => { requests++ },
|
||||
html(html: string) {
|
||||
if (LOG_HTML)
|
||||
this.debug(html)
|
||||
},
|
||||
...require('console-log-level')(
|
||||
{level: process.env.LOG_LEVEL || 'info',
|
||||
prefix(level: string) {
|
||||
const req = (requests > 0) ? ` REQ-${requests}` : '';
|
||||
return `${toIsoString(new Date())} ${level.toUpperCase()}${req}`
|
||||
}
|
||||
}
|
||||
)
|
||||
}
|
||||
177
src/services/sessions.ts
Normal file
177
src/services/sessions.ts
Normal file
@@ -0,0 +1,177 @@
|
||||
import {v1 as UUIDv1} from 'uuid'
|
||||
import * as os from 'os'
|
||||
import * as path from 'path'
|
||||
import * as fs from 'fs'
|
||||
import {LaunchOptions, Headers, SetCookie, Browser} from 'puppeteer'
|
||||
|
||||
import log from './log'
|
||||
import {deleteFolderRecursive, sleep, removeEmptyFields} from './utils'
|
||||
|
||||
const puppeteer = require('puppeteer');
|
||||
|
||||
interface SessionPageDefaults {
|
||||
headers?: Headers
|
||||
}
|
||||
|
||||
export interface SessionsCacheItem {
|
||||
sessionId: string
|
||||
browser: Browser
|
||||
userDataDir?: string
|
||||
defaults: SessionPageDefaults
|
||||
}
|
||||
|
||||
interface SessionsCache {
|
||||
[key: string]: SessionsCacheItem
|
||||
}
|
||||
|
||||
export interface SessionCreateOptions {
|
||||
oneTimeSession: boolean
|
||||
cookies?: SetCookie[],
|
||||
headers?: Headers
|
||||
maxTimeout?: number
|
||||
proxy?: any// TODO: use interface not any
|
||||
}
|
||||
|
||||
const sessionCache: SessionsCache = {}
|
||||
let webBrowserUserAgent: string;
|
||||
|
||||
|
||||
function userDataDirFromId(id: string): string {
|
||||
return path.join(os.tmpdir(), `/puppeteer_profile_${id}`)
|
||||
}
|
||||
|
||||
function prepareBrowserProfile(id: string): string {
|
||||
// TODO: maybe pass SessionCreateOptions for loading later?
|
||||
const userDataDir = userDataDirFromId(id)
|
||||
|
||||
if (!fs.existsSync(userDataDir)) {
|
||||
fs.mkdirSync(userDataDir, { recursive: true })
|
||||
}
|
||||
|
||||
return userDataDir
|
||||
}
|
||||
|
||||
export function getUserAgent() {
|
||||
return webBrowserUserAgent
|
||||
}
|
||||
|
||||
export async function testWebBrowserInstallation(): Promise<void> {
|
||||
log.info("Testing web browser installation...")
|
||||
const session = await create(null, {
|
||||
oneTimeSession: true
|
||||
})
|
||||
const page = await session.browser.newPage()
|
||||
await page.goto("https://www.google.com")
|
||||
webBrowserUserAgent = await page.evaluate(() => navigator.userAgent)
|
||||
|
||||
// replace Linux ARM user-agent because it's detected
|
||||
if (webBrowserUserAgent.toLocaleLowerCase().includes('linux arm')) {
|
||||
webBrowserUserAgent = webBrowserUserAgent.replace(/linux arm[^;]+;/i, 'Linux x86_64;')
|
||||
}
|
||||
|
||||
log.info("FlareSolverr User-Agent: " + webBrowserUserAgent)
|
||||
await page.close()
|
||||
await destroy(session.sessionId)
|
||||
|
||||
log.info("Test successful")
|
||||
}
|
||||
|
||||
export async function create(session: string, options: SessionCreateOptions): Promise<SessionsCacheItem> {
|
||||
const sessionId = session || UUIDv1()
|
||||
|
||||
// todo: these args are only supported in chrome
|
||||
let args = [
|
||||
'--no-sandbox',
|
||||
'--disable-setuid-sandbox',
|
||||
'--disable-dev-shm-usage' // issue #45
|
||||
];
|
||||
if (options.proxy && options.proxy.url) {
|
||||
args.push(`--proxy-server=${options.proxy.url}`);
|
||||
}
|
||||
|
||||
const puppeteerOptions: LaunchOptions = {
|
||||
product: 'firefox',
|
||||
headless: process.env.HEADLESS !== 'false',
|
||||
args
|
||||
}
|
||||
|
||||
if (!options.oneTimeSession) {
|
||||
log.debug('Creating userDataDir for session.')
|
||||
puppeteerOptions.userDataDir = prepareBrowserProfile(sessionId)
|
||||
}
|
||||
|
||||
// todo: fix native package with firefox
|
||||
// if we are running inside executable binary, change browser path
|
||||
if (typeof (process as any).pkg !== 'undefined') {
|
||||
const exe = process.platform === "win32" ? 'chrome.exe' : 'chrome';
|
||||
puppeteerOptions.executablePath = path.join(path.dirname(process.execPath), 'chrome', exe)
|
||||
}
|
||||
|
||||
log.debug('Launching web browser...')
|
||||
|
||||
// TODO: maybe access env variable?
|
||||
// TODO: sometimes browser instances are created and not connected to correctly.
|
||||
// how do we handle/quit those instances inside Docker?
|
||||
let launchTries = 3
|
||||
let browser: Browser;
|
||||
|
||||
while (0 <= launchTries--) {
|
||||
try {
|
||||
browser = await puppeteer.launch(puppeteerOptions)
|
||||
break
|
||||
} catch (e) {
|
||||
if (e.message !== 'Failed to launch the browser process!')
|
||||
throw e
|
||||
log.warn('Failed to open browser, trying again...')
|
||||
}
|
||||
}
|
||||
|
||||
if (!browser) { throw Error(`Failed to launch browser 3 times in a row.`) }
|
||||
|
||||
if (options.cookies) {
|
||||
const page = await browser.newPage()
|
||||
await page.setCookie(...options.cookies)
|
||||
}
|
||||
|
||||
sessionCache[sessionId] = {
|
||||
sessionId: sessionId,
|
||||
browser: browser,
|
||||
userDataDir: puppeteerOptions.userDataDir,
|
||||
defaults: removeEmptyFields(options) // todo: review
|
||||
}
|
||||
|
||||
return sessionCache[sessionId]
|
||||
}
|
||||
|
||||
export function list(): string[] {
|
||||
return Object.keys(sessionCache)
|
||||
}
|
||||
|
||||
// todo: create a sessions.close that doesn't rm the userDataDir
|
||||
|
||||
export async function destroy(id: string): Promise<boolean>{
|
||||
if (id && sessionCache.hasOwnProperty(id)) {
|
||||
const { browser, userDataDir } = sessionCache[id]
|
||||
if (browser) {
|
||||
await browser.close()
|
||||
delete sessionCache[id]
|
||||
if (userDataDir) {
|
||||
const userDataDirPath = userDataDirFromId(id)
|
||||
try {
|
||||
// for some reason this keeps an error from being thrown in Windows, figures
|
||||
await sleep(5000)
|
||||
deleteFolderRecursive(userDataDirPath)
|
||||
} catch (e) {
|
||||
console.error(e)
|
||||
throw Error(`Error deleting browser session folder. ${e.message}`)
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
export function get(id: string): SessionsCacheItem {
|
||||
return sessionCache[id]
|
||||
}
|
||||
219
src/services/solver.ts
Normal file
219
src/services/solver.ts
Normal file
@@ -0,0 +1,219 @@
|
||||
import {Response, Headers, Page, Browser} from 'puppeteer'
|
||||
const Timeout = require('await-timeout');
|
||||
|
||||
import log from './log'
|
||||
import {SessionsCacheItem} from "./sessions";
|
||||
import {V1Request} from "../controllers/v1";
|
||||
import cloudflareProvider from '../providers/cloudflare';
|
||||
|
||||
const sessions = require('./sessions')
|
||||
|
||||
export interface ChallengeResolutionResultT {
|
||||
url: string
|
||||
status: number,
|
||||
headers?: Headers,
|
||||
response: string,
|
||||
cookies: object[]
|
||||
userAgent: string
|
||||
}
|
||||
|
||||
export interface ChallengeResolutionT {
|
||||
status?: string
|
||||
message: string
|
||||
result: ChallengeResolutionResultT
|
||||
}
|
||||
|
||||
// interface OverrideResolvers {
|
||||
// method?: (request: Request) => HttpMethod,
|
||||
// postData?: (request: Request) => string,
|
||||
// headers?: (request: Request) => Headers
|
||||
// }
|
||||
//
|
||||
// type OverridesProps =
|
||||
// 'method' |
|
||||
// 'postData' |
|
||||
// 'headers'
|
||||
|
||||
async function resolveChallengeWithTimeout(params: V1Request, page: Page) {
|
||||
const maxTimeout = params.maxTimeout || 60000
|
||||
const timer = new Timeout();
|
||||
try {
|
||||
const promise = resolveChallenge(params, page);
|
||||
return await Promise.race([
|
||||
promise,
|
||||
timer.set(maxTimeout, `Maximum timeout reached. maxTimeout=${maxTimeout} (ms)`)
|
||||
]);
|
||||
} finally {
|
||||
timer.clear();
|
||||
}
|
||||
}
|
||||
|
||||
async function resolveChallenge({ url, proxy, download, returnOnlyCookies, returnRawHtml }: V1Request,
|
||||
page: Page): Promise<ChallengeResolutionT | void> {
|
||||
|
||||
let status = 'ok'
|
||||
let message = ''
|
||||
|
||||
if (proxy) {
|
||||
log.debug("Apply proxy");
|
||||
if (proxy.username)
|
||||
await page.authenticate({ username: proxy.username, password: proxy.password });
|
||||
}
|
||||
|
||||
log.debug(`Navigating to... ${url}`)
|
||||
let response: Response = await page.goto(url, { waitUntil: 'domcontentloaded' })
|
||||
log.html(await page.content())
|
||||
|
||||
// Detect protection services and solve challenges
|
||||
try {
|
||||
response = await cloudflareProvider(url, page, response);
|
||||
} catch (e) {
|
||||
status = "error";
|
||||
message = "Cloudflare " + e.toString();
|
||||
}
|
||||
|
||||
const payload: ChallengeResolutionT = {
|
||||
status,
|
||||
message,
|
||||
result: {
|
||||
url: page.url(),
|
||||
status: response.status(),
|
||||
headers: response.headers(),
|
||||
response: null,
|
||||
cookies: await page.cookies(),
|
||||
userAgent: await page.evaluate(() => navigator.userAgent)
|
||||
}
|
||||
}
|
||||
|
||||
if (returnOnlyCookies) {
|
||||
payload.result.headers = null;
|
||||
payload.result.userAgent = null;
|
||||
} else {
|
||||
if (download) {
|
||||
// for some reason we get an error unless we reload the page
|
||||
// has something to do with a stale buffer and this is the quickest
|
||||
// fix since I am short on time
|
||||
response = await page.goto(url, { waitUntil: 'domcontentloaded' })
|
||||
payload.result.response = (await response.buffer()).toString('base64')
|
||||
|
||||
// todo: review this functionality
|
||||
// } else if (returnRawHtml) {
|
||||
// payload.result.response = await response.text()
|
||||
} else {
|
||||
payload.result.response = await page.content()
|
||||
}
|
||||
}
|
||||
|
||||
// Add final url in result
|
||||
payload.result.url = page.url();
|
||||
|
||||
// make sure the page is closed because if it isn't and error will be thrown
|
||||
// when a user uses a temporary session, the browser make be quit before
|
||||
// the page is properly closed.
|
||||
await page.close()
|
||||
|
||||
return payload
|
||||
}
|
||||
|
||||
function mergeSessionWithParams({ defaults }: SessionsCacheItem, params: V1Request): V1Request {
|
||||
const copy = { ...defaults, ...params }
|
||||
|
||||
// custom merging logic
|
||||
copy.headers = { ...defaults.headers || {}, ...params.headers || {} } || null
|
||||
|
||||
return copy
|
||||
}
|
||||
|
||||
async function setupPage(params: V1Request, browser: Browser): Promise<Page> {
|
||||
const page = await browser.newPage()
|
||||
|
||||
// merge session defaults with params
|
||||
const { method, postData, headers, cookies } = params
|
||||
|
||||
// the user-agent is changed just for linux arm build
|
||||
await page.setUserAgent(sessions.getUserAgent())
|
||||
|
||||
// todo: redo all functionality
|
||||
|
||||
// let overrideResolvers: OverrideResolvers = {}
|
||||
//
|
||||
// if (method !== 'GET') {
|
||||
// log.debug(`Setting method to ${method}`)
|
||||
// overrideResolvers.method = request => method
|
||||
// }
|
||||
//
|
||||
// if (postData) {
|
||||
// log.debug(`Setting body data to ${postData}`)
|
||||
// overrideResolvers.postData = request => postData
|
||||
// }
|
||||
//
|
||||
// if (headers) {
|
||||
// log.debug(`Adding custom headers: ${JSON.stringify(headers)}`)
|
||||
// overrideResolvers.headers = request => Object.assign(request.headers(), headers)
|
||||
// }
|
||||
//
|
||||
// if (cookies) {
|
||||
// log.debug(`Setting custom cookies: ${JSON.stringify(cookies)}`)
|
||||
// await page.setCookie(...cookies)
|
||||
// }
|
||||
//
|
||||
// // if any keys have been set on the object
|
||||
// if (Object.keys(overrideResolvers).length > 0) {
|
||||
// let callbackRunOnce = false
|
||||
// const callback = (request: Request) => {
|
||||
//
|
||||
// // avoid loading resources to speed up page load
|
||||
// if(request.resourceType() == 'stylesheet' || request.resourceType() == 'font' || request.resourceType() == 'image') {
|
||||
// request.abort()
|
||||
// return
|
||||
// }
|
||||
//
|
||||
// if (callbackRunOnce || !request.isNavigationRequest()) {
|
||||
// request.continue()
|
||||
// return
|
||||
// }
|
||||
//
|
||||
// callbackRunOnce = true
|
||||
// const overrides: Overrides = {}
|
||||
//
|
||||
// Object.keys(overrideResolvers).forEach((key: OverridesProps) => {
|
||||
// // @ts-ignore
|
||||
// overrides[key] = overrideResolvers[key](request)
|
||||
// });
|
||||
//
|
||||
// log.debug(`Overrides: ${JSON.stringify(overrides)}`)
|
||||
// request.continue(overrides)
|
||||
// }
|
||||
//
|
||||
// await page.setRequestInterception(true)
|
||||
// page.on('request', callback)
|
||||
// }
|
||||
|
||||
return page
|
||||
}
|
||||
|
||||
export async function browserRequest(params: V1Request): Promise<ChallengeResolutionT> {
|
||||
const oneTimeSession = params.session === undefined;
|
||||
const session: SessionsCacheItem = oneTimeSession
|
||||
? await sessions.create(null, {
|
||||
oneTimeSession: true
|
||||
})
|
||||
: sessions.get(params.session)
|
||||
|
||||
if (!session) {
|
||||
throw Error('This session does not exist. Use \'list_sessions\' to see all the existing sessions.')
|
||||
}
|
||||
|
||||
params = mergeSessionWithParams(session, params)
|
||||
|
||||
try {
|
||||
const page = await setupPage(params, session.browser)
|
||||
return await resolveChallengeWithTimeout(params, page)
|
||||
} catch (error) {
|
||||
throw Error("Unable to process browser request. Error: " + error)
|
||||
} finally {
|
||||
if (oneTimeSession) {
|
||||
await sessions.destroy(session.sessionId)
|
||||
}
|
||||
}
|
||||
}
|
||||
31
src/services/utils.ts
Normal file
31
src/services/utils.ts
Normal file
@@ -0,0 +1,31 @@
|
||||
import * as fs from 'fs'
|
||||
import * as Path from 'path'
|
||||
import { promisify } from 'util'
|
||||
|
||||
export const sleep = promisify(setTimeout)
|
||||
|
||||
// recursive fs.rmdir needs node version 12:
|
||||
// https://github.com/ngosang/FlareSolverr/issues/5#issuecomment-655572712
|
||||
export function deleteFolderRecursive(path: string) {
|
||||
if (fs.existsSync(path)) {
|
||||
fs.readdirSync(path).forEach((file) => {
|
||||
const curPath = Path.join(path, file)
|
||||
if (fs.lstatSync(curPath).isDirectory()) { // recurse
|
||||
deleteFolderRecursive(curPath)
|
||||
} else { // delete file
|
||||
fs.unlinkSync(curPath)
|
||||
}
|
||||
})
|
||||
fs.rmdirSync(path)
|
||||
}
|
||||
}
|
||||
|
||||
export const removeEmptyFields = (o: Record<string, any>): typeof o => {
|
||||
const r: typeof o = {}
|
||||
for (const k in o) {
|
||||
if (o[k] !== undefined) {
|
||||
r[k] = o[k]
|
||||
}
|
||||
}
|
||||
return r
|
||||
}
|
||||
Reference in New Issue
Block a user