Refactor the app to use Express server and Jest for tests

This commit is contained in:
ngosang
2021-10-17 18:00:19 +02:00
parent 0459f2642d
commit 744de4d158
20 changed files with 9338 additions and 663 deletions

38
src/services/log.ts Normal file
View File

@@ -0,0 +1,38 @@
let requests = 0
const LOG_HTML: boolean = process.env.LOG_HTML == 'true';
function toIsoString(date: Date) {
// this function fixes Date.toISOString() adding timezone
let tzo = -date.getTimezoneOffset(),
dif = tzo >= 0 ? '+' : '-',
pad = function(num: number) {
let norm = Math.floor(Math.abs(num));
return (norm < 10 ? '0' : '') + norm;
};
return date.getFullYear() +
'-' + pad(date.getMonth() + 1) +
'-' + pad(date.getDate()) +
'T' + pad(date.getHours()) +
':' + pad(date.getMinutes()) +
':' + pad(date.getSeconds()) +
dif + pad(tzo / 60) +
':' + pad(tzo % 60);
}
export default {
incRequests: () => { requests++ },
html(html: string) {
if (LOG_HTML)
this.debug(html)
},
...require('console-log-level')(
{level: process.env.LOG_LEVEL || 'info',
prefix(level: string) {
const req = (requests > 0) ? ` REQ-${requests}` : '';
return `${toIsoString(new Date())} ${level.toUpperCase()}${req}`
}
}
)
}

177
src/services/sessions.ts Normal file
View File

@@ -0,0 +1,177 @@
import {v1 as UUIDv1} from 'uuid'
import * as os from 'os'
import * as path from 'path'
import * as fs from 'fs'
import {LaunchOptions, Headers, SetCookie, Browser} from 'puppeteer'
import log from './log'
import {deleteFolderRecursive, sleep, removeEmptyFields} from './utils'
const puppeteer = require('puppeteer');
interface SessionPageDefaults {
headers?: Headers
}
export interface SessionsCacheItem {
sessionId: string
browser: Browser
userDataDir?: string
defaults: SessionPageDefaults
}
interface SessionsCache {
[key: string]: SessionsCacheItem
}
export interface SessionCreateOptions {
oneTimeSession: boolean
cookies?: SetCookie[],
headers?: Headers
maxTimeout?: number
proxy?: any// TODO: use interface not any
}
const sessionCache: SessionsCache = {}
let webBrowserUserAgent: string;
function userDataDirFromId(id: string): string {
return path.join(os.tmpdir(), `/puppeteer_profile_${id}`)
}
function prepareBrowserProfile(id: string): string {
// TODO: maybe pass SessionCreateOptions for loading later?
const userDataDir = userDataDirFromId(id)
if (!fs.existsSync(userDataDir)) {
fs.mkdirSync(userDataDir, { recursive: true })
}
return userDataDir
}
export function getUserAgent() {
return webBrowserUserAgent
}
export async function testWebBrowserInstallation(): Promise<void> {
log.info("Testing web browser installation...")
const session = await create(null, {
oneTimeSession: true
})
const page = await session.browser.newPage()
await page.goto("https://www.google.com")
webBrowserUserAgent = await page.evaluate(() => navigator.userAgent)
// replace Linux ARM user-agent because it's detected
if (webBrowserUserAgent.toLocaleLowerCase().includes('linux arm')) {
webBrowserUserAgent = webBrowserUserAgent.replace(/linux arm[^;]+;/i, 'Linux x86_64;')
}
log.info("FlareSolverr User-Agent: " + webBrowserUserAgent)
await page.close()
await destroy(session.sessionId)
log.info("Test successful")
}
export async function create(session: string, options: SessionCreateOptions): Promise<SessionsCacheItem> {
const sessionId = session || UUIDv1()
// todo: these args are only supported in chrome
let args = [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage' // issue #45
];
if (options.proxy && options.proxy.url) {
args.push(`--proxy-server=${options.proxy.url}`);
}
const puppeteerOptions: LaunchOptions = {
product: 'firefox',
headless: process.env.HEADLESS !== 'false',
args
}
if (!options.oneTimeSession) {
log.debug('Creating userDataDir for session.')
puppeteerOptions.userDataDir = prepareBrowserProfile(sessionId)
}
// todo: fix native package with firefox
// if we are running inside executable binary, change browser path
if (typeof (process as any).pkg !== 'undefined') {
const exe = process.platform === "win32" ? 'chrome.exe' : 'chrome';
puppeteerOptions.executablePath = path.join(path.dirname(process.execPath), 'chrome', exe)
}
log.debug('Launching web browser...')
// TODO: maybe access env variable?
// TODO: sometimes browser instances are created and not connected to correctly.
// how do we handle/quit those instances inside Docker?
let launchTries = 3
let browser: Browser;
while (0 <= launchTries--) {
try {
browser = await puppeteer.launch(puppeteerOptions)
break
} catch (e) {
if (e.message !== 'Failed to launch the browser process!')
throw e
log.warn('Failed to open browser, trying again...')
}
}
if (!browser) { throw Error(`Failed to launch browser 3 times in a row.`) }
if (options.cookies) {
const page = await browser.newPage()
await page.setCookie(...options.cookies)
}
sessionCache[sessionId] = {
sessionId: sessionId,
browser: browser,
userDataDir: puppeteerOptions.userDataDir,
defaults: removeEmptyFields(options) // todo: review
}
return sessionCache[sessionId]
}
export function list(): string[] {
return Object.keys(sessionCache)
}
// todo: create a sessions.close that doesn't rm the userDataDir
export async function destroy(id: string): Promise<boolean>{
if (id && sessionCache.hasOwnProperty(id)) {
const { browser, userDataDir } = sessionCache[id]
if (browser) {
await browser.close()
delete sessionCache[id]
if (userDataDir) {
const userDataDirPath = userDataDirFromId(id)
try {
// for some reason this keeps an error from being thrown in Windows, figures
await sleep(5000)
deleteFolderRecursive(userDataDirPath)
} catch (e) {
console.error(e)
throw Error(`Error deleting browser session folder. ${e.message}`)
}
}
return true
}
}
return false
}
export function get(id: string): SessionsCacheItem {
return sessionCache[id]
}

219
src/services/solver.ts Normal file
View File

@@ -0,0 +1,219 @@
import {Response, Headers, Page, Browser} from 'puppeteer'
const Timeout = require('await-timeout');
import log from './log'
import {SessionsCacheItem} from "./sessions";
import {V1Request} from "../controllers/v1";
import cloudflareProvider from '../providers/cloudflare';
const sessions = require('./sessions')
export interface ChallengeResolutionResultT {
url: string
status: number,
headers?: Headers,
response: string,
cookies: object[]
userAgent: string
}
export interface ChallengeResolutionT {
status?: string
message: string
result: ChallengeResolutionResultT
}
// interface OverrideResolvers {
// method?: (request: Request) => HttpMethod,
// postData?: (request: Request) => string,
// headers?: (request: Request) => Headers
// }
//
// type OverridesProps =
// 'method' |
// 'postData' |
// 'headers'
async function resolveChallengeWithTimeout(params: V1Request, page: Page) {
const maxTimeout = params.maxTimeout || 60000
const timer = new Timeout();
try {
const promise = resolveChallenge(params, page);
return await Promise.race([
promise,
timer.set(maxTimeout, `Maximum timeout reached. maxTimeout=${maxTimeout} (ms)`)
]);
} finally {
timer.clear();
}
}
async function resolveChallenge({ url, proxy, download, returnOnlyCookies, returnRawHtml }: V1Request,
page: Page): Promise<ChallengeResolutionT | void> {
let status = 'ok'
let message = ''
if (proxy) {
log.debug("Apply proxy");
if (proxy.username)
await page.authenticate({ username: proxy.username, password: proxy.password });
}
log.debug(`Navigating to... ${url}`)
let response: Response = await page.goto(url, { waitUntil: 'domcontentloaded' })
log.html(await page.content())
// Detect protection services and solve challenges
try {
response = await cloudflareProvider(url, page, response);
} catch (e) {
status = "error";
message = "Cloudflare " + e.toString();
}
const payload: ChallengeResolutionT = {
status,
message,
result: {
url: page.url(),
status: response.status(),
headers: response.headers(),
response: null,
cookies: await page.cookies(),
userAgent: await page.evaluate(() => navigator.userAgent)
}
}
if (returnOnlyCookies) {
payload.result.headers = null;
payload.result.userAgent = null;
} else {
if (download) {
// for some reason we get an error unless we reload the page
// has something to do with a stale buffer and this is the quickest
// fix since I am short on time
response = await page.goto(url, { waitUntil: 'domcontentloaded' })
payload.result.response = (await response.buffer()).toString('base64')
// todo: review this functionality
// } else if (returnRawHtml) {
// payload.result.response = await response.text()
} else {
payload.result.response = await page.content()
}
}
// Add final url in result
payload.result.url = page.url();
// make sure the page is closed because if it isn't and error will be thrown
// when a user uses a temporary session, the browser make be quit before
// the page is properly closed.
await page.close()
return payload
}
function mergeSessionWithParams({ defaults }: SessionsCacheItem, params: V1Request): V1Request {
const copy = { ...defaults, ...params }
// custom merging logic
copy.headers = { ...defaults.headers || {}, ...params.headers || {} } || null
return copy
}
async function setupPage(params: V1Request, browser: Browser): Promise<Page> {
const page = await browser.newPage()
// merge session defaults with params
const { method, postData, headers, cookies } = params
// the user-agent is changed just for linux arm build
await page.setUserAgent(sessions.getUserAgent())
// todo: redo all functionality
// let overrideResolvers: OverrideResolvers = {}
//
// if (method !== 'GET') {
// log.debug(`Setting method to ${method}`)
// overrideResolvers.method = request => method
// }
//
// if (postData) {
// log.debug(`Setting body data to ${postData}`)
// overrideResolvers.postData = request => postData
// }
//
// if (headers) {
// log.debug(`Adding custom headers: ${JSON.stringify(headers)}`)
// overrideResolvers.headers = request => Object.assign(request.headers(), headers)
// }
//
// if (cookies) {
// log.debug(`Setting custom cookies: ${JSON.stringify(cookies)}`)
// await page.setCookie(...cookies)
// }
//
// // if any keys have been set on the object
// if (Object.keys(overrideResolvers).length > 0) {
// let callbackRunOnce = false
// const callback = (request: Request) => {
//
// // avoid loading resources to speed up page load
// if(request.resourceType() == 'stylesheet' || request.resourceType() == 'font' || request.resourceType() == 'image') {
// request.abort()
// return
// }
//
// if (callbackRunOnce || !request.isNavigationRequest()) {
// request.continue()
// return
// }
//
// callbackRunOnce = true
// const overrides: Overrides = {}
//
// Object.keys(overrideResolvers).forEach((key: OverridesProps) => {
// // @ts-ignore
// overrides[key] = overrideResolvers[key](request)
// });
//
// log.debug(`Overrides: ${JSON.stringify(overrides)}`)
// request.continue(overrides)
// }
//
// await page.setRequestInterception(true)
// page.on('request', callback)
// }
return page
}
export async function browserRequest(params: V1Request): Promise<ChallengeResolutionT> {
const oneTimeSession = params.session === undefined;
const session: SessionsCacheItem = oneTimeSession
? await sessions.create(null, {
oneTimeSession: true
})
: sessions.get(params.session)
if (!session) {
throw Error('This session does not exist. Use \'list_sessions\' to see all the existing sessions.')
}
params = mergeSessionWithParams(session, params)
try {
const page = await setupPage(params, session.browser)
return await resolveChallengeWithTimeout(params, page)
} catch (error) {
throw Error("Unable to process browser request. Error: " + error)
} finally {
if (oneTimeSession) {
await sessions.destroy(session.sessionId)
}
}
}

31
src/services/utils.ts Normal file
View File

@@ -0,0 +1,31 @@
import * as fs from 'fs'
import * as Path from 'path'
import { promisify } from 'util'
export const sleep = promisify(setTimeout)
// recursive fs.rmdir needs node version 12:
// https://github.com/ngosang/FlareSolverr/issues/5#issuecomment-655572712
export function deleteFolderRecursive(path: string) {
if (fs.existsSync(path)) {
fs.readdirSync(path).forEach((file) => {
const curPath = Path.join(path, file)
if (fs.lstatSync(curPath).isDirectory()) { // recurse
deleteFolderRecursive(curPath)
} else { // delete file
fs.unlinkSync(curPath)
}
})
fs.rmdirSync(path)
}
}
export const removeEmptyFields = (o: Record<string, any>): typeof o => {
const r: typeof o = {}
for (const k in o) {
if (o[k] !== undefined) {
r[k] = o[k]
}
}
return r
}