Skip to content

Commit

Permalink
feat(puppeteer): enable new headless mode (#1910)
Browse files Browse the repository at this point in the history
https://developer.chrome.com/articles/new-headless/
 
To opt out of it and keep using the old headless, add `headless: 'old`
to your puppeteer crawler options.

---------

Co-authored-by: Jindřich Bär <[email protected]>
  • Loading branch information
B4nan and barjin authored Nov 15, 2023
1 parent 3143586 commit 7fc999c
Show file tree
Hide file tree
Showing 8 changed files with 36 additions and 50 deletions.
4 changes: 2 additions & 2 deletions packages/browser-crawler/src/internals/browser-crawler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ export interface BrowserCrawlerOptions<
* Whether to run browser in headless mode. Defaults to `true`.
* Can be also set via {@apilink Configuration}.
*/
headless?: boolean;
headless?: boolean | 'new' | 'old'; // `new`/`old` are for puppeteer only
}

/**
Expand Down Expand Up @@ -326,7 +326,7 @@ export abstract class BrowserCrawler<
postNavigationHooks: ow.optional.array,

launchContext: ow.optional.object,
headless: ow.optional.boolean,
headless: ow.optional.any(ow.boolean, ow.string),
browserPoolOptions: ow.object,
sessionPoolOptions: ow.optional.object,
persistCookiesPerSession: ow.optional.boolean,
Expand Down
3 changes: 1 addition & 2 deletions packages/browser-pool/src/puppeteer/puppeteer-plugin.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,7 @@ export class PuppeteerPlugin extends BrowserPlugin<
}

if (launchOptions!.headless === true) {
// to disable deprecation warnings, we should switch to `new` after more testing
launchOptions!.headless = 'old' as unknown as boolean;
launchOptions!.headless = 'new';
}

let browser: PuppeteerTypes.Browser;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ export class PlaywrightCrawler extends BrowserCrawler<{ browserPlugins: [Playwri

if (headless != null) {
launchContext.launchOptions ??= {} as LaunchOptions;
launchContext.launchOptions.headless = headless;
launchContext.launchOptions.headless = headless as boolean;
}

const playwrightLauncher = new PlaywrightLauncher(launchContext, config);
Expand Down
6 changes: 3 additions & 3 deletions packages/puppeteer-crawler/src/internals/puppeteer-crawler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -160,15 +160,15 @@ export class PuppeteerCrawler extends BrowserCrawler<{ browserPlugins: [Puppetee
+ 'Use PuppeteerCrawlerOptions.proxyConfiguration');
}

// `browserPlugins` is working when it's not overriden by `launchContext`,
// which for crawlers it is always overriden. Hence the error to use the other option.
// `browserPlugins` is working when it's not overridden by `launchContext`,
// which for crawlers it is always overridden. Hence the error to use the other option.
if (browserPoolOptions.browserPlugins) {
throw new Error('browserPoolOptions.browserPlugins is disallowed. Use launchContext.launcher instead.');
}

if (headless != null) {
launchContext.launchOptions ??= {} as LaunchOptions;
launchContext.launchOptions.headless = headless;
launchContext.launchOptions.headless = headless as boolean;
}

const puppeteerLauncher = new PuppeteerLauncher(launchContext, config);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,11 @@ export class PuppeteerLauncher extends BrowserLauncher<PuppeteerPlugin, unknown>

this.Plugin = PuppeteerPlugin;
}

protected override _getDefaultHeadlessOption(): boolean {
const headless = super._getDefaultHeadlessOption();
return headless ? 'new' as any : headless;
}
}

/**
Expand Down
23 changes: 5 additions & 18 deletions test/core/puppeteer_request_interception.test.ts
Original file line number Diff line number Diff line change
@@ -1,30 +1,20 @@
import type { Server } from 'http';
import type { AddressInfo } from 'net';

import { sleep } from '@crawlee/utils';
import { launchPuppeteer, utils } from 'crawlee';
import express from 'express';
import type { HTTPRequest } from 'puppeteer';

import { runExampleComServer } from '../shared/_helper';

const { addInterceptRequestHandler, removeInterceptRequestHandler } = utils.puppeteer;

// Simple page with image, script and stylesheet links.
let HTML_PAGE = '';

let serverAddress = 'http://localhost:';
let port: number;
let server: Server;

beforeAll(async () => {
[server, port] = await runExampleComServer();
serverAddress += port;
HTML_PAGE = `<html><body>
<link rel="stylesheet" type="text/css" href="${serverAddress}/style.css">
<img src="${serverAddress}/image.png" />
<script src="${serverAddress}/script.js" defer="defer">></script>
</body></html>`;
});

afterAll(() => {
Expand Down Expand Up @@ -61,7 +51,7 @@ describe('utils.puppeteer.addInterceptRequestHandler|removeInterceptRequestHandl
// Save all loaded URLs.
page.on('response', (response) => loadedUrls.push(response.url()));

await page.setContent(HTML_PAGE, { waitUntil: 'networkidle0' });
await page.goto(`${serverAddress}/special/resources`, { waitUntil: 'networkidle0' });
} finally {
await browser.close();
}
Expand Down Expand Up @@ -105,7 +95,7 @@ describe('utils.puppeteer.addInterceptRequestHandler|removeInterceptRequestHandl
propagatedUrls.push(request.url());
return request.continue();
});
await page.setContent(HTML_PAGE, { waitUntil: 'networkidle0' });
await page.goto(`${serverAddress}/special/resources`, { waitUntil: 'networkidle0' });
} finally {
await browser.close();
}
Expand Down Expand Up @@ -242,15 +232,13 @@ describe('utils.puppeteer.removeInterceptRequestHandler()', () => {
});

// Load with scripts and images disabled.
await page.setContent('<html><body></body></html>');
await page.setContent(HTML_PAGE, { waitUntil: 'networkidle0' });
await page.goto(`${serverAddress}/special/resources`, { waitUntil: 'networkidle0' });
expect(loadedUrls).toEqual(expect.arrayContaining([
`${serverAddress}/style.css`,
]));

// Try it once again.
await page.setContent('<html><body></body></html>');
await page.setContent(HTML_PAGE, { waitUntil: 'networkidle0' });
await page.goto(`${serverAddress}/special/resources`, { waitUntil: 'networkidle0' });
expect(loadedUrls).toEqual(expect.arrayContaining([
`${serverAddress}/style.css`,
`${serverAddress}/style.css`,
Expand All @@ -260,8 +248,7 @@ describe('utils.puppeteer.removeInterceptRequestHandler()', () => {
await removeInterceptRequestHandler(page, abortImagesHandler);

// Try to load once again if images appear there.
await page.setContent('<html><body></body></html>');
await page.setContent(HTML_PAGE, { waitUntil: 'networkidle0' });
await page.goto(`${serverAddress}/special/resources`, { waitUntil: 'networkidle0' });
expect(loadedUrls).toEqual(expect.arrayContaining([
`${serverAddress}/style.css`,
`${serverAddress}/style.css`,
Expand Down
33 changes: 9 additions & 24 deletions test/core/puppeteer_utils.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import path from 'path';
import log from '@apify/log';
import { KeyValueStore, launchPuppeteer, puppeteerUtils, Request } from '@crawlee/puppeteer';
import type { Dictionary } from '@crawlee/utils';
import express from 'express';
import type { Browser, Page, ResponseForRequest } from 'puppeteer';
import { runExampleComServer } from 'test/shared/_helper';
import { MemoryStorageEmulator } from 'test/shared/MemoryStorageEmulator';
Expand Down Expand Up @@ -174,13 +173,11 @@ describe('puppeteerUtils', () => {
const page = await browser.newPage();
await puppeteerUtils.blockRequests(page);
page.on('response', (response) => loadedUrls.push(response.url()));
await page.setContent(`<html><body>
<link rel="stylesheet" type="text/css" href="${serverAddress}/style.css">
<img src="${serverAddress}/image.png">
<img src="${serverAddress}/image.gif">
<script src="${serverAddress}/script.js" defer="defer">></script>
</body></html>`, { waitUntil: 'load' });
expect(loadedUrls).toEqual([`${serverAddress}/script.js`]);
await page.goto(`${serverAddress}/special/resources`, { waitUntil: 'load' });
expect(loadedUrls).toEqual([
`${serverAddress}/special/resources`,
`${serverAddress}/script.js`,
]);
});

test('works with overridden values', async () => {
Expand All @@ -191,12 +188,8 @@ describe('puppeteerUtils', () => {
urlPatterns: ['.css'],
});
page.on('response', (response) => loadedUrls.push(response.url()));
await page.setContent(`<html><body>
<link rel="stylesheet" type="text/css" href="${serverAddress}/style.css">
<img src="${serverAddress}/image.png">
<img src="${serverAddress}/image.gif">
<script src="${serverAddress}/script.js" defer="defer">></script>
</body></html>`, { waitUntil: 'load' });
await page.goto(`${serverAddress}/special/resources`, { waitUntil: 'load' });

expect(loadedUrls).toEqual(expect.arrayContaining([
`${serverAddress}/image.png`,
`${serverAddress}/script.js`,
Expand All @@ -210,11 +203,7 @@ describe('puppeteerUtils', () => {
const page = await browser.newPage();
await puppeteerUtils.blockResources(page);
page.on('response', (response) => loadedUrls.push(response.url()));
await page.setContent(`<html><body>
<link rel="stylesheet" type="text/css" href="${serverAddress}/style.css">
<img src="${serverAddress}/image.png" />
<script src="${serverAddress}/script.js" defer="defer">></script>
</body></html>`, { waitUntil: 'load' });
await page.goto(`${serverAddress}/special/resources`, { waitUntil: 'load' });

expect(loadedUrls).toEqual(expect.arrayContaining([
`${serverAddress}/script.js`,
Expand All @@ -227,11 +216,7 @@ describe('puppeteerUtils', () => {
const page = await browser.newPage();
await puppeteerUtils.blockResources(page, ['script']);
page.on('response', (response) => loadedUrls.push(response.url()));
await page.setContent(`<html><body>
<link rel="stylesheet" type="text/css" href="${serverAddress}/style.css">
<img src="${serverAddress}/image.png" />
<script src="${serverAddress}/script.js" defer="defer">></script>
</body></html>`, { waitUntil: 'load' });
await page.goto(`${serverAddress}/special/resources`, { waitUntil: 'load' });

expect(loadedUrls).toEqual(expect.arrayContaining([
`${serverAddress}/style.css`,
Expand Down
10 changes: 10 additions & 0 deletions test/shared/_helper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,13 @@ export const responseSamples = {
</div>
</body>
</html>`,
resources: `
<html><body>
<link rel="stylesheet" type="text/css" href="/style.css">
<img src="/image.png">
<img src="/image.gif">
<script src="/script.js" defer="defer"></script>
</body></html>`,
cacheable: {
html: `
<!doctype html>
Expand Down Expand Up @@ -177,6 +184,9 @@ export async function runExampleComServer(): Promise<[Server, number]> {
await setTimeout(32000);
res.type('html').send('<div>TEST</div>');
});
special.get('/resources', async (_req, res) => {
res.type('html').send(responseSamples.resources);
});
})();

// "cacheable" site with one page, scripts and stylesheets
Expand Down

0 comments on commit 7fc999c

Please sign in to comment.