From 118515d2ba534b99be2f23436f6abe41d66a8e07 Mon Sep 17 00:00:00 2001 From: Gustavo Rudiger Date: Thu, 2 Nov 2023 10:39:31 +0000 Subject: [PATCH] fix: add `skipNavigation` option to `enqueueLinks` (#2153) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Martin Adámek --- .../core/src/enqueue_links/enqueue_links.ts | 7 +++++++ packages/core/src/enqueue_links/shared.ts | 6 +++++- .../internals/enqueue-links/click-elements.ts | 7 +++++++ .../internals/enqueue-links/click-elements.ts | 7 +++++++ test/core/autoscaling/autoscaled_pool.test.ts | 2 -- test/core/enqueue_links/enqueue_links.test.ts | 20 +++++++++++++++++++ 6 files changed, 46 insertions(+), 3 deletions(-) diff --git a/packages/core/src/enqueue_links/enqueue_links.ts b/packages/core/src/enqueue_links/enqueue_links.ts index a8b4b9669b1a..bd6e710600c2 100644 --- a/packages/core/src/enqueue_links/enqueue_links.ts +++ b/packages/core/src/enqueue_links/enqueue_links.ts @@ -35,6 +35,12 @@ export interface EnqueueLinksOptions extends RequestQueueOperationOptions { /** Sets {@apilink Request.label} for newly enqueued requests. */ label?: string; + /** + * If set to `true`, tells the crawler to skip navigation and process the request directly. + * @default false + */ + skipNavigation?: boolean; + /** * A base URL that will be used to resolve relative URLs when using Cheerio. Ignored when using Puppeteer, * since the relative URL resolution is done inside the browser automatically. @@ -239,6 +245,7 @@ export async function enqueueLinks(options: SetRequired)[], - options: Pick = {}, + options: Pick = {}, ): RequestOptions[] { return sources .map((src) => (typeof src === 'string' ? { url: src } : src as unknown as RequestOptions)) @@ -230,6 +230,10 @@ export function createRequestOptions( }; } + if (options.skipNavigation) { + requestOptions.skipNavigation = true; + } + return requestOptions; }); } diff --git a/packages/playwright-crawler/src/internals/enqueue-links/click-elements.ts b/packages/playwright-crawler/src/internals/enqueue-links/click-elements.ts index 72cc22297fda..3b4a472475ac 100644 --- a/packages/playwright-crawler/src/internals/enqueue-links/click-elements.ts +++ b/packages/playwright-crawler/src/internals/enqueue-links/click-elements.ts @@ -166,6 +166,12 @@ export interface EnqueueLinksByClickingElementsOptions { * @default false */ forefront?: boolean; + + /** + * If set to `true`, tells the crawler to skip navigation and process the request directly. + * @default false + */ + skipNavigation?: boolean; } /** @@ -233,6 +239,7 @@ export async function enqueueLinksByClickingElements(options: EnqueueLinksByClic maxWaitForPageIdleSecs: ow.optional.number, label: ow.optional.string, forefront: ow.optional.boolean, + skipNavigation: ow.optional.boolean, })); const { diff --git a/packages/puppeteer-crawler/src/internals/enqueue-links/click-elements.ts b/packages/puppeteer-crawler/src/internals/enqueue-links/click-elements.ts index 3a0d47923bca..b659f16ac906 100644 --- a/packages/puppeteer-crawler/src/internals/enqueue-links/click-elements.ts +++ b/packages/puppeteer-crawler/src/internals/enqueue-links/click-elements.ts @@ -167,6 +167,12 @@ export interface EnqueueLinksByClickingElementsOptions { * @default false */ forefront?: boolean; + + /** + * If set to `true`, tells the crawler to skip navigation and process the request directly. + * @default false + */ + skipNavigation?: boolean; } /** @@ -234,6 +240,7 @@ export async function enqueueLinksByClickingElements(options: EnqueueLinksByClic maxWaitForPageIdleSecs: ow.optional.number, label: ow.optional.string, forefront: ow.optional.boolean, + skipNavigation: ow.optional.boolean, })); const { diff --git a/test/core/autoscaling/autoscaled_pool.test.ts b/test/core/autoscaling/autoscaled_pool.test.ts index c0960e0037db..fca7589a0351 100644 --- a/test/core/autoscaling/autoscaled_pool.test.ts +++ b/test/core/autoscaling/autoscaled_pool.test.ts @@ -541,5 +541,3 @@ describe('AutoscaledPool', () => { expect(Date.now() - now).toBeGreaterThanOrEqual(1e3); }, 10e3); }); - -/* eslint-enable no-underscore-dangle */ diff --git a/test/core/enqueue_links/enqueue_links.test.ts b/test/core/enqueue_links/enqueue_links.test.ts index 04274d15deec..aa91f47111a4 100644 --- a/test/core/enqueue_links/enqueue_links.test.ts +++ b/test/core/enqueue_links/enqueue_links.test.ts @@ -189,6 +189,26 @@ describe('enqueueLinks()', () => { expect(enqueued[2].userData).toEqual({ label: 'COOL' }); }); + test('works with skipNavigation', async () => { + const { enqueued, requestQueue } = createRequestQueueMock(); + + await browserCrawlerEnqueueLinks({ + options: { + selector: '.click', + skipNavigation: true, + }, + page, + requestQueue, + originalRequestUrl: 'https://example.com', + }); + + expect(enqueued).toHaveLength(2); + + for (const request of enqueued) { + expect(request.skipNavigation).toBe(true); + } + }); + test('works with exclude glob', async () => { const { enqueued, requestQueue } = createRequestQueueMock(); const globs = [