Skip to content

Commit

Permalink
fix: add skipNavigation option to enqueueLinks (#2153)
Browse files Browse the repository at this point in the history
Co-authored-by: Martin Adámek <[email protected]>
  • Loading branch information
gustavotr and B4nan authored Nov 2, 2023
1 parent f188ebe commit 118515d
Show file tree
Hide file tree
Showing 6 changed files with 46 additions and 3 deletions.
7 changes: 7 additions & 0 deletions packages/core/src/enqueue_links/enqueue_links.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@ export interface EnqueueLinksOptions extends RequestQueueOperationOptions {
/** Sets {@apilink Request.label} for newly enqueued requests. */
label?: string;

/**
* If set to `true`, tells the crawler to skip navigation and process the request directly.
* @default false
*/
skipNavigation?: boolean;

/**
* A base URL that will be used to resolve relative URLs when using Cheerio. Ignored when using Puppeteer,
* since the relative URL resolution is done inside the browser automatically.
Expand Down Expand Up @@ -239,6 +245,7 @@ export async function enqueueLinks(options: SetRequired<EnqueueLinksOptions, 're
urls: ow.array.ofType(ow.string),
requestQueue: ow.object.hasKeys('fetchNextRequest', 'addRequest'),
forefront: ow.optional.boolean,
skipNavigation: ow.optional.boolean,
limit: ow.optional.number,
selector: ow.optional.string,
baseUrl: ow.optional.string,
Expand Down
6 changes: 5 additions & 1 deletion packages/core/src/enqueue_links/shared.ts
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ export function filterRequestsByPatterns(requests: Request[], patterns?: UrlPatt
*/
export function createRequestOptions(
sources: (string | Record<string, unknown>)[],
options: Pick<EnqueueLinksOptions, 'label' | 'userData' | 'baseUrl'> = {},
options: Pick<EnqueueLinksOptions, 'label' | 'userData' | 'baseUrl' | 'skipNavigation'> = {},
): RequestOptions[] {
return sources
.map((src) => (typeof src === 'string' ? { url: src } : src as unknown as RequestOptions))
Expand All @@ -230,6 +230,10 @@ export function createRequestOptions(
};
}

if (options.skipNavigation) {
requestOptions.skipNavigation = true;
}

return requestOptions;
});
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,12 @@ export interface EnqueueLinksByClickingElementsOptions {
* @default false
*/
forefront?: boolean;

/**
* If set to `true`, tells the crawler to skip navigation and process the request directly.
* @default false
*/
skipNavigation?: boolean;
}

/**
Expand Down Expand Up @@ -233,6 +239,7 @@ export async function enqueueLinksByClickingElements(options: EnqueueLinksByClic
maxWaitForPageIdleSecs: ow.optional.number,
label: ow.optional.string,
forefront: ow.optional.boolean,
skipNavigation: ow.optional.boolean,
}));

const {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,12 @@ export interface EnqueueLinksByClickingElementsOptions {
* @default false
*/
forefront?: boolean;

/**
* If set to `true`, tells the crawler to skip navigation and process the request directly.
* @default false
*/
skipNavigation?: boolean;
}

/**
Expand Down Expand Up @@ -234,6 +240,7 @@ export async function enqueueLinksByClickingElements(options: EnqueueLinksByClic
maxWaitForPageIdleSecs: ow.optional.number,
label: ow.optional.string,
forefront: ow.optional.boolean,
skipNavigation: ow.optional.boolean,
}));

const {
Expand Down
2 changes: 0 additions & 2 deletions test/core/autoscaling/autoscaled_pool.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -541,5 +541,3 @@ describe('AutoscaledPool', () => {
expect(Date.now() - now).toBeGreaterThanOrEqual(1e3);
}, 10e3);
});

/* eslint-enable no-underscore-dangle */
20 changes: 20 additions & 0 deletions test/core/enqueue_links/enqueue_links.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,26 @@ describe('enqueueLinks()', () => {
expect(enqueued[2].userData).toEqual({ label: 'COOL' });
});

test('works with skipNavigation', async () => {
const { enqueued, requestQueue } = createRequestQueueMock();

await browserCrawlerEnqueueLinks({
options: {
selector: '.click',
skipNavigation: true,
},
page,
requestQueue,
originalRequestUrl: 'https://example.com',
});

expect(enqueued).toHaveLength(2);

for (const request of enqueued) {
expect(request.skipNavigation).toBe(true);
}
});

test('works with exclude glob', async () => {
const { enqueued, requestQueue } = createRequestQueueMock();
const globs = [
Expand Down

0 comments on commit 118515d

Please sign in to comment.