From 7a91577c5f86d99078e4f6b44271bc7d2e14b67e Mon Sep 17 00:00:00 2001 From: Kevin Novak Date: Thu, 5 Jan 2023 13:57:26 -0500 Subject: [PATCH] Scrape release date --- .vscode/settings.json | 8 ++++++- package-lock.json | 44 +++++++++++++++++++++++++++++++++++ package.json | 3 +++ public/scripts/index.js | 8 ++++--- src/models/internal-models.ts | 2 ++ src/services/steam-scraper.ts | 36 ++++++++++++++++++++++++++++ src/utils/regex-utils.ts | 8 +++++++ 7 files changed, 105 insertions(+), 4 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 314339d..477b7cd 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -12,6 +12,12 @@ "editor.formatOnSave": true }, "cSpell.enabled": true, - "cSpell.words": ["Novak", "parens", "vrgamedeals"], + "cSpell.words": [ + "Chrono", + "Luxon", + "Novak", + "parens", + "vrgamedeals" + ], "typescript.preferences.importModuleSpecifierEnding": "js" } diff --git a/package-lock.json b/package-lock.json index 8a05e9c..65f43a1 100644 --- a/package-lock.json +++ b/package-lock.json @@ -11,13 +11,16 @@ "license": "MIT", "dependencies": { "cheerio": "1.0.0-rc.12", + "chrono-node": "^2.5.0", "express": "4.18.2", "express-promise-router": "4.1.1", + "luxon": "3.2.1", "node-fetch": "3.3.0", "pm2": "^5.2.2" }, "devDependencies": { "@types/express": "4.17.14", + "@types/luxon": "3.2.0", "@types/node": "18.11.9", "@types/node-fetch": "2.6.2", "@typescript-eslint/eslint-plugin": "^5.42.0", @@ -474,6 +477,12 @@ "integrity": "sha512-dRLjCWHYg4oaA77cxO64oO+7JwCwnIzkZPdrrC71jQmQtlhM556pwKo5bUzqvZndkVbeFLIIi+9TC40JNF5hNQ==", "dev": true }, + "node_modules/@types/luxon": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/@types/luxon/-/luxon-3.2.0.tgz", + "integrity": "sha512-lGmaGFoaXHuOLXFvuju2bfvZRqxAqkHPx9Y9IQdQABrinJJshJwfNCKV+u7rR3kJbiqfTF/NhOkcxxAFrObyaA==", + "dev": true + }, "node_modules/@types/mime": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/@types/mime/-/mime-3.0.1.tgz", @@ -1182,6 +1191,14 @@ "node": ">= 6" } }, + "node_modules/chrono-node": { + "version": "2.5.0", + "resolved": "https://registry.npmjs.org/chrono-node/-/chrono-node-2.5.0.tgz", + "integrity": "sha512-GasdFCw4tsb8UKlwyJW1S+3bdN06vsyGR2cEDMlhEGI7ic4SQRnLyl/hbItwSum6pPkkUTrzFcaR3C2tZnnO5Q==", + "dependencies": { + "dayjs": "^1.10.0" + } + }, "node_modules/ci-info": { "version": "3.6.2", "resolved": "https://registry.npmjs.org/ci-info/-/ci-info-3.6.2.tgz", @@ -3258,6 +3275,14 @@ "yallist": "^3.0.2" } }, + "node_modules/luxon": { + "version": "3.2.1", + "resolved": "https://registry.npmjs.org/luxon/-/luxon-3.2.1.tgz", + "integrity": "sha512-QrwPArQCNLAKGO/C+ZIilgIuDnEnKx5QYODdDtbFaxzsbZcc/a7WFq7MhsVYgRlwawLtvOUESTlfJ+hc/USqPg==", + "engines": { + "node": ">=12" + } + }, "node_modules/media-typer": { "version": "0.3.0", "resolved": "https://registry.npmjs.org/media-typer/-/media-typer-0.3.0.tgz", @@ -5523,6 +5548,12 @@ "integrity": "sha512-dRLjCWHYg4oaA77cxO64oO+7JwCwnIzkZPdrrC71jQmQtlhM556pwKo5bUzqvZndkVbeFLIIi+9TC40JNF5hNQ==", "dev": true }, + "@types/luxon": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/@types/luxon/-/luxon-3.2.0.tgz", + "integrity": "sha512-lGmaGFoaXHuOLXFvuju2bfvZRqxAqkHPx9Y9IQdQABrinJJshJwfNCKV+u7rR3kJbiqfTF/NhOkcxxAFrObyaA==", + "dev": true + }, "@types/mime": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/@types/mime/-/mime-3.0.1.tgz", @@ -6030,6 +6061,14 @@ } } }, + "chrono-node": { + "version": "2.5.0", + "resolved": "https://registry.npmjs.org/chrono-node/-/chrono-node-2.5.0.tgz", + "integrity": "sha512-GasdFCw4tsb8UKlwyJW1S+3bdN06vsyGR2cEDMlhEGI7ic4SQRnLyl/hbItwSum6pPkkUTrzFcaR3C2tZnnO5Q==", + "requires": { + "dayjs": "^1.10.0" + } + }, "ci-info": { "version": "3.6.2", "resolved": "https://registry.npmjs.org/ci-info/-/ci-info-3.6.2.tgz", @@ -7585,6 +7624,11 @@ "yallist": "^3.0.2" } }, + "luxon": { + "version": "3.2.1", + "resolved": "https://registry.npmjs.org/luxon/-/luxon-3.2.1.tgz", + "integrity": "sha512-QrwPArQCNLAKGO/C+ZIilgIuDnEnKx5QYODdDtbFaxzsbZcc/a7WFq7MhsVYgRlwawLtvOUESTlfJ+hc/USqPg==" + }, "media-typer": { "version": "0.3.0", "resolved": "https://registry.npmjs.org/media-typer/-/media-typer-0.3.0.tgz", diff --git a/package.json b/package.json index d93fafc..0367dc4 100644 --- a/package.json +++ b/package.json @@ -29,13 +29,16 @@ }, "dependencies": { "cheerio": "1.0.0-rc.12", + "chrono-node": "^2.5.0", "express": "4.18.2", "express-promise-router": "4.1.1", + "luxon": "3.2.1", "node-fetch": "3.3.0", "pm2": "^5.2.2" }, "devDependencies": { "@types/express": "4.17.14", + "@types/luxon": "3.2.0", "@types/node": "18.11.9", "@types/node-fetch": "2.6.2", "@typescript-eslint/eslint-plugin": "^5.42.0", diff --git a/public/scripts/index.js b/public/scripts/index.js index 29d793b..cd90db3 100644 --- a/public/scripts/index.js +++ b/public/scripts/index.js @@ -199,6 +199,7 @@ function formatAppData(app) { countdownTime: 0, reviews: '', reviewsCount: '', + releaseDate: '', }; formattedData.type = app.type; @@ -216,6 +217,7 @@ function formatAppData(app) { formattedData.countdownTime = app.countdown.time; formattedData.reviews = extractNumberFromPercent(app.reviewsPercent) || app.reviewsPercent; formattedData.reviewsCount = app.reviewsCount; + formattedData.releaseDate = app.releaseDate; return formattedData; } @@ -231,8 +233,8 @@ async function retrieveSearchPageData(steamSearchUrl, pageNumber) { } function createMarkdownTable(formattedSearchData) { - let header = '| Title | Price (USD) | Discount (%) | Rating (%) | Review Count |'; - let divider = '| :- | -: | -: | -: | -: |'; + let header = '| Title | Price (USD) | Discount (%) | Rating (%) | Total Reviews | Release Date |'; + let divider = '| :- | -: | -: | -: | -: | :- |'; let result = header + NEW_LINE + divider + NEW_LINE; for (let app of formattedSearchData) { @@ -243,7 +245,7 @@ function createMarkdownTable(formattedSearchData) { } function convertToRow(app) { - return `| ${app.titleLink} | ${app.price} | ${app.percentOff} | ${app.reviews} | ${app.reviewsCount} |`; + return `| ${app.titleLink} | ${app.price} | ${app.percentOff} | ${app.reviews} | ${app.reviewsCount} | ${app.releaseDate} |`; } async function post(url, content, maxAttempts) { diff --git a/src/models/internal-models.ts b/src/models/internal-models.ts index ec00940..f85df86 100644 --- a/src/models/internal-models.ts +++ b/src/models/internal-models.ts @@ -5,6 +5,7 @@ export interface AppPageData { percentOff: string; reviewsPercent: string; reviewsCount: string; + releaseDate: string; countdown: CountdownData; vrSupport: string; link: string; @@ -24,6 +25,7 @@ export interface GameData { percentOff: string; reviewsPercent: string; reviewsCount: string; + releaseDate: string; } export interface GameElementData { diff --git a/src/services/steam-scraper.ts b/src/services/steam-scraper.ts index a026244..29e13c7 100644 --- a/src/services/steam-scraper.ts +++ b/src/services/steam-scraper.ts @@ -1,5 +1,7 @@ import * as cheerio from 'cheerio'; import { Element } from 'cheerio'; +import chrono from 'chrono-node'; +import { DateTime } from 'luxon'; import { AppPageData, @@ -23,11 +25,13 @@ export class SteamScraper { let countdown = this.getCountdownFromGameElement(firstGame); let vrSupport = this.getVrSupportFromGameElement(firstGame); let reviews = this.getReviews(appPageHtml); + let releaseDate = this.getReleaseDate(appPageHtml); return { title, ...gameData, ...reviews, + releaseDate, countdown, vrSupport, link: undefined, @@ -92,6 +96,29 @@ export class SteamScraper { return title; } + private getReleaseDate(appPageHtml: string): string { + let $ = cheerio.load(appPageHtml); + + let releaseDate = ''; + + let gameDetailsElement = $( + '.game_details .details_block:contains("Release Date:")' + ).first(); + + if (gameDetailsElement) { + let gameDetailsContent = gameDetailsElement.html().trim(); + let releaseDateText = RegexUtils.extractReleaseDate(gameDetailsContent); + if (releaseDateText) { + let releaseJsDate = chrono.parseDate(releaseDateText); + if (releaseJsDate) { + releaseDate = DateTime.fromJSDate(releaseJsDate).toFormat('yyyy-MM-dd'); + } + } + } + + return releaseDate; + } + private getReviews(appPageHtml: string): ReviewData { let $ = cheerio.load(appPageHtml); @@ -124,6 +151,7 @@ export class SteamScraper { percentOff: '', reviewsPercent: '', reviewsCount: '', + releaseDate: '', }; let title = $('div.search_name > span.title').text().trim(); @@ -170,6 +198,14 @@ export class SteamScraper { } } + let releaseDateText = $('div.search_released').text().trim(); + if (releaseDateText) { + let releaseJsDate = chrono.parseDate(releaseDateText); + if (releaseJsDate) { + gameData.releaseDate = DateTime.fromJSDate(releaseJsDate).toFormat('yyyy-MM-dd'); + } + } + return gameData; } diff --git a/src/utils/regex-utils.ts b/src/utils/regex-utils.ts index 8b60684..f4427f4 100644 --- a/src/utils/regex-utils.ts +++ b/src/utils/regex-utils.ts @@ -1,4 +1,5 @@ const TITLE_REGEX = /Title:<\/b>(.*)
/; +const RELEASE_DATE_REGEX = /Release Date:<\/b>(.*)
/; const PERCENT_REGEX = /(\d+%)/; const REVIEWS_COUNT_REGEX = /([\d,]+) user review/; const DISCOUNT_COUNTDOWN_REGEX = /DiscountCountdown,[ ]*([\d]{7,})/; @@ -11,6 +12,13 @@ export abstract class RegexUtils { } } + public static extractReleaseDate(input: string): string { + let match = RELEASE_DATE_REGEX.exec(input); + if (match) { + return match[1].trim(); + } + } + public static extractPercent(input: string): string { let match = PERCENT_REGEX.exec(input); if (match) {