diff --git a/.coafile b/.coafile index 3303a9b..c0902d9 100644 --- a/.coafile +++ b/.coafile @@ -18,6 +18,7 @@ bears = LineLengthBear [all.links] bears = InvalidLinkBear +link_ignore_regex = (github\.com) [css] files = static/css/*.css diff --git a/lib/scrape.js b/lib/scrape.js index 584b272..a5cd036 100644 --- a/lib/scrape.js +++ b/lib/scrape.js @@ -4,6 +4,7 @@ const fs = require('fs') const json2yaml = require('json2yaml') const GH_USER_BASE = 'https://github.com/users' +const GH_ORG_BASE = 'https://github.com/orgs' const GH_API_BASE = 'https://api.github.com' const GCI_API_BASE = 'https://codein.withgoogle.com/api' @@ -19,12 +20,23 @@ const CHAT_IMAGES = { OTHER: 'static/images/chat.png', } -const GITHUB_OPTIONS = { +const GH_API_OPTIONS = { headers: process.env.GITHUB_TOKEN ? { Authorization: `token ${process.env.GITHUB_TOKEN}` } : {}, } +const GH_WEB_OPTIONS = { + headers: { + Accept: 'text/html', + 'Accept-Encoding': 'utf8', + 'Accept-Language': 'en-US,en;q=0.9', + 'Upgrade-Insecure-Requests': '1', + 'User-Agent': 'Mozilla/5.0', + }, + compress: false, +} + async function fetchProgram() { const res = await fetch(`${GCI_API_BASE}/program/2017/`) return await res.json() @@ -45,7 +57,7 @@ async function fetchLeaders(id) { async function searchGitHubOrgs(query) { const res = await fetch( `${GH_API_BASE}/search/users?q=${query}%20type:org`, - GITHUB_OPTIONS + GH_API_OPTIONS ) const { items } = await res.json() return items || [] @@ -97,7 +109,7 @@ function findMatches(input, pattern) { } async function getGitHubUser(user) { - const res = await fetch(`${GH_API_BASE}/users/${user}`, GITHUB_OPTIONS) + const res = await fetch(`${GH_API_BASE}/users/${user}`, GH_API_OPTIONS) let response = await res.json() if (response && response.message) { response = undefined @@ -155,14 +167,17 @@ async function findOrganization({ async function findGitHubUser({ display_name }, org) { if (!org) return - display_name = display_name.replace(/ /g, '') + const shortName = display_name.replace(/ /g, '') + + const username = await findGitHubUserInOrg(display_name, org) + if (username) return username const displayNamePattern = /^[a-zA-Z0-9-]{1,39}$/ - const displayNameMatches = displayNamePattern.exec(display_name) + const displayNameMatches = displayNamePattern.exec(shortName) if (!displayNameMatches) return - const user = await getGitHubUser(display_name) + const user = await getGitHubUser(shortName) if (!user) return const login = user.login @@ -183,6 +198,19 @@ async function findGitHubUser({ display_name }, org) { } } +async function findGitHubUserInOrg(user, org) { + const pattern = new RegExp( + '' + ) + const res = await fetch( + `${GH_ORG_BASE}/${org}/people?query=${user}`, + GH_WEB_OPTIONS + ) + const body = await res.text() + const match = pattern.exec(body) + return match ? match[1] : null +} + async function fetchOrgsWithData() { const orgs = await fetchOrgs() const fetchingLeaders = orgs.map(org => fetchLeaders(org.id))