Skip to content

Commit

Permalink
chore(generative-ai): move accuracy tests to generative-ai package, p…
Browse files Browse the repository at this point in the history
…arallelize tests (#5008)
  • Loading branch information
Anemy authored Oct 23, 2023
1 parent 61a4415 commit 78b6386
Show file tree
Hide file tree
Showing 9 changed files with 275 additions and 87 deletions.
266 changes: 207 additions & 59 deletions package-lock.json

Large diffs are not rendered by default.

10 changes: 10 additions & 0 deletions packages/compass-generative-ai/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
"depcheck": "compass-scripts check-peer-deps && depcheck",
"check": "npm run typecheck && npm run lint && npm run depcheck",
"check-ci": "npm run check",
"ai-accuracy-tests": "node ./scripts/ai-accuracy-tests.js",
"test": "mocha",
"test-electron": "xvfb-maybe electron-mocha --no-sandbox",
"test-cov": "nyc --compact=false --produce-source-map=false -x \"**/*.spec.*\" --reporter=lcov --reporter=text --reporter=html npm run test",
Expand Down Expand Up @@ -77,11 +78,20 @@
"@types/react": "^17.0.5",
"@types/react-dom": "^17.0.10",
"@types/sinon-chai": "^3.2.5",
"bson": "^6.0.0",
"chai": "^4.3.6",
"decomment": "^0.9.5",
"depcheck": "^1.4.1",
"digest-fetch": "^2.0.3",
"ejson-shell-parser": "^1.2.4",
"eslint": "^7.25.0",
"mocha": "^10.2.0",
"mongodb": "^6.1.0",
"mongodb-runner": "^5.4.4",
"mongodb-schema": "^11.2.2",
"node-fetch": "^2.7.0",
"nyc": "^15.1.0",
"p-queue": "^7.4.1",
"prettier": "^2.7.1",
"react": "^17.0.2",
"react-dom": "^17.0.2",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
'use strict';

/* eslint-disable no-console */

// To run these tests against cloud-dev:
// > ATLAS_PUBLIC_KEY="..." \
// ATLAS_PRIVATE_KEY="..." \
Expand Down Expand Up @@ -33,6 +36,18 @@ const DEFAULT_MIN_ACCURACY = 0.8;

const MAX_TIMEOUTS_PER_TEST = 10;

// There are a limited amount of resources available both on the Atlas
// and on the ai service side of things, so we want to limit how many
// requests can be happening at a time.
const TESTS_TO_RUN_CONCURRENTLY = 3;

// To avoid rate limit we also reduce the time between tests running
// when the test returns a result quickly.
const ADD_TIMEOUT_BETWEEN_TESTS_THRESHOLD_MS = 5000;
const TIMEOUT_BETWEEN_TESTS_MS = 2000;

let PQueue;

const ATTEMPTS_PER_TEST = process.env.AI_TESTS_ATTEMPTS_PER_TEST
? +process.env.AI_TESTS_ATTEMPTS_PER_TEST
: DEFAULT_ATTEMPTS_PER_TEST;
Expand Down Expand Up @@ -251,11 +266,22 @@ const runTest = async (testOptions) => {
const attempts = ATTEMPTS_PER_TEST;
let fails = 0;
let timeouts = 0;
let lastTestTimeMS = 0;

for (let i = 0; i < attempts; i++) {
if (timeouts >= MAX_TIMEOUTS_PER_TEST) {
throw new Error('Too many timeouts');
}
let startTime = Date.now();

if (
attempts > 0 &&
lastTestTimeMS < ADD_TIMEOUT_BETWEEN_TESTS_THRESHOLD_MS
) {
await new Promise((resolve) =>
setTimeout(resolve, TIMEOUT_BETWEEN_TESTS_MS)
);
}

try {
console.info('---------------------------------------------------');
Expand All @@ -276,6 +302,7 @@ const runTest = async (testOptions) => {
fails++;
}
}
lastTestTimeMS = Date.now() - startTime;
}

const accuracy = (attempts - fails) / attempts;
Expand All @@ -286,6 +313,9 @@ const runTest = async (testOptions) => {
const fixtures = {};

async function setup() {
// p-queue is ESM package only.
PQueue = (await import('p-queue')).default;

cluster = await MongoCluster.start({
tmpDir: os.tmpdir(),
topology: 'standalone',
Expand Down Expand Up @@ -487,34 +517,41 @@ const tests = [
]),
},
];

async function main() {
try {
await setup();
const table = [];

let anyFailed = false;

for (const test of tests) {
const {
accuracy,
// usageStats
} = await runTest(test);
const minAccuracy = test.minAccuracy ?? DEFAULT_MIN_ACCURACY;
const failed = accuracy < minAccuracy;

table.push({
Type: test.type.slice(0, 1).toUpperCase(),
'User Input': test.userInput.slice(0, 50),
Namespace: `${test.databaseName}.${test.collectionName}`,
Accuracy: accuracy,
// 'Prompt Tokens': usageStats[0]?.promptTokens,
// 'Completion Tokens': usageStats[0]?.completionTokens,
Pass: failed ? '✗' : '✓',
});

anyFailed = anyFailed || failed;
}
const testPromiseQueue = new PQueue({
concurrency: TESTS_TO_RUN_CONCURRENTLY,
});

tests.map((test) =>
testPromiseQueue.add(async () => {
const {
accuracy,
// usageStats
} = await runTest(test);
const minAccuracy = test.minAccuracy ?? DEFAULT_MIN_ACCURACY;
const failed = accuracy < minAccuracy;

table.push({
Type: test.type.slice(0, 1).toUpperCase(),
'User Input': test.userInput.slice(0, 50),
Namespace: `${test.databaseName}.${test.collectionName}`,
Accuracy: accuracy,
// 'Prompt Tokens': usageStats[0]?.promptTokens,
// 'Completion Tokens': usageStats[0]?.completionTokens,
Pass: failed ? '✗' : '✓',
});

anyFailed = anyFailed || failed;
})
);

await testPromiseQueue.onIdle();

console.table(table, [
'Type',
Expand Down
7 changes: 0 additions & 7 deletions scripts/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -42,19 +42,12 @@
"@mongodb-js/monorepo-tools": "^1.1.1",
"@mongodb-js/webpack-config-compass": "^1.2.3",
"commander": "^11.0.0",
"decomment": "^0.9.5",
"digest-fetch": "^2.0.3",
"ejson-shell-parser": "^1.2.4",
"electron": "^25.8.4",
"glob": "^10.2.5",
"jsdom": "^21.1.0",
"keytar": "^7.9.0",
"make-fetch-happen": "^8.0.14",
"mongodb": "^6.1.0",
"mongodb-connection-string-url": "^2.6.0",
"mongodb-runner": "^5.4.4",
"mongodb-schema": "^11.2.2",
"node-fetch": "^2.7.0",
"pacote": "^11.3.5",
"pkg-up": "^3.1.0",
"prompts": "^2.4.1",
Expand Down

0 comments on commit 78b6386

Please sign in to comment.