Skip to content

Commit

Permalink
Merge pull request #16 from sitegeist/assistantsV2
Browse files Browse the repository at this point in the history
Adjust to assistants v2
  • Loading branch information
mficzel authored Jul 15, 2024
2 parents 8aa83e4 + 2aa3513 commit 6556e74
Show file tree
Hide file tree
Showing 8 changed files with 131 additions and 43 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
strategy:
fail-fast: false
matrix:
php-versions: ["8.1"]
php-versions: ["8.2"]
neos-versions: ["8.3"]

runs-on: ubuntu-latest
Expand Down
45 changes: 21 additions & 24 deletions Classes/Domain/AssistantDepartment.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@
use Sitegeist\Chatterbox\Domain\Instruction\InstructionCollection;
use Sitegeist\Chatterbox\Domain\Instruction\InstructionContract;
use Sitegeist\Chatterbox\Domain\Instruction\Manual;
use Sitegeist\Chatterbox\Domain\Knowledge\KnowledgeFilename;
use Sitegeist\Chatterbox\Domain\Knowledge\KnowledgeSourceDiscriminator;
use Sitegeist\Chatterbox\Domain\Knowledge\KnowledgeSourceName;
use Sitegeist\Chatterbox\Domain\Knowledge\Library;
use Sitegeist\Chatterbox\Domain\Knowledge\VectorStoreName;
use Sitegeist\Chatterbox\Domain\Tools\Toolbox;
use Sitegeist\Chatterbox\Domain\Tools\ToolCollection;
use Sitegeist\Chatterbox\Domain\Tools\ToolContract;
Expand Down Expand Up @@ -113,7 +113,7 @@ public function updateAssistant(AssistantRecord $assistantRecord): void
'description' => $assistantRecord->description ?: '',
'instructions' => $assistantRecord->instructions ?: '',
'tools' => $this->createToolConfiguration($assistantRecord),
'file_ids' => $this->createFileIdConfiguration($assistantRecord),
'tool_resources' => $this->createToolResourcesConfiguration($assistantRecord),
'metadata' => $this->createMetadataConfiguration($assistantRecord),
]
);
Expand All @@ -140,7 +140,7 @@ private function createToolConfiguration(AssistantRecord $assistantRecord): arra
$tools = [];
if (!empty($assistantRecord->selectedSourcesOfKnowledge)) {
$tools[] = [
'type' => 'retrieval'
'type' => 'file_search'
];
}
foreach ($assistantRecord->selectedTools as $toolId) {
Expand All @@ -164,38 +164,35 @@ private function createToolConfiguration(AssistantRecord $assistantRecord): arra
}

/**
* @return string[]
* @return array<string, array<string, array<int, string>>>
*/
private function createFileIdConfiguration(AssistantRecord $assistantRecord): array
private function createToolResourcesConfiguration(AssistantRecord $assistantRecord): array
{
$fileListResponse = $this->client->files()->list();
$fileIds = [];
$vectorStoreListResponse = $this->client->vectorStores()->list();
$vectorStoreIds = [];
foreach ($assistantRecord->selectedSourcesOfKnowledge as $knowledgeSourceName) {
$knowledgeSourceNameObject = new KnowledgeSourceName($knowledgeSourceName);
$knowledgeSourceDiscriminator = new KnowledgeSourceDiscriminator(
$this->organizationDiscriminator,
$knowledgeSourceNameObject
new KnowledgeSourceName($knowledgeSourceName)
);
$latestFilename = null;
foreach ($fileListResponse->data as $fileResponse) {
$knowledgeFilename = KnowledgeFilename::tryFromSystemFileName($fileResponse->filename);
if ($knowledgeFilename === null) {
$latestVectorStoreName = null;
foreach ($vectorStoreListResponse->data as $vectorStoreResponse) {
$vectorStoreName = VectorStoreName::tryFromNullableString($vectorStoreResponse->name);
if (!$vectorStoreName?->knowledgeSourceDiscriminator->equals($knowledgeSourceDiscriminator)) {
continue;
}

if ($latestFilename instanceof KnowledgeFilename) {
if ($knowledgeFilename->takesPrecedenceOver($latestFilename)) {
$latestFilename = $knowledgeFilename;
$fileIds[$knowledgeSourceName] = $fileResponse->id;
}
} else {
if ($knowledgeFilename->knowledgeSourceDiscriminator->equals($knowledgeSourceDiscriminator)) {
$latestFilename = $knowledgeFilename;
$fileIds[$knowledgeSourceName] = $fileResponse->id;
}
if (!$latestVectorStoreName || $vectorStoreName->takesPrecedenceOver($latestVectorStoreName)) {
$latestVectorStoreName = $vectorStoreName;
$vectorStoreIds[$knowledgeSourceName] = $vectorStoreResponse->id;
}
}
}
return array_values($fileIds);

return [
'file_search' => [
'vector_store_ids' => array_values($vectorStoreIds)
],
];
}
}
10 changes: 5 additions & 5 deletions Classes/Domain/AssistantRecord.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
use OpenAI\Responses\Assistants\AssistantResponse;
use Neos\Flow\Annotations as Flow;
use OpenAI\Responses\Assistants\AssistantResponseToolCodeInterpreter;
use OpenAI\Responses\Assistants\AssistantResponseToolFileSearch;
use OpenAI\Responses\Assistants\AssistantResponseToolFunction;
use OpenAI\Responses\Assistants\AssistantResponseToolRetrieval;

#[Flow\Proxy(false)]
final class AssistantRecord
Expand All @@ -19,7 +19,7 @@ final class AssistantRecord
* @param string[] $selectedTools
* @param string[] $selectedSourcesOfKnowledge
* @param string[] $selectedInstructions
* @param string[] $fileIds
* @param mixed[] $toolResources
*/
public function __construct(
public readonly string $id,
Expand All @@ -32,7 +32,7 @@ public function __construct(
public readonly array $selectedTools = [],
public readonly array $selectedSourcesOfKnowledge = [],
public readonly array $selectedInstructions = [],
public readonly array $fileIds = [],
public readonly array $toolResources = [],
) {
}

Expand All @@ -48,12 +48,12 @@ public static function fromAssistantResponse(AssistantResponse $response): self
$response->name,
$response->description,
$response->instructions,
array_map(fn(AssistantResponseToolCodeInterpreter|AssistantResponseToolRetrieval|AssistantResponseToolFunction $item) => $item->toArray(), $response->tools),
array_map(fn(AssistantResponseToolCodeInterpreter|AssistantResponseToolFileSearch|AssistantResponseToolFunction $item) => $item->toArray(), $response->tools),
$response->metadata,
$selectedTools,
$selectedSourcesOfKnowledge,
$selectedInstructions,
$response->fileIds,
$response->toolResources?->toArray() ?: [],
);
}
}
32 changes: 30 additions & 2 deletions Classes/Domain/Knowledge/Library.php
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ public function updateSourceOfKnowledge(SourceOfKnowledgeContract $sourceOfKnowl
$sourceOfKnowledge->getName()
);
$filename = KnowledgeFilename::forKnowledgeSource($knowledgeSourceDiscriminator);
$vectorStoreName = VectorStoreName::forKnowledgeSource($knowledgeSourceDiscriminator);
$this->databaseConnection->transactional(function () use ($content, $knowledgeSourceDiscriminator) {
$this->databaseConnection->delete(
self::TABLE_NAME,
Expand All @@ -89,20 +90,34 @@ public function updateSourceOfKnowledge(SourceOfKnowledgeContract $sourceOfKnowl
$path = $this->environment->getPathToTemporaryDirectory() . '/' . $filename->toSystemFilename();
\file_put_contents($path, (string)$content);

$this->client->files()->upload([
$createFileResponse = $this->client->files()->upload([
'file' => fopen($path, 'r'),
'purpose' => 'assistants'
]);
\unlink($path);

$this->client->vectorStores()->create([
'file_ids' => [
$createFileResponse->id
],
'name' => $vectorStoreName->toString(),
]);
}

public function cleanKnowledgePool(AssistantDepartment $assistantDepartment): void
{
$filesListResponse = $this->client->files()->list();
$vectorStoreListResponse = $this->client->vectorStores()->list();

$usedVectorStoreIds = [];
$usedFileIds = [];
foreach ($assistantDepartment->findAllRecords() as $assistant) {
$usedFileIds = array_merge($usedFileIds, $assistant->fileIds);
foreach ($assistant->toolResources['file_search']['vector_store_ids'] ?? [] as $vectorStoreId) {
$usedVectorStoreIds[] = $vectorStoreId;
foreach ($this->client->vectorStores()->files()->list($vectorStoreId)->data as $vectorStoreFileResponse) {
$usedFileIds[] = $vectorStoreFileResponse->id;
}
}
}

foreach ($filesListResponse->data as $fileResponse) {
Expand All @@ -117,6 +132,19 @@ public function cleanKnowledgePool(AssistantDepartment $assistantDepartment): vo
$this->client->files()->delete($fileResponse->id);
}
}

foreach ($vectorStoreListResponse->data as $vectorStoreResponse) {
$vectorStoreName = VectorStoreName::tryFromNullableString($vectorStoreResponse->name);
if (
$vectorStoreName === null
|| $this->organizationDiscriminator->equals($vectorStoreName->knowledgeSourceDiscriminator->organizationDiscriminator) === false
) {
continue;
}
if (!in_array($vectorStoreResponse->id, $usedVectorStoreIds)) {
$this->client->vectorStores()->delete($vectorStoreResponse->id);
}
}
}

private function instantiateSourceOfKnowledge(string $name): SourceOfKnowledgeContract
Expand Down
12 changes: 5 additions & 7 deletions Classes/Domain/Knowledge/QuoteString.php
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,12 @@ private function __construct(
) {
}

public static function fromFileCitationObject(
public static function tryFromFileCitationObject(
ThreadMessageResponseContentTextAnnotationFileCitationObject $fileCitationObject
): self {
): ?self {
if ($fileCitationObject->fileCitation->quote === null || $fileCitationObject->fileCitation->quote === '') {
return null;
}
return new self($fileCitationObject->fileCitation->quote);
}

Expand All @@ -40,9 +43,4 @@ public function wtf8Encode(): string

return '%' . $value . '%';
}

public function isEmpty(): bool
{
return $this->value === '';
}
}
6 changes: 3 additions & 3 deletions Classes/Domain/Knowledge/SourceOfKnowledgeCollection.php
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ public function resolveQuotations(
$unresolvedQuotations = [];
foreach ($annotations as $annotation) {
if ($annotation instanceof ThreadMessageResponseContentTextAnnotationFileCitationObject) {
$quoteString = QuoteString::fromFileCitationObject($annotation);
if ($quoteString->isEmpty()) {
$quoteString = QuoteString::tryFromFileCitationObject($annotation);
if (!$quoteString) {
$unresolvedQuotations[] = new UnresolvedQuotation($annotation->text);
continue;
}
Expand Down Expand Up @@ -92,7 +92,7 @@ public function resolveQuotations(
$unresolvedQuotations[] = new UnresolvedQuotation($annotation->text);
continue;
}
$quotation = $sourceOfKnowledge->tryCreateQuotation($annotation->text, $annotation->fileCitation->quote, $databaseRecord['id']);
$quotation = $sourceOfKnowledge->tryCreateQuotation($annotation->text, $annotation->fileCitation->quote ?: '', $databaseRecord['id']);
if ($quotation) {
$resolvedQuotations[] = $quotation;
}
Expand Down
64 changes: 64 additions & 0 deletions Classes/Domain/Knowledge/VectorStoreName.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
<?php

declare(strict_types=1);

namespace Sitegeist\Chatterbox\Domain\Knowledge;

use Neos\Flow\Annotations as Flow;
use Sitegeist\Chatterbox\Domain\OrganizationDiscriminator;

#[Flow\Proxy(false)]
final readonly class VectorStoreName
{
public function __construct(
public KnowledgeSourceDiscriminator $knowledgeSourceDiscriminator,
public int $timestamp,
) {
}

public static function forKnowledgeSource(KnowledgeSourceDiscriminator $knowledgeSourceDiscriminator): self
{
return new self(
$knowledgeSourceDiscriminator,
time()
);
}

public static function tryFromNullableString(?string $value): ?self
{
if ($value === null) {
return null;
}

if (substr_count($value, '-') !== 2) {
return null;
}
list($discriminator, $sourceName, $timestamp) = explode('-', $value);

if (!is_numeric($timestamp)) {
return null;
}

return new self(
new KnowledgeSourceDiscriminator(
new OrganizationDiscriminator($discriminator),
new KnowledgeSourceName($sourceName)
),
(int)$timestamp
);
}

public function takesPrecedenceOver(self $other): bool
{
if ($this->knowledgeSourceDiscriminator->equals($other->knowledgeSourceDiscriminator) === false) {
return false;
}

return $this->timestamp > $other->timestamp;
}

public function toString(): string
{
return $this->knowledgeSourceDiscriminator->toString() . '-' . $this->timestamp;
}
}
3 changes: 2 additions & 1 deletion composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@
"name": "sitegeist/chatterbox",
"license": "GPL-3.0-or-later",
"require": {
"php": "^8.2",
"neos/neos": "*",
"sitegeist/flow-openaiclientfactory": "dev-main",
"sitegeist/flow-openaiclientfactory": "~0.1.0",
"symfony/http-client": "^6.4",
"league/commonmark": "~2.4",
"league/html-to-markdown": "^5.0"
Expand Down

0 comments on commit 6556e74

Please sign in to comment.