Skip to content

Commit

Permalink
Merge branch 'master' of github.com:Mintplex-Labs/anything-llm into r…
Browse files Browse the repository at this point in the history
…ender
  • Loading branch information
timothycarambat committed Aug 12, 2024
2 parents 0938850 + 2695956 commit 8fc547e
Show file tree
Hide file tree
Showing 61 changed files with 3,066 additions and 602 deletions.
6 changes: 3 additions & 3 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,8 @@
},
"updateContentCommand": "cd server && yarn && cd ../collector && PUPPETEER_DOWNLOAD_BASE_URL=https://storage.googleapis.com/chrome-for-testing-public yarn && cd ../frontend && yarn && cd .. && yarn setup:envs && yarn prisma:setup && echo \"Please run yarn dev:server, yarn dev:collector, and yarn dev:frontend in separate terminal tabs.\"",
// Use 'postCreateCommand' to run commands after the container is created.
// This configures VITE for github codespaces
"postCreateCommand": "if [ \"${CODESPACES}\" = \"true\" ]; then echo 'VITE_API_BASE=\"https://$CODESPACE_NAME-3001.$GITHUB_CODESPACES_PORT_FORWARDING_DOMAIN/api\"' > ./frontend/.env; fi",
// This configures VITE for github codespaces and installs gh cli
"postCreateCommand": "if [ \"${CODESPACES}\" = \"true\" ]; then echo 'VITE_API_BASE=\"https://$CODESPACE_NAME-3001.$GITHUB_CODESPACES_PORT_FORWARDING_DOMAIN/api\"' > ./frontend/.env && (type -p wget >/dev/null || (sudo apt update && sudo apt-get install wget -y)) && sudo mkdir -p -m 755 /etc/apt/keyrings && wget -qO- https://cli.github.com/packages/githubcli-archive-keyring.gpg | sudo tee /etc/apt/keyrings/githubcli-archive-keyring.gpg > /dev/null && sudo chmod go+r /etc/apt/keyrings/githubcli-archive-keyring.gpg && echo \"deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main\" | sudo tee /etc/apt/sources.list.d/github-cli.list > /dev/null && sudo apt update && sudo apt install gh -y; fi",
"portsAttributes": {
"3001": {
"label": "Backend",
Expand Down Expand Up @@ -208,4 +208,4 @@
}
// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
// "remoteUser": "root"
}
}
115 changes: 115 additions & 0 deletions .github/workflows/build-and-push-image-semver.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
name: Publish AnythingLLM Docker image on Release (amd64 & arm64)

concurrency:
group: build-${{ github.ref }}
cancel-in-progress: true

on:
release:
types: [published]

jobs:
push_multi_platform_to_registries:
name: Push Docker multi-platform image to multiple registries
runs-on: ubuntu-latest
permissions:
packages: write
contents: read
steps:
- name: Check out the repo
uses: actions/checkout@v4

- name: Check if DockerHub build needed
shell: bash
run: |
# Check if the secret for USERNAME is set (don't even check for the password)
if [[ -z "${{ secrets.DOCKER_USERNAME }}" ]]; then
echo "DockerHub build not needed"
echo "enabled=false" >> $GITHUB_OUTPUT
else
echo "DockerHub build needed"
echo "enabled=true" >> $GITHUB_OUTPUT
fi
id: dockerhub

- name: Set up QEMU
uses: docker/setup-qemu-action@v3

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Log in to Docker Hub
uses: docker/login-action@f4ef78c080cd8ba55a85445d5b36e214a81df20a
# Only login to the Docker Hub if the repo is mintplex/anythingllm, to allow for forks to build on GHCR
if: steps.dockerhub.outputs.enabled == 'true'
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}

- name: Log in to the Container registry
uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7
with:
images: |
${{ steps.dockerhub.outputs.enabled == 'true' && 'mintplexlabs/anythingllm' || '' }}
ghcr.io/${{ github.repository }}
tags: |
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
- name: Build and push multi-platform Docker image
uses: docker/build-push-action@v6
with:
context: .
file: ./docker/Dockerfile
push: true
sbom: true
provenance: mode=max
platforms: linux/amd64,linux/arm64
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
cache-to: type=gha,mode=max

# For Docker scout there are some intermediary reported CVEs which exists outside
# of execution content or are unreachable by an attacker but exist in image.
# We create VEX files for these so they don't show in scout summary.
- name: Collect known and verified CVE exceptions
id: cve-list
run: |
# Collect CVEs from filenames in vex folder
CVE_NAMES=""
for file in ./docker/vex/*.vex.json; do
[ -e "$file" ] || continue
filename=$(basename "$file")
stripped_filename=${filename%.vex.json}
CVE_NAMES+=" $stripped_filename"
done
echo "CVE_EXCEPTIONS=$CVE_NAMES" >> $GITHUB_OUTPUT
shell: bash

# About VEX attestations https://docs.docker.com/scout/explore/exceptions/
# Justifications https://github.com/openvex/spec/blob/main/OPENVEX-SPEC.md#status-justifications
- name: Add VEX attestations
env:
CVE_EXCEPTIONS: ${{ steps.cve-list.outputs.CVE_EXCEPTIONS }}
run: |
echo $CVE_EXCEPTIONS
curl -sSfL https://raw.githubusercontent.com/docker/scout-cli/main/install.sh | sh -s --
for cve in $CVE_EXCEPTIONS; do
for tag in "${{ join(fromJSON(steps.meta.outputs.json).tags, ' ') }}"; do
echo "Attaching VEX exception $cve to $tag"
docker scout attestation add \
--file "./docker/vex/$cve.vex.json" \
--predicate-type https://openvex.dev/ns/v0.2.0 \
$tag
done
done
shell: bash
2 changes: 1 addition & 1 deletion .github/workflows/dev-build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ concurrency:

on:
push:
branches: ['558-multi-modal-support'] # put your current branch to create a build. Core team only.
branches: ['pipertts-support'] # put your current branch to create a build. Core team only.
paths-ignore:
- '**.md'
- 'cloud-deployments/*'
Expand Down
2 changes: 1 addition & 1 deletion .vscode/tasks.json
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
}
}
},
"command": "cd ${workspaceFolder}/server/ && yarn dev",
"command": "if [ \"${CODESPACES}\" = \"true\" ]; then while ! gh codespace ports -c $CODESPACE_NAME | grep 3001; do sleep 1; done; gh codespace ports visibility 3001:public -c $CODESPACE_NAME; fi & cd ${workspaceFolder}/server/ && yarn dev",
"runOptions": {
"instanceLimit": 1,
"reevaluateOnRerun": true
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ AnythingLLM divides your documents into objects called `workspaces`. A Workspace
**TTS (text-to-speech) support:**

- Native Browser Built-in (default)
- [PiperTTSLocal - runs in browser](https://github.com/rhasspy/piper)
- [OpenAI TTS](https://platform.openai.com/docs/guides/text-to-speech/voice-options)
- [ElevenLabs](https://elevenlabs.io/)

Expand Down
6 changes: 4 additions & 2 deletions cloud-deployments/digitalocean/terraform/user_data.tp1
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,10 @@ sudo systemctl enable docker
sudo systemctl start docker

mkdir -p /home/anythingllm
touch /home/anythingllm/.env

cat <<EOF >/home/anythingllm/.env
${env_content}
EOF

sudo docker pull mintplexlabs/anythingllm
sudo docker run -d -p 3001:3001 --cap-add SYS_ADMIN -v /home/anythingllm:/app/server/storage -v /home/anythingllm/.env:/app/server/.env -e STORAGE_DIR="/app/server/storage" mintplexlabs/anythingllm
echo "Container ID: $(sudo docker ps --latest --quiet)"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -223,10 +223,6 @@ class GitLabRepoLoader {
const objects = Array.isArray(data)
? data.filter((item) => item.type === "blob")
: []; // only get files, not paths or submodules
if (objects.length === 0) {
fetching = false;
break;
}

// Apply ignore path rules to found objects. If any rules match it is an invalid file path.
console.log(
Expand Down
50 changes: 26 additions & 24 deletions collector/utils/extensions/WebsiteDepth/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,34 +9,36 @@ const { tokenizeString } = require("../../tokenizer");
const path = require("path");
const fs = require("fs");

async function discoverLinks(startUrl, depth = 1, maxLinks = 20) {
async function discoverLinks(startUrl, maxDepth = 1, maxLinks = 20) {
const baseUrl = new URL(startUrl);
const discoveredLinks = new Set();
const pendingLinks = [startUrl];
let currentLevel = 0;
depth = depth < 1 ? 1 : depth;
maxLinks = maxLinks < 1 ? 1 : maxLinks;

// Check depth and if there are any links left to scrape
while (currentLevel < depth && pendingLinks.length > 0) {
const newLinks = await getPageLinks(pendingLinks[0], baseUrl);
pendingLinks.shift();

for (const link of newLinks) {
if (!discoveredLinks.has(link)) {
discoveredLinks.add(link);
pendingLinks.push(link);
}

// Exit out if we reach maxLinks
if (discoveredLinks.size >= maxLinks) {
return Array.from(discoveredLinks).slice(0, maxLinks);
const discoveredLinks = new Set([startUrl]);
let queue = [[startUrl, 0]]; // [url, currentDepth]
const scrapedUrls = new Set();

for (let currentDepth = 0; currentDepth < maxDepth; currentDepth++) {
const levelSize = queue.length;
const nextQueue = [];

for (let i = 0; i < levelSize && discoveredLinks.size < maxLinks; i++) {
const [currentUrl, urlDepth] = queue[i];

if (!scrapedUrls.has(currentUrl)) {
scrapedUrls.add(currentUrl);
const newLinks = await getPageLinks(currentUrl, baseUrl);

for (const link of newLinks) {
if (!discoveredLinks.has(link) && discoveredLinks.size < maxLinks) {
discoveredLinks.add(link);
if (urlDepth + 1 < maxDepth) {
nextQueue.push([link, urlDepth + 1]);
}
}
}
}
}

if (pendingLinks.length === 0) {
currentLevel++;
}
queue = nextQueue;
if (queue.length === 0 || discoveredLinks.size >= maxLinks) break;
}

return Array.from(discoveredLinks);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,12 @@ class YoutubeTranscript {
let transcript = "";
const chunks = transcriptXML.getElementsByTagName("text");
for (const chunk of chunks) {
transcript += chunk.textContent;
// Add space after each text chunk
transcript += chunk.textContent + " ";
}

return transcript;
// Trim extra whitespace
return transcript.trim().replace(/\s+/g, " ");
} catch (e) {
throw new YoutubeTranscriptError(e);
}
Expand Down
1 change: 1 addition & 0 deletions collector/utils/files/mime.js
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ class MimeDetector {
"lua",
"pas",
"r",
"go",
],
},
true
Expand Down
3 changes: 3 additions & 0 deletions frontend/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
"dependencies": {
"@metamask/jazzicon": "^2.0.0",
"@microsoft/fetch-event-source": "^2.0.1",
"@mintplex-labs/piper-tts-web": "^1.0.4",
"@phosphor-icons/react": "^2.1.7",
"@tremor/react": "^3.15.1",
"dompurify": "^3.0.8",
Expand All @@ -24,7 +25,9 @@
"js-levenshtein": "^1.1.6",
"lodash.debounce": "^4.0.8",
"markdown-it": "^13.0.1",
"markdown-it-katex": "^2.0.3",
"moment": "^2.30.1",
"onnxruntime-web": "^1.18.0",
"pluralize": "^8.0.0",
"react": "^18.2.0",
"react-device-detect": "^2.2.2",
Expand Down
Loading

0 comments on commit 8fc547e

Please sign in to comment.