-
Notifications
You must be signed in to change notification settings - Fork 48
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Amith Koujalgi
committed
Nov 10, 2023
0 parents
commit 6045b1a
Showing
12 changed files
with
525 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
name: CI - Build and Push Docker Image | ||
|
||
on: | ||
push: | ||
branches: [ "main" ] | ||
pull_request: | ||
branches: [ "main" ] | ||
|
||
jobs: | ||
push_to_registry: | ||
name: Push Docker image to Docker Hub | ||
runs-on: ubuntu-latest | ||
env: | ||
CONTEXT_SUBDIR: . | ||
steps: | ||
- name: Check out the repo | ||
uses: actions/checkout@v4 | ||
|
||
- name: Log in to Docker Hub | ||
run: docker login -u amithkoujalgi -p ${{ secrets.DOCKERHUB_ACCESS_TOKEN }} | ||
|
||
- name: Build and push Docker image | ||
uses: docker/build-push-action@v5 | ||
with: | ||
context: . | ||
file: Dockerfile | ||
push: true | ||
tags: amithkoujalgi/pdf-bot:1.0.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,171 @@ | ||
# Byte-compiled / optimized / DLL files | ||
__pycache__/ | ||
*.py[cod] | ||
*$py.class | ||
|
||
# C extensions | ||
*.so | ||
|
||
# Distribution / packaging | ||
.Python | ||
build/ | ||
develop-eggs/ | ||
dist/ | ||
downloads/ | ||
eggs/ | ||
.eggs/ | ||
lib/ | ||
lib64/ | ||
parts/ | ||
sdist/ | ||
var/ | ||
wheels/ | ||
share/python-wheels/ | ||
*.egg-info/ | ||
.installed.cfg | ||
*.egg | ||
MANIFEST | ||
|
||
# PyInstaller | ||
# Usually these files are written by a python script from a template | ||
# before PyInstaller builds the exe, so as to inject date/other infos into it. | ||
*.manifest | ||
*.spec | ||
|
||
# Installer logs | ||
pip-log.txt | ||
pip-delete-this-directory.txt | ||
|
||
# Unit test / coverage reports | ||
htmlcov/ | ||
.tox/ | ||
.nox/ | ||
.coverage | ||
.coverage.* | ||
.cache | ||
nosetests.xml | ||
coverage.xml | ||
*.cover | ||
*.py,cover | ||
.hypothesis/ | ||
.pytest_cache/ | ||
cover/ | ||
|
||
# Translations | ||
*.mo | ||
*.pot | ||
|
||
# Django stuff: | ||
*.log | ||
local_settings.py | ||
db.sqlite3 | ||
db.sqlite3-journal | ||
|
||
# Flask stuff: | ||
instance/ | ||
.webassets-cache | ||
|
||
# Scrapy stuff: | ||
.scrapy | ||
|
||
# Sphinx documentation | ||
docs/_build/ | ||
|
||
# PyBuilder | ||
.pybuilder/ | ||
target/ | ||
|
||
# Jupyter Notebook | ||
.ipynb_checkpoints | ||
|
||
# IPython | ||
profile_default/ | ||
ipython_config.py | ||
|
||
# pyenv | ||
# For a library or package, you might want to ignore these files since the code is | ||
# intended to run in multiple environments; otherwise, check them in: | ||
# .python-version | ||
|
||
# pipenv | ||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. | ||
# However, in case of collaboration, if having platform-specific dependencies or dependencies | ||
# having no cross-platform support, pipenv may install dependencies that don't work, or not | ||
# install all needed dependencies. | ||
#Pipfile.lock | ||
|
||
# poetry | ||
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. | ||
# This is especially recommended for binary packages to ensure reproducibility, and is more | ||
# commonly ignored for libraries. | ||
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control | ||
#poetry.lock | ||
|
||
# pdm | ||
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. | ||
#pdm.lock | ||
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it | ||
# in version control. | ||
# https://pdm.fming.dev/#use-with-ide | ||
.pdm.toml | ||
|
||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm | ||
__pypackages__/ | ||
|
||
# Celery stuff | ||
celerybeat-schedule | ||
celerybeat.pid | ||
|
||
# SageMath parsed files | ||
*.sage.py | ||
|
||
# Environments | ||
.env | ||
.venv | ||
env/ | ||
venv/ | ||
ENV/ | ||
env.bak/ | ||
venv.bak/ | ||
|
||
# Spyder project settings | ||
.spyderproject | ||
.spyproject | ||
|
||
# Rope project settings | ||
.ropeproject | ||
|
||
# mkdocs documentation | ||
/site | ||
|
||
# mypy | ||
.mypy_cache/ | ||
.dmypy.json | ||
dmypy.json | ||
|
||
# Pyre type checker | ||
.pyre/ | ||
|
||
# pytype static type analyzer | ||
.pytype/ | ||
|
||
# Cython debug symbols | ||
cython_debug/ | ||
|
||
# PyCharm | ||
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can | ||
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore | ||
# and can be added to the global gitignore or merged into this file. For a more nuclear | ||
# option (not recommended) you can uncomment the following to ignore the entire idea folder. | ||
#.idea/ | ||
|
||
|
||
.idea/ | ||
.idea/** | ||
.DS_Store | ||
*.pyc | ||
*.egg-info/** | ||
|
||
|
||
.vscode/ | ||
.npm/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
FROM python:3.8.18 | ||
|
||
WORKDIR /app | ||
|
||
COPY ./requirements.txt /app/requirements.txt | ||
RUN pip install -r /app/requirements.txt | ||
|
||
COPY ./pdf_bot /app/pdf_bot | ||
|
||
CMD ["streamlit", "run", "/app/pdf_bot/app.py"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
# ollama-pdf-bot |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
#!/bin/bash | ||
docker build --rm --file Dockerfile -t amithkoujalgi/pdf-bot:1.0.0 . |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
services: | ||
|
||
ollama: | ||
image: ollama/ollama | ||
ports: | ||
- 11434:11434 | ||
volumes: | ||
- ~/ollama:/root/.ollama | ||
networks: | ||
- net | ||
|
||
app: | ||
image: amithkoujalgi/pdf-bot:1.0.0 | ||
ports: | ||
- 8501:8501 | ||
environment: | ||
- OLLAMA_API_BASE_URL=http://ollama:11434 | ||
- MODEL="orca-mini" | ||
networks: | ||
- net | ||
|
||
networks: | ||
net: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
#!/bin/bash | ||
pip install -r requirements.txt |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
import os | ||
from pathlib import Path | ||
from typing import Optional | ||
|
||
import requests | ||
import streamlit as st | ||
|
||
from pdf_helper import PDFHelper | ||
|
||
|
||
def pull_model(model_name_): | ||
print(f"pulling model '{model_name_}'...") | ||
url = f"{ollama_api_base_url}/api/pull" | ||
data = f'{"name": "{model_name_}"}' | ||
headers = {'Content-Type': 'application/json'} | ||
_response = requests.post(url, data=data, headers=headers) | ||
print(_response.text) | ||
|
||
|
||
title = "PDF Bot" | ||
|
||
model_name = os.environ.get('MODEL', "orca-mini") | ||
|
||
ollama_api_base_url = os.environ.get('OLLAMA_API_BASE_URL', "http://localhost:11434") | ||
pdfs_directory = os.path.join(str(Path.home()), 'langchain-store', 'uploads', 'pdfs') | ||
os.makedirs(pdfs_directory, exist_ok=True) | ||
|
||
print(f"Using model: {model_name}") | ||
print(f"Using Ollama base URL: {ollama_api_base_url}") | ||
print(f"Using PDFs upload directory: {pdfs_directory}") | ||
pull_model(model_name_=model_name) | ||
|
||
st.set_page_config(page_title=title) | ||
|
||
|
||
def on_upload_change(): | ||
clear_chat_history() | ||
|
||
|
||
def set_uploaded_file(_uploaded_file: str): | ||
st.session_state['uploaded_file'] = _uploaded_file | ||
|
||
|
||
def get_uploaded_file() -> Optional[str]: | ||
if 'uploaded_file' in st.session_state: | ||
return st.session_state['uploaded_file'] | ||
return None | ||
|
||
|
||
with st.sidebar: | ||
st.title(title) | ||
st.write('This chatbot accepts a PDF file and lets you ask questions on it.') | ||
uploaded_file = st.file_uploader( | ||
label='Upload a PDF', type=['pdf', 'PDF'], | ||
accept_multiple_files=False, | ||
key='file-uploader', | ||
help=None, | ||
on_change=on_upload_change, | ||
args=None, | ||
kwargs=None, | ||
disabled=False, | ||
label_visibility="visible" | ||
) | ||
|
||
if uploaded_file is not None: | ||
bytes_data = uploaded_file.getvalue() | ||
target_file = os.path.join(pdfs_directory, uploaded_file.name) | ||
# print(uploaded_file) | ||
set_uploaded_file(target_file) | ||
with open(target_file, 'wb') as f: | ||
f.write(bytes_data) | ||
|
||
# Store LLM generated responses | ||
if "messages" not in st.session_state.keys(): | ||
st.session_state.messages = [{"role": "assistant", "content": "Hello, I'm your PDF assistant."}] | ||
|
||
# Display or clear chat messages | ||
for message in st.session_state.messages: | ||
with st.chat_message(message["role"]): | ||
st.write(message["content"]) | ||
|
||
|
||
def clear_chat_history(): | ||
st.session_state.messages = [{"role": "assistant", "content": "Hello, I'm your PDF assistant."}] | ||
|
||
|
||
st.sidebar.button('Reset', on_click=clear_chat_history) | ||
|
||
# User-provided prompt | ||
if prompt := st.chat_input(disabled=False, placeholder="What do you want to know from the uploaded PDF?"): | ||
st.session_state.messages.append({"role": "user", "content": prompt}) | ||
with st.chat_message("user"): | ||
st.write(prompt) | ||
|
||
if st.session_state.messages[-1]["role"] != "assistant": | ||
source_file = get_uploaded_file() | ||
if source_file is None: | ||
with st.chat_message("assistant"): | ||
with st.spinner("Thinking..."): | ||
placeholder = st.empty() | ||
full_response = 'PDF file needs to be uploaded before you can ask questions on it 😟. Please upload a file.' | ||
placeholder.markdown(full_response) | ||
message = {"role": "assistant", "content": full_response} | ||
st.session_state.messages.append(message) | ||
else: | ||
with st.chat_message("assistant"): | ||
with st.spinner("Thinking..."): | ||
question = dict(st.session_state.messages[-1]).get('content') | ||
pdf_helper = PDFHelper( | ||
ollama_api_base_url=ollama_api_base_url, | ||
model_name=model_name | ||
) | ||
response = pdf_helper.ask( | ||
pdf_file_path=source_file, | ||
question=question | ||
) | ||
placeholder = st.empty() | ||
full_response = '' | ||
for item in response: | ||
full_response += item | ||
placeholder.markdown(full_response) | ||
placeholder.markdown(full_response) | ||
message = {"role": "assistant", "content": full_response} | ||
st.session_state.messages.append(message) |
Oops, something went wrong.