Skip to content

Commit

Permalink
init
Browse files Browse the repository at this point in the history
  • Loading branch information
Amith Koujalgi committed Nov 10, 2023
0 parents commit 6045b1a
Show file tree
Hide file tree
Showing 12 changed files with 525 additions and 0 deletions.
28 changes: 28 additions & 0 deletions .github/workflows/push-docker-image.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
name: CI - Build and Push Docker Image

on:
push:
branches: [ "main" ]
pull_request:
branches: [ "main" ]

jobs:
push_to_registry:
name: Push Docker image to Docker Hub
runs-on: ubuntu-latest
env:
CONTEXT_SUBDIR: .
steps:
- name: Check out the repo
uses: actions/checkout@v4

- name: Log in to Docker Hub
run: docker login -u amithkoujalgi -p ${{ secrets.DOCKERHUB_ACCESS_TOKEN }}

- name: Build and push Docker image
uses: docker/build-push-action@v5
with:
context: .
file: Dockerfile
push: true
tags: amithkoujalgi/pdf-bot:1.0.0
171 changes: 171 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock

# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml

# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/


.idea/
.idea/**
.DS_Store
*.pyc
*.egg-info/**


.vscode/
.npm/
10 changes: 10 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
FROM python:3.8.18

WORKDIR /app

COPY ./requirements.txt /app/requirements.txt
RUN pip install -r /app/requirements.txt

COPY ./pdf_bot /app/pdf_bot

CMD ["streamlit", "run", "/app/pdf_bot/app.py"]
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# ollama-pdf-bot
2 changes: 2 additions & 0 deletions docker-build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#!/bin/bash
docker build --rm --file Dockerfile -t amithkoujalgi/pdf-bot:1.0.0 .
23 changes: 23 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
services:

ollama:
image: ollama/ollama
ports:
- 11434:11434
volumes:
- ~/ollama:/root/.ollama
networks:
- net

app:
image: amithkoujalgi/pdf-bot:1.0.0
ports:
- 8501:8501
environment:
- OLLAMA_API_BASE_URL=http://ollama:11434
- MODEL="orca-mini"
networks:
- net

networks:
net:
2 changes: 2 additions & 0 deletions install.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#!/bin/bash
pip install -r requirements.txt
Empty file added pdf_bot/__init__.py
Empty file.
124 changes: 124 additions & 0 deletions pdf_bot/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
import os
from pathlib import Path
from typing import Optional

import requests
import streamlit as st

from pdf_helper import PDFHelper


def pull_model(model_name_):
print(f"pulling model '{model_name_}'...")
url = f"{ollama_api_base_url}/api/pull"
data = f'{"name": "{model_name_}"}'
headers = {'Content-Type': 'application/json'}
_response = requests.post(url, data=data, headers=headers)
print(_response.text)


title = "PDF Bot"

model_name = os.environ.get('MODEL', "orca-mini")

ollama_api_base_url = os.environ.get('OLLAMA_API_BASE_URL', "http://localhost:11434")
pdfs_directory = os.path.join(str(Path.home()), 'langchain-store', 'uploads', 'pdfs')
os.makedirs(pdfs_directory, exist_ok=True)

print(f"Using model: {model_name}")
print(f"Using Ollama base URL: {ollama_api_base_url}")
print(f"Using PDFs upload directory: {pdfs_directory}")
pull_model(model_name_=model_name)

st.set_page_config(page_title=title)


def on_upload_change():
clear_chat_history()


def set_uploaded_file(_uploaded_file: str):
st.session_state['uploaded_file'] = _uploaded_file


def get_uploaded_file() -> Optional[str]:
if 'uploaded_file' in st.session_state:
return st.session_state['uploaded_file']
return None


with st.sidebar:
st.title(title)
st.write('This chatbot accepts a PDF file and lets you ask questions on it.')
uploaded_file = st.file_uploader(
label='Upload a PDF', type=['pdf', 'PDF'],
accept_multiple_files=False,
key='file-uploader',
help=None,
on_change=on_upload_change,
args=None,
kwargs=None,
disabled=False,
label_visibility="visible"
)

if uploaded_file is not None:
bytes_data = uploaded_file.getvalue()
target_file = os.path.join(pdfs_directory, uploaded_file.name)
# print(uploaded_file)
set_uploaded_file(target_file)
with open(target_file, 'wb') as f:
f.write(bytes_data)

# Store LLM generated responses
if "messages" not in st.session_state.keys():
st.session_state.messages = [{"role": "assistant", "content": "Hello, I'm your PDF assistant."}]

# Display or clear chat messages
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.write(message["content"])


def clear_chat_history():
st.session_state.messages = [{"role": "assistant", "content": "Hello, I'm your PDF assistant."}]


st.sidebar.button('Reset', on_click=clear_chat_history)

# User-provided prompt
if prompt := st.chat_input(disabled=False, placeholder="What do you want to know from the uploaded PDF?"):
st.session_state.messages.append({"role": "user", "content": prompt})
with st.chat_message("user"):
st.write(prompt)

if st.session_state.messages[-1]["role"] != "assistant":
source_file = get_uploaded_file()
if source_file is None:
with st.chat_message("assistant"):
with st.spinner("Thinking..."):
placeholder = st.empty()
full_response = 'PDF file needs to be uploaded before you can ask questions on it 😟. Please upload a file.'
placeholder.markdown(full_response)
message = {"role": "assistant", "content": full_response}
st.session_state.messages.append(message)
else:
with st.chat_message("assistant"):
with st.spinner("Thinking..."):
question = dict(st.session_state.messages[-1]).get('content')
pdf_helper = PDFHelper(
ollama_api_base_url=ollama_api_base_url,
model_name=model_name
)
response = pdf_helper.ask(
pdf_file_path=source_file,
question=question
)
placeholder = st.empty()
full_response = ''
for item in response:
full_response += item
placeholder.markdown(full_response)
placeholder.markdown(full_response)
message = {"role": "assistant", "content": full_response}
st.session_state.messages.append(message)
Loading

0 comments on commit 6045b1a

Please sign in to comment.