-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDockerfile
28 lines (21 loc) · 907 Bytes
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# Use Python as the base image
FROM python:3.8-slim
# Install Python dependencies
RUN pip install --upgrade pip
RUN pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu117
RUN pip install transformers accelerate grpcio-tools grpcio protobuf
# Set working directory
WORKDIR /app
# Copy your application code from the local machine
COPY . /app
# Download the model (if not already downloaded)
# RUN python3 -c "from transformers import AutoModelForCausalLM, AutoTokenizer; \
# model_name = 'google/gemma-2-2b-it'; \
# model = AutoModelForCausalLM.from_pretrained(model_name); \
# tokenizer = AutoTokenizer.from_pretrained(model_name);"
# Generate protobuf and grpc code
RUN python -m grpc_tools.protoc -I. --python_out=. --grpc_python_out=. chat.proto
# Expose the gRPC server port
EXPOSE 50051
# Start the gRPC server
CMD ["python3", "server.py"]