3 anos atrás · 7ac7f1bc9a
--- a/.dockerignore
+++ b/.dockerignore
@@ -0,0 +1,10 @@
 
				+.env
			
 
				+Dockerfile
			
 
				+/characters
			
 
				+/extensions
			
 
				+/loras
			
 
				+/models
			
 
				+/presets
			
 
				+/prompts
			
 
				+/softprompts
			
 
				+/training
			
--- a/.env.example
+++ b/.env.example
@@ -0,0 +1,25 @@
 
				+# by default the Dockerfile specifies these versions: 3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX
			
 
				+# however for me to work i had to specify the exact version for my card ( 2060 ) it was 7.5
			
 
				+# https://developer.nvidia.com/cuda-gpus you can find the version for your card here
			
 
				+TORCH_CUDA_ARCH_LIST=7.5
			
 
				+
			
 
				+# these commands worked for me with roughly 4.5GB of vram
			
 
				+CLI_ARGS=--model llama-7b-4bit --wbits 4 --listen --auto-devices
			
 
				+
			
 
				+# the following examples have been tested with the files linked in docs/README_docker.md:
			
 
				+# example running 13b with 4bit/128 groupsize        : CLI_ARGS=--model llama-13b-4bit-128g --wbits 4 --listen --groupsize 128 --pre_layer 25
			
 
				+# example with loading api extension and public share: CLI_ARGS=--model llama-7b-4bit --wbits 4 --listen --auto-devices --no-stream --extensions api --share
			
 
				+# example running 7b with 8bit groupsize             : CLI_ARGS=--model llama-7b --load-in-8bit --listen --auto-devices
			
 
				+
			
 
				+# the port the webui binds to on the host
			
 
				+HOST_PORT=7860
			
 
				+# the port the webui binds to inside the container
			
 
				+CONTAINER_PORT=7860
			
 
				+
			
 
				+# the port the api binds to on the host
			
 
				+HOST_API_PORT=5000
			
 
				+# the port the api binds to inside the container
			
 
				+CONTAINER_API_PORT=5000
			
 
				+
			
 
				+# the version used to install text-generation-webui from
			
 
				+WEBUI_VERSION=HEAD
			
--- a/Dockerfile
+++ b/Dockerfile
@@ -0,0 +1,61 @@
 
				+FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 as builder
			
 
				+
			
 
				+RUN apt-get update && \
			
 
				+    apt-get install --no-install-recommends -y git vim build-essential python3-dev python3-venv && \
			
 
				+    rm -rf /var/lib/apt/lists/*
			
 
				+
			
 
				+RUN git clone https://github.com/oobabooga/GPTQ-for-LLaMa /build
			
 
				+
			
 
				+WORKDIR /build
			
 
				+
			
 
				+RUN python3 -m venv /build/venv
			
 
				+RUN . /build/venv/bin/activate && \
			
 
				+    pip3 install --upgrade pip setuptools && \
			
 
				+    pip3 install torch torchvision torchaudio && \
			
 
				+    pip3 install -r requirements.txt
			
 
				+
			
 
				+# https://developer.nvidia.com/cuda-gpus
			
 
				+# for a rtx 2060: ARG TORCH_CUDA_ARCH_LIST="7.5"
			
 
				+ARG TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX"
			
 
				+RUN . /build/venv/bin/activate && \
			
 
				+    python3 setup_cuda.py bdist_wheel -d .
			
 
				+
			
 
				+FROM nvidia/cuda:11.8.0-runtime-ubuntu22.04
			
 
				+
			
 
				+LABEL maintainer="Your Name <your.email@example.com>"
			
 
				+LABEL description="Docker image for GPTQ-for-LLaMa and Text Generation WebUI"
			
 
				+
			
 
				+RUN apt-get update && \
			
 
				+    apt-get install --no-install-recommends -y git python3 python3-pip && \
			
 
				+    rm -rf /var/lib/apt/lists/*
			
 
				+
			
 
				+RUN --mount=type=cache,target=/root/.cache/pip pip3 install virtualenv
			
 
				+
			
 
				+COPY . /app/
			
 
				+
			
 
				+WORKDIR /app
			
 
				+
			
 
				+ARG WEBUI_VERSION
			
 
				+RUN test -n "${WEBUI_VERSION}" && git reset --hard ${WEBUI_VERSION} || echo "Using provided webui source"
			
 
				+
			
 
				+RUN virtualenv /app/venv
			
 
				+RUN . /app/venv/bin/activate && \
			
 
				+    pip3 install --upgrade pip setuptools && \
			
 
				+    pip3 install torch torchvision torchaudio && \
			
 
				+    pip3 install -r requirements.txt
			
 
				+
			
 
				+COPY --from=builder /build /app/repositories/GPTQ-for-LLaMa
			
 
				+RUN . /app/venv/bin/activate && \
			
 
				+    pip3 install /app/repositories/GPTQ-for-LLaMa/*.whl
			
 
				+
			
 
				+ENV CLI_ARGS=""
			
 
				+
			
 
				+RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/api && pip3 install -r requirements.txt
			
 
				+RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/elevenlabs_tts && pip3 install -r requirements.txt
			
 
				+RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/google_translate && pip3 install -r requirements.txt
			
 
				+RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/silero_tts && pip3 install -r requirements.txt
			
 
				+RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/whisper_stt && pip3 install -r requirements.txt
			
 
				+
			
 
				+RUN cp /app/venv/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda118.so /app/venv/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cpu.so
			
 
				+
			
 
				+CMD . /app/venv/bin/activate && python3 server.py ${CLI_ARGS}
			
--- a/README.md
+++ b/README.md
@@ -117,7 +117,16 @@ As an alternative to the recommended WSL method, you can install the web UI nati
 
				 
			
 
				 ### Alternative: Docker
			
 
				 
			
 
				-https://github.com/oobabooga/text-generation-webui/issues/174, https://github.com/oobabooga/text-generation-webui/issues/87
			
 
				+```
			
 
				+cp .env.example .env
			
 
				+docker-compose up --build
			
 
				+```
			
 
				+
			
 
				+Make sure to edit `.env.example` and set the appropriate CUDA version for your GPU.
			
 
				+
			
 
				+You need to have docker compose v2.17 or higher installed in your system. For installation instructions, see [Docker compose installation](https://github.com/oobabooga/text-generation-webui/wiki/Docker-compose-installation).
			
 
				+
			
 
				+Contributed by [@loeken](https://github.com/loeken) in [#633](https://github.com/oobabooga/text-generation-webui/pull/633)
			
 
				 
			
 
				 ### Updating the requirements
			
 
				 
			
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -0,0 +1,32 @@
 
				+version: "3.3"
			
 
				+services:
			
 
				+  text-generation-webui:
			
 
				+    build:
			
 
				+      context: .
			
 
				+      args:
			
 
				+        # specify which cuda version your card supports: https://developer.nvidia.com/cuda-gpus
			
 
				+        TORCH_CUDA_ARCH_LIST: ${TORCH_CUDA_ARCH_LIST}
			
 
				+        GPTQ_VERSION: ${GPTQ_VERSION}
			
 
				+        WEBUI_VERSION: ${WEBUI_VERSION}
			
 
				+    env_file: .env
			
 
				+    ports:
			
 
				+      - "${HOST_PORT}:${CONTAINER_PORT}"
			
 
				+      - "${HOST_API_PORT}:${CONTAINER_API_PORT}"
			
 
				+    stdin_open: true
			
 
				+    tty: true
			
 
				+    volumes:
			
 
				+      - ./characters:/app/characters
			
 
				+      - ./extensions:/app/extensions
			
 
				+      - ./loras:/app/loras
			
 
				+      - ./models:/app/models
			
 
				+      - ./presets:/app/presets
			
 
				+      - ./prompts:/app/prompts
			
 
				+      - ./softprompts:/app/softprompts
			
 
				+      - ./training:/app/training
			
 
				+    deploy:
			
 
				+      resources:
			
 
				+        reservations:
			
 
				+          devices:
			
 
				+            - driver: nvidia
			
 
				+              device_ids: ['0']
			
 
				+              capabilities: [gpu]