diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..9c0b2b4 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,6 @@ +.dockerignore +.gitignore +*.md +Dockerfile +compose.yml +data/ \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..0f8bb0a --- /dev/null +++ b/Dockerfile @@ -0,0 +1,30 @@ +FROM nvidia/cuda:12.9.1-cudnn-devel-ubuntu24.04 + +# Install Python 3.13 +RUN apt-get update && apt-get install -y wget software-properties-common build-essential && apt-get clean && rm -rf /var/lib/apt/lists/* +RUN add-apt-repository ppa:deadsnakes/ppa +RUN apt-get update && apt-get install -y python3.13 python3.13-venv python3.13-dev ninja-build && apt-get clean && rm -rf /var/lib/apt/lists/* + +# Install Sharp and dependencies +RUN mkdir /app +COPY pyproject.toml requirements.txt requirements.in /app/ +COPY src/ /app/src/ +WORKDIR /app +RUN python3.13 -m venv .venv +ENV TORCH_CUDA_ARCH_LIST="8.0;8.6;8.7;8.9;9.0+PTX" +ENV FORCE_CUDA="1" +RUN .venv/bin/pip install ninja +RUN .venv/bin/pip install -r requirements.txt +RUN .venv/bin/pip install gradio +RUN ln -s /app/.venv/bin/sharp /usr/local/bin/sharp + +# Test run to download model and check if it works +RUN wget https://apple.github.io/ml-sharp/thumbnails/Unsplash_-5wkyNA2BPc_0000-0001.jpg -O /tmp/test.jpg +RUN sharp predict -i /tmp/test.jpg -o /tmp/test +RUN rm /tmp/test.jpg /tmp/test -rf + +# Copy other files +COPY . /app + +# Start Gradio web server +CMD [".venv/bin/python3.13", "-u", "/app/gradio_web.py"] diff --git a/README.md b/README.md index b105c65..5e6c45d 100644 --- a/README.md +++ b/README.md @@ -66,6 +66,17 @@ sharp predict -i /path/to/input/images -o /path/to/output/gaussians --render sharp render -i /path/to/output/gaussians -o /path/to/output/renderings ``` +## Using the Docker + +We provide a docker image to run the code. You can start the Gradio app using + +``` +docker compose up --build --remove-orphans +``` + +The app will be available at `http://localhost:7860`. +You need to install Docker with CUDA support in order to use the docker image. + ## Evaluation Please refer to the paper for both quantitative and qualitative evaluations. diff --git a/compose.yml b/compose.yml new file mode 100644 index 0000000..2d8f589 --- /dev/null +++ b/compose.yml @@ -0,0 +1,16 @@ +services: + sharp: + build: + context: . + dockerfile: Dockerfile + volumes: + - ./data:/app/data + ports: + - "7860:7860" + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] \ No newline at end of file diff --git a/gradio_web.py b/gradio_web.py new file mode 100644 index 0000000..4258731 --- /dev/null +++ b/gradio_web.py @@ -0,0 +1,61 @@ + +import gradio as gr +import subprocess +import os +import shutil +import time +import glob + +def predict(image): + # Ensure data directory exists + os.makedirs("/app/data", exist_ok=True) + + input_path = "/app/data/input.jpg" + + # Save/Copy input image + # image provided by gradio (type='filepath') is a temp path + shutil.copy(image, input_path) + + # Run sharp command + # sharp predict -i /app/data/input.jpg -o /app/data/output --render + cmd = [ + "sharp", "predict", + "-i", input_path, + "-o", "/app/data/output", + "--render" + ] + + # Execute command + try: + t = time.time() + print("Sharp started") + subprocess.run(cmd, check=True, capture_output=True) + print(f"Sharp command took {round(time.time() - t, 3)} seconds") + except subprocess.CalledProcessError as e: + print(f"Error running sharp: {e}") + print(f"Stdout: {e.stdout.decode()}") + print(f"Stderr: {e.stderr.decode()}") + return None + + # Find output videos + rgb_video = "/app/data/output/input.mp4" + depth_video = "/app/data/output/input.depth.mp4" + + if os.path.exists(rgb_video) and os.path.exists(depth_video): + return rgb_video, depth_video + elif os.path.exists(rgb_video): + return rgb_video, None + + return None, None + +demo = gr.Interface( + fn=predict, + inputs=gr.Image(type="filepath", label="Input Image"), + outputs=[gr.Video(label="RGB Video"), gr.Video(label="Depth Video")], + title="Sharp 3D View Synthesis", + description="Upload an image to generate a 3D view synthesis video." +) + +if __name__ == "__main__": + print("Sharp Monocular View Synthesis in Less Than a Second (https://github.com/apple/ml-sharp)") + demo.launch(server_name="0.0.0.0", server_port=7860)