From aec04f6eb4e3d07fa524f9d56986f64d8c5d4753 Mon Sep 17 00:00:00 2001
From: Ray Dumasia <worldofray@googlemail.com>
Date: Tue, 21 Oct 2025 01:32:09 +0100
Subject: [PATCH] Initial commit

---
 .gitignore                                   | 102 ++++++
 README.md                                    | 138 ++++++++
 backend/Dockerfile                           |  20 ++
 backend/main.py                              | 329 +++++++++++++++++++
 backend/requirements.txt                     |  12 +
 docker-compose.yml                           |  35 ++
 frontend/.gitignore                          |  24 ++
 frontend/Dockerfile                          |  29 ++
 frontend/README.md                           |  73 ++++
 frontend/eslint.config.js                    |  23 ++
 frontend/index.html                          |  16 +
 frontend/nginx.conf                          |  40 +++
 frontend/package.json                        |  29 ++
 frontend/postcss.config.js                   |   6 +
 frontend/public/vite.svg                     |   1 +
 frontend/src/App.css                         |  42 +++
 frontend/src/App.jsx                         | 251 ++++++++++++++
 frontend/src/App.tsx                         |  35 ++
 frontend/src/assets/react.svg                |   1 +
 frontend/src/components/AdvancedSettings.jsx |  83 +++++
 frontend/src/components/ImageUpload.jsx      |  99 ++++++
 frontend/src/components/ModeSelector.jsx     | 105 ++++++
 frontend/src/components/ResultPanel.jsx      | 302 +++++++++++++++++
 frontend/src/index.css                       |  50 +++
 frontend/src/main.jsx                        |  10 +
 frontend/src/main.tsx                        |  10 +
 frontend/tailwind.config.js                  |  48 +++
 frontend/tsconfig.app.json                   |  28 ++
 frontend/tsconfig.json                       |   7 +
 frontend/tsconfig.node.json                  |  26 ++
 frontend/vite.config.js                      |  20 ++
 frontend/vite.config.ts                      |   7 +
 32 files changed, 2001 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 README.md
 create mode 100644 backend/Dockerfile
 create mode 100644 backend/main.py
 create mode 100644 backend/requirements.txt
 create mode 100644 docker-compose.yml
 create mode 100644 frontend/.gitignore
 create mode 100644 frontend/Dockerfile
 create mode 100644 frontend/README.md
 create mode 100644 frontend/eslint.config.js
 create mode 100644 frontend/index.html
 create mode 100644 frontend/nginx.conf
 create mode 100644 frontend/package.json
 create mode 100644 frontend/postcss.config.js
 create mode 100644 frontend/public/vite.svg
 create mode 100644 frontend/src/App.css
 create mode 100644 frontend/src/App.jsx
 create mode 100644 frontend/src/App.tsx
 create mode 100644 frontend/src/assets/react.svg
 create mode 100644 frontend/src/components/AdvancedSettings.jsx
 create mode 100644 frontend/src/components/ImageUpload.jsx
 create mode 100644 frontend/src/components/ModeSelector.jsx
 create mode 100644 frontend/src/components/ResultPanel.jsx
 create mode 100644 frontend/src/index.css
 create mode 100644 frontend/src/main.jsx
 create mode 100644 frontend/src/main.tsx
 create mode 100644 frontend/tailwind.config.js
 create mode 100644 frontend/tsconfig.app.json
 create mode 100644 frontend/tsconfig.json
 create mode 100644 frontend/tsconfig.node.json
 create mode 100644 frontend/vite.config.js
 create mode 100644 frontend/vite.config.ts

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..705ff1b
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,102 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+*.pyc
+.pytest_cache/
+.coverage
+htmlcov/
+.tox/
+.hypothesis/
+venv/
+env/
+ENV/
+.venv
+
+# Node
+node_modules/
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+pnpm-debug.log*
+.pnpm-store/
+dist/
+dist-ssr/
+*.local
+package-lock.json
+yarn.lock
+pnpm-lock.yaml
+
+# Environment
+.env
+.env.local
+.env.development.local
+.env.test.local
+.env.production.local
+
+# IDEs
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+.DS_Store
+*.sublime-project
+*.sublime-workspace
+.project
+.classpath
+.settings/
+
+# Logs
+logs/
+*.log
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+pnpm-debug.log*
+lerna-debug.log*
+
+# Docker
+.dockerignore
+
+# Models (keep structure, ignore downloads)
+models/hub/models--*/blobs/*
+models/hub/models--*/snapshots/*/pytorch_model*.bin
+models/hub/models--*/snapshots/*/model*.safetensors
+models/hub/models--*/snapshots/*/*.msgpack
+*.bin
+*.safetensors
+*.msgpack
+*.h5
+*.onnx
+
+# Temporary files
+tmp/
+temp/
+*.tmp
+*.bak
+*.swp
+*~
+
+# OS
+.DS_Store
+Thumbs.db
+Desktop.ini
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..093560c
--- /dev/null
+++ b/README.md
@@ -0,0 +1,138 @@
+# 🚀 DeepSeek OCR - React + FastAPI
+
+Modern OCR web application powered by DeepSeek-OCR with a stunning React frontend and FastAPI backend.
+
+> **Note**: This was a quickly vibe-coded project to test out DeepSeek-OCR! It basically works quite nice on an RTX 5090. The "Find" mode grounding boxes aren't quite working yet - probably my fault in not interpreting the dimensions correctly, but the core OCR functionality is pretty nice so far.
+
+## Quick Start
+
+```bash
+docker compose up --build
+```
+
+Then open:
+- **Frontend**: http://localhost:3000
+- **Backend API**: http://localhost:8000
+- **API Docs**: http://localhost:8000/docs
+
+## Features
+
+### 4 OCR Modes
+- **Plain OCR** - Raw text extraction
+- **Describe** - Generate image descriptions
+- **Find** - Locate specific terms (grounding boxes WIP)
+- **Freeform** - Custom prompts for anything
+
+### UI Features
+- 🎨 Glass morphism design with animated gradients
+- 🎯 Drag & drop file upload
+- 📦 Grounding box visualization (WIP - dimensions need fixing)
+- ✨ Smooth animations (Framer Motion)
+- 📋 Copy/Download results
+- 🎛️ Advanced settings dropdown
+- 📝 Markdown rendering for formatted output
+
+## Tech Stack
+
+- **Frontend**: React 18 + Vite 5 + TailwindCSS 3 + Framer Motion 11
+- **Backend**: FastAPI + PyTorch + Transformers 4.46 + DeepSeek-OCR
+- **Server**: Nginx (reverse proxy)
+- **Container**: Docker + Docker Compose with multi-stage builds
+- **GPU**: NVIDIA CUDA support (tested on RTX 3090)
+
+## Project Structure
+
+```
+deepseek-ocr/
+├── backend/           # FastAPI backend
+│   ├── main.py
+│   ├── requirements.txt
+│   └── Dockerfile
+├── frontend/          # React frontend
+│   ├── src/
+│   │   ├── components/
+│   │   ├── App.jsx
+│   │   └── main.jsx
+│   ├── package.json
+│   ├── nginx.conf
+│   └── Dockerfile
+├── models/            # Model cache
+└── docker-compose.yml
+```
+
+## Development
+
+### Backend
+```bash
+cd backend
+pip install -r requirements.txt
+uvicorn main:app --reload --host 0.0.0.0 --port 8000
+```
+
+### Frontend
+```bash
+cd frontend
+npm install
+npm run dev
+```
+
+## Requirements
+
+- Docker & Docker Compose
+- NVIDIA GPU with CUDA support (tested on RTX 3090)
+- nvidia-docker runtime
+- ~8-12GB VRAM for model
+
+## Known Issues
+
+- 📦 **Find mode grounding boxes**: Not rendering correctly - likely dimension scaling issue in the canvas overlay logic. Boxes are detected and returned by the backend, but the frontend visualization needs work.
+
+## API Usage
+
+### POST /api/ocr
+
+**Parameters:**
+- `image` (file, required)
+- `mode` (string): plain_ocr | describe | find_ref | freeform
+- `prompt` (string): Custom prompt for freeform mode
+- `grounding` (bool): Enable bounding boxes (auto-enabled for find_ref)
+- `find_term` (string): Term to locate in find_ref mode
+- `base_size` (int): Base processing size (default: 1024)
+- `image_size` (int): Image size (default: 640)
+- `crop_mode` (bool): Enable crop mode (default: true)
+
+**Response:**
+```json
+{
+  "success": true,
+  "text": "Extracted text...",
+  "boxes": [{"label": "field", "box": [x1, y1, x2, y2]}],
+  "image_dims": {"w": 1920, "h": 1080},
+  "metadata": {...}
+}
+```
+
+## Troubleshooting
+
+### GPU not detected
+```bash
+nvidia-smi
+docker run --rm --gpus all nvidia/cuda:11.8.0-base-ubuntu22.04 nvidia-smi
+```
+
+### Port conflicts
+```bash
+sudo lsof -i :3000
+sudo lsof -i :8000
+```
+
+### Frontend build issues
+```bash
+cd frontend
+rm -rf node_modules package-lock.json
+docker-compose build frontend
+```
+
+## License
+
+This project uses the DeepSeek-OCR model. Refer to the model's license terms.
diff --git a/backend/Dockerfile b/backend/Dockerfile
new file mode 100644
index 0000000..7b95536
--- /dev/null
+++ b/backend/Dockerfile
@@ -0,0 +1,20 @@
+# Backend Dockerfile - FastAPI + DeepSeek-OCR
+FROM nvcr.io/nvidia/pytorch:25.09-py3
+
+ENV PYTHONUNBUFFERED=1 \
+    PIP_NO_CACHE_DIR=1 \
+    HF_HOME=/models
+
+WORKDIR /app
+
+# Install dependencies
+COPY requirements.txt .
+RUN pip install --upgrade pip && pip install -r requirements.txt
+
+# Copy backend code
+COPY main.py .
+
+EXPOSE 8000
+
+# Use uvicorn with reasonable workers for GPU workload
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000", "--workers", "1"]
diff --git a/backend/main.py b/backend/main.py
new file mode 100644
index 0000000..2b69bdb
--- /dev/null
+++ b/backend/main.py
@@ -0,0 +1,329 @@
+import os
+import re
+import tempfile
+import shutil
+from typing import List, Dict, Any, Optional
+from contextlib import asynccontextmanager
+
+from fastapi import FastAPI, File, UploadFile, Form, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse
+import torch
+from transformers import AutoModel, AutoTokenizer
+from PIL import Image
+import uvicorn
+
+# -----------------------------
+# Lifespan context for model loading
+# -----------------------------
+model = None
+tokenizer = None
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """Load model on startup, cleanup on shutdown"""
+    global model, tokenizer
+    
+    # Environment setup
+    os.environ.pop("TRANSFORMERS_CACHE", None)
+    MODEL_NAME = os.environ.get("MODEL_NAME", "deepseek-ai/DeepSeek-OCR")
+    HF_HOME = os.environ.get("HF_HOME", "/models")
+    os.makedirs(HF_HOME, exist_ok=True)
+    
+    # Load model
+    print(f"🚀 Loading {MODEL_NAME}...")
+    torch_dtype = torch.bfloat16
+    
+    tokenizer = AutoTokenizer.from_pretrained(
+        MODEL_NAME,
+        trust_remote_code=True,
+    )
+    
+    model = AutoModel.from_pretrained(
+        MODEL_NAME,
+        trust_remote_code=True,
+        use_safetensors=True,
+        attn_implementation="eager",
+        torch_dtype=torch_dtype,
+    ).eval().to("cuda")
+    
+    # Pad token setup
+    try:
+        if getattr(tokenizer, "pad_token_id", None) is None and getattr(tokenizer, "eos_token_id", None) is not None:
+            tokenizer.pad_token = tokenizer.eos_token
+        if getattr(model.config, "pad_token_id", None) is None and getattr(tokenizer, "pad_token_id", None) is not None:
+            model.config.pad_token_id = tokenizer.pad_token_id
+    except Exception:
+        pass
+    
+    print("✅ Model loaded and ready!")
+    
+    yield
+    
+    # Cleanup
+    print("🛑 Shutting down...")
+
+# -----------------------------
+# FastAPI app
+# -----------------------------
+app = FastAPI(
+    title="DeepSeek-OCR API",
+    description="Blazing fast OCR with DeepSeek-OCR model 🔥",
+    version="2.0.0",
+    lifespan=lifespan
+)
+
+# CORS middleware for React frontend
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+# -----------------------------
+# Prompt builder
+# -----------------------------
+def build_prompt(
+    mode: str,
+    user_prompt: str,
+    grounding: bool,
+    find_term: Optional[str],
+    schema: Optional[str],
+    include_caption: bool,
+) -> str:
+    """Build the prompt based on mode"""
+    parts: List[str] = ["<image>"]
+    mode_requires_grounding = mode in {"find_ref", "layout_map", "pii_redact"}
+    if grounding or mode_requires_grounding:
+        parts.append("<|grounding|>")
+
+    instruction = ""
+    if mode == "plain_ocr":
+        instruction = "Free OCR. Only output the raw text."
+    elif mode == "markdown":
+        instruction = "Convert the document to markdown."
+    elif mode == "tables_csv":
+        instruction = (
+            "Extract every table and output CSV only. "
+            "Use commas, minimal quoting. If multiple tables, separate with a line containing '---'."
+        )
+    elif mode == "tables_md":
+        instruction = "Extract every table as GitHub-flavored Markdown tables. Output only the tables."
+    elif mode == "kv_json":
+        schema_text = schema.strip() if schema else "{}"
+        instruction = (
+            "Extract key fields and return strict JSON only. "
+            f"Use this schema (fill the values): {schema_text}"
+        )
+    elif mode == "figure_chart":
+        instruction = (
+            "Parse the figure. First extract any numeric series as a two-column table (x,y). "
+            "Then summarize the chart in 2 sentences. Output the table, then a line '---', then the summary."
+        )
+    elif mode == "find_ref":
+        key = (find_term or "").strip() or "Total"
+        instruction = f"Locate <|ref|>{key}<|/ref|> in the image."
+    elif mode == "layout_map":
+        instruction = (
+            'Return a JSON array of blocks with fields {"type":["title","paragraph","table","figure"],'
+            '"box":[x1,y1,x2,y2]}. Do not include any text content.'
+        )
+    elif mode == "pii_redact":
+        instruction = (
+            'Find all occurrences of emails, phone numbers, postal addresses, and IBANs. '
+            'Return a JSON array of objects {label, text, box:[x1,y1,x2,y2]}.'
+        )
+    elif mode == "multilingual":
+        instruction = "Free OCR. Detect the language automatically and output in the same script."
+    elif mode == "describe":
+        instruction = "Describe this image concisely in 2-3 sentences. Focus on visible key elements."
+    elif mode == "freeform":
+        instruction = user_prompt.strip() if user_prompt else "OCR this image."
+    else:
+        instruction = "OCR this image."
+
+    if include_caption and mode not in {"describe"}:
+        instruction = instruction + "\nThen add a one-paragraph description of the image."
+
+    parts.append(instruction)
+    return "\n".join(parts)
+
+# -----------------------------
+# Grounding parser
+# -----------------------------
+DET_BLOCK = re.compile(
+    r"<\|ref\|>(?P<label>.*?)<\|/ref\|>\s*<\|det\|>\s*\[\s*\[\s*(?P<coords>[^\]]+?)\s*\]\s*\]\s*<\|/det\|>",
+    re.DOTALL,
+)
+
+def clean_grounding_text(text: str) -> str:
+    """Remove grounding tags from text for display, keeping labels"""
+    # Replace <|ref|>label<|/ref|><|det|>[[...]]<|/det|> with just "label"
+    cleaned = re.sub(
+        r"<\|ref\|>(.*?)<\|/ref\|>\s*<\|det\|>\s*\[\s*\[[^\]]+\]\s*\]\s*<\|/det\|>",
+        r"\1",
+        text,
+        flags=re.DOTALL
+    )
+    # Also remove any standalone grounding tags
+    cleaned = re.sub(r"<\|grounding\|>", "", cleaned)
+    return cleaned.strip()
+
+def parse_detections(text: str) -> List[Dict[str, Any]]:
+    """Parse grounding boxes from text"""
+    boxes: List[Dict[str, Any]] = []
+    for m in DET_BLOCK.finditer(text or ""):
+        label = m.group("label").strip()
+        coords = [c.strip() for c in m.group("coords").split(",")]
+        try:
+            nums = list(map(float, coords[:4]))
+        except Exception:
+            continue
+        if len(nums) == 4:
+            boxes.append({"label": label, "box": nums})
+    return boxes
+
+# -----------------------------
+# Routes
+# -----------------------------
+@app.get("/")
+async def root():
+    return {"message": "DeepSeek-OCR API is running! 🚀", "docs": "/docs"}
+
+@app.get("/health")
+async def health():
+    return {"status": "healthy", "model_loaded": model is not None}
+
+@app.post("/api/ocr")
+async def ocr_inference(
+    image: UploadFile = File(...),
+    mode: str = Form("plain_ocr"),
+    prompt: str = Form(""),
+    grounding: bool = Form(False),
+    include_caption: bool = Form(False),
+    find_term: Optional[str] = Form(None),
+    schema: Optional[str] = Form(None),
+    base_size: int = Form(1024),
+    image_size: int = Form(640),
+    crop_mode: bool = Form(True),
+    test_compress: bool = Form(False),
+):
+    """
+    Perform OCR inference on uploaded image
+    
+    - **image**: Image file to process
+    - **mode**: OCR mode (plain_ocr, markdown, tables_csv, etc.)
+    - **prompt**: Custom prompt for freeform mode
+    - **grounding**: Enable grounding boxes
+    - **include_caption**: Add image description
+    - **find_term**: Term to find (for find_ref mode)
+    - **schema**: JSON schema (for kv_json mode)
+    - **base_size**: Base processing size
+    - **image_size**: Image size parameter
+    - **crop_mode**: Enable crop mode
+    - **test_compress**: Test compression
+    """
+    if model is None or tokenizer is None:
+        raise HTTPException(status_code=503, detail="Model not loaded yet")
+    
+    # Build prompt
+    prompt_text = build_prompt(
+        mode=mode,
+        user_prompt=prompt,
+        grounding=grounding,
+        find_term=find_term,
+        schema=schema,
+        include_caption=include_caption,
+    )
+    
+    tmp_img = None
+    out_dir = None
+    try:
+        # Save uploaded file
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp:
+            content = await image.read()
+            tmp.write(content)
+            tmp_img = tmp.name
+        
+        # Get original dimensions
+        try:
+            with Image.open(tmp_img) as im:
+                orig_w, orig_h = im.size
+        except Exception:
+            orig_w = orig_h = None
+        
+        out_dir = tempfile.mkdtemp(prefix="dsocr_")
+        
+        # Run inference
+        res = model.infer(
+            tokenizer,
+            prompt=prompt_text,
+            image_file=tmp_img,
+            output_path=out_dir,
+            base_size=base_size,
+            image_size=image_size,
+            crop_mode=crop_mode,
+            save_results=False,
+            test_compress=test_compress,
+            eval_mode=True,
+        )
+        
+        # Normalize response
+        if isinstance(res, str):
+            text = res.strip()
+        elif isinstance(res, dict) and "text" in res:
+            text = str(res["text"]).strip()
+        elif isinstance(res, (list, tuple)):
+            text = "\n".join(map(str, res)).strip()
+        else:
+            text = ""
+        
+        # Fallback: check output file
+        if not text:
+            mmd = os.path.join(out_dir, "result.mmd")
+            if os.path.exists(mmd):
+                with open(mmd, "r", encoding="utf-8") as fh:
+                    text = fh.read().strip()
+        if not text:
+            text = "No text returned by model."
+        
+        # Parse grounding boxes
+        boxes = parse_detections(text) if ("<|det|>" in text or "<|ref|>" in text) else []
+        
+        # Clean grounding tags from display text, but keep the labels
+        display_text = clean_grounding_text(text) if ("<|ref|>" in text or "<|grounding|>" in text) else text
+        
+        # If display text is empty after cleaning but we have boxes, show the labels
+        if not display_text and boxes:
+            display_text = ", ".join([b["label"] for b in boxes])
+        
+        return JSONResponse({
+            "success": True,
+            "text": display_text,
+            "boxes": boxes,
+            "image_dims": {"w": orig_w, "h": orig_h},
+            "metadata": {
+                "mode": mode,
+                "grounding": grounding or (mode in {"find_ref","layout_map","pii_redact"}),
+                "base_size": base_size,
+                "image_size": image_size,
+                "crop_mode": crop_mode
+            }
+        })
+    
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"{type(e).__name__}: {str(e)}")
+    
+    finally:
+        if tmp_img:
+            try:
+                os.remove(tmp_img)
+            except Exception:
+                pass
+        if out_dir:
+            shutil.rmtree(out_dir, ignore_errors=True)
+
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=8000)
diff --git a/backend/requirements.txt b/backend/requirements.txt
new file mode 100644
index 0000000..1dfebeb
--- /dev/null
+++ b/backend/requirements.txt
@@ -0,0 +1,12 @@
+fastapi>=0.104.0
+uvicorn[standard]>=0.24.0
+python-multipart>=0.0.6
+transformers==4.46.3
+tokenizers==0.20.3
+accelerate>=0.34.2
+einops
+addict
+easydict
+pillow
+safetensors
+torch
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000..99bd123
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,35 @@
+services:
+  backend:
+    build: ./backend
+    container_name: deepseek-ocr-backend
+    environment:
+      MODEL_NAME: deepseek-ai/DeepSeek-OCR
+      HF_HOME: /models
+    volumes:
+      - ./models:/models
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities: [gpu]
+    shm_size: "4g"
+    ports:
+      - "8000:8000"
+    networks:
+      - ocr-network
+
+  frontend:
+    build: ./frontend
+    container_name: deepseek-ocr-frontend
+    ports:
+      - "3000:80"
+    depends_on:
+      - backend
+    networks:
+      - ocr-network
+
+networks:
+  ocr-network:
+    driver: bridge
diff --git a/frontend/.gitignore b/frontend/.gitignore
new file mode 100644
index 0000000..a547bf3
--- /dev/null
+++ b/frontend/.gitignore
@@ -0,0 +1,24 @@
+# Logs
+logs
+*.log
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+pnpm-debug.log*
+lerna-debug.log*
+
+node_modules
+dist
+dist-ssr
+*.local
+
+# Editor directories and files
+.vscode/*
+!.vscode/extensions.json
+.idea
+.DS_Store
+*.suo
+*.ntvs*
+*.njsproj
+*.sln
+*.sw?
diff --git a/frontend/Dockerfile b/frontend/Dockerfile
new file mode 100644
index 0000000..c575814
--- /dev/null
+++ b/frontend/Dockerfile
@@ -0,0 +1,29 @@
+# Frontend Dockerfile - React + Vite
+FROM node:18-alpine as build
+
+WORKDIR /app
+
+# Copy package files first for better caching
+COPY package*.json ./
+
+# Install dependencies
+RUN npm install --legacy-peer-deps
+
+# Copy all source files
+COPY . .
+
+# Build the app
+RUN npm run build
+
+# Production stage with nginx
+FROM nginx:alpine
+
+# Copy built files from build stage
+COPY --from=build /app/dist /usr/share/nginx/html
+
+# Copy nginx config
+COPY nginx.conf /etc/nginx/conf.d/default.conf
+
+EXPOSE 80
+
+CMD ["nginx", "-g", "daemon off;"]
diff --git a/frontend/README.md b/frontend/README.md
new file mode 100644
index 0000000..d2e7761
--- /dev/null
+++ b/frontend/README.md
@@ -0,0 +1,73 @@
+# React + TypeScript + Vite
+
+This template provides a minimal setup to get React working in Vite with HMR and some ESLint rules.
+
+Currently, two official plugins are available:
+
+- [@vitejs/plugin-react](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react) uses [Babel](https://babeljs.io/) (or [oxc](https://oxc.rs) when used in [rolldown-vite](https://vite.dev/guide/rolldown)) for Fast Refresh
+- [@vitejs/plugin-react-swc](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react-swc) uses [SWC](https://swc.rs/) for Fast Refresh
+
+## React Compiler
+
+The React Compiler is not enabled on this template because of its impact on dev & build performances. To add it, see [this documentation](https://react.dev/learn/react-compiler/installation).
+
+## Expanding the ESLint configuration
+
+If you are developing a production application, we recommend updating the configuration to enable type-aware lint rules:
+
+```js
+export default defineConfig([
+  globalIgnores(['dist']),
+  {
+    files: ['**/*.{ts,tsx}'],
+    extends: [
+      // Other configs...
+
+      // Remove tseslint.configs.recommended and replace with this
+      tseslint.configs.recommendedTypeChecked,
+      // Alternatively, use this for stricter rules
+      tseslint.configs.strictTypeChecked,
+      // Optionally, add this for stylistic rules
+      tseslint.configs.stylisticTypeChecked,
+
+      // Other configs...
+    ],
+    languageOptions: {
+      parserOptions: {
+        project: ['./tsconfig.node.json', './tsconfig.app.json'],
+        tsconfigRootDir: import.meta.dirname,
+      },
+      // other options...
+    },
+  },
+])
+```
+
+You can also install [eslint-plugin-react-x](https://github.com/Rel1cx/eslint-react/tree/main/packages/plugins/eslint-plugin-react-x) and [eslint-plugin-react-dom](https://github.com/Rel1cx/eslint-react/tree/main/packages/plugins/eslint-plugin-react-dom) for React-specific lint rules:
+
+```js
+// eslint.config.js
+import reactX from 'eslint-plugin-react-x'
+import reactDom from 'eslint-plugin-react-dom'
+
+export default defineConfig([
+  globalIgnores(['dist']),
+  {
+    files: ['**/*.{ts,tsx}'],
+    extends: [
+      // Other configs...
+      // Enable lint rules for React
+      reactX.configs['recommended-typescript'],
+      // Enable lint rules for React DOM
+      reactDom.configs.recommended,
+    ],
+    languageOptions: {
+      parserOptions: {
+        project: ['./tsconfig.node.json', './tsconfig.app.json'],
+        tsconfigRootDir: import.meta.dirname,
+      },
+      // other options...
+    },
+  },
+])
+```
diff --git a/frontend/eslint.config.js b/frontend/eslint.config.js
new file mode 100644
index 0000000..b19330b
--- /dev/null
+++ b/frontend/eslint.config.js
@@ -0,0 +1,23 @@
+import js from '@eslint/js'
+import globals from 'globals'
+import reactHooks from 'eslint-plugin-react-hooks'
+import reactRefresh from 'eslint-plugin-react-refresh'
+import tseslint from 'typescript-eslint'
+import { defineConfig, globalIgnores } from 'eslint/config'
+
+export default defineConfig([
+  globalIgnores(['dist']),
+  {
+    files: ['**/*.{ts,tsx}'],
+    extends: [
+      js.configs.recommended,
+      tseslint.configs.recommended,
+      reactHooks.configs['recommended-latest'],
+      reactRefresh.configs.vite,
+    ],
+    languageOptions: {
+      ecmaVersion: 2020,
+      globals: globals.browser,
+    },
+  },
+])
diff --git a/frontend/index.html b/frontend/index.html
new file mode 100644
index 0000000..556dd76
--- /dev/null
+++ b/frontend/index.html
@@ -0,0 +1,16 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <link rel="icon" type="image/svg+xml" href="/vite.svg" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <link rel="preconnect" href="https://fonts.googleapis.com">
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
+    <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700;800;900&display=swap" rel="stylesheet">
+    <title>DeepSeek OCR - Next Gen Vision AI</title>
+  </head>
+  <body>
+    <div id="root"></div>
+    <script type="module" src="/src/main.jsx"></script>
+  </body>
+</html>
diff --git a/frontend/nginx.conf b/frontend/nginx.conf
new file mode 100644
index 0000000..b53360b
--- /dev/null
+++ b/frontend/nginx.conf
@@ -0,0 +1,40 @@
+server {
+    listen 80;
+    server_name _;
+    root /usr/share/nginx/html;
+    index index.html;
+
+    # Gzip compression
+    gzip on;
+    gzip_types text/plain text/css application/json application/javascript text/xml application/xml application/xml+rss text/javascript;
+
+    # API proxy to backend
+    location /api/ {
+        proxy_pass http://backend:8000/api/;
+        proxy_http_version 1.1;
+        proxy_set_header Upgrade $http_upgrade;
+        proxy_set_header Connection 'upgrade';
+        proxy_set_header Host $host;
+        proxy_cache_bypass $http_upgrade;
+        proxy_set_header X-Real-IP $remote_addr;
+        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+        proxy_set_header X-Forwarded-Proto $scheme;
+        
+        # Increase timeouts for ML model processing
+        proxy_connect_timeout 600;
+        proxy_send_timeout 600;
+        proxy_read_timeout 600;
+        send_timeout 600;
+    }
+
+    # SPA fallback
+    location / {
+        try_files $uri $uri/ /index.html;
+    }
+
+    # Cache static assets
+    location ~* \.(js|css|png|jpg|jpeg|gif|ico|svg|woff|woff2|ttf|eot)$ {
+        expires 1y;
+        add_header Cache-Control "public, immutable";
+    }
+}
diff --git a/frontend/package.json b/frontend/package.json
new file mode 100644
index 0000000..bdbd22d
--- /dev/null
+++ b/frontend/package.json
@@ -0,0 +1,29 @@
+{
+  "name": "deepseek-ocr-frontend",
+  "private": true,
+  "version": "2.0.0",
+  "type": "module",
+  "scripts": {
+    "dev": "vite --host",
+    "build": "vite build",
+    "preview": "vite preview"
+  },
+  "dependencies": {
+    "axios": "^1.6.5",
+    "framer-motion": "^11.0.0",
+    "lucide-react": "^0.344.0",
+    "react": "^18.3.1",
+    "react-dom": "^18.3.1",
+    "react-dropzone": "^14.2.3",
+    "react-markdown": "^10.1.0"
+  },
+  "devDependencies": {
+    "@types/react": "^18.3.12",
+    "@types/react-dom": "^18.3.1",
+    "@vitejs/plugin-react": "^4.3.4",
+    "autoprefixer": "^10.4.17",
+    "postcss": "^8.4.35",
+    "tailwindcss": "^3.4.1",
+    "vite": "^5.4.11"
+  }
+}
diff --git a/frontend/postcss.config.js b/frontend/postcss.config.js
new file mode 100644
index 0000000..2e7af2b
--- /dev/null
+++ b/frontend/postcss.config.js
@@ -0,0 +1,6 @@
+export default {
+  plugins: {
+    tailwindcss: {},
+    autoprefixer: {},
+  },
+}
diff --git a/frontend/public/vite.svg b/frontend/public/vite.svg
new file mode 100644
index 0000000..e7b8dfb
--- /dev/null
+++ b/frontend/public/vite.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" class="iconify iconify--logos" width="31.88" height="32" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 257"><defs><linearGradient id="IconifyId1813088fe1fbc01fb466" x1="-.828%" x2="57.636%" y1="7.652%" y2="78.411%"><stop offset="0%" stop-color="#41D1FF"></stop><stop offset="100%" stop-color="#BD34FE"></stop></linearGradient><linearGradient id="IconifyId1813088fe1fbc01fb467" x1="43.376%" x2="50.316%" y1="2.242%" y2="89.03%"><stop offset="0%" stop-color="#FFEA83"></stop><stop offset="8.333%" stop-color="#FFDD35"></stop><stop offset="100%" stop-color="#FFA800"></stop></linearGradient></defs><path fill="url(#IconifyId1813088fe1fbc01fb466)" d="M255.153 37.938L134.897 252.976c-2.483 4.44-8.862 4.466-11.382.048L.875 37.958c-2.746-4.814 1.371-10.646 6.827-9.67l120.385 21.517a6.537 6.537 0 0 0 2.322-.004l117.867-21.483c5.438-.991 9.574 4.796 6.877 9.62Z"></path><path fill="url(#IconifyId1813088fe1fbc01fb467)" d="M185.432.063L96.44 17.501a3.268 3.268 0 0 0-2.634 3.014l-5.474 92.456a3.268 3.268 0 0 0 3.997 3.378l24.777-5.718c2.318-.535 4.413 1.507 3.936 3.838l-7.361 36.047c-.495 2.426 1.782 4.5 4.151 3.78l15.304-4.649c2.372-.72 4.652 1.36 4.15 3.788l-11.698 56.621c-.732 3.542 3.979 5.473 5.943 2.437l1.313-2.028l72.516-144.72c1.215-2.423-.88-5.186-3.54-4.672l-25.505 4.922c-2.396.462-4.435-1.77-3.759-4.114l16.646-57.705c.677-2.35-1.37-4.583-3.769-4.113Z"></path></svg>
\ No newline at end of file
diff --git a/frontend/src/App.css b/frontend/src/App.css
new file mode 100644
index 0000000..b9d355d
--- /dev/null
+++ b/frontend/src/App.css
@@ -0,0 +1,42 @@
+#root {
+  max-width: 1280px;
+  margin: 0 auto;
+  padding: 2rem;
+  text-align: center;
+}
+
+.logo {
+  height: 6em;
+  padding: 1.5em;
+  will-change: filter;
+  transition: filter 300ms;
+}
+.logo:hover {
+  filter: drop-shadow(0 0 2em #646cffaa);
+}
+.logo.react:hover {
+  filter: drop-shadow(0 0 2em #61dafbaa);
+}
+
+@keyframes logo-spin {
+  from {
+    transform: rotate(0deg);
+  }
+  to {
+    transform: rotate(360deg);
+  }
+}
+
+@media (prefers-reduced-motion: no-preference) {
+  a:nth-of-type(2) .logo {
+    animation: logo-spin infinite 20s linear;
+  }
+}
+
+.card {
+  padding: 2em;
+}
+
+.read-the-docs {
+  color: #888;
+}
diff --git a/frontend/src/App.jsx b/frontend/src/App.jsx
new file mode 100644
index 0000000..9a62c6a
--- /dev/null
+++ b/frontend/src/App.jsx
@@ -0,0 +1,251 @@
+import { useState, useCallback } from 'react'
+import { motion, AnimatePresence } from 'framer-motion'
+import { Sparkles, Zap, Loader2 } from 'lucide-react'
+import ImageUpload from './components/ImageUpload'
+import ModeSelector from './components/ModeSelector'
+import ResultPanel from './components/ResultPanel'
+import axios from 'axios'
+
+const API_BASE = import.meta.env.VITE_API_URL || '/api'
+
+function App() {
+  const [mode, setMode] = useState('plain_ocr')
+  const [image, setImage] = useState(null)
+  const [imagePreview, setImagePreview] = useState(null)
+  const [result, setResult] = useState(null)
+  const [loading, setLoading] = useState(false)
+  const [error, setError] = useState(null)
+  
+  // Form state
+  const [prompt, setPrompt] = useState('')
+  const [findTerm, setFindTerm] = useState('')
+  const [advancedSettings, setAdvancedSettings] = useState({
+    base_size: 1024,
+    image_size: 640,
+    crop_mode: true,
+    test_compress: false
+  })
+
+  const handleImageSelect = useCallback((file) => {
+    setImage(file)
+    setImagePreview(URL.createObjectURL(file))
+    setError(null)
+    setResult(null)
+  }, [])
+
+  const handleSubmit = async () => {
+    if (!image) {
+      setError('Please upload an image first')
+      return
+    }
+
+    setLoading(true)
+    setError(null)
+
+    try {
+      const formData = new FormData()
+      formData.append('image', image)
+      formData.append('mode', mode)
+      formData.append('prompt', prompt)
+      formData.append('grounding', mode === 'find_ref') // Auto-enable for find mode
+      formData.append('include_caption', false)
+      formData.append('find_term', findTerm)
+      formData.append('schema', '')
+      formData.append('base_size', advancedSettings.base_size)
+      formData.append('image_size', advancedSettings.image_size)
+      formData.append('crop_mode', advancedSettings.crop_mode)
+      formData.append('test_compress', advancedSettings.test_compress)
+
+      const response = await axios.post(`${API_BASE}/ocr`, formData, {
+        headers: {
+          'Content-Type': 'multipart/form-data',
+        },
+      })
+
+      setResult(response.data)
+    } catch (err) {
+      setError(err.response?.data?.detail || err.message || 'An error occurred')
+    } finally {
+      setLoading(false)
+    }
+  }
+
+  const handleCopy = useCallback(() => {
+    if (result?.text) {
+      navigator.clipboard.writeText(result.text)
+    }
+  }, [result])
+
+  const handleDownload = useCallback(() => {
+    if (!result?.text) return
+    
+    const extensions = {
+      plain_ocr: 'txt',
+      markdown: 'md',
+      tables_csv: 'csv',
+      tables_md: 'md',
+      kv_json: 'json',
+      layout_map: 'json',
+      pii_redact: 'json',
+    }
+    
+    const ext = extensions[mode] || 'txt'
+    const blob = new Blob([result.text], { type: 'text/plain' })
+    const url = URL.createObjectURL(blob)
+    const a = document.createElement('a')
+    a.href = url
+    a.download = `deepseek-ocr-result.${ext}`
+    a.click()
+    URL.revokeObjectURL(url)
+  }, [result, mode])
+
+  return (
+    <div className="min-h-screen relative overflow-hidden">
+      {/* Animated background */}
+      <div className="fixed inset-0 -z-10">
+        <div className="absolute inset-0 bg-gradient-to-br from-purple-900/20 via-pink-900/20 to-cyan-900/20" />
+        <div className="absolute inset-0 bg-[url('data:image/svg+xml;base64,PHN2ZyB3aWR0aD0iNjAiIGhlaWdodD0iNjAiIHZpZXdCb3g9IjAgMCA2MCA2MCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj48ZyBmaWxsPSJub25lIiBmaWxsLXJ1bGU9ImV2ZW5vZGQiPjxwYXRoIGQ9Ik0zNiAxOGMzLjMxIDAgNiAyLjY5IDYgNnMtMi42OSA2LTYgNi02LTIuNjktNi02IDIuNjktNiA2LTZ6TTI0IDZjMy4zMSAwIDYgMi42OSA2IDZzLTIuNjkgNi02IDYtNi0yLjY5LTYtNiAyLjY5LTYgNi02ek00OCAzNmMzLjMxIDAgNiAyLjY5IDYgNnMtMi42OSA2LTYgNi02LTIuNjktNi02IDIuNjktNiA2LTZ6IiBzdHJva2U9InJnYmEoMTQ3LCA1MSwgMjM0LCAwLjEpIiBzdHJva2Utd2lkdGg9IjIiLz48L2c+PC9zdmc+')] opacity-30" />
+        <motion.div
+          className="absolute top-20 left-20 w-96 h-96 bg-purple-500/10 rounded-full blur-3xl"
+          animate={{
+            scale: [1, 1.2, 1],
+            opacity: [0.3, 0.5, 0.3],
+          }}
+          transition={{
+            duration: 8,
+            repeat: Infinity,
+            ease: "easeInOut"
+          }}
+        />
+        <motion.div
+          className="absolute bottom-20 right-20 w-96 h-96 bg-cyan-500/10 rounded-full blur-3xl"
+          animate={{
+            scale: [1.2, 1, 1.2],
+            opacity: [0.5, 0.3, 0.5],
+          }}
+          transition={{
+            duration: 8,
+            repeat: Infinity,
+            ease: "easeInOut"
+          }}
+        />
+      </div>
+
+      {/* Header */}
+      <header className="sticky top-0 z-50 glass border-b border-white/10">
+        <div className="max-w-7xl mx-auto px-6 py-4">
+          <div className="flex items-center justify-between">
+            <motion.div 
+              className="flex items-center gap-3"
+              initial={{ opacity: 0, x: -20 }}
+              animate={{ opacity: 1, x: 0 }}
+            >
+              <div className="relative">
+                <div className="absolute inset-0 bg-gradient-to-r from-purple-500 to-cyan-500 rounded-xl blur-lg opacity-75" />
+                <div className="relative bg-gradient-to-br from-purple-600 to-cyan-500 p-2 rounded-xl">
+                  <Sparkles className="w-6 h-6" />
+                </div>
+              </div>
+              <div>
+                <h1 className="text-2xl font-bold gradient-text">DeepSeek OCR</h1>
+                <p className="text-xs text-gray-400">Next-Gen Vision AI</p>
+              </div>
+            </motion.div>
+          </div>
+        </div>
+      </header>
+
+      {/* Main Content */}
+      <main className="max-w-7xl mx-auto px-6 py-8">
+        <div className="grid lg:grid-cols-2 gap-6">
+          {/* Left Panel - Upload & Controls */}
+          <motion.div
+            initial={{ opacity: 0, y: 20 }}
+            animate={{ opacity: 1, y: 0 }}
+            transition={{ delay: 0.1 }}
+            className="space-y-6"
+          >
+            {/* Mode Selector with integrated inputs */}
+            <ModeSelector 
+              mode={mode} 
+              onModeChange={setMode}
+              prompt={prompt}
+              onPromptChange={setPrompt}
+              findTerm={findTerm}
+              onFindTermChange={setFindTerm}
+            />
+
+            {/* Image Upload */}
+            <ImageUpload 
+              onImageSelect={handleImageSelect}
+              preview={imagePreview}
+            />
+
+            {/* Action Button */}
+            <motion.button
+              onClick={handleSubmit}
+              disabled={!image || loading}
+              className={`w-full relative overflow-hidden rounded-2xl p-[2px] ${
+                !image || loading ? 'opacity-50 cursor-not-allowed' : ''
+              }`}
+              whileHover={!loading && image ? { scale: 1.02 } : {}}
+              whileTap={!loading && image ? { scale: 0.98 } : {}}
+            >
+              <div className="absolute inset-0 bg-gradient-to-r from-purple-600 via-pink-600 to-cyan-600 animate-gradient" />
+              <div className="relative bg-dark-100 px-8 py-4 rounded-2xl flex items-center justify-center gap-3">
+                {loading ? (
+                  <>
+                    <Loader2 className="w-5 h-5 animate-spin" />
+                    <span className="font-semibold">Processing Magic...</span>
+                  </>
+                ) : (
+                  <>
+                    <Zap className="w-5 h-5" />
+                    <span className="font-semibold">Analyze Image</span>
+                  </>
+                )}
+              </div>
+            </motion.button>
+
+            {error && (
+              <motion.div
+                initial={{ opacity: 0, y: -10 }}
+                animate={{ opacity: 1, y: 0 }}
+                className="glass p-4 rounded-2xl border-red-500/50 bg-red-500/10"
+              >
+                <p className="text-sm text-red-400">{error}</p>
+              </motion.div>
+            )}
+          </motion.div>
+
+          {/* Right Panel - Results */}
+          <motion.div
+            initial={{ opacity: 0, y: 20 }}
+            animate={{ opacity: 1, y: 0 }}
+            transition={{ delay: 0.2 }}
+          >
+            <ResultPanel 
+              result={result}
+              loading={loading}
+              imagePreview={imagePreview}
+              onCopy={handleCopy}
+              onDownload={handleDownload}
+            />
+          </motion.div>
+        </div>
+      </main>
+
+      {/* Footer */}
+      <footer className="mt-20 border-t border-white/10 glass">
+        <div className="max-w-7xl mx-auto px-6 py-8 text-center">
+          <p className="text-sm text-gray-400">
+            Powered by <span className="gradient-text font-semibold">DeepSeek-OCR</span> • 
+            Built with <span className="text-pink-400">♥</span> using React + FastAPI
+          </p>
+        </div>
+      </footer>
+    </div>
+  )
+}
+
+export default App
diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx
new file mode 100644
index 0000000..3d7ded3
--- /dev/null
+++ b/frontend/src/App.tsx
@@ -0,0 +1,35 @@
+import { useState } from 'react'
+import reactLogo from './assets/react.svg'
+import viteLogo from '/vite.svg'
+import './App.css'
+
+function App() {
+  const [count, setCount] = useState(0)
+
+  return (
+    <>
+      <div>
+        <a href="https://vite.dev" target="_blank">
+          <img src={viteLogo} className="logo" alt="Vite logo" />
+        </a>
+        <a href="https://react.dev" target="_blank">
+          <img src={reactLogo} className="logo react" alt="React logo" />
+        </a>
+      </div>
+      <h1>Vite + React</h1>
+      <div className="card">
+        <button onClick={() => setCount((count) => count + 1)}>
+          count is {count}
+        </button>
+        <p>
+          Edit <code>src/App.tsx</code> and save to test HMR
+        </p>
+      </div>
+      <p className="read-the-docs">
+        Click on the Vite and React logos to learn more
+      </p>
+    </>
+  )
+}
+
+export default App
diff --git a/frontend/src/assets/react.svg b/frontend/src/assets/react.svg
new file mode 100644
index 0000000..6c87de9
--- /dev/null
+++ b/frontend/src/assets/react.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" class="iconify iconify--logos" width="35.93" height="32" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 228"><path fill="#00D8FF" d="M210.483 73.824a171.49 171.49 0 0 0-8.24-2.597c.465-1.9.893-3.777 1.273-5.621c6.238-30.281 2.16-54.676-11.769-62.708c-13.355-7.7-35.196.329-57.254 19.526a171.23 171.23 0 0 0-6.375 5.848a155.866 155.866 0 0 0-4.241-3.917C100.759 3.829 77.587-4.822 63.673 3.233C50.33 10.957 46.379 33.89 51.995 62.588a170.974 170.974 0 0 0 1.892 8.48c-3.28.932-6.445 1.924-9.474 2.98C17.309 83.498 0 98.307 0 113.668c0 15.865 18.582 31.778 46.812 41.427a145.52 145.52 0 0 0 6.921 2.165a167.467 167.467 0 0 0-2.01 9.138c-5.354 28.2-1.173 50.591 12.134 58.266c13.744 7.926 36.812-.22 59.273-19.855a145.567 145.567 0 0 0 5.342-4.923a168.064 168.064 0 0 0 6.92 6.314c21.758 18.722 43.246 26.282 56.54 18.586c13.731-7.949 18.194-32.003 12.4-61.268a145.016 145.016 0 0 0-1.535-6.842c1.62-.48 3.21-.974 4.76-1.488c29.348-9.723 48.443-25.443 48.443-41.52c0-15.417-17.868-30.326-45.517-39.844Zm-6.365 70.984c-1.4.463-2.836.91-4.3 1.345c-3.24-10.257-7.612-21.163-12.963-32.432c5.106-11 9.31-21.767 12.459-31.957c2.619.758 5.16 1.557 7.61 2.4c23.69 8.156 38.14 20.213 38.14 29.504c0 9.896-15.606 22.743-40.946 31.14Zm-10.514 20.834c2.562 12.94 2.927 24.64 1.23 33.787c-1.524 8.219-4.59 13.698-8.382 15.893c-8.067 4.67-25.32-1.4-43.927-17.412a156.726 156.726 0 0 1-6.437-5.87c7.214-7.889 14.423-17.06 21.459-27.246c12.376-1.098 24.068-2.894 34.671-5.345a134.17 134.17 0 0 1 1.386 6.193ZM87.276 214.515c-7.882 2.783-14.16 2.863-17.955.675c-8.075-4.657-11.432-22.636-6.853-46.752a156.923 156.923 0 0 1 1.869-8.499c10.486 2.32 22.093 3.988 34.498 4.994c7.084 9.967 14.501 19.128 21.976 27.15a134.668 134.668 0 0 1-4.877 4.492c-9.933 8.682-19.886 14.842-28.658 17.94ZM50.35 144.747c-12.483-4.267-22.792-9.812-29.858-15.863c-6.35-5.437-9.555-10.836-9.555-15.216c0-9.322 13.897-21.212 37.076-29.293c2.813-.98 5.757-1.905 8.812-2.773c3.204 10.42 7.406 21.315 12.477 32.332c-5.137 11.18-9.399 22.249-12.634 32.792a134.718 134.718 0 0 1-6.318-1.979Zm12.378-84.26c-4.811-24.587-1.616-43.134 6.425-47.789c8.564-4.958 27.502 2.111 47.463 19.835a144.318 144.318 0 0 1 3.841 3.545c-7.438 7.987-14.787 17.08-21.808 26.988c-12.04 1.116-23.565 2.908-34.161 5.309a160.342 160.342 0 0 1-1.76-7.887Zm110.427 27.268a347.8 347.8 0 0 0-7.785-12.803c8.168 1.033 15.994 2.404 23.343 4.08c-2.206 7.072-4.956 14.465-8.193 22.045a381.151 381.151 0 0 0-7.365-13.322Zm-45.032-43.861c5.044 5.465 10.096 11.566 15.065 18.186a322.04 322.04 0 0 0-30.257-.006c4.974-6.559 10.069-12.652 15.192-18.18ZM82.802 87.83a323.167 323.167 0 0 0-7.227 13.238c-3.184-7.553-5.909-14.98-8.134-22.152c7.304-1.634 15.093-2.97 23.209-3.984a321.524 321.524 0 0 0-7.848 12.897Zm8.081 65.352c-8.385-.936-16.291-2.203-23.593-3.793c2.26-7.3 5.045-14.885 8.298-22.6a321.187 321.187 0 0 0 7.257 13.246c2.594 4.48 5.28 8.868 8.038 13.147Zm37.542 31.03c-5.184-5.592-10.354-11.779-15.403-18.433c4.902.192 9.899.29 14.978.29c5.218 0 10.376-.117 15.453-.343c-4.985 6.774-10.018 12.97-15.028 18.486Zm52.198-57.817c3.422 7.8 6.306 15.345 8.596 22.52c-7.422 1.694-15.436 3.058-23.88 4.071a382.417 382.417 0 0 0 7.859-13.026a347.403 347.403 0 0 0 7.425-13.565Zm-16.898 8.101a358.557 358.557 0 0 1-12.281 19.815a329.4 329.4 0 0 1-23.444.823c-7.967 0-15.716-.248-23.178-.732a310.202 310.202 0 0 1-12.513-19.846h.001a307.41 307.41 0 0 1-10.923-20.627a310.278 310.278 0 0 1 10.89-20.637l-.001.001a307.318 307.318 0 0 1 12.413-19.761c7.613-.576 15.42-.876 23.31-.876H128c7.926 0 15.743.303 23.354.883a329.357 329.357 0 0 1 12.335 19.695a358.489 358.489 0 0 1 11.036 20.54a329.472 329.472 0 0 1-11 20.722Zm22.56-122.124c8.572 4.944 11.906 24.881 6.52 51.026c-.344 1.668-.73 3.367-1.15 5.09c-10.622-2.452-22.155-4.275-34.23-5.408c-7.034-10.017-14.323-19.124-21.64-27.008a160.789 160.789 0 0 1 5.888-5.4c18.9-16.447 36.564-22.941 44.612-18.3ZM128 90.808c12.625 0 22.86 10.235 22.86 22.86s-10.235 22.86-22.86 22.86s-22.86-10.235-22.86-22.86s10.235-22.86 22.86-22.86Z"></path></svg>
\ No newline at end of file
diff --git a/frontend/src/components/AdvancedSettings.jsx b/frontend/src/components/AdvancedSettings.jsx
new file mode 100644
index 0000000..87e1aeb
--- /dev/null
+++ b/frontend/src/components/AdvancedSettings.jsx
@@ -0,0 +1,83 @@
+import { motion } from 'framer-motion'
+import { Sliders } from 'lucide-react'
+
+export default function AdvancedSettings({ settings, onSettingsChange, includeCaption, onIncludeCaptionChange }) {
+  const handleChange = (key, value) => {
+    onSettingsChange({
+      ...settings,
+      [key]: value
+    })
+  }
+
+  return (
+    <motion.div
+      initial={{ opacity: 0, height: 0 }}
+      animate={{ opacity: 1, height: 'auto' }}
+      exit={{ opacity: 0, height: 0 }}
+      className="glass p-6 rounded-2xl space-y-4"
+    >
+      <div className="flex items-center gap-2">
+        <Sliders className="w-5 h-5 text-purple-400" />
+        <h3 className="font-semibold text-gray-200">Advanced Settings</h3>
+      </div>
+
+      <div className="grid grid-cols-2 gap-4">
+        <div className="space-y-2">
+          <label className="text-xs text-gray-400">Base Size</label>
+          <input
+            type="number"
+            value={settings.base_size}
+            onChange={(e) => handleChange('base_size', parseInt(e.target.value))}
+            className="w-full bg-white/5 border border-white/10 rounded-xl px-3 py-2 text-sm focus:outline-none focus:border-purple-500"
+          />
+        </div>
+
+        <div className="space-y-2">
+          <label className="text-xs text-gray-400">Image Size</label>
+          <input
+            type="number"
+            value={settings.image_size}
+            onChange={(e) => handleChange('image_size', parseInt(e.target.value))}
+            className="w-full bg-white/5 border border-white/10 rounded-xl px-3 py-2 text-sm focus:outline-none focus:border-purple-500"
+          />
+        </div>
+
+        <div className="space-y-2">
+          <label className="text-xs text-gray-400">Crop Mode</label>
+          <select
+            value={settings.crop_mode ? 'true' : 'false'}
+            onChange={(e) => handleChange('crop_mode', e.target.value === 'true')}
+            className="w-full bg-white/5 border border-white/10 rounded-xl px-3 py-2 text-sm focus:outline-none focus:border-purple-500"
+          >
+            <option value="true">Enabled</option>
+            <option value="false">Disabled</option>
+          </select>
+        </div>
+
+        <div className="space-y-2">
+          <label className="text-xs text-gray-400">Test Compress</label>
+          <select
+            value={settings.test_compress ? 'true' : 'false'}
+            onChange={(e) => handleChange('test_compress', e.target.value === 'true')}
+            className="w-full bg-white/5 border border-white/10 rounded-xl px-3 py-2 text-sm focus:outline-none focus:border-purple-500"
+          >
+            <option value="false">Disabled</option>
+            <option value="true">Enabled</option>
+          </select>
+        </div>
+      </div>
+
+      <div className="pt-2 border-t border-white/10">
+        <label className="flex items-center gap-2 cursor-pointer">
+          <input
+            type="checkbox"
+            checked={includeCaption}
+            onChange={(e) => onIncludeCaptionChange(e.target.checked)}
+            className="accent-purple-500"
+          />
+          <span className="text-sm text-gray-300">Include image caption</span>
+        </label>
+      </div>
+    </motion.div>
+  )
+}
diff --git a/frontend/src/components/ImageUpload.jsx b/frontend/src/components/ImageUpload.jsx
new file mode 100644
index 0000000..baedde8
--- /dev/null
+++ b/frontend/src/components/ImageUpload.jsx
@@ -0,0 +1,99 @@
+import { useCallback } from 'react'
+import { motion } from 'framer-motion'
+import { useDropzone } from 'react-dropzone'
+import { Upload, Image as ImageIcon, X } from 'lucide-react'
+
+export default function ImageUpload({ onImageSelect, preview }) {
+  const onDrop = useCallback((acceptedFiles) => {
+    if (acceptedFiles?.[0]) {
+      onImageSelect(acceptedFiles[0])
+    }
+  }, [onImageSelect])
+
+  const { getRootProps, getInputProps, isDragActive } = useDropzone({
+    onDrop,
+    accept: {
+      'image/*': ['.png', '.jpg', '.jpeg', '.webp', '.gif', '.bmp']
+    },
+    multiple: false
+  })
+
+  return (
+    <div className="glass p-6 rounded-2xl space-y-4">
+      <div className="flex items-center justify-between">
+        <h3 className="font-semibold text-gray-200">Upload Image</h3>
+        <ImageIcon className="w-5 h-5 text-purple-400" />
+      </div>
+
+      {!preview ? (
+        <motion.div
+          {...getRootProps()}
+          className={`
+            relative border-2 border-dashed rounded-2xl p-12 text-center cursor-pointer
+            transition-all duration-300
+            ${isDragActive 
+              ? 'border-purple-500 bg-purple-500/10' 
+              : 'border-white/20 bg-white/5 hover:border-white/40 hover:bg-white/10'
+            }
+          `}
+          whileHover={{ scale: 1.02 }}
+          whileTap={{ scale: 0.98 }}
+        >
+          <input {...getInputProps()} />
+          
+          <div className="space-y-4">
+            <motion.div
+              animate={{ 
+                y: isDragActive ? -10 : 0,
+                scale: isDragActive ? 1.1 : 1 
+              }}
+              className="flex justify-center"
+            >
+              <div className="relative">
+                <div className="absolute inset-0 bg-gradient-to-r from-purple-500 to-cyan-500 rounded-2xl blur-xl opacity-50" />
+                <div className="relative bg-gradient-to-br from-purple-600 to-cyan-500 p-4 rounded-2xl">
+                  <Upload className="w-8 h-8" />
+                </div>
+              </div>
+            </motion.div>
+            
+            <div>
+              <p className="text-lg font-medium text-gray-200">
+                {isDragActive ? 'Drop it like it\'s hot! 🔥' : 'Drag & drop your image'}
+              </p>
+              <p className="text-sm text-gray-400 mt-1">
+                or click to browse • PNG, JPG, WEBP up to 10MB
+              </p>
+            </div>
+          </div>
+        </motion.div>
+      ) : (
+        <motion.div
+          initial={{ opacity: 0, scale: 0.9 }}
+          animate={{ opacity: 1, scale: 1 }}
+          className="relative group"
+        >
+          <img 
+            src={preview} 
+            alt="Preview" 
+            className="w-full rounded-2xl border border-white/10"
+          />
+          <motion.button
+            onClick={() => onImageSelect(null)}
+            className="absolute top-3 right-3 bg-red-500/80 backdrop-blur-sm p-2 rounded-full opacity-0 group-hover:opacity-100 transition-opacity"
+            whileHover={{ scale: 1.1 }}
+            whileTap={{ scale: 0.9 }}
+          >
+            <X className="w-4 h-4" />
+          </motion.button>
+          
+          {/* Grounding overlay canvas */}
+          <canvas 
+            id="preview-canvas" 
+            className="absolute top-0 left-0 w-full h-full pointer-events-none"
+          />
+        </motion.div>
+      )}
+    </div>
+  )
+}
diff --git a/frontend/src/components/ModeSelector.jsx b/frontend/src/components/ModeSelector.jsx
new file mode 100644
index 0000000..3d6294f
--- /dev/null
+++ b/frontend/src/components/ModeSelector.jsx
@@ -0,0 +1,105 @@
+import { motion } from 'framer-motion'
+import { FileText, Eye, Search, Wand2 } from 'lucide-react'
+
+const modes = [
+  { id: 'plain_ocr', name: 'Plain OCR', icon: FileText, color: 'from-blue-500 to-cyan-500', desc: 'Extract raw text', needsInput: false },
+  { id: 'describe', name: 'Describe', icon: Eye, color: 'from-violet-500 to-purple-500', desc: 'Image description', needsInput: false },
+  { id: 'find_ref', name: 'Find', icon: Search, color: 'from-yellow-500 to-orange-500', desc: 'Locate specific terms', needsInput: 'findTerm' },
+  { id: 'freeform', name: 'Freeform', icon: Wand2, color: 'from-fuchsia-500 to-pink-500', desc: 'Custom prompt', needsInput: 'prompt' },
+]
+
+export default function ModeSelector({ 
+  mode, 
+  onModeChange, 
+  prompt, 
+  onPromptChange,
+  findTerm,
+  onFindTermChange 
+}) {
+  const selectedMode = modes.find(m => m.id === mode)
+  const needsInput = selectedMode?.needsInput
+
+  return (
+    <div className="glass p-4 rounded-2xl space-y-3">
+      <h3 className="text-sm font-semibold text-gray-200">Mode</h3>
+
+      <div className="grid grid-cols-4 gap-2">
+        {modes.map((m) => {
+          const Icon = m.icon
+          const isSelected = mode === m.id
+          
+          return (
+            <motion.button
+              key={m.id}
+              onClick={() => onModeChange(m.id)}
+              className={`
+                relative p-2 rounded-xl text-center transition-all
+                ${isSelected 
+                  ? 'glass border-white/20 shadow-lg' 
+                  : 'bg-white/5 border border-white/10 hover:border-white/20'
+                }
+              `}
+              whileHover={{ scale: 1.05 }}
+              whileTap={{ scale: 0.95 }}
+            >
+              {isSelected && (
+                <motion.div
+                  layoutId="selected-mode"
+                  className={`absolute inset-0 bg-gradient-to-br ${m.color} opacity-10 rounded-xl`}
+                  transition={{ type: "spring", bounce: 0.2, duration: 0.6 }}
+                />
+              )}
+              
+              <div className="relative space-y-1">
+                <div className={`
+                  w-8 h-8 mx-auto rounded-lg flex items-center justify-center
+                  ${isSelected 
+                    ? `bg-gradient-to-br ${m.color}` 
+                    : 'bg-white/10'
+                  }
+                `}>
+                  <Icon className="w-4 h-4" />
+                </div>
+                <p className={`text-xs font-medium ${isSelected ? 'text-white' : 'text-gray-300'}`}>
+                  {m.name}
+                </p>
+              </div>
+            </motion.button>
+          )
+        })}
+      </div>
+
+      {needsInput === 'findTerm' && (
+        <motion.div
+          initial={{ opacity: 0, height: 0 }}
+          animate={{ opacity: 1, height: 'auto' }}
+          exit={{ opacity: 0, height: 0 }}
+        >
+          <input
+            type="text"
+            value={findTerm}
+            onChange={(e) => onFindTermChange(e.target.value)}
+            placeholder="Enter term to find (e.g., Total, Invoice #)"
+            className="w-full bg-white/5 border border-white/10 rounded-xl px-3 py-2 text-sm focus:outline-none focus:border-purple-500 transition-colors"
+          />
+        </motion.div>
+      )}
+
+      {needsInput === 'prompt' && (
+        <motion.div
+          initial={{ opacity: 0, height: 0 }}
+          animate={{ opacity: 1, height: 'auto' }}
+          exit={{ opacity: 0, height: 0 }}
+        >
+          <textarea
+            value={prompt}
+            onChange={(e) => onPromptChange(e.target.value)}
+            placeholder="Enter your custom prompt..."
+            className="w-full bg-white/5 border border-white/10 rounded-xl px-3 py-2 text-sm focus:outline-none focus:border-purple-500 transition-colors resize-none"
+            rows={2}
+          />
+        </motion.div>
+      )}
+    </div>
+  )
+}
diff --git a/frontend/src/components/ResultPanel.jsx b/frontend/src/components/ResultPanel.jsx
new file mode 100644
index 0000000..434217c
--- /dev/null
+++ b/frontend/src/components/ResultPanel.jsx
@@ -0,0 +1,302 @@
+import { useEffect, useRef, useState, useCallback } from 'react'
+import { motion, AnimatePresence } from 'framer-motion'
+import { Copy, Download, Sparkles, Loader2, CheckCircle2, ChevronDown } from 'lucide-react'
+import ReactMarkdown from 'react-markdown'
+
+export default function ResultPanel({ result, loading, imagePreview, onCopy, onDownload }) {
+  const canvasRef = useRef(null)
+  const imgRef = useRef(null)
+  const [showAdvanced, setShowAdvanced] = useState(false)
+  const [imageLoaded, setImageLoaded] = useState(false)
+
+  // Check if text looks like markdown
+  const isMarkdown = result?.text && (
+    result.text.includes('##') || 
+    result.text.includes('**') || 
+    result.text.includes('```') ||
+    result.text.includes('- ') ||
+    result.text.includes('|')
+  )
+
+  // Draw boxes function
+  const drawBoxes = useCallback(() => {
+    if (!result?.boxes?.length || !canvasRef.current || !imgRef.current) {
+      console.log('❌ Cannot draw - missing:', {
+        hasBoxes: !!result?.boxes?.length,
+        hasCanvas: !!canvasRef.current,
+        hasImgRef: !!imgRef.current
+      })
+      return
+    }
+
+    console.log('🎨 Drawing boxes:', result.boxes)
+
+    const img = imgRef.current
+    const canvas = canvasRef.current
+    const ctx = canvas.getContext('2d')
+    
+    console.log('📐 Image dimensions:', {
+      displayWidth: img.offsetWidth,
+      displayHeight: img.offsetHeight,
+      naturalWidth: img.naturalWidth,
+      naturalHeight: img.naturalHeight,
+      imageDims: result.image_dims
+    })
+
+    // Set canvas size to match displayed image
+    canvas.width = img.offsetWidth
+    canvas.height = img.offsetHeight
+    
+    ctx.clearRect(0, 0, canvas.width, canvas.height)
+    
+    // Calculate scale factors
+    const scaleX = img.offsetWidth / (result.image_dims?.w || img.naturalWidth)
+    const scaleY = img.offsetHeight / (result.image_dims?.h || img.naturalHeight)
+    
+    console.log('📏 Scale factors:', { scaleX, scaleY })
+    
+    // Draw boxes
+    result.boxes.forEach((box, idx) => {
+      const [x1, y1, x2, y2] = box.box
+      const colors = [
+        '#00ff00', '#00ffff', '#ff00ff', '#ffff00', '#ff0066'
+      ]
+      const color = colors[idx % colors.length]
+      
+      // Scale coordinates
+      const sx = x1 * scaleX
+      const sy = y1 * scaleY
+      const sw = (x2 - x1) * scaleX
+      const sh = (y2 - y1) * scaleY
+      
+      console.log(`📦 Box ${idx} (${box.label}):`, {
+        original: [x1, y1, x2, y2],
+        scaled: [sx, sy, sx + sw, sy + sh],
+        dimensions: { width: sw, height: sh }
+      })
+      
+      // Draw semi-transparent fill
+      ctx.fillStyle = color + '33'
+      ctx.fillRect(sx, sy, sw, sh)
+      
+      // Draw thick neon border
+      ctx.strokeStyle = color
+      ctx.lineWidth = 4
+      ctx.shadowColor = color
+      ctx.shadowBlur = 10
+      ctx.strokeRect(sx, sy, sw, sh)
+      ctx.shadowBlur = 0
+      
+      // Label background
+      if (box.label) {
+        ctx.font = 'bold 14px Inter'
+        const metrics = ctx.measureText(box.label)
+        const padding = 8
+        const labelHeight = 24
+        
+        ctx.fillStyle = color
+        ctx.fillRect(sx, sy - labelHeight, metrics.width + padding * 2, labelHeight)
+        
+        // Label text
+        ctx.fillStyle = '#000'
+        ctx.fillText(box.label, sx + padding, sy - 7)
+      }
+    })
+    
+    console.log('✅ Finished drawing', result.boxes.length, 'boxes')
+  }, [result])
+
+  // Trigger drawing when image loads
+  useEffect(() => {
+    if (imageLoaded && result?.boxes?.length) {
+      console.log('🚀 Image loaded, drawing boxes now')
+      drawBoxes()
+    }
+  }, [imageLoaded, result, drawBoxes])
+
+  // Reset imageLoaded when result changes
+  useEffect(() => {
+    setImageLoaded(false)
+  }, [result])
+
+  // Redraw on window resize
+  useEffect(() => {
+    if (!imageLoaded || !result?.boxes?.length) return
+    
+    const handleResize = () => {
+      console.log('📐 Window resized, redrawing')
+      drawBoxes()
+    }
+    
+    window.addEventListener('resize', handleResize)
+    return () => window.removeEventListener('resize', handleResize)
+  }, [imageLoaded, result, drawBoxes])
+
+  return (
+    <div className="glass p-6 rounded-2xl space-y-4 h-full">
+      <div className="flex items-center justify-between">
+        <div className="flex items-center gap-2">
+          <Sparkles className="w-5 h-5 text-purple-400" />
+          <h3 className="font-semibold text-gray-200">Results</h3>
+        </div>
+        
+        {result && (
+          <div className="flex gap-2">
+            <motion.button
+              onClick={onCopy}
+              className="glass glass-hover p-2 rounded-lg"
+              whileHover={{ scale: 1.05 }}
+              whileTap={{ scale: 0.95 }}
+              title="Copy to clipboard"
+            >
+              <Copy className="w-4 h-4" />
+            </motion.button>
+            <motion.button
+              onClick={onDownload}
+              className="glass glass-hover p-2 rounded-lg"
+              whileHover={{ scale: 1.05 }}
+              whileTap={{ scale: 0.95 }}
+              title="Download"
+            >
+              <Download className="w-4 h-4" />
+            </motion.button>
+          </div>
+        )}
+      </div>
+
+      <AnimatePresence mode="wait">
+        {loading ? (
+          <motion.div
+            key="loading"
+            initial={{ opacity: 0 }}
+            animate={{ opacity: 1 }}
+            exit={{ opacity: 0 }}
+            className="flex flex-col items-center justify-center py-20 space-y-4"
+          >
+            <div className="relative">
+              <motion.div
+                animate={{ rotate: 360 }}
+                transition={{ duration: 2, repeat: Infinity, ease: "linear" }}
+                className="w-16 h-16 border-4 border-purple-500/20 border-t-purple-500 rounded-full"
+              />
+              <Loader2 className="w-8 h-8 absolute top-1/2 left-1/2 transform -translate-x-1/2 -translate-y-1/2 text-purple-400" />
+            </div>
+            <p className="text-sm text-gray-400 animate-pulse">
+              Processing your image with AI magic...
+            </p>
+          </motion.div>
+        ) : result ? (
+          <motion.div
+            key="result"
+            initial={{ opacity: 0, y: 20 }}
+            animate={{ opacity: 1, y: 0 }}
+            exit={{ opacity: 0, y: -20 }}
+            className="space-y-4"
+          >
+            {/* Preview with boxes */}
+            {imagePreview && result.boxes && result.boxes.length > 0 && (
+              <div className="relative rounded-xl overflow-hidden border border-white/10 bg-black">
+                <img 
+                  ref={imgRef}
+                  src={imagePreview} 
+                  alt="Result" 
+                  className="w-full block" 
+                  onLoad={() => {
+                    console.log('🖼️ Image loaded, triggering draw')
+                    setImageLoaded(true)
+                  }}
+                />
+                <canvas 
+                  ref={canvasRef}
+                  className="absolute top-0 left-0 w-full h-full pointer-events-none"
+                  style={{ display: 'block' }}
+                />
+              </div>
+            )}
+
+            {/* Text result */}
+            <div className="bg-white/5 border border-white/10 rounded-xl p-4 max-h-96 overflow-y-auto">
+              {isMarkdown ? (
+                <div className="prose prose-invert prose-sm max-w-none">
+                  <ReactMarkdown>{result.text}</ReactMarkdown>
+                </div>
+              ) : (
+                <pre className="text-sm text-gray-200 whitespace-pre-wrap font-mono">
+                  {result.text}
+                </pre>
+              )}
+            </div>
+
+            {/* Advanced Settings Dropdown */}
+            <details className="glass rounded-xl overflow-hidden">
+              <summary className="px-4 py-3 cursor-pointer flex items-center justify-between hover:bg-white/5 transition-colors">
+                <span className="text-sm font-medium text-gray-300">Advanced Settings & Metadata</span>
+                <ChevronDown className="w-4 h-4 text-gray-400" />
+              </summary>
+              <div className="px-4 py-3 border-t border-white/10 space-y-3">
+                {result.metadata && (
+                  <div>
+                    <p className="text-xs text-gray-400 mb-2">Processing Metadata</p>
+                    <pre className="text-xs text-gray-500 whitespace-pre-wrap">
+                      {JSON.stringify(result.metadata, null, 2)}
+                    </pre>
+                  </div>
+                )}
+                {result.boxes?.length > 0 && (
+                  <div>
+                    <p className="text-xs text-gray-400 mb-2">Detected Regions ({result.boxes.length})</p>
+                    <div className="space-y-1">
+                      {result.boxes.map((box, idx) => (
+                        <div key={idx} className="text-xs text-gray-500">
+                          {box.label}: [{box.box.map(n => Math.round(n)).join(', ')}]
+                        </div>
+                      ))}
+                    </div>
+                  </div>
+                )}
+              </div>
+            </details>
+
+            {/* Success indicator */}
+            <motion.div
+              initial={{ scale: 0.9, opacity: 0 }}
+              animate={{ scale: 1, opacity: 1 }}
+              className="flex items-center justify-center gap-2 text-green-400"
+            >
+              <CheckCircle2 className="w-5 h-5" />
+              <span className="text-sm font-medium">Processing complete!</span>
+            </motion.div>
+          </motion.div>
+        ) : (
+          <motion.div
+            key="empty"
+            initial={{ opacity: 0 }}
+            animate={{ opacity: 1 }}
+            exit={{ opacity: 0 }}
+            className="flex flex-col items-center justify-center py-20 space-y-4"
+          >
+            <div className="relative">
+              <motion.div
+                animate={{ 
+                  scale: [1, 1.2, 1],
+                  opacity: [0.5, 0.8, 0.5]
+                }}
+                transition={{ duration: 3, repeat: Infinity }}
+                className="w-20 h-20 bg-purple-500/20 rounded-full blur-xl"
+              />
+              <Sparkles className="w-10 h-10 absolute top-1/2 left-1/2 transform -translate-x-1/2 -translate-y-1/2 text-purple-400" />
+            </div>
+            <div className="text-center">
+              <p className="text-lg font-medium text-gray-300">
+                Ready to process
+              </p>
+              <p className="text-sm text-gray-500 mt-1">
+                Upload an image and hit analyze to see the magic!
+              </p>
+            </div>
+          </motion.div>
+        )}
+      </AnimatePresence>
+    </div>
+  )
+}
diff --git a/frontend/src/index.css b/frontend/src/index.css
new file mode 100644
index 0000000..a2719dd
--- /dev/null
+++ b/frontend/src/index.css
@@ -0,0 +1,50 @@
+@tailwind base;
+@tailwind components;
+@tailwind utilities;
+
+@layer base {
+  body {
+    @apply bg-dark-200 text-white;
+    font-family: 'Inter', system-ui, -apple-system, sans-serif;
+  }
+}
+
+@layer components {
+  .glass {
+    @apply bg-white/5 backdrop-blur-xl border border-white/10;
+  }
+  
+  .glass-hover {
+    @apply transition-all hover:bg-white/10 hover:border-white/20;
+  }
+  
+  .gradient-text {
+    @apply bg-clip-text text-transparent bg-gradient-to-r from-cyan-400 via-purple-500 to-pink-500;
+  }
+  
+  .gradient-border {
+    @apply relative bg-gradient-to-r from-cyan-500 via-purple-500 to-pink-500 p-[2px] rounded-2xl;
+  }
+  
+  .gradient-bg {
+    @apply bg-gradient-to-br from-purple-900/20 via-pink-900/20 to-cyan-900/20;
+  }
+}
+
+/* Custom scrollbar */
+::-webkit-scrollbar {
+  width: 8px;
+  height: 8px;
+}
+
+::-webkit-scrollbar-track {
+  @apply bg-dark-100;
+}
+
+::-webkit-scrollbar-thumb {
+  @apply bg-purple-500/30 rounded-full;
+}
+
+::-webkit-scrollbar-thumb:hover {
+  @apply bg-purple-500/50;
+}
diff --git a/frontend/src/main.jsx b/frontend/src/main.jsx
new file mode 100644
index 0000000..54b39dd
--- /dev/null
+++ b/frontend/src/main.jsx
@@ -0,0 +1,10 @@
+import React from 'react'
+import ReactDOM from 'react-dom/client'
+import App from './App.jsx'
+import './index.css'
+
+ReactDOM.createRoot(document.getElementById('root')).render(
+  <React.StrictMode>
+    <App />
+  </React.StrictMode>,
+)
diff --git a/frontend/src/main.tsx b/frontend/src/main.tsx
new file mode 100644
index 0000000..bef5202
--- /dev/null
+++ b/frontend/src/main.tsx
@@ -0,0 +1,10 @@
+import { StrictMode } from 'react'
+import { createRoot } from 'react-dom/client'
+import './index.css'
+import App from './App.tsx'
+
+createRoot(document.getElementById('root')!).render(
+  <StrictMode>
+    <App />
+  </StrictMode>,
+)
diff --git a/frontend/tailwind.config.js b/frontend/tailwind.config.js
new file mode 100644
index 0000000..2a01793
--- /dev/null
+++ b/frontend/tailwind.config.js
@@ -0,0 +1,48 @@
+/** @type {import('tailwindcss').Config} */
+export default {
+  content: [
+    "./index.html",
+    "./src/**/*.{js,ts,jsx,tsx}",
+  ],
+  theme: {
+    extend: {
+      colors: {
+        dark: {
+          50: '#18181b',
+          100: '#0f0f12',
+          200: '#09090b',
+        }
+      },
+      animation: {
+        'gradient': 'gradient 8s linear infinite',
+        'float': 'float 6s ease-in-out infinite',
+        'glow': 'glow 2s ease-in-out infinite alternate',
+      },
+      keyframes: {
+        gradient: {
+          '0%, 100%': {
+            'background-size': '200% 200%',
+            'background-position': 'left center'
+          },
+          '50%': {
+            'background-size': '200% 200%',
+            'background-position': 'right center'
+          },
+        },
+        float: {
+          '0%, 100%': { transform: 'translateY(0px)' },
+          '50%': { transform: 'translateY(-20px)' },
+        },
+        glow: {
+          'from': {
+            'text-shadow': '0 0 10px #fff, 0 0 20px #fff, 0 0 30px #e60073, 0 0 40px #e60073',
+          },
+          'to': {
+            'text-shadow': '0 0 20px #fff, 0 0 30px #ff4da6, 0 0 40px #ff4da6, 0 0 50px #ff4da6',
+          },
+        },
+      },
+    },
+  },
+  plugins: [],
+}
diff --git a/frontend/tsconfig.app.json b/frontend/tsconfig.app.json
new file mode 100644
index 0000000..a9b5a59
--- /dev/null
+++ b/frontend/tsconfig.app.json
@@ -0,0 +1,28 @@
+{
+  "compilerOptions": {
+    "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.app.tsbuildinfo",
+    "target": "ES2022",
+    "useDefineForClassFields": true,
+    "lib": ["ES2022", "DOM", "DOM.Iterable"],
+    "module": "ESNext",
+    "types": ["vite/client"],
+    "skipLibCheck": true,
+
+    /* Bundler mode */
+    "moduleResolution": "bundler",
+    "allowImportingTsExtensions": true,
+    "verbatimModuleSyntax": true,
+    "moduleDetection": "force",
+    "noEmit": true,
+    "jsx": "react-jsx",
+
+    /* Linting */
+    "strict": true,
+    "noUnusedLocals": true,
+    "noUnusedParameters": true,
+    "erasableSyntaxOnly": true,
+    "noFallthroughCasesInSwitch": true,
+    "noUncheckedSideEffectImports": true
+  },
+  "include": ["src"]
+}
diff --git a/frontend/tsconfig.json b/frontend/tsconfig.json
new file mode 100644
index 0000000..1ffef60
--- /dev/null
+++ b/frontend/tsconfig.json
@@ -0,0 +1,7 @@
+{
+  "files": [],
+  "references": [
+    { "path": "./tsconfig.app.json" },
+    { "path": "./tsconfig.node.json" }
+  ]
+}
diff --git a/frontend/tsconfig.node.json b/frontend/tsconfig.node.json
new file mode 100644
index 0000000..8a67f62
--- /dev/null
+++ b/frontend/tsconfig.node.json
@@ -0,0 +1,26 @@
+{
+  "compilerOptions": {
+    "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.node.tsbuildinfo",
+    "target": "ES2023",
+    "lib": ["ES2023"],
+    "module": "ESNext",
+    "types": ["node"],
+    "skipLibCheck": true,
+
+    /* Bundler mode */
+    "moduleResolution": "bundler",
+    "allowImportingTsExtensions": true,
+    "verbatimModuleSyntax": true,
+    "moduleDetection": "force",
+    "noEmit": true,
+
+    /* Linting */
+    "strict": true,
+    "noUnusedLocals": true,
+    "noUnusedParameters": true,
+    "erasableSyntaxOnly": true,
+    "noFallthroughCasesInSwitch": true,
+    "noUncheckedSideEffectImports": true
+  },
+  "include": ["vite.config.ts"]
+}
diff --git a/frontend/vite.config.js b/frontend/vite.config.js
new file mode 100644
index 0000000..d287719
--- /dev/null
+++ b/frontend/vite.config.js
@@ -0,0 +1,20 @@
+import { defineConfig } from 'vite'
+import react from '@vitejs/plugin-react'
+
+export default defineConfig({
+  plugins: [react()],
+  server: {
+    host: '0.0.0.0',
+    port: 3000,
+    proxy: {
+      '/api': {
+        target: 'http://backend:8000',
+        changeOrigin: true,
+      }
+    }
+  },
+  preview: {
+    host: '0.0.0.0',
+    port: 3000
+  }
+})
diff --git a/frontend/vite.config.ts b/frontend/vite.config.ts
new file mode 100644
index 0000000..8b0f57b
--- /dev/null
+++ b/frontend/vite.config.ts
@@ -0,0 +1,7 @@
+import { defineConfig } from 'vite'
+import react from '@vitejs/plugin-react'
+
+// https://vite.dev/config/
+export default defineConfig({
+  plugins: [react()],
+})