Initial commit

This commit is contained in:
Ray Dumasia
2025-10-21 01:32:09 +01:00
commit aec04f6eb4
32 changed files with 2001 additions and 0 deletions

102
.gitignore vendored Normal file
View File

@@ -0,0 +1,102 @@
# Python
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
*.pyc
.pytest_cache/
.coverage
htmlcov/
.tox/
.hypothesis/
venv/
env/
ENV/
.venv
# Node
node_modules/
npm-debug.log*
yarn-debug.log*
yarn-error.log*
pnpm-debug.log*
.pnpm-store/
dist/
dist-ssr/
*.local
package-lock.json
yarn.lock
pnpm-lock.yaml
# Environment
.env
.env.local
.env.development.local
.env.test.local
.env.production.local
# IDEs
.vscode/
.idea/
*.swp
*.swo
*~
.DS_Store
*.sublime-project
*.sublime-workspace
.project
.classpath
.settings/
# Logs
logs/
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
pnpm-debug.log*
lerna-debug.log*
# Docker
.dockerignore
# Models (keep structure, ignore downloads)
models/hub/models--*/blobs/*
models/hub/models--*/snapshots/*/pytorch_model*.bin
models/hub/models--*/snapshots/*/model*.safetensors
models/hub/models--*/snapshots/*/*.msgpack
*.bin
*.safetensors
*.msgpack
*.h5
*.onnx
# Temporary files
tmp/
temp/
*.tmp
*.bak
*.swp
*~
# OS
.DS_Store
Thumbs.db
Desktop.ini

138
README.md Normal file
View File

@@ -0,0 +1,138 @@
# 🚀 DeepSeek OCR - React + FastAPI
Modern OCR web application powered by DeepSeek-OCR with a stunning React frontend and FastAPI backend.
> **Note**: This was a quickly vibe-coded project to test out DeepSeek-OCR! It basically works quite nice on an RTX 5090. The "Find" mode grounding boxes aren't quite working yet - probably my fault in not interpreting the dimensions correctly, but the core OCR functionality is pretty nice so far.
## Quick Start
```bash
docker compose up --build
```
Then open:
- **Frontend**: http://localhost:3000
- **Backend API**: http://localhost:8000
- **API Docs**: http://localhost:8000/docs
## Features
### 4 OCR Modes
- **Plain OCR** - Raw text extraction
- **Describe** - Generate image descriptions
- **Find** - Locate specific terms (grounding boxes WIP)
- **Freeform** - Custom prompts for anything
### UI Features
- 🎨 Glass morphism design with animated gradients
- 🎯 Drag & drop file upload
- 📦 Grounding box visualization (WIP - dimensions need fixing)
- ✨ Smooth animations (Framer Motion)
- 📋 Copy/Download results
- 🎛️ Advanced settings dropdown
- 📝 Markdown rendering for formatted output
## Tech Stack
- **Frontend**: React 18 + Vite 5 + TailwindCSS 3 + Framer Motion 11
- **Backend**: FastAPI + PyTorch + Transformers 4.46 + DeepSeek-OCR
- **Server**: Nginx (reverse proxy)
- **Container**: Docker + Docker Compose with multi-stage builds
- **GPU**: NVIDIA CUDA support (tested on RTX 3090)
## Project Structure
```
deepseek-ocr/
├── backend/ # FastAPI backend
│ ├── main.py
│ ├── requirements.txt
│ └── Dockerfile
├── frontend/ # React frontend
│ ├── src/
│ │ ├── components/
│ │ ├── App.jsx
│ │ └── main.jsx
│ ├── package.json
│ ├── nginx.conf
│ └── Dockerfile
├── models/ # Model cache
└── docker-compose.yml
```
## Development
### Backend
```bash
cd backend
pip install -r requirements.txt
uvicorn main:app --reload --host 0.0.0.0 --port 8000
```
### Frontend
```bash
cd frontend
npm install
npm run dev
```
## Requirements
- Docker & Docker Compose
- NVIDIA GPU with CUDA support (tested on RTX 3090)
- nvidia-docker runtime
- ~8-12GB VRAM for model
## Known Issues
- 📦 **Find mode grounding boxes**: Not rendering correctly - likely dimension scaling issue in the canvas overlay logic. Boxes are detected and returned by the backend, but the frontend visualization needs work.
## API Usage
### POST /api/ocr
**Parameters:**
- `image` (file, required)
- `mode` (string): plain_ocr | describe | find_ref | freeform
- `prompt` (string): Custom prompt for freeform mode
- `grounding` (bool): Enable bounding boxes (auto-enabled for find_ref)
- `find_term` (string): Term to locate in find_ref mode
- `base_size` (int): Base processing size (default: 1024)
- `image_size` (int): Image size (default: 640)
- `crop_mode` (bool): Enable crop mode (default: true)
**Response:**
```json
{
"success": true,
"text": "Extracted text...",
"boxes": [{"label": "field", "box": [x1, y1, x2, y2]}],
"image_dims": {"w": 1920, "h": 1080},
"metadata": {...}
}
```
## Troubleshooting
### GPU not detected
```bash
nvidia-smi
docker run --rm --gpus all nvidia/cuda:11.8.0-base-ubuntu22.04 nvidia-smi
```
### Port conflicts
```bash
sudo lsof -i :3000
sudo lsof -i :8000
```
### Frontend build issues
```bash
cd frontend
rm -rf node_modules package-lock.json
docker-compose build frontend
```
## License
This project uses the DeepSeek-OCR model. Refer to the model's license terms.

20
backend/Dockerfile Normal file
View File

@@ -0,0 +1,20 @@
# Backend Dockerfile - FastAPI + DeepSeek-OCR
FROM nvcr.io/nvidia/pytorch:25.09-py3
ENV PYTHONUNBUFFERED=1 \
PIP_NO_CACHE_DIR=1 \
HF_HOME=/models
WORKDIR /app
# Install dependencies
COPY requirements.txt .
RUN pip install --upgrade pip && pip install -r requirements.txt
# Copy backend code
COPY main.py .
EXPOSE 8000
# Use uvicorn with reasonable workers for GPU workload
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000", "--workers", "1"]

329
backend/main.py Normal file
View File

@@ -0,0 +1,329 @@
import os
import re
import tempfile
import shutil
from typing import List, Dict, Any, Optional
from contextlib import asynccontextmanager
from fastapi import FastAPI, File, UploadFile, Form, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
import torch
from transformers import AutoModel, AutoTokenizer
from PIL import Image
import uvicorn
# -----------------------------
# Lifespan context for model loading
# -----------------------------
model = None
tokenizer = None
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Load model on startup, cleanup on shutdown"""
global model, tokenizer
# Environment setup
os.environ.pop("TRANSFORMERS_CACHE", None)
MODEL_NAME = os.environ.get("MODEL_NAME", "deepseek-ai/DeepSeek-OCR")
HF_HOME = os.environ.get("HF_HOME", "/models")
os.makedirs(HF_HOME, exist_ok=True)
# Load model
print(f"🚀 Loading {MODEL_NAME}...")
torch_dtype = torch.bfloat16
tokenizer = AutoTokenizer.from_pretrained(
MODEL_NAME,
trust_remote_code=True,
)
model = AutoModel.from_pretrained(
MODEL_NAME,
trust_remote_code=True,
use_safetensors=True,
attn_implementation="eager",
torch_dtype=torch_dtype,
).eval().to("cuda")
# Pad token setup
try:
if getattr(tokenizer, "pad_token_id", None) is None and getattr(tokenizer, "eos_token_id", None) is not None:
tokenizer.pad_token = tokenizer.eos_token
if getattr(model.config, "pad_token_id", None) is None and getattr(tokenizer, "pad_token_id", None) is not None:
model.config.pad_token_id = tokenizer.pad_token_id
except Exception:
pass
print("✅ Model loaded and ready!")
yield
# Cleanup
print("🛑 Shutting down...")
# -----------------------------
# FastAPI app
# -----------------------------
app = FastAPI(
title="DeepSeek-OCR API",
description="Blazing fast OCR with DeepSeek-OCR model 🔥",
version="2.0.0",
lifespan=lifespan
)
# CORS middleware for React frontend
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# -----------------------------
# Prompt builder
# -----------------------------
def build_prompt(
mode: str,
user_prompt: str,
grounding: bool,
find_term: Optional[str],
schema: Optional[str],
include_caption: bool,
) -> str:
"""Build the prompt based on mode"""
parts: List[str] = ["<image>"]
mode_requires_grounding = mode in {"find_ref", "layout_map", "pii_redact"}
if grounding or mode_requires_grounding:
parts.append("<|grounding|>")
instruction = ""
if mode == "plain_ocr":
instruction = "Free OCR. Only output the raw text."
elif mode == "markdown":
instruction = "Convert the document to markdown."
elif mode == "tables_csv":
instruction = (
"Extract every table and output CSV only. "
"Use commas, minimal quoting. If multiple tables, separate with a line containing '---'."
)
elif mode == "tables_md":
instruction = "Extract every table as GitHub-flavored Markdown tables. Output only the tables."
elif mode == "kv_json":
schema_text = schema.strip() if schema else "{}"
instruction = (
"Extract key fields and return strict JSON only. "
f"Use this schema (fill the values): {schema_text}"
)
elif mode == "figure_chart":
instruction = (
"Parse the figure. First extract any numeric series as a two-column table (x,y). "
"Then summarize the chart in 2 sentences. Output the table, then a line '---', then the summary."
)
elif mode == "find_ref":
key = (find_term or "").strip() or "Total"
instruction = f"Locate <|ref|>{key}<|/ref|> in the image."
elif mode == "layout_map":
instruction = (
'Return a JSON array of blocks with fields {"type":["title","paragraph","table","figure"],'
'"box":[x1,y1,x2,y2]}. Do not include any text content.'
)
elif mode == "pii_redact":
instruction = (
'Find all occurrences of emails, phone numbers, postal addresses, and IBANs. '
'Return a JSON array of objects {label, text, box:[x1,y1,x2,y2]}.'
)
elif mode == "multilingual":
instruction = "Free OCR. Detect the language automatically and output in the same script."
elif mode == "describe":
instruction = "Describe this image concisely in 2-3 sentences. Focus on visible key elements."
elif mode == "freeform":
instruction = user_prompt.strip() if user_prompt else "OCR this image."
else:
instruction = "OCR this image."
if include_caption and mode not in {"describe"}:
instruction = instruction + "\nThen add a one-paragraph description of the image."
parts.append(instruction)
return "\n".join(parts)
# -----------------------------
# Grounding parser
# -----------------------------
DET_BLOCK = re.compile(
r"<\|ref\|>(?P<label>.*?)<\|/ref\|>\s*<\|det\|>\s*\[\s*\[\s*(?P<coords>[^\]]+?)\s*\]\s*\]\s*<\|/det\|>",
re.DOTALL,
)
def clean_grounding_text(text: str) -> str:
"""Remove grounding tags from text for display, keeping labels"""
# Replace <|ref|>label<|/ref|><|det|>[[...]]<|/det|> with just "label"
cleaned = re.sub(
r"<\|ref\|>(.*?)<\|/ref\|>\s*<\|det\|>\s*\[\s*\[[^\]]+\]\s*\]\s*<\|/det\|>",
r"\1",
text,
flags=re.DOTALL
)
# Also remove any standalone grounding tags
cleaned = re.sub(r"<\|grounding\|>", "", cleaned)
return cleaned.strip()
def parse_detections(text: str) -> List[Dict[str, Any]]:
"""Parse grounding boxes from text"""
boxes: List[Dict[str, Any]] = []
for m in DET_BLOCK.finditer(text or ""):
label = m.group("label").strip()
coords = [c.strip() for c in m.group("coords").split(",")]
try:
nums = list(map(float, coords[:4]))
except Exception:
continue
if len(nums) == 4:
boxes.append({"label": label, "box": nums})
return boxes
# -----------------------------
# Routes
# -----------------------------
@app.get("/")
async def root():
return {"message": "DeepSeek-OCR API is running! 🚀", "docs": "/docs"}
@app.get("/health")
async def health():
return {"status": "healthy", "model_loaded": model is not None}
@app.post("/api/ocr")
async def ocr_inference(
image: UploadFile = File(...),
mode: str = Form("plain_ocr"),
prompt: str = Form(""),
grounding: bool = Form(False),
include_caption: bool = Form(False),
find_term: Optional[str] = Form(None),
schema: Optional[str] = Form(None),
base_size: int = Form(1024),
image_size: int = Form(640),
crop_mode: bool = Form(True),
test_compress: bool = Form(False),
):
"""
Perform OCR inference on uploaded image
- **image**: Image file to process
- **mode**: OCR mode (plain_ocr, markdown, tables_csv, etc.)
- **prompt**: Custom prompt for freeform mode
- **grounding**: Enable grounding boxes
- **include_caption**: Add image description
- **find_term**: Term to find (for find_ref mode)
- **schema**: JSON schema (for kv_json mode)
- **base_size**: Base processing size
- **image_size**: Image size parameter
- **crop_mode**: Enable crop mode
- **test_compress**: Test compression
"""
if model is None or tokenizer is None:
raise HTTPException(status_code=503, detail="Model not loaded yet")
# Build prompt
prompt_text = build_prompt(
mode=mode,
user_prompt=prompt,
grounding=grounding,
find_term=find_term,
schema=schema,
include_caption=include_caption,
)
tmp_img = None
out_dir = None
try:
# Save uploaded file
with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp:
content = await image.read()
tmp.write(content)
tmp_img = tmp.name
# Get original dimensions
try:
with Image.open(tmp_img) as im:
orig_w, orig_h = im.size
except Exception:
orig_w = orig_h = None
out_dir = tempfile.mkdtemp(prefix="dsocr_")
# Run inference
res = model.infer(
tokenizer,
prompt=prompt_text,
image_file=tmp_img,
output_path=out_dir,
base_size=base_size,
image_size=image_size,
crop_mode=crop_mode,
save_results=False,
test_compress=test_compress,
eval_mode=True,
)
# Normalize response
if isinstance(res, str):
text = res.strip()
elif isinstance(res, dict) and "text" in res:
text = str(res["text"]).strip()
elif isinstance(res, (list, tuple)):
text = "\n".join(map(str, res)).strip()
else:
text = ""
# Fallback: check output file
if not text:
mmd = os.path.join(out_dir, "result.mmd")
if os.path.exists(mmd):
with open(mmd, "r", encoding="utf-8") as fh:
text = fh.read().strip()
if not text:
text = "No text returned by model."
# Parse grounding boxes
boxes = parse_detections(text) if ("<|det|>" in text or "<|ref|>" in text) else []
# Clean grounding tags from display text, but keep the labels
display_text = clean_grounding_text(text) if ("<|ref|>" in text or "<|grounding|>" in text) else text
# If display text is empty after cleaning but we have boxes, show the labels
if not display_text and boxes:
display_text = ", ".join([b["label"] for b in boxes])
return JSONResponse({
"success": True,
"text": display_text,
"boxes": boxes,
"image_dims": {"w": orig_w, "h": orig_h},
"metadata": {
"mode": mode,
"grounding": grounding or (mode in {"find_ref","layout_map","pii_redact"}),
"base_size": base_size,
"image_size": image_size,
"crop_mode": crop_mode
}
})
except Exception as e:
raise HTTPException(status_code=500, detail=f"{type(e).__name__}: {str(e)}")
finally:
if tmp_img:
try:
os.remove(tmp_img)
except Exception:
pass
if out_dir:
shutil.rmtree(out_dir, ignore_errors=True)
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=8000)

12
backend/requirements.txt Normal file
View File

@@ -0,0 +1,12 @@
fastapi>=0.104.0
uvicorn[standard]>=0.24.0
python-multipart>=0.0.6
transformers==4.46.3
tokenizers==0.20.3
accelerate>=0.34.2
einops
addict
easydict
pillow
safetensors
torch

35
docker-compose.yml Normal file
View File

@@ -0,0 +1,35 @@
services:
backend:
build: ./backend
container_name: deepseek-ocr-backend
environment:
MODEL_NAME: deepseek-ai/DeepSeek-OCR
HF_HOME: /models
volumes:
- ./models:/models
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
shm_size: "4g"
ports:
- "8000:8000"
networks:
- ocr-network
frontend:
build: ./frontend
container_name: deepseek-ocr-frontend
ports:
- "3000:80"
depends_on:
- backend
networks:
- ocr-network
networks:
ocr-network:
driver: bridge

24
frontend/.gitignore vendored Normal file
View File

@@ -0,0 +1,24 @@
# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
pnpm-debug.log*
lerna-debug.log*
node_modules
dist
dist-ssr
*.local
# Editor directories and files
.vscode/*
!.vscode/extensions.json
.idea
.DS_Store
*.suo
*.ntvs*
*.njsproj
*.sln
*.sw?

29
frontend/Dockerfile Normal file
View File

@@ -0,0 +1,29 @@
# Frontend Dockerfile - React + Vite
FROM node:18-alpine as build
WORKDIR /app
# Copy package files first for better caching
COPY package*.json ./
# Install dependencies
RUN npm install --legacy-peer-deps
# Copy all source files
COPY . .
# Build the app
RUN npm run build
# Production stage with nginx
FROM nginx:alpine
# Copy built files from build stage
COPY --from=build /app/dist /usr/share/nginx/html
# Copy nginx config
COPY nginx.conf /etc/nginx/conf.d/default.conf
EXPOSE 80
CMD ["nginx", "-g", "daemon off;"]

73
frontend/README.md Normal file
View File

@@ -0,0 +1,73 @@
# React + TypeScript + Vite
This template provides a minimal setup to get React working in Vite with HMR and some ESLint rules.
Currently, two official plugins are available:
- [@vitejs/plugin-react](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react) uses [Babel](https://babeljs.io/) (or [oxc](https://oxc.rs) when used in [rolldown-vite](https://vite.dev/guide/rolldown)) for Fast Refresh
- [@vitejs/plugin-react-swc](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react-swc) uses [SWC](https://swc.rs/) for Fast Refresh
## React Compiler
The React Compiler is not enabled on this template because of its impact on dev & build performances. To add it, see [this documentation](https://react.dev/learn/react-compiler/installation).
## Expanding the ESLint configuration
If you are developing a production application, we recommend updating the configuration to enable type-aware lint rules:
```js
export default defineConfig([
globalIgnores(['dist']),
{
files: ['**/*.{ts,tsx}'],
extends: [
// Other configs...
// Remove tseslint.configs.recommended and replace with this
tseslint.configs.recommendedTypeChecked,
// Alternatively, use this for stricter rules
tseslint.configs.strictTypeChecked,
// Optionally, add this for stylistic rules
tseslint.configs.stylisticTypeChecked,
// Other configs...
],
languageOptions: {
parserOptions: {
project: ['./tsconfig.node.json', './tsconfig.app.json'],
tsconfigRootDir: import.meta.dirname,
},
// other options...
},
},
])
```
You can also install [eslint-plugin-react-x](https://github.com/Rel1cx/eslint-react/tree/main/packages/plugins/eslint-plugin-react-x) and [eslint-plugin-react-dom](https://github.com/Rel1cx/eslint-react/tree/main/packages/plugins/eslint-plugin-react-dom) for React-specific lint rules:
```js
// eslint.config.js
import reactX from 'eslint-plugin-react-x'
import reactDom from 'eslint-plugin-react-dom'
export default defineConfig([
globalIgnores(['dist']),
{
files: ['**/*.{ts,tsx}'],
extends: [
// Other configs...
// Enable lint rules for React
reactX.configs['recommended-typescript'],
// Enable lint rules for React DOM
reactDom.configs.recommended,
],
languageOptions: {
parserOptions: {
project: ['./tsconfig.node.json', './tsconfig.app.json'],
tsconfigRootDir: import.meta.dirname,
},
// other options...
},
},
])
```

23
frontend/eslint.config.js Normal file
View File

@@ -0,0 +1,23 @@
import js from '@eslint/js'
import globals from 'globals'
import reactHooks from 'eslint-plugin-react-hooks'
import reactRefresh from 'eslint-plugin-react-refresh'
import tseslint from 'typescript-eslint'
import { defineConfig, globalIgnores } from 'eslint/config'
export default defineConfig([
globalIgnores(['dist']),
{
files: ['**/*.{ts,tsx}'],
extends: [
js.configs.recommended,
tseslint.configs.recommended,
reactHooks.configs['recommended-latest'],
reactRefresh.configs.vite,
],
languageOptions: {
ecmaVersion: 2020,
globals: globals.browser,
},
},
])

16
frontend/index.html Normal file
View File

@@ -0,0 +1,16 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<link rel="icon" type="image/svg+xml" href="/vite.svg" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700;800;900&display=swap" rel="stylesheet">
<title>DeepSeek OCR - Next Gen Vision AI</title>
</head>
<body>
<div id="root"></div>
<script type="module" src="/src/main.jsx"></script>
</body>
</html>

40
frontend/nginx.conf Normal file
View File

@@ -0,0 +1,40 @@
server {
listen 80;
server_name _;
root /usr/share/nginx/html;
index index.html;
# Gzip compression
gzip on;
gzip_types text/plain text/css application/json application/javascript text/xml application/xml application/xml+rss text/javascript;
# API proxy to backend
location /api/ {
proxy_pass http://backend:8000/api/;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection 'upgrade';
proxy_set_header Host $host;
proxy_cache_bypass $http_upgrade;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
# Increase timeouts for ML model processing
proxy_connect_timeout 600;
proxy_send_timeout 600;
proxy_read_timeout 600;
send_timeout 600;
}
# SPA fallback
location / {
try_files $uri $uri/ /index.html;
}
# Cache static assets
location ~* \.(js|css|png|jpg|jpeg|gif|ico|svg|woff|woff2|ttf|eot)$ {
expires 1y;
add_header Cache-Control "public, immutable";
}
}

29
frontend/package.json Normal file
View File

@@ -0,0 +1,29 @@
{
"name": "deepseek-ocr-frontend",
"private": true,
"version": "2.0.0",
"type": "module",
"scripts": {
"dev": "vite --host",
"build": "vite build",
"preview": "vite preview"
},
"dependencies": {
"axios": "^1.6.5",
"framer-motion": "^11.0.0",
"lucide-react": "^0.344.0",
"react": "^18.3.1",
"react-dom": "^18.3.1",
"react-dropzone": "^14.2.3",
"react-markdown": "^10.1.0"
},
"devDependencies": {
"@types/react": "^18.3.12",
"@types/react-dom": "^18.3.1",
"@vitejs/plugin-react": "^4.3.4",
"autoprefixer": "^10.4.17",
"postcss": "^8.4.35",
"tailwindcss": "^3.4.1",
"vite": "^5.4.11"
}
}

View File

@@ -0,0 +1,6 @@
export default {
plugins: {
tailwindcss: {},
autoprefixer: {},
},
}

1
frontend/public/vite.svg Normal file
View File

@@ -0,0 +1 @@
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" class="iconify iconify--logos" width="31.88" height="32" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 257"><defs><linearGradient id="IconifyId1813088fe1fbc01fb466" x1="-.828%" x2="57.636%" y1="7.652%" y2="78.411%"><stop offset="0%" stop-color="#41D1FF"></stop><stop offset="100%" stop-color="#BD34FE"></stop></linearGradient><linearGradient id="IconifyId1813088fe1fbc01fb467" x1="43.376%" x2="50.316%" y1="2.242%" y2="89.03%"><stop offset="0%" stop-color="#FFEA83"></stop><stop offset="8.333%" stop-color="#FFDD35"></stop><stop offset="100%" stop-color="#FFA800"></stop></linearGradient></defs><path fill="url(#IconifyId1813088fe1fbc01fb466)" d="M255.153 37.938L134.897 252.976c-2.483 4.44-8.862 4.466-11.382.048L.875 37.958c-2.746-4.814 1.371-10.646 6.827-9.67l120.385 21.517a6.537 6.537 0 0 0 2.322-.004l117.867-21.483c5.438-.991 9.574 4.796 6.877 9.62Z"></path><path fill="url(#IconifyId1813088fe1fbc01fb467)" d="M185.432.063L96.44 17.501a3.268 3.268 0 0 0-2.634 3.014l-5.474 92.456a3.268 3.268 0 0 0 3.997 3.378l24.777-5.718c2.318-.535 4.413 1.507 3.936 3.838l-7.361 36.047c-.495 2.426 1.782 4.5 4.151 3.78l15.304-4.649c2.372-.72 4.652 1.36 4.15 3.788l-11.698 56.621c-.732 3.542 3.979 5.473 5.943 2.437l1.313-2.028l72.516-144.72c1.215-2.423-.88-5.186-3.54-4.672l-25.505 4.922c-2.396.462-4.435-1.77-3.759-4.114l16.646-57.705c.677-2.35-1.37-4.583-3.769-4.113Z"></path></svg>

After

Width:  |  Height:  |  Size: 1.5 KiB

42
frontend/src/App.css Normal file
View File

@@ -0,0 +1,42 @@
#root {
max-width: 1280px;
margin: 0 auto;
padding: 2rem;
text-align: center;
}
.logo {
height: 6em;
padding: 1.5em;
will-change: filter;
transition: filter 300ms;
}
.logo:hover {
filter: drop-shadow(0 0 2em #646cffaa);
}
.logo.react:hover {
filter: drop-shadow(0 0 2em #61dafbaa);
}
@keyframes logo-spin {
from {
transform: rotate(0deg);
}
to {
transform: rotate(360deg);
}
}
@media (prefers-reduced-motion: no-preference) {
a:nth-of-type(2) .logo {
animation: logo-spin infinite 20s linear;
}
}
.card {
padding: 2em;
}
.read-the-docs {
color: #888;
}

251
frontend/src/App.jsx Normal file
View File

@@ -0,0 +1,251 @@
import { useState, useCallback } from 'react'
import { motion, AnimatePresence } from 'framer-motion'
import { Sparkles, Zap, Loader2 } from 'lucide-react'
import ImageUpload from './components/ImageUpload'
import ModeSelector from './components/ModeSelector'
import ResultPanel from './components/ResultPanel'
import axios from 'axios'
const API_BASE = import.meta.env.VITE_API_URL || '/api'
function App() {
const [mode, setMode] = useState('plain_ocr')
const [image, setImage] = useState(null)
const [imagePreview, setImagePreview] = useState(null)
const [result, setResult] = useState(null)
const [loading, setLoading] = useState(false)
const [error, setError] = useState(null)
// Form state
const [prompt, setPrompt] = useState('')
const [findTerm, setFindTerm] = useState('')
const [advancedSettings, setAdvancedSettings] = useState({
base_size: 1024,
image_size: 640,
crop_mode: true,
test_compress: false
})
const handleImageSelect = useCallback((file) => {
setImage(file)
setImagePreview(URL.createObjectURL(file))
setError(null)
setResult(null)
}, [])
const handleSubmit = async () => {
if (!image) {
setError('Please upload an image first')
return
}
setLoading(true)
setError(null)
try {
const formData = new FormData()
formData.append('image', image)
formData.append('mode', mode)
formData.append('prompt', prompt)
formData.append('grounding', mode === 'find_ref') // Auto-enable for find mode
formData.append('include_caption', false)
formData.append('find_term', findTerm)
formData.append('schema', '')
formData.append('base_size', advancedSettings.base_size)
formData.append('image_size', advancedSettings.image_size)
formData.append('crop_mode', advancedSettings.crop_mode)
formData.append('test_compress', advancedSettings.test_compress)
const response = await axios.post(`${API_BASE}/ocr`, formData, {
headers: {
'Content-Type': 'multipart/form-data',
},
})
setResult(response.data)
} catch (err) {
setError(err.response?.data?.detail || err.message || 'An error occurred')
} finally {
setLoading(false)
}
}
const handleCopy = useCallback(() => {
if (result?.text) {
navigator.clipboard.writeText(result.text)
}
}, [result])
const handleDownload = useCallback(() => {
if (!result?.text) return
const extensions = {
plain_ocr: 'txt',
markdown: 'md',
tables_csv: 'csv',
tables_md: 'md',
kv_json: 'json',
layout_map: 'json',
pii_redact: 'json',
}
const ext = extensions[mode] || 'txt'
const blob = new Blob([result.text], { type: 'text/plain' })
const url = URL.createObjectURL(blob)
const a = document.createElement('a')
a.href = url
a.download = `deepseek-ocr-result.${ext}`
a.click()
URL.revokeObjectURL(url)
}, [result, mode])
return (
<div className="min-h-screen relative overflow-hidden">
{/* Animated background */}
<div className="fixed inset-0 -z-10">
<div className="absolute inset-0 bg-gradient-to-br from-purple-900/20 via-pink-900/20 to-cyan-900/20" />
<div className="absolute inset-0 bg-[url('data:image/svg+xml;base64,PHN2ZyB3aWR0aD0iNjAiIGhlaWdodD0iNjAiIHZpZXdCb3g9IjAgMCA2MCA2MCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj48ZyBmaWxsPSJub25lIiBmaWxsLXJ1bGU9ImV2ZW5vZGQiPjxwYXRoIGQ9Ik0zNiAxOGMzLjMxIDAgNiAyLjY5IDYgNnMtMi42OSA2LTYgNi02LTIuNjktNi02IDIuNjktNiA2LTZ6TTI0IDZjMy4zMSAwIDYgMi42OSA2IDZzLTIuNjkgNi02IDYtNi0yLjY5LTYtNiAyLjY5LTYgNi02ek00OCAzNmMzLjMxIDAgNiAyLjY5IDYgNnMtMi42OSA2LTYgNi02LTIuNjktNi02IDIuNjktNiA2LTZ6IiBzdHJva2U9InJnYmEoMTQ3LCA1MSwgMjM0LCAwLjEpIiBzdHJva2Utd2lkdGg9IjIiLz48L2c+PC9zdmc+')] opacity-30" />
<motion.div
className="absolute top-20 left-20 w-96 h-96 bg-purple-500/10 rounded-full blur-3xl"
animate={{
scale: [1, 1.2, 1],
opacity: [0.3, 0.5, 0.3],
}}
transition={{
duration: 8,
repeat: Infinity,
ease: "easeInOut"
}}
/>
<motion.div
className="absolute bottom-20 right-20 w-96 h-96 bg-cyan-500/10 rounded-full blur-3xl"
animate={{
scale: [1.2, 1, 1.2],
opacity: [0.5, 0.3, 0.5],
}}
transition={{
duration: 8,
repeat: Infinity,
ease: "easeInOut"
}}
/>
</div>
{/* Header */}
<header className="sticky top-0 z-50 glass border-b border-white/10">
<div className="max-w-7xl mx-auto px-6 py-4">
<div className="flex items-center justify-between">
<motion.div
className="flex items-center gap-3"
initial={{ opacity: 0, x: -20 }}
animate={{ opacity: 1, x: 0 }}
>
<div className="relative">
<div className="absolute inset-0 bg-gradient-to-r from-purple-500 to-cyan-500 rounded-xl blur-lg opacity-75" />
<div className="relative bg-gradient-to-br from-purple-600 to-cyan-500 p-2 rounded-xl">
<Sparkles className="w-6 h-6" />
</div>
</div>
<div>
<h1 className="text-2xl font-bold gradient-text">DeepSeek OCR</h1>
<p className="text-xs text-gray-400">Next-Gen Vision AI</p>
</div>
</motion.div>
</div>
</div>
</header>
{/* Main Content */}
<main className="max-w-7xl mx-auto px-6 py-8">
<div className="grid lg:grid-cols-2 gap-6">
{/* Left Panel - Upload & Controls */}
<motion.div
initial={{ opacity: 0, y: 20 }}
animate={{ opacity: 1, y: 0 }}
transition={{ delay: 0.1 }}
className="space-y-6"
>
{/* Mode Selector with integrated inputs */}
<ModeSelector
mode={mode}
onModeChange={setMode}
prompt={prompt}
onPromptChange={setPrompt}
findTerm={findTerm}
onFindTermChange={setFindTerm}
/>
{/* Image Upload */}
<ImageUpload
onImageSelect={handleImageSelect}
preview={imagePreview}
/>
{/* Action Button */}
<motion.button
onClick={handleSubmit}
disabled={!image || loading}
className={`w-full relative overflow-hidden rounded-2xl p-[2px] ${
!image || loading ? 'opacity-50 cursor-not-allowed' : ''
}`}
whileHover={!loading && image ? { scale: 1.02 } : {}}
whileTap={!loading && image ? { scale: 0.98 } : {}}
>
<div className="absolute inset-0 bg-gradient-to-r from-purple-600 via-pink-600 to-cyan-600 animate-gradient" />
<div className="relative bg-dark-100 px-8 py-4 rounded-2xl flex items-center justify-center gap-3">
{loading ? (
<>
<Loader2 className="w-5 h-5 animate-spin" />
<span className="font-semibold">Processing Magic...</span>
</>
) : (
<>
<Zap className="w-5 h-5" />
<span className="font-semibold">Analyze Image</span>
</>
)}
</div>
</motion.button>
{error && (
<motion.div
initial={{ opacity: 0, y: -10 }}
animate={{ opacity: 1, y: 0 }}
className="glass p-4 rounded-2xl border-red-500/50 bg-red-500/10"
>
<p className="text-sm text-red-400">{error}</p>
</motion.div>
)}
</motion.div>
{/* Right Panel - Results */}
<motion.div
initial={{ opacity: 0, y: 20 }}
animate={{ opacity: 1, y: 0 }}
transition={{ delay: 0.2 }}
>
<ResultPanel
result={result}
loading={loading}
imagePreview={imagePreview}
onCopy={handleCopy}
onDownload={handleDownload}
/>
</motion.div>
</div>
</main>
{/* Footer */}
<footer className="mt-20 border-t border-white/10 glass">
<div className="max-w-7xl mx-auto px-6 py-8 text-center">
<p className="text-sm text-gray-400">
Powered by <span className="gradient-text font-semibold">DeepSeek-OCR</span>
Built with <span className="text-pink-400"></span> using React + FastAPI
</p>
</div>
</footer>
</div>
)
}
export default App

35
frontend/src/App.tsx Normal file
View File

@@ -0,0 +1,35 @@
import { useState } from 'react'
import reactLogo from './assets/react.svg'
import viteLogo from '/vite.svg'
import './App.css'
function App() {
const [count, setCount] = useState(0)
return (
<>
<div>
<a href="https://vite.dev" target="_blank">
<img src={viteLogo} className="logo" alt="Vite logo" />
</a>
<a href="https://react.dev" target="_blank">
<img src={reactLogo} className="logo react" alt="React logo" />
</a>
</div>
<h1>Vite + React</h1>
<div className="card">
<button onClick={() => setCount((count) => count + 1)}>
count is {count}
</button>
<p>
Edit <code>src/App.tsx</code> and save to test HMR
</p>
</div>
<p className="read-the-docs">
Click on the Vite and React logos to learn more
</p>
</>
)
}
export default App

View File

@@ -0,0 +1 @@
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" class="iconify iconify--logos" width="35.93" height="32" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 228"><path fill="#00D8FF" d="M210.483 73.824a171.49 171.49 0 0 0-8.24-2.597c.465-1.9.893-3.777 1.273-5.621c6.238-30.281 2.16-54.676-11.769-62.708c-13.355-7.7-35.196.329-57.254 19.526a171.23 171.23 0 0 0-6.375 5.848a155.866 155.866 0 0 0-4.241-3.917C100.759 3.829 77.587-4.822 63.673 3.233C50.33 10.957 46.379 33.89 51.995 62.588a170.974 170.974 0 0 0 1.892 8.48c-3.28.932-6.445 1.924-9.474 2.98C17.309 83.498 0 98.307 0 113.668c0 15.865 18.582 31.778 46.812 41.427a145.52 145.52 0 0 0 6.921 2.165a167.467 167.467 0 0 0-2.01 9.138c-5.354 28.2-1.173 50.591 12.134 58.266c13.744 7.926 36.812-.22 59.273-19.855a145.567 145.567 0 0 0 5.342-4.923a168.064 168.064 0 0 0 6.92 6.314c21.758 18.722 43.246 26.282 56.54 18.586c13.731-7.949 18.194-32.003 12.4-61.268a145.016 145.016 0 0 0-1.535-6.842c1.62-.48 3.21-.974 4.76-1.488c29.348-9.723 48.443-25.443 48.443-41.52c0-15.417-17.868-30.326-45.517-39.844Zm-6.365 70.984c-1.4.463-2.836.91-4.3 1.345c-3.24-10.257-7.612-21.163-12.963-32.432c5.106-11 9.31-21.767 12.459-31.957c2.619.758 5.16 1.557 7.61 2.4c23.69 8.156 38.14 20.213 38.14 29.504c0 9.896-15.606 22.743-40.946 31.14Zm-10.514 20.834c2.562 12.94 2.927 24.64 1.23 33.787c-1.524 8.219-4.59 13.698-8.382 15.893c-8.067 4.67-25.32-1.4-43.927-17.412a156.726 156.726 0 0 1-6.437-5.87c7.214-7.889 14.423-17.06 21.459-27.246c12.376-1.098 24.068-2.894 34.671-5.345a134.17 134.17 0 0 1 1.386 6.193ZM87.276 214.515c-7.882 2.783-14.16 2.863-17.955.675c-8.075-4.657-11.432-22.636-6.853-46.752a156.923 156.923 0 0 1 1.869-8.499c10.486 2.32 22.093 3.988 34.498 4.994c7.084 9.967 14.501 19.128 21.976 27.15a134.668 134.668 0 0 1-4.877 4.492c-9.933 8.682-19.886 14.842-28.658 17.94ZM50.35 144.747c-12.483-4.267-22.792-9.812-29.858-15.863c-6.35-5.437-9.555-10.836-9.555-15.216c0-9.322 13.897-21.212 37.076-29.293c2.813-.98 5.757-1.905 8.812-2.773c3.204 10.42 7.406 21.315 12.477 32.332c-5.137 11.18-9.399 22.249-12.634 32.792a134.718 134.718 0 0 1-6.318-1.979Zm12.378-84.26c-4.811-24.587-1.616-43.134 6.425-47.789c8.564-4.958 27.502 2.111 47.463 19.835a144.318 144.318 0 0 1 3.841 3.545c-7.438 7.987-14.787 17.08-21.808 26.988c-12.04 1.116-23.565 2.908-34.161 5.309a160.342 160.342 0 0 1-1.76-7.887Zm110.427 27.268a347.8 347.8 0 0 0-7.785-12.803c8.168 1.033 15.994 2.404 23.343 4.08c-2.206 7.072-4.956 14.465-8.193 22.045a381.151 381.151 0 0 0-7.365-13.322Zm-45.032-43.861c5.044 5.465 10.096 11.566 15.065 18.186a322.04 322.04 0 0 0-30.257-.006c4.974-6.559 10.069-12.652 15.192-18.18ZM82.802 87.83a323.167 323.167 0 0 0-7.227 13.238c-3.184-7.553-5.909-14.98-8.134-22.152c7.304-1.634 15.093-2.97 23.209-3.984a321.524 321.524 0 0 0-7.848 12.897Zm8.081 65.352c-8.385-.936-16.291-2.203-23.593-3.793c2.26-7.3 5.045-14.885 8.298-22.6a321.187 321.187 0 0 0 7.257 13.246c2.594 4.48 5.28 8.868 8.038 13.147Zm37.542 31.03c-5.184-5.592-10.354-11.779-15.403-18.433c4.902.192 9.899.29 14.978.29c5.218 0 10.376-.117 15.453-.343c-4.985 6.774-10.018 12.97-15.028 18.486Zm52.198-57.817c3.422 7.8 6.306 15.345 8.596 22.52c-7.422 1.694-15.436 3.058-23.88 4.071a382.417 382.417 0 0 0 7.859-13.026a347.403 347.403 0 0 0 7.425-13.565Zm-16.898 8.101a358.557 358.557 0 0 1-12.281 19.815a329.4 329.4 0 0 1-23.444.823c-7.967 0-15.716-.248-23.178-.732a310.202 310.202 0 0 1-12.513-19.846h.001a307.41 307.41 0 0 1-10.923-20.627a310.278 310.278 0 0 1 10.89-20.637l-.001.001a307.318 307.318 0 0 1 12.413-19.761c7.613-.576 15.42-.876 23.31-.876H128c7.926 0 15.743.303 23.354.883a329.357 329.357 0 0 1 12.335 19.695a358.489 358.489 0 0 1 11.036 20.54a329.472 329.472 0 0 1-11 20.722Zm22.56-122.124c8.572 4.944 11.906 24.881 6.52 51.026c-.344 1.668-.73 3.367-1.15 5.09c-10.622-2.452-22.155-4.275-34.23-5.408c-7.034-10.017-14.323-19.124-21.64-27.008a160.789 160.789 0 0 1 5.888-5.4c18.9-16.447 36.564-22.941 44.612-18.3ZM128 90.808c12.625 0 22.86 10.235 22.86 22.86s-10.235 22.86-22.86 22.86s-22.86-10.235-22.86-22.86s10.235-22.86 22.86-22.86Z"></path></svg>

After

Width:  |  Height:  |  Size: 4.0 KiB

View File

@@ -0,0 +1,83 @@
import { motion } from 'framer-motion'
import { Sliders } from 'lucide-react'
export default function AdvancedSettings({ settings, onSettingsChange, includeCaption, onIncludeCaptionChange }) {
const handleChange = (key, value) => {
onSettingsChange({
...settings,
[key]: value
})
}
return (
<motion.div
initial={{ opacity: 0, height: 0 }}
animate={{ opacity: 1, height: 'auto' }}
exit={{ opacity: 0, height: 0 }}
className="glass p-6 rounded-2xl space-y-4"
>
<div className="flex items-center gap-2">
<Sliders className="w-5 h-5 text-purple-400" />
<h3 className="font-semibold text-gray-200">Advanced Settings</h3>
</div>
<div className="grid grid-cols-2 gap-4">
<div className="space-y-2">
<label className="text-xs text-gray-400">Base Size</label>
<input
type="number"
value={settings.base_size}
onChange={(e) => handleChange('base_size', parseInt(e.target.value))}
className="w-full bg-white/5 border border-white/10 rounded-xl px-3 py-2 text-sm focus:outline-none focus:border-purple-500"
/>
</div>
<div className="space-y-2">
<label className="text-xs text-gray-400">Image Size</label>
<input
type="number"
value={settings.image_size}
onChange={(e) => handleChange('image_size', parseInt(e.target.value))}
className="w-full bg-white/5 border border-white/10 rounded-xl px-3 py-2 text-sm focus:outline-none focus:border-purple-500"
/>
</div>
<div className="space-y-2">
<label className="text-xs text-gray-400">Crop Mode</label>
<select
value={settings.crop_mode ? 'true' : 'false'}
onChange={(e) => handleChange('crop_mode', e.target.value === 'true')}
className="w-full bg-white/5 border border-white/10 rounded-xl px-3 py-2 text-sm focus:outline-none focus:border-purple-500"
>
<option value="true">Enabled</option>
<option value="false">Disabled</option>
</select>
</div>
<div className="space-y-2">
<label className="text-xs text-gray-400">Test Compress</label>
<select
value={settings.test_compress ? 'true' : 'false'}
onChange={(e) => handleChange('test_compress', e.target.value === 'true')}
className="w-full bg-white/5 border border-white/10 rounded-xl px-3 py-2 text-sm focus:outline-none focus:border-purple-500"
>
<option value="false">Disabled</option>
<option value="true">Enabled</option>
</select>
</div>
</div>
<div className="pt-2 border-t border-white/10">
<label className="flex items-center gap-2 cursor-pointer">
<input
type="checkbox"
checked={includeCaption}
onChange={(e) => onIncludeCaptionChange(e.target.checked)}
className="accent-purple-500"
/>
<span className="text-sm text-gray-300">Include image caption</span>
</label>
</div>
</motion.div>
)
}

View File

@@ -0,0 +1,99 @@
import { useCallback } from 'react'
import { motion } from 'framer-motion'
import { useDropzone } from 'react-dropzone'
import { Upload, Image as ImageIcon, X } from 'lucide-react'
export default function ImageUpload({ onImageSelect, preview }) {
const onDrop = useCallback((acceptedFiles) => {
if (acceptedFiles?.[0]) {
onImageSelect(acceptedFiles[0])
}
}, [onImageSelect])
const { getRootProps, getInputProps, isDragActive } = useDropzone({
onDrop,
accept: {
'image/*': ['.png', '.jpg', '.jpeg', '.webp', '.gif', '.bmp']
},
multiple: false
})
return (
<div className="glass p-6 rounded-2xl space-y-4">
<div className="flex items-center justify-between">
<h3 className="font-semibold text-gray-200">Upload Image</h3>
<ImageIcon className="w-5 h-5 text-purple-400" />
</div>
{!preview ? (
<motion.div
{...getRootProps()}
className={`
relative border-2 border-dashed rounded-2xl p-12 text-center cursor-pointer
transition-all duration-300
${isDragActive
? 'border-purple-500 bg-purple-500/10'
: 'border-white/20 bg-white/5 hover:border-white/40 hover:bg-white/10'
}
`}
whileHover={{ scale: 1.02 }}
whileTap={{ scale: 0.98 }}
>
<input {...getInputProps()} />
<div className="space-y-4">
<motion.div
animate={{
y: isDragActive ? -10 : 0,
scale: isDragActive ? 1.1 : 1
}}
className="flex justify-center"
>
<div className="relative">
<div className="absolute inset-0 bg-gradient-to-r from-purple-500 to-cyan-500 rounded-2xl blur-xl opacity-50" />
<div className="relative bg-gradient-to-br from-purple-600 to-cyan-500 p-4 rounded-2xl">
<Upload className="w-8 h-8" />
</div>
</div>
</motion.div>
<div>
<p className="text-lg font-medium text-gray-200">
{isDragActive ? 'Drop it like it\'s hot! 🔥' : 'Drag & drop your image'}
</p>
<p className="text-sm text-gray-400 mt-1">
or click to browse PNG, JPG, WEBP up to 10MB
</p>
</div>
</div>
</motion.div>
) : (
<motion.div
initial={{ opacity: 0, scale: 0.9 }}
animate={{ opacity: 1, scale: 1 }}
className="relative group"
>
<img
src={preview}
alt="Preview"
className="w-full rounded-2xl border border-white/10"
/>
<motion.button
onClick={() => onImageSelect(null)}
className="absolute top-3 right-3 bg-red-500/80 backdrop-blur-sm p-2 rounded-full opacity-0 group-hover:opacity-100 transition-opacity"
whileHover={{ scale: 1.1 }}
whileTap={{ scale: 0.9 }}
>
<X className="w-4 h-4" />
</motion.button>
{/* Grounding overlay canvas */}
<canvas
id="preview-canvas"
className="absolute top-0 left-0 w-full h-full pointer-events-none"
/>
</motion.div>
)}
</div>
)
}

View File

@@ -0,0 +1,105 @@
import { motion } from 'framer-motion'
import { FileText, Eye, Search, Wand2 } from 'lucide-react'
const modes = [
{ id: 'plain_ocr', name: 'Plain OCR', icon: FileText, color: 'from-blue-500 to-cyan-500', desc: 'Extract raw text', needsInput: false },
{ id: 'describe', name: 'Describe', icon: Eye, color: 'from-violet-500 to-purple-500', desc: 'Image description', needsInput: false },
{ id: 'find_ref', name: 'Find', icon: Search, color: 'from-yellow-500 to-orange-500', desc: 'Locate specific terms', needsInput: 'findTerm' },
{ id: 'freeform', name: 'Freeform', icon: Wand2, color: 'from-fuchsia-500 to-pink-500', desc: 'Custom prompt', needsInput: 'prompt' },
]
export default function ModeSelector({
mode,
onModeChange,
prompt,
onPromptChange,
findTerm,
onFindTermChange
}) {
const selectedMode = modes.find(m => m.id === mode)
const needsInput = selectedMode?.needsInput
return (
<div className="glass p-4 rounded-2xl space-y-3">
<h3 className="text-sm font-semibold text-gray-200">Mode</h3>
<div className="grid grid-cols-4 gap-2">
{modes.map((m) => {
const Icon = m.icon
const isSelected = mode === m.id
return (
<motion.button
key={m.id}
onClick={() => onModeChange(m.id)}
className={`
relative p-2 rounded-xl text-center transition-all
${isSelected
? 'glass border-white/20 shadow-lg'
: 'bg-white/5 border border-white/10 hover:border-white/20'
}
`}
whileHover={{ scale: 1.05 }}
whileTap={{ scale: 0.95 }}
>
{isSelected && (
<motion.div
layoutId="selected-mode"
className={`absolute inset-0 bg-gradient-to-br ${m.color} opacity-10 rounded-xl`}
transition={{ type: "spring", bounce: 0.2, duration: 0.6 }}
/>
)}
<div className="relative space-y-1">
<div className={`
w-8 h-8 mx-auto rounded-lg flex items-center justify-center
${isSelected
? `bg-gradient-to-br ${m.color}`
: 'bg-white/10'
}
`}>
<Icon className="w-4 h-4" />
</div>
<p className={`text-xs font-medium ${isSelected ? 'text-white' : 'text-gray-300'}`}>
{m.name}
</p>
</div>
</motion.button>
)
})}
</div>
{needsInput === 'findTerm' && (
<motion.div
initial={{ opacity: 0, height: 0 }}
animate={{ opacity: 1, height: 'auto' }}
exit={{ opacity: 0, height: 0 }}
>
<input
type="text"
value={findTerm}
onChange={(e) => onFindTermChange(e.target.value)}
placeholder="Enter term to find (e.g., Total, Invoice #)"
className="w-full bg-white/5 border border-white/10 rounded-xl px-3 py-2 text-sm focus:outline-none focus:border-purple-500 transition-colors"
/>
</motion.div>
)}
{needsInput === 'prompt' && (
<motion.div
initial={{ opacity: 0, height: 0 }}
animate={{ opacity: 1, height: 'auto' }}
exit={{ opacity: 0, height: 0 }}
>
<textarea
value={prompt}
onChange={(e) => onPromptChange(e.target.value)}
placeholder="Enter your custom prompt..."
className="w-full bg-white/5 border border-white/10 rounded-xl px-3 py-2 text-sm focus:outline-none focus:border-purple-500 transition-colors resize-none"
rows={2}
/>
</motion.div>
)}
</div>
)
}

View File

@@ -0,0 +1,302 @@
import { useEffect, useRef, useState, useCallback } from 'react'
import { motion, AnimatePresence } from 'framer-motion'
import { Copy, Download, Sparkles, Loader2, CheckCircle2, ChevronDown } from 'lucide-react'
import ReactMarkdown from 'react-markdown'
export default function ResultPanel({ result, loading, imagePreview, onCopy, onDownload }) {
const canvasRef = useRef(null)
const imgRef = useRef(null)
const [showAdvanced, setShowAdvanced] = useState(false)
const [imageLoaded, setImageLoaded] = useState(false)
// Check if text looks like markdown
const isMarkdown = result?.text && (
result.text.includes('##') ||
result.text.includes('**') ||
result.text.includes('```') ||
result.text.includes('- ') ||
result.text.includes('|')
)
// Draw boxes function
const drawBoxes = useCallback(() => {
if (!result?.boxes?.length || !canvasRef.current || !imgRef.current) {
console.log('❌ Cannot draw - missing:', {
hasBoxes: !!result?.boxes?.length,
hasCanvas: !!canvasRef.current,
hasImgRef: !!imgRef.current
})
return
}
console.log('🎨 Drawing boxes:', result.boxes)
const img = imgRef.current
const canvas = canvasRef.current
const ctx = canvas.getContext('2d')
console.log('📐 Image dimensions:', {
displayWidth: img.offsetWidth,
displayHeight: img.offsetHeight,
naturalWidth: img.naturalWidth,
naturalHeight: img.naturalHeight,
imageDims: result.image_dims
})
// Set canvas size to match displayed image
canvas.width = img.offsetWidth
canvas.height = img.offsetHeight
ctx.clearRect(0, 0, canvas.width, canvas.height)
// Calculate scale factors
const scaleX = img.offsetWidth / (result.image_dims?.w || img.naturalWidth)
const scaleY = img.offsetHeight / (result.image_dims?.h || img.naturalHeight)
console.log('📏 Scale factors:', { scaleX, scaleY })
// Draw boxes
result.boxes.forEach((box, idx) => {
const [x1, y1, x2, y2] = box.box
const colors = [
'#00ff00', '#00ffff', '#ff00ff', '#ffff00', '#ff0066'
]
const color = colors[idx % colors.length]
// Scale coordinates
const sx = x1 * scaleX
const sy = y1 * scaleY
const sw = (x2 - x1) * scaleX
const sh = (y2 - y1) * scaleY
console.log(`📦 Box ${idx} (${box.label}):`, {
original: [x1, y1, x2, y2],
scaled: [sx, sy, sx + sw, sy + sh],
dimensions: { width: sw, height: sh }
})
// Draw semi-transparent fill
ctx.fillStyle = color + '33'
ctx.fillRect(sx, sy, sw, sh)
// Draw thick neon border
ctx.strokeStyle = color
ctx.lineWidth = 4
ctx.shadowColor = color
ctx.shadowBlur = 10
ctx.strokeRect(sx, sy, sw, sh)
ctx.shadowBlur = 0
// Label background
if (box.label) {
ctx.font = 'bold 14px Inter'
const metrics = ctx.measureText(box.label)
const padding = 8
const labelHeight = 24
ctx.fillStyle = color
ctx.fillRect(sx, sy - labelHeight, metrics.width + padding * 2, labelHeight)
// Label text
ctx.fillStyle = '#000'
ctx.fillText(box.label, sx + padding, sy - 7)
}
})
console.log('✅ Finished drawing', result.boxes.length, 'boxes')
}, [result])
// Trigger drawing when image loads
useEffect(() => {
if (imageLoaded && result?.boxes?.length) {
console.log('🚀 Image loaded, drawing boxes now')
drawBoxes()
}
}, [imageLoaded, result, drawBoxes])
// Reset imageLoaded when result changes
useEffect(() => {
setImageLoaded(false)
}, [result])
// Redraw on window resize
useEffect(() => {
if (!imageLoaded || !result?.boxes?.length) return
const handleResize = () => {
console.log('📐 Window resized, redrawing')
drawBoxes()
}
window.addEventListener('resize', handleResize)
return () => window.removeEventListener('resize', handleResize)
}, [imageLoaded, result, drawBoxes])
return (
<div className="glass p-6 rounded-2xl space-y-4 h-full">
<div className="flex items-center justify-between">
<div className="flex items-center gap-2">
<Sparkles className="w-5 h-5 text-purple-400" />
<h3 className="font-semibold text-gray-200">Results</h3>
</div>
{result && (
<div className="flex gap-2">
<motion.button
onClick={onCopy}
className="glass glass-hover p-2 rounded-lg"
whileHover={{ scale: 1.05 }}
whileTap={{ scale: 0.95 }}
title="Copy to clipboard"
>
<Copy className="w-4 h-4" />
</motion.button>
<motion.button
onClick={onDownload}
className="glass glass-hover p-2 rounded-lg"
whileHover={{ scale: 1.05 }}
whileTap={{ scale: 0.95 }}
title="Download"
>
<Download className="w-4 h-4" />
</motion.button>
</div>
)}
</div>
<AnimatePresence mode="wait">
{loading ? (
<motion.div
key="loading"
initial={{ opacity: 0 }}
animate={{ opacity: 1 }}
exit={{ opacity: 0 }}
className="flex flex-col items-center justify-center py-20 space-y-4"
>
<div className="relative">
<motion.div
animate={{ rotate: 360 }}
transition={{ duration: 2, repeat: Infinity, ease: "linear" }}
className="w-16 h-16 border-4 border-purple-500/20 border-t-purple-500 rounded-full"
/>
<Loader2 className="w-8 h-8 absolute top-1/2 left-1/2 transform -translate-x-1/2 -translate-y-1/2 text-purple-400" />
</div>
<p className="text-sm text-gray-400 animate-pulse">
Processing your image with AI magic...
</p>
</motion.div>
) : result ? (
<motion.div
key="result"
initial={{ opacity: 0, y: 20 }}
animate={{ opacity: 1, y: 0 }}
exit={{ opacity: 0, y: -20 }}
className="space-y-4"
>
{/* Preview with boxes */}
{imagePreview && result.boxes && result.boxes.length > 0 && (
<div className="relative rounded-xl overflow-hidden border border-white/10 bg-black">
<img
ref={imgRef}
src={imagePreview}
alt="Result"
className="w-full block"
onLoad={() => {
console.log('🖼️ Image loaded, triggering draw')
setImageLoaded(true)
}}
/>
<canvas
ref={canvasRef}
className="absolute top-0 left-0 w-full h-full pointer-events-none"
style={{ display: 'block' }}
/>
</div>
)}
{/* Text result */}
<div className="bg-white/5 border border-white/10 rounded-xl p-4 max-h-96 overflow-y-auto">
{isMarkdown ? (
<div className="prose prose-invert prose-sm max-w-none">
<ReactMarkdown>{result.text}</ReactMarkdown>
</div>
) : (
<pre className="text-sm text-gray-200 whitespace-pre-wrap font-mono">
{result.text}
</pre>
)}
</div>
{/* Advanced Settings Dropdown */}
<details className="glass rounded-xl overflow-hidden">
<summary className="px-4 py-3 cursor-pointer flex items-center justify-between hover:bg-white/5 transition-colors">
<span className="text-sm font-medium text-gray-300">Advanced Settings & Metadata</span>
<ChevronDown className="w-4 h-4 text-gray-400" />
</summary>
<div className="px-4 py-3 border-t border-white/10 space-y-3">
{result.metadata && (
<div>
<p className="text-xs text-gray-400 mb-2">Processing Metadata</p>
<pre className="text-xs text-gray-500 whitespace-pre-wrap">
{JSON.stringify(result.metadata, null, 2)}
</pre>
</div>
)}
{result.boxes?.length > 0 && (
<div>
<p className="text-xs text-gray-400 mb-2">Detected Regions ({result.boxes.length})</p>
<div className="space-y-1">
{result.boxes.map((box, idx) => (
<div key={idx} className="text-xs text-gray-500">
{box.label}: [{box.box.map(n => Math.round(n)).join(', ')}]
</div>
))}
</div>
</div>
)}
</div>
</details>
{/* Success indicator */}
<motion.div
initial={{ scale: 0.9, opacity: 0 }}
animate={{ scale: 1, opacity: 1 }}
className="flex items-center justify-center gap-2 text-green-400"
>
<CheckCircle2 className="w-5 h-5" />
<span className="text-sm font-medium">Processing complete!</span>
</motion.div>
</motion.div>
) : (
<motion.div
key="empty"
initial={{ opacity: 0 }}
animate={{ opacity: 1 }}
exit={{ opacity: 0 }}
className="flex flex-col items-center justify-center py-20 space-y-4"
>
<div className="relative">
<motion.div
animate={{
scale: [1, 1.2, 1],
opacity: [0.5, 0.8, 0.5]
}}
transition={{ duration: 3, repeat: Infinity }}
className="w-20 h-20 bg-purple-500/20 rounded-full blur-xl"
/>
<Sparkles className="w-10 h-10 absolute top-1/2 left-1/2 transform -translate-x-1/2 -translate-y-1/2 text-purple-400" />
</div>
<div className="text-center">
<p className="text-lg font-medium text-gray-300">
Ready to process
</p>
<p className="text-sm text-gray-500 mt-1">
Upload an image and hit analyze to see the magic!
</p>
</div>
</motion.div>
)}
</AnimatePresence>
</div>
)
}

50
frontend/src/index.css Normal file
View File

@@ -0,0 +1,50 @@
@tailwind base;
@tailwind components;
@tailwind utilities;
@layer base {
body {
@apply bg-dark-200 text-white;
font-family: 'Inter', system-ui, -apple-system, sans-serif;
}
}
@layer components {
.glass {
@apply bg-white/5 backdrop-blur-xl border border-white/10;
}
.glass-hover {
@apply transition-all hover:bg-white/10 hover:border-white/20;
}
.gradient-text {
@apply bg-clip-text text-transparent bg-gradient-to-r from-cyan-400 via-purple-500 to-pink-500;
}
.gradient-border {
@apply relative bg-gradient-to-r from-cyan-500 via-purple-500 to-pink-500 p-[2px] rounded-2xl;
}
.gradient-bg {
@apply bg-gradient-to-br from-purple-900/20 via-pink-900/20 to-cyan-900/20;
}
}
/* Custom scrollbar */
::-webkit-scrollbar {
width: 8px;
height: 8px;
}
::-webkit-scrollbar-track {
@apply bg-dark-100;
}
::-webkit-scrollbar-thumb {
@apply bg-purple-500/30 rounded-full;
}
::-webkit-scrollbar-thumb:hover {
@apply bg-purple-500/50;
}

10
frontend/src/main.jsx Normal file
View File

@@ -0,0 +1,10 @@
import React from 'react'
import ReactDOM from 'react-dom/client'
import App from './App.jsx'
import './index.css'
ReactDOM.createRoot(document.getElementById('root')).render(
<React.StrictMode>
<App />
</React.StrictMode>,
)

10
frontend/src/main.tsx Normal file
View File

@@ -0,0 +1,10 @@
import { StrictMode } from 'react'
import { createRoot } from 'react-dom/client'
import './index.css'
import App from './App.tsx'
createRoot(document.getElementById('root')!).render(
<StrictMode>
<App />
</StrictMode>,
)

View File

@@ -0,0 +1,48 @@
/** @type {import('tailwindcss').Config} */
export default {
content: [
"./index.html",
"./src/**/*.{js,ts,jsx,tsx}",
],
theme: {
extend: {
colors: {
dark: {
50: '#18181b',
100: '#0f0f12',
200: '#09090b',
}
},
animation: {
'gradient': 'gradient 8s linear infinite',
'float': 'float 6s ease-in-out infinite',
'glow': 'glow 2s ease-in-out infinite alternate',
},
keyframes: {
gradient: {
'0%, 100%': {
'background-size': '200% 200%',
'background-position': 'left center'
},
'50%': {
'background-size': '200% 200%',
'background-position': 'right center'
},
},
float: {
'0%, 100%': { transform: 'translateY(0px)' },
'50%': { transform: 'translateY(-20px)' },
},
glow: {
'from': {
'text-shadow': '0 0 10px #fff, 0 0 20px #fff, 0 0 30px #e60073, 0 0 40px #e60073',
},
'to': {
'text-shadow': '0 0 20px #fff, 0 0 30px #ff4da6, 0 0 40px #ff4da6, 0 0 50px #ff4da6',
},
},
},
},
},
plugins: [],
}

View File

@@ -0,0 +1,28 @@
{
"compilerOptions": {
"tsBuildInfoFile": "./node_modules/.tmp/tsconfig.app.tsbuildinfo",
"target": "ES2022",
"useDefineForClassFields": true,
"lib": ["ES2022", "DOM", "DOM.Iterable"],
"module": "ESNext",
"types": ["vite/client"],
"skipLibCheck": true,
/* Bundler mode */
"moduleResolution": "bundler",
"allowImportingTsExtensions": true,
"verbatimModuleSyntax": true,
"moduleDetection": "force",
"noEmit": true,
"jsx": "react-jsx",
/* Linting */
"strict": true,
"noUnusedLocals": true,
"noUnusedParameters": true,
"erasableSyntaxOnly": true,
"noFallthroughCasesInSwitch": true,
"noUncheckedSideEffectImports": true
},
"include": ["src"]
}

7
frontend/tsconfig.json Normal file
View File

@@ -0,0 +1,7 @@
{
"files": [],
"references": [
{ "path": "./tsconfig.app.json" },
{ "path": "./tsconfig.node.json" }
]
}

View File

@@ -0,0 +1,26 @@
{
"compilerOptions": {
"tsBuildInfoFile": "./node_modules/.tmp/tsconfig.node.tsbuildinfo",
"target": "ES2023",
"lib": ["ES2023"],
"module": "ESNext",
"types": ["node"],
"skipLibCheck": true,
/* Bundler mode */
"moduleResolution": "bundler",
"allowImportingTsExtensions": true,
"verbatimModuleSyntax": true,
"moduleDetection": "force",
"noEmit": true,
/* Linting */
"strict": true,
"noUnusedLocals": true,
"noUnusedParameters": true,
"erasableSyntaxOnly": true,
"noFallthroughCasesInSwitch": true,
"noUncheckedSideEffectImports": true
},
"include": ["vite.config.ts"]
}

20
frontend/vite.config.js Normal file
View File

@@ -0,0 +1,20 @@
import { defineConfig } from 'vite'
import react from '@vitejs/plugin-react'
export default defineConfig({
plugins: [react()],
server: {
host: '0.0.0.0',
port: 3000,
proxy: {
'/api': {
target: 'http://backend:8000',
changeOrigin: true,
}
}
},
preview: {
host: '0.0.0.0',
port: 3000
}
})

7
frontend/vite.config.ts Normal file
View File

@@ -0,0 +1,7 @@
import { defineConfig } from 'vite'
import react from '@vitejs/plugin-react'
// https://vite.dev/config/
export default defineConfig({
plugins: [react()],
})