Compare commits
26 Commits
3dac0741b1
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
02185bef46 | ||
|
|
04bbbebd5a | ||
|
|
48f958de6c | ||
|
|
91c134faa7 | ||
|
|
38ac36b18e | ||
|
|
ab19725e0b | ||
|
|
a511db78cb | ||
|
|
07b2f2b6bc | ||
|
|
ae0ac3af59 | ||
|
|
4ab87d2e6f | ||
|
|
cc5ce0c6be | ||
|
|
02e3099388 | ||
|
|
dc5a1a4ff5 | ||
|
|
5ea18d76d6 | ||
|
|
1d15b5f0c1 | ||
|
|
cb704a2f27 | ||
|
|
3ca40a2255 | ||
|
|
6f86f872a9 | ||
|
|
7381ecd12e | ||
|
|
247a5e4b0e | ||
|
|
9356ba6d1b | ||
|
|
da7957d7d5 | ||
|
|
fd747e6c23 | ||
|
|
68147eb97c | ||
|
|
ba313ee808 | ||
|
|
bd19e09630 |
23
.env
Normal file
23
.env
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
# DeepSeek OCR Application Configuration
|
||||||
|
|
||||||
|
# API Configuration
|
||||||
|
API_HOST=0.0.0.0
|
||||||
|
API_PORT=8000
|
||||||
|
|
||||||
|
# Frontend Configuration
|
||||||
|
FRONTEND_PORT=3000
|
||||||
|
|
||||||
|
# Model Configuration
|
||||||
|
MODEL_NAME=deepseek-ai/DeepSeek-OCR
|
||||||
|
HF_HOME=/models
|
||||||
|
|
||||||
|
# CORS Configuration (comma-separated origins, defaults to http://localhost:3000)
|
||||||
|
CORS_ORIGINS=http://localhost:3000
|
||||||
|
|
||||||
|
# Upload Configuration
|
||||||
|
MAX_UPLOAD_SIZE_MB=100
|
||||||
|
|
||||||
|
# Processing Configuration
|
||||||
|
BASE_SIZE=1024
|
||||||
|
IMAGE_SIZE=640
|
||||||
|
CROP_MODE=true
|
||||||
22
.env.example
22
.env.example
@@ -11,12 +11,34 @@ FRONTEND_PORT=3000
|
|||||||
MODEL_NAME=deepseek-ai/DeepSeek-OCR
|
MODEL_NAME=deepseek-ai/DeepSeek-OCR
|
||||||
HF_HOME=/models
|
HF_HOME=/models
|
||||||
|
|
||||||
|
# OCR model selection
|
||||||
|
# Register the local DeepSeek-OCR model (set to false for an Ollama-only deployment)
|
||||||
|
ENABLE_DEEPSEEK_LOCAL=true
|
||||||
|
# External Ollama host the backend should call (no trailing slash)
|
||||||
|
OLLAMA_BASE_URL=http://host.docker.internal:11434
|
||||||
|
# Comma-separated Ollama vision model tags to surface in the UI.
|
||||||
|
# Pull these on the Ollama host first, e.g. `ollama pull glm-ocr`.
|
||||||
|
OLLAMA_MODELS=glm-ocr,llama3.2-vision,minicpm-v,qwen2.5vl
|
||||||
|
# Default model id selected in the UI (deepseek-local or ollama:<tag>)
|
||||||
|
DEFAULT_OCR_MODEL=deepseek-local
|
||||||
|
# Per-request timeout (seconds) for Ollama calls
|
||||||
|
OLLAMA_TIMEOUT=300
|
||||||
|
|
||||||
# CORS Configuration (comma-separated origins, defaults to http://localhost:3000)
|
# CORS Configuration (comma-separated origins, defaults to http://localhost:3000)
|
||||||
CORS_ORIGINS=http://localhost:3000
|
CORS_ORIGINS=http://localhost:3000
|
||||||
|
|
||||||
# Upload Configuration
|
# Upload Configuration
|
||||||
MAX_UPLOAD_SIZE_MB=100
|
MAX_UPLOAD_SIZE_MB=100
|
||||||
|
|
||||||
|
# PostgreSQL Configuration
|
||||||
|
POSTGRES_USER=ocr_user
|
||||||
|
POSTGRES_PASSWORD=ocr_password
|
||||||
|
POSTGRES_DB=ocr_db
|
||||||
|
DATABASE_URL=postgresql://ocr_user:ocr_password@postgres:5432/ocr_db
|
||||||
|
|
||||||
|
# OCR Image Storage (host path mounted into container)
|
||||||
|
OCR_IMAGES_DIR=/data/ocr_images
|
||||||
|
|
||||||
# Processing Configuration
|
# Processing Configuration
|
||||||
BASE_SIZE=1024
|
BASE_SIZE=1024
|
||||||
IMAGE_SIZE=640
|
IMAGE_SIZE=640
|
||||||
|
|||||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -46,7 +46,7 @@ yarn.lock
|
|||||||
pnpm-lock.yaml
|
pnpm-lock.yaml
|
||||||
|
|
||||||
# Environment
|
# Environment
|
||||||
.env
|
#.env
|
||||||
.env.local
|
.env.local
|
||||||
.env.development.local
|
.env.development.local
|
||||||
.env.test.local
|
.env.test.local
|
||||||
|
|||||||
45
README.md
45
README.md
@@ -172,6 +172,13 @@ FRONTEND_PORT=3000
|
|||||||
MODEL_NAME=deepseek-ai/DeepSeek-OCR
|
MODEL_NAME=deepseek-ai/DeepSeek-OCR
|
||||||
HF_HOME=/models
|
HF_HOME=/models
|
||||||
|
|
||||||
|
# OCR model selection (DeepSeek + Ollama)
|
||||||
|
ENABLE_DEEPSEEK_LOCAL=true # register the local GPU model
|
||||||
|
OLLAMA_BASE_URL=http://host.docker.internal:11434 # external Ollama host
|
||||||
|
OLLAMA_MODELS=glm-ocr,llama3.2-vision,minicpm-v,qwen2.5vl
|
||||||
|
DEFAULT_OCR_MODEL=deepseek-local # deepseek-local or ollama:<tag>
|
||||||
|
OLLAMA_TIMEOUT=300 # per-request timeout (seconds)
|
||||||
|
|
||||||
# Upload Configuration
|
# Upload Configuration
|
||||||
MAX_UPLOAD_SIZE_MB=100 # Maximum file upload size
|
MAX_UPLOAD_SIZE_MB=100 # Maximum file upload size
|
||||||
|
|
||||||
@@ -186,13 +193,47 @@ CROP_MODE=true # Enable dynamic cropping for large images
|
|||||||
- `API_HOST`: Backend API host (default: 0.0.0.0)
|
- `API_HOST`: Backend API host (default: 0.0.0.0)
|
||||||
- `API_PORT`: Backend API port (default: 8000)
|
- `API_PORT`: Backend API port (default: 8000)
|
||||||
- `FRONTEND_PORT`: Frontend port (default: 3000)
|
- `FRONTEND_PORT`: Frontend port (default: 3000)
|
||||||
- `MODEL_NAME`: HuggingFace model identifier
|
- `MODEL_NAME`: HuggingFace model identifier for the local DeepSeek-OCR model
|
||||||
- `HF_HOME`: Model cache directory
|
- `HF_HOME`: Model cache directory
|
||||||
|
- `ENABLE_DEEPSEEK_LOCAL`: Register the local DeepSeek-OCR model (set `false` for an Ollama-only deployment with no GPU model loaded)
|
||||||
|
- `OLLAMA_BASE_URL`: URL of an external Ollama server the backend calls for non-DeepSeek models
|
||||||
|
- `OLLAMA_MODELS`: Comma-separated Ollama vision model tags to expose in the UI (pull them on the Ollama host first, e.g. `ollama pull glm-ocr`)
|
||||||
|
- `DEFAULT_OCR_MODEL`: Model id selected by default (`deepseek-local` or `ollama:<tag>`)
|
||||||
|
- `OLLAMA_TIMEOUT`: Per-request timeout in seconds for Ollama calls
|
||||||
- `MAX_UPLOAD_SIZE_MB`: Maximum file upload size in megabytes
|
- `MAX_UPLOAD_SIZE_MB`: Maximum file upload size in megabytes
|
||||||
- `BASE_SIZE`: Base image processing size (affects memory usage)
|
- `BASE_SIZE`: Base image processing size (affects memory usage)
|
||||||
- `IMAGE_SIZE`: Tile size for dynamic cropping
|
- `IMAGE_SIZE`: Tile size for dynamic cropping
|
||||||
- `CROP_MODE`: Enable/disable dynamic image cropping
|
- `CROP_MODE`: Enable/disable dynamic image cropping
|
||||||
|
|
||||||
|
### Choosing an OCR Model
|
||||||
|
|
||||||
|
The **Model** selector (next to the Mode selector) chooses which backend runs the OCR:
|
||||||
|
|
||||||
|
- **DeepSeek-OCR (local GPU)** — the default. Loaded lazily on first use. Supports
|
||||||
|
every mode including grounding/bounding-box modes (Find), plus the Advanced
|
||||||
|
Settings (base size, crop mode, etc.).
|
||||||
|
- **Ollama models** — any vision model pulled on your Ollama host and listed in
|
||||||
|
`OLLAMA_MODELS` (e.g. `glm-ocr`, `llama3.2-vision`). These run remotely on the
|
||||||
|
Ollama server. They return **plain text only**: bounding boxes are not produced,
|
||||||
|
so grounding modes (Find) and the DeepSeek-specific Advanced Settings are ignored
|
||||||
|
/ disabled when an Ollama model is selected.
|
||||||
|
|
||||||
|
Setup for Ollama models:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# On the machine running Ollama
|
||||||
|
ollama pull glm-ocr
|
||||||
|
ollama pull llama3.2-vision
|
||||||
|
|
||||||
|
# Point the backend at it (in .env), then restart
|
||||||
|
OLLAMA_BASE_URL=http://host.docker.internal:11434
|
||||||
|
OLLAMA_MODELS=glm-ocr,llama3.2-vision
|
||||||
|
```
|
||||||
|
|
||||||
|
`GET /api/models` returns the registered models and their capabilities; the UI
|
||||||
|
populates the selector from it. The model used for each job is stored on the job
|
||||||
|
record (`ocr_model`) and shown in the Browse Jobs view.
|
||||||
|
|
||||||
## Tech Stack
|
## Tech Stack
|
||||||
|
|
||||||
### Frontend
|
### Frontend
|
||||||
@@ -377,6 +418,7 @@ For large images, the model uses dynamic cropping:
|
|||||||
|
|
||||||
**Parameters:**
|
**Parameters:**
|
||||||
- `image` (file, required) - Image file to process (up to 100MB)
|
- `image` (file, required) - Image file to process (up to 100MB)
|
||||||
|
- `model` (string) - OCR model id from `GET /api/models` (default: registry default). Grounding/Advanced settings apply to DeepSeek only.
|
||||||
- `mode` (string) - OCR mode: `plain_ocr` | `describe` | `find_ref` | `freeform`
|
- `mode` (string) - OCR mode: `plain_ocr` | `describe` | `find_ref` | `freeform`
|
||||||
- `prompt` (string) - Custom prompt for freeform mode
|
- `prompt` (string) - Custom prompt for freeform mode
|
||||||
- `grounding` (bool) - Enable bounding boxes (auto-enabled for find_ref)
|
- `grounding` (bool) - Enable bounding boxes (auto-enabled for find_ref)
|
||||||
@@ -416,6 +458,7 @@ Process PDF documents with OCR and export to various formats.
|
|||||||
|
|
||||||
**Parameters:**
|
**Parameters:**
|
||||||
- `pdf_file` (file, required) - PDF file to process (up to 100MB)
|
- `pdf_file` (file, required) - PDF file to process (up to 100MB)
|
||||||
|
- `model` (string) - OCR model id from `GET /api/models` (default: registry default)
|
||||||
- `mode` (string) - OCR mode: `plain_ocr` | `describe` | `find_ref` | `freeform`
|
- `mode` (string) - OCR mode: `plain_ocr` | `describe` | `find_ref` | `freeform`
|
||||||
- `prompt` (string) - Custom prompt for freeform mode
|
- `prompt` (string) - Custom prompt for freeform mode
|
||||||
- `output_format` (string) - Output format: `markdown` | `html` | `docx` | `json`
|
- `output_format` (string) - Output format: `markdown` | `html` | `docx` | `json`
|
||||||
|
|||||||
115
backend/database.py
Normal file
115
backend/database.py
Normal file
@@ -0,0 +1,115 @@
|
|||||||
|
import os
|
||||||
|
import psycopg2
|
||||||
|
import psycopg2.extras
|
||||||
|
from contextlib import contextmanager
|
||||||
|
from decouple import config as env_config
|
||||||
|
|
||||||
|
DATABASE_URL = env_config(
|
||||||
|
"DATABASE_URL",
|
||||||
|
default="postgresql://ocr_user:ocr_password@postgres:5432/ocr_db"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_conn():
|
||||||
|
return psycopg2.connect(DATABASE_URL, cursor_factory=psycopg2.extras.RealDictCursor)
|
||||||
|
|
||||||
|
|
||||||
|
def init_db():
|
||||||
|
"""Create tables if they don't exist. Called once at startup."""
|
||||||
|
conn = None
|
||||||
|
try:
|
||||||
|
conn = _get_conn()
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
cur.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS ocr_jobs (
|
||||||
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||||
|
author TEXT,
|
||||||
|
book TEXT,
|
||||||
|
chapter TEXT,
|
||||||
|
page TEXT,
|
||||||
|
submitted_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||||
|
image_path TEXT NOT NULL,
|
||||||
|
original_filename TEXT,
|
||||||
|
ocr_text TEXT,
|
||||||
|
status TEXT NOT NULL DEFAULT 'unreviewed',
|
||||||
|
reviewed_text TEXT,
|
||||||
|
reviewer_name TEXT,
|
||||||
|
reviewed_at TIMESTAMPTZ,
|
||||||
|
mode TEXT
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
# Index for fast full-text-style searches on common fields
|
||||||
|
cur.execute("""
|
||||||
|
CREATE INDEX IF NOT EXISTS ocr_jobs_status_idx ON ocr_jobs(status)
|
||||||
|
""")
|
||||||
|
cur.execute("""
|
||||||
|
CREATE INDEX IF NOT EXISTS ocr_jobs_submitted_at_idx ON ocr_jobs(submitted_at DESC)
|
||||||
|
""")
|
||||||
|
# Add columns introduced after initial schema (safe to run repeatedly)
|
||||||
|
cur.execute("""
|
||||||
|
ALTER TABLE ocr_jobs
|
||||||
|
ADD COLUMN IF NOT EXISTS describe_text TEXT
|
||||||
|
""")
|
||||||
|
cur.execute("""
|
||||||
|
ALTER TABLE ocr_jobs
|
||||||
|
ADD COLUMN IF NOT EXISTS freeform_text TEXT
|
||||||
|
""")
|
||||||
|
cur.execute("""
|
||||||
|
ALTER TABLE ocr_jobs
|
||||||
|
ADD COLUMN IF NOT EXISTS qdrant_synced_at TIMESTAMPTZ
|
||||||
|
""")
|
||||||
|
cur.execute("""
|
||||||
|
ALTER TABLE ocr_jobs
|
||||||
|
ADD COLUMN IF NOT EXISTS updated_at TIMESTAMPTZ
|
||||||
|
""")
|
||||||
|
# Which OCR model produced this job (e.g. "deepseek-local", "ollama:glm-ocr")
|
||||||
|
cur.execute("""
|
||||||
|
ALTER TABLE ocr_jobs
|
||||||
|
ADD COLUMN IF NOT EXISTS ocr_model TEXT
|
||||||
|
""")
|
||||||
|
# Trigger function: stamp updated_at on every row update
|
||||||
|
cur.execute("""
|
||||||
|
CREATE OR REPLACE FUNCTION set_updated_at()
|
||||||
|
RETURNS TRIGGER AS $$
|
||||||
|
BEGIN
|
||||||
|
NEW.updated_at = NOW();
|
||||||
|
RETURN NEW;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql
|
||||||
|
""")
|
||||||
|
cur.execute("""
|
||||||
|
CREATE OR REPLACE TRIGGER ocr_jobs_set_updated_at
|
||||||
|
BEFORE UPDATE ON ocr_jobs
|
||||||
|
FOR EACH ROW EXECUTE FUNCTION set_updated_at()
|
||||||
|
""")
|
||||||
|
# Unique constraint: prevent duplicate (author, chapter, page) submissions.
|
||||||
|
# Applies only when all three fields are non-null.
|
||||||
|
cur.execute("""
|
||||||
|
CREATE UNIQUE INDEX IF NOT EXISTS ocr_jobs_author_chapter_page_unique
|
||||||
|
ON ocr_jobs (author, chapter, page)
|
||||||
|
WHERE author IS NOT NULL AND chapter IS NOT NULL AND page IS NOT NULL
|
||||||
|
""")
|
||||||
|
conn.commit()
|
||||||
|
print("Database initialized.")
|
||||||
|
except Exception as exc:
|
||||||
|
print(f"Database init failed: {exc}")
|
||||||
|
if conn:
|
||||||
|
conn.rollback()
|
||||||
|
raise
|
||||||
|
finally:
|
||||||
|
if conn:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def get_db():
|
||||||
|
"""Yield a connection and auto-commit/rollback."""
|
||||||
|
conn = _get_conn()
|
||||||
|
try:
|
||||||
|
yield conn
|
||||||
|
conn.commit()
|
||||||
|
except Exception:
|
||||||
|
conn.rollback()
|
||||||
|
raise
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
847
backend/main.py
847
backend/main.py
@@ -1,16 +1,15 @@
|
|||||||
import os
|
import os
|
||||||
import re
|
import uuid
|
||||||
import tempfile
|
import tempfile
|
||||||
import shutil
|
|
||||||
import base64
|
import base64
|
||||||
from typing import List, Dict, Any, Optional
|
from typing import List, Dict, Any, Optional
|
||||||
from contextlib import asynccontextmanager
|
from contextlib import asynccontextmanager
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
from fastapi import FastAPI, File, UploadFile, Form, HTTPException
|
from fastapi import FastAPI, File, UploadFile, Form, HTTPException, Query
|
||||||
from fastapi.middleware.cors import CORSMiddleware
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
from fastapi.responses import JSONResponse, StreamingResponse
|
from fastapi.responses import JSONResponse, StreamingResponse, FileResponse
|
||||||
import torch
|
from pydantic import BaseModel
|
||||||
from transformers import AutoModel, AutoTokenizer
|
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
import uvicorn
|
import uvicorn
|
||||||
from decouple import config as env_config
|
from decouple import config as env_config
|
||||||
@@ -24,51 +23,41 @@ from pdf_utils import (
|
|||||||
clean_markdown_content
|
clean_markdown_content
|
||||||
)
|
)
|
||||||
from format_converter import DocumentConverter
|
from format_converter import DocumentConverter
|
||||||
|
from database import init_db, get_db
|
||||||
|
from providers import (
|
||||||
|
build_registry,
|
||||||
|
parse_detections,
|
||||||
|
clean_grounding_text,
|
||||||
|
ProviderError,
|
||||||
|
GROUNDING_MODES,
|
||||||
|
)
|
||||||
|
|
||||||
|
OCR_IMAGES_DIR = env_config("OCR_IMAGES_DIR", default="/data/ocr_images")
|
||||||
|
|
||||||
# -----------------------------
|
# -----------------------------
|
||||||
# Lifespan context for model loading
|
# Lifespan context
|
||||||
# -----------------------------
|
# -----------------------------
|
||||||
model = None
|
# The model registry holds all available OCR providers. Local models (e.g.
|
||||||
tokenizer = None
|
# DeepSeek-OCR) are loaded lazily on first use so an Ollama-only deployment
|
||||||
|
# starts instantly and never touches the GPU.
|
||||||
|
registry = None
|
||||||
|
|
||||||
@asynccontextmanager
|
@asynccontextmanager
|
||||||
async def lifespan(app: FastAPI):
|
async def lifespan(app: FastAPI):
|
||||||
"""Load model on startup, cleanup on shutdown"""
|
"""Build the model registry on startup."""
|
||||||
global model, tokenizer
|
global registry
|
||||||
|
|
||||||
# Environment setup
|
# Image storage directory
|
||||||
os.environ.pop("TRANSFORMERS_CACHE", None)
|
os.makedirs(OCR_IMAGES_DIR, exist_ok=True)
|
||||||
MODEL_NAME = env_config("MODEL_NAME", default="deepseek-ai/DeepSeek-OCR")
|
|
||||||
HF_HOME = env_config("HF_HOME", default="/models")
|
|
||||||
os.makedirs(HF_HOME, exist_ok=True)
|
|
||||||
|
|
||||||
# Load model
|
# Database
|
||||||
print(f"🚀 Loading {MODEL_NAME}...")
|
|
||||||
torch_dtype = torch.bfloat16
|
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(
|
|
||||||
MODEL_NAME,
|
|
||||||
trust_remote_code=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
model = AutoModel.from_pretrained(
|
|
||||||
MODEL_NAME,
|
|
||||||
trust_remote_code=True,
|
|
||||||
use_safetensors=True,
|
|
||||||
attn_implementation="eager",
|
|
||||||
torch_dtype=torch_dtype,
|
|
||||||
).eval().to("cuda")
|
|
||||||
|
|
||||||
# Pad token setup
|
|
||||||
try:
|
try:
|
||||||
if getattr(tokenizer, "pad_token_id", None) is None and getattr(tokenizer, "eos_token_id", None) is not None:
|
init_db()
|
||||||
tokenizer.pad_token = tokenizer.eos_token
|
except Exception as exc:
|
||||||
if getattr(model.config, "pad_token_id", None) is None and getattr(tokenizer, "pad_token_id", None) is not None:
|
print(f"Warning: database initialization failed: {exc}")
|
||||||
model.config.pad_token_id = tokenizer.pad_token_id
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
print("✅ Model loaded and ready!")
|
# OCR model registry (providers load their models lazily)
|
||||||
|
registry = build_registry()
|
||||||
|
|
||||||
yield
|
yield
|
||||||
|
|
||||||
@@ -97,155 +86,6 @@ app.add_middleware(
|
|||||||
allow_headers=["*"],
|
allow_headers=["*"],
|
||||||
)
|
)
|
||||||
|
|
||||||
# -----------------------------
|
|
||||||
# Prompt builder
|
|
||||||
# -----------------------------
|
|
||||||
def build_prompt(
|
|
||||||
mode: str,
|
|
||||||
user_prompt: str,
|
|
||||||
grounding: bool,
|
|
||||||
find_term: Optional[str],
|
|
||||||
schema: Optional[str],
|
|
||||||
include_caption: bool,
|
|
||||||
) -> str:
|
|
||||||
"""Build the prompt based on mode"""
|
|
||||||
parts: List[str] = ["<image>"]
|
|
||||||
mode_requires_grounding = mode in {"find_ref", "layout_map", "pii_redact"}
|
|
||||||
if grounding or mode_requires_grounding:
|
|
||||||
parts.append("<|grounding|>")
|
|
||||||
|
|
||||||
instruction = ""
|
|
||||||
if mode == "plain_ocr":
|
|
||||||
instruction = "Free OCR."
|
|
||||||
elif mode == "markdown":
|
|
||||||
instruction = "Convert the document to markdown."
|
|
||||||
elif mode == "tables_csv":
|
|
||||||
instruction = (
|
|
||||||
"Extract every table and output CSV only. "
|
|
||||||
"Use commas, minimal quoting. If multiple tables, separate with a line containing '---'."
|
|
||||||
)
|
|
||||||
elif mode == "tables_md":
|
|
||||||
instruction = "Extract every table as GitHub-flavored Markdown tables. Output only the tables."
|
|
||||||
elif mode == "kv_json":
|
|
||||||
schema_text = schema.strip() if schema else "{}"
|
|
||||||
instruction = (
|
|
||||||
"Extract key fields and return strict JSON only. "
|
|
||||||
f"Use this schema (fill the values): {schema_text}"
|
|
||||||
)
|
|
||||||
elif mode == "figure_chart":
|
|
||||||
instruction = (
|
|
||||||
"Parse the figure. First extract any numeric series as a two-column table (x,y). "
|
|
||||||
"Then summarize the chart in 2 sentences. Output the table, then a line '---', then the summary."
|
|
||||||
)
|
|
||||||
elif mode == "find_ref":
|
|
||||||
key = (find_term or "").strip() or "Total"
|
|
||||||
instruction = f"Locate <|ref|>{key}<|/ref|> in the image."
|
|
||||||
elif mode == "layout_map":
|
|
||||||
instruction = (
|
|
||||||
'Return a JSON array of blocks with fields {"type":["title","paragraph","table","figure"],'
|
|
||||||
'"box":[x1,y1,x2,y2]}. Do not include any text content.'
|
|
||||||
)
|
|
||||||
elif mode == "pii_redact":
|
|
||||||
instruction = (
|
|
||||||
'Find all occurrences of emails, phone numbers, postal addresses, and IBANs. '
|
|
||||||
'Return a JSON array of objects {label, text, box:[x1,y1,x2,y2]}.'
|
|
||||||
)
|
|
||||||
elif mode == "multilingual":
|
|
||||||
instruction = "Free OCR. Detect the language automatically and output in the same script."
|
|
||||||
elif mode == "describe":
|
|
||||||
instruction = "Describe this image. Focus on visible key elements."
|
|
||||||
elif mode == "freeform":
|
|
||||||
instruction = user_prompt.strip() if user_prompt else "OCR this image."
|
|
||||||
else:
|
|
||||||
instruction = "OCR this image."
|
|
||||||
|
|
||||||
if include_caption and mode not in {"describe"}:
|
|
||||||
instruction = instruction + "\nThen add a one-paragraph description of the image."
|
|
||||||
|
|
||||||
parts.append(instruction)
|
|
||||||
return "\n".join(parts)
|
|
||||||
|
|
||||||
# -----------------------------
|
|
||||||
# Grounding parser
|
|
||||||
# -----------------------------
|
|
||||||
# Match a full detection block and capture the coordinates as the entire list expression
|
|
||||||
# Examples of captured coords (including outer brackets):
|
|
||||||
# - [[312, 339, 480, 681]]
|
|
||||||
# - [[504, 700, 625, 910], [771, 570, 996, 996]]
|
|
||||||
# - [[110, 310, 255, 800], [312, 343, 479, 680], ...]
|
|
||||||
# Using a greedy bracket capture ensures we include all inner lists up to the last ']' before </|det|>
|
|
||||||
DET_BLOCK = re.compile(
|
|
||||||
r"<\|ref\|>(?P<label>.*?)<\|/ref\|>\s*<\|det\|>\s*(?P<coords>\[.*\])\s*<\|/det\|>",
|
|
||||||
re.DOTALL,
|
|
||||||
)
|
|
||||||
|
|
||||||
def clean_grounding_text(text: str) -> str:
|
|
||||||
"""Remove grounding tags from text for display, keeping labels"""
|
|
||||||
# Replace <|ref|>label<|/ref|><|det|>[...any nested lists...]<|/det|> with just the label
|
|
||||||
cleaned = re.sub(
|
|
||||||
r"<\|ref\|>(.*?)<\|/ref\|>\s*<\|det\|>\s*\[.*\]\s*<\|/det\|>",
|
|
||||||
r"\1",
|
|
||||||
text,
|
|
||||||
flags=re.DOTALL,
|
|
||||||
)
|
|
||||||
# Also remove any standalone grounding tags
|
|
||||||
cleaned = re.sub(r"<\|grounding\|>", "", cleaned)
|
|
||||||
return cleaned.strip()
|
|
||||||
|
|
||||||
def parse_detections(text: str, image_width: int, image_height: int) -> List[Dict[str, Any]]:
|
|
||||||
"""Parse grounding boxes from text and scale from 0-999 normalized coords to actual image dimensions
|
|
||||||
|
|
||||||
Handles both single and multiple bounding boxes:
|
|
||||||
- Single: <|ref|>label<|/ref|><|det|>[[x1,y1,x2,y2]]<|/det|>
|
|
||||||
- Multiple: <|ref|>label<|/ref|><|det|>[[x1,y1,x2,y2], [x1,y1,x2,y2], ...]<|/det|>
|
|
||||||
"""
|
|
||||||
boxes: List[Dict[str, Any]] = []
|
|
||||||
for m in DET_BLOCK.finditer(text or ""):
|
|
||||||
label = m.group("label").strip()
|
|
||||||
coords_str = m.group("coords").strip()
|
|
||||||
|
|
||||||
print(f"🔍 DEBUG: Found detection for '{label}'")
|
|
||||||
print(f"📦 Raw coords string (with brackets): {coords_str}")
|
|
||||||
|
|
||||||
try:
|
|
||||||
import ast
|
|
||||||
|
|
||||||
# Parse the full bracket expression directly (handles single and multiple)
|
|
||||||
parsed = ast.literal_eval(coords_str)
|
|
||||||
|
|
||||||
# Normalize to a list of lists
|
|
||||||
if (
|
|
||||||
isinstance(parsed, list)
|
|
||||||
and len(parsed) == 4
|
|
||||||
and all(isinstance(n, (int, float)) for n in parsed)
|
|
||||||
):
|
|
||||||
# Single box provided as [x1,y1,x2,y2]
|
|
||||||
box_coords = [parsed]
|
|
||||||
print("📦 Single box (flat list) detected")
|
|
||||||
elif isinstance(parsed, list):
|
|
||||||
box_coords = parsed
|
|
||||||
print(f"📦 Boxes detected: {len(box_coords)}")
|
|
||||||
else:
|
|
||||||
raise ValueError("Unsupported coords structure")
|
|
||||||
|
|
||||||
# Process each box
|
|
||||||
for idx, box in enumerate(box_coords):
|
|
||||||
if isinstance(box, (list, tuple)) and len(box) >= 4:
|
|
||||||
x1 = int(float(box[0]) / 999 * image_width)
|
|
||||||
y1 = int(float(box[1]) / 999 * image_height)
|
|
||||||
x2 = int(float(box[2]) / 999 * image_width)
|
|
||||||
y2 = int(float(box[3]) / 999 * image_height)
|
|
||||||
print(f" Box {idx+1}: {box} → [{x1}, {y1}, {x2}, {y2}]")
|
|
||||||
boxes.append({"label": label, "box": [x1, y1, x2, y2]})
|
|
||||||
else:
|
|
||||||
print(f" ⚠️ Skipping invalid box: {box}")
|
|
||||||
except Exception as e:
|
|
||||||
print(f"❌ Parsing failed: {e}")
|
|
||||||
continue
|
|
||||||
|
|
||||||
print(f"🎯 Total boxes parsed: {len(boxes)}")
|
|
||||||
return boxes
|
|
||||||
|
|
||||||
# -----------------------------
|
# -----------------------------
|
||||||
# Routes
|
# Routes
|
||||||
# -----------------------------
|
# -----------------------------
|
||||||
@@ -255,11 +95,38 @@ async def root():
|
|||||||
|
|
||||||
@app.get("/health")
|
@app.get("/health")
|
||||||
async def health():
|
async def health():
|
||||||
return {"status": "healthy", "model_loaded": model is not None}
|
return {"status": "healthy", "models": registry.list_models() if registry else []}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/api/models")
|
||||||
|
async def list_models():
|
||||||
|
"""List the OCR models available for selection in the UI."""
|
||||||
|
if registry is None:
|
||||||
|
raise HTTPException(status_code=503, detail="Model registry not ready.")
|
||||||
|
return JSONResponse({"models": registry.list_models()})
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_provider(model_id: Optional[str], mode: str):
|
||||||
|
"""Look up the provider and reject capability mismatches (e.g. grounding)."""
|
||||||
|
if registry is None:
|
||||||
|
raise HTTPException(status_code=503, detail="Model registry not ready.")
|
||||||
|
try:
|
||||||
|
provider = registry.get(model_id)
|
||||||
|
except ProviderError as exc:
|
||||||
|
raise HTTPException(status_code=400, detail=str(exc))
|
||||||
|
|
||||||
|
if mode in GROUNDING_MODES and not provider.capabilities.get("grounding"):
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail=f"Model '{provider.label}' does not support grounding modes (e.g. {mode}).",
|
||||||
|
)
|
||||||
|
return provider
|
||||||
|
|
||||||
|
|
||||||
@app.post("/api/ocr")
|
@app.post("/api/ocr")
|
||||||
async def ocr_inference(
|
async def ocr_inference(
|
||||||
image: UploadFile = File(...),
|
image: UploadFile = File(...),
|
||||||
|
model: Optional[str] = Form(None),
|
||||||
mode: str = Form("plain_ocr"),
|
mode: str = Form("plain_ocr"),
|
||||||
prompt: str = Form(""),
|
prompt: str = Form(""),
|
||||||
grounding: bool = Form(False),
|
grounding: bool = Form(False),
|
||||||
@@ -275,32 +142,18 @@ async def ocr_inference(
|
|||||||
Perform OCR inference on uploaded image
|
Perform OCR inference on uploaded image
|
||||||
|
|
||||||
- **image**: Image file to process
|
- **image**: Image file to process
|
||||||
|
- **model**: OCR model id (see GET /api/models); defaults to the registry default
|
||||||
- **mode**: OCR mode (plain_ocr, markdown, tables_csv, etc.)
|
- **mode**: OCR mode (plain_ocr, markdown, tables_csv, etc.)
|
||||||
- **prompt**: Custom prompt for freeform mode
|
- **prompt**: Custom prompt for freeform mode
|
||||||
- **grounding**: Enable grounding boxes
|
- **grounding**: Enable grounding boxes (DeepSeek only)
|
||||||
- **include_caption**: Add image description
|
- **include_caption**: Add image description
|
||||||
- **find_term**: Term to find (for find_ref mode)
|
- **find_term**: Term to find (for find_ref mode)
|
||||||
- **schema**: JSON schema (for kv_json mode)
|
- **schema**: JSON schema (for kv_json mode)
|
||||||
- **base_size**: Base processing size
|
- **base_size/image_size/crop_mode/test_compress**: DeepSeek processing options
|
||||||
- **image_size**: Image size parameter
|
|
||||||
- **crop_mode**: Enable crop mode
|
|
||||||
- **test_compress**: Test compression
|
|
||||||
"""
|
"""
|
||||||
if model is None or tokenizer is None:
|
provider = _resolve_provider(model, mode)
|
||||||
raise HTTPException(status_code=503, detail="Model not loaded yet")
|
|
||||||
|
|
||||||
# Build prompt
|
|
||||||
prompt_text = build_prompt(
|
|
||||||
mode=mode,
|
|
||||||
user_prompt=prompt,
|
|
||||||
grounding=grounding,
|
|
||||||
find_term=find_term,
|
|
||||||
schema=schema,
|
|
||||||
include_caption=include_caption,
|
|
||||||
)
|
|
||||||
|
|
||||||
tmp_img = None
|
tmp_img = None
|
||||||
out_dir = None
|
|
||||||
try:
|
try:
|
||||||
# Save uploaded file
|
# Save uploaded file
|
||||||
with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp:
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp:
|
||||||
@@ -315,42 +168,27 @@ async def ocr_inference(
|
|||||||
except Exception:
|
except Exception:
|
||||||
orig_w = orig_h = None
|
orig_w = orig_h = None
|
||||||
|
|
||||||
out_dir = tempfile.mkdtemp(prefix="dsocr_")
|
# Run inference through the selected provider
|
||||||
|
text = provider.run(
|
||||||
# Run inference
|
tmp_img,
|
||||||
res = model.infer(
|
mode=mode,
|
||||||
tokenizer,
|
prompt=prompt,
|
||||||
prompt=prompt_text,
|
grounding=grounding,
|
||||||
image_file=tmp_img,
|
find_term=find_term,
|
||||||
output_path=out_dir,
|
schema=schema,
|
||||||
base_size=base_size,
|
include_caption=include_caption,
|
||||||
image_size=image_size,
|
options={
|
||||||
crop_mode=crop_mode,
|
"base_size": base_size,
|
||||||
save_results=False,
|
"image_size": image_size,
|
||||||
test_compress=test_compress,
|
"crop_mode": crop_mode,
|
||||||
eval_mode=True,
|
"test_compress": test_compress,
|
||||||
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
# Normalize response
|
|
||||||
if isinstance(res, str):
|
|
||||||
text = res.strip()
|
|
||||||
elif isinstance(res, dict) and "text" in res:
|
|
||||||
text = str(res["text"]).strip()
|
|
||||||
elif isinstance(res, (list, tuple)):
|
|
||||||
text = "\n".join(map(str, res)).strip()
|
|
||||||
else:
|
|
||||||
text = ""
|
|
||||||
|
|
||||||
# Fallback: check output file
|
|
||||||
if not text:
|
|
||||||
mmd = os.path.join(out_dir, "result.mmd")
|
|
||||||
if os.path.exists(mmd):
|
|
||||||
with open(mmd, "r", encoding="utf-8") as fh:
|
|
||||||
text = fh.read().strip()
|
|
||||||
if not text:
|
if not text:
|
||||||
text = "No text returned by model."
|
text = "No text returned by model."
|
||||||
|
|
||||||
# Parse grounding boxes with proper coordinate scaling
|
# Parse grounding boxes (no-op for providers/text without grounding tokens)
|
||||||
boxes = parse_detections(text, orig_w or 1, orig_h or 1) if ("<|det|>" in text or "<|ref|>" in text) else []
|
boxes = parse_detections(text, orig_w or 1, orig_h or 1) if ("<|det|>" in text or "<|ref|>" in text) else []
|
||||||
|
|
||||||
# Clean grounding tags from display text, but keep the labels
|
# Clean grounding tags from display text, but keep the labels
|
||||||
@@ -367,14 +205,21 @@ async def ocr_inference(
|
|||||||
"boxes": boxes,
|
"boxes": boxes,
|
||||||
"image_dims": {"w": orig_w, "h": orig_h},
|
"image_dims": {"w": orig_w, "h": orig_h},
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"model": provider.id,
|
||||||
|
"model_label": provider.label,
|
||||||
"mode": mode,
|
"mode": mode,
|
||||||
"grounding": grounding or (mode in {"find_ref","layout_map","pii_redact"}),
|
"grounding": grounding or (mode in GROUNDING_MODES),
|
||||||
"base_size": base_size,
|
"base_size": base_size,
|
||||||
"image_size": image_size,
|
"image_size": image_size,
|
||||||
"crop_mode": crop_mode
|
"crop_mode": crop_mode
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
|
except ProviderError as e:
|
||||||
|
print(f"OCR provider error: {e}")
|
||||||
|
raise HTTPException(status_code=502, detail=str(e))
|
||||||
|
except HTTPException:
|
||||||
|
raise
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"OCR inference error: {type(e).__name__}: {str(e)}")
|
print(f"OCR inference error: {type(e).__name__}: {str(e)}")
|
||||||
raise HTTPException(status_code=500, detail="An internal error occurred during OCR processing.")
|
raise HTTPException(status_code=500, detail="An internal error occurred during OCR processing.")
|
||||||
@@ -385,12 +230,11 @@ async def ocr_inference(
|
|||||||
os.remove(tmp_img)
|
os.remove(tmp_img)
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
if out_dir:
|
|
||||||
shutil.rmtree(out_dir, ignore_errors=True)
|
|
||||||
|
|
||||||
@app.post("/api/process-pdf")
|
@app.post("/api/process-pdf")
|
||||||
async def process_pdf(
|
async def process_pdf(
|
||||||
pdf_file: UploadFile = File(...),
|
pdf_file: UploadFile = File(...),
|
||||||
|
model: Optional[str] = Form(None),
|
||||||
mode: str = Form("plain_ocr"),
|
mode: str = Form("plain_ocr"),
|
||||||
prompt: str = Form(""),
|
prompt: str = Form(""),
|
||||||
output_format: str = Form("markdown"), # markdown, html, docx, json
|
output_format: str = Form("markdown"), # markdown, html, docx, json
|
||||||
@@ -417,8 +261,7 @@ async def process_pdf(
|
|||||||
- **image_size**: Image size parameter
|
- **image_size**: Image size parameter
|
||||||
- **crop_mode**: Enable crop mode
|
- **crop_mode**: Enable crop mode
|
||||||
"""
|
"""
|
||||||
if model is None or tokenizer is None:
|
provider = _resolve_provider(model, mode)
|
||||||
raise HTTPException(status_code=503, detail="Model not loaded yet")
|
|
||||||
|
|
||||||
# Validate output format
|
# Validate output format
|
||||||
if output_format not in ["markdown", "html", "docx", "json"]:
|
if output_format not in ["markdown", "html", "docx", "json"]:
|
||||||
@@ -441,56 +284,32 @@ async def process_pdf(
|
|||||||
for page_idx, img in enumerate(images):
|
for page_idx, img in enumerate(images):
|
||||||
print(f"🔍 Processing page {page_idx + 1}/{total_pages}...")
|
print(f"🔍 Processing page {page_idx + 1}/{total_pages}...")
|
||||||
|
|
||||||
# Build prompt for this page
|
|
||||||
prompt_text = build_prompt(
|
|
||||||
mode=mode,
|
|
||||||
user_prompt=prompt,
|
|
||||||
grounding=grounding,
|
|
||||||
find_term=None,
|
|
||||||
schema=None,
|
|
||||||
include_caption=include_caption,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Save image temporarily
|
# Save image temporarily
|
||||||
tmp_img = None
|
tmp_img = None
|
||||||
out_dir = None
|
|
||||||
try:
|
try:
|
||||||
with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp:
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp:
|
||||||
img.save(tmp, format="PNG")
|
img.save(tmp, format="PNG")
|
||||||
tmp_img = tmp.name
|
tmp_img = tmp.name
|
||||||
|
|
||||||
orig_w, orig_h = img.size
|
orig_w, orig_h = img.size
|
||||||
out_dir = tempfile.mkdtemp(prefix="dsocr_pdf_")
|
|
||||||
|
|
||||||
# Run inference
|
# Run inference through the selected provider
|
||||||
res = model.infer(
|
text = provider.run(
|
||||||
tokenizer,
|
tmp_img,
|
||||||
prompt=prompt_text,
|
mode=mode,
|
||||||
image_file=tmp_img,
|
prompt=prompt,
|
||||||
output_path=out_dir,
|
grounding=grounding,
|
||||||
base_size=base_size,
|
find_term=None,
|
||||||
image_size=image_size,
|
schema=None,
|
||||||
crop_mode=crop_mode,
|
include_caption=include_caption,
|
||||||
save_results=False,
|
options={
|
||||||
test_compress=False,
|
"base_size": base_size,
|
||||||
eval_mode=True,
|
"image_size": image_size,
|
||||||
|
"crop_mode": crop_mode,
|
||||||
|
"test_compress": False,
|
||||||
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
# Normalize response
|
|
||||||
if isinstance(res, str):
|
|
||||||
text = res.strip()
|
|
||||||
elif isinstance(res, dict) and "text" in res:
|
|
||||||
text = str(res["text"]).strip()
|
|
||||||
elif isinstance(res, (list, tuple)):
|
|
||||||
text = "\n".join(map(str, res)).strip()
|
|
||||||
else:
|
|
||||||
text = ""
|
|
||||||
|
|
||||||
if not text:
|
|
||||||
mmd = os.path.join(out_dir, "result.mmd")
|
|
||||||
if os.path.exists(mmd):
|
|
||||||
with open(mmd, "r", encoding="utf-8") as fh:
|
|
||||||
text = fh.read().strip()
|
|
||||||
if not text:
|
if not text:
|
||||||
text = f"No text returned for page {page_idx + 1}."
|
text = f"No text returned for page {page_idx + 1}."
|
||||||
|
|
||||||
@@ -535,8 +354,6 @@ async def process_pdf(
|
|||||||
os.remove(tmp_img)
|
os.remove(tmp_img)
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
if out_dir:
|
|
||||||
shutil.rmtree(out_dir, ignore_errors=True)
|
|
||||||
|
|
||||||
print(f"✅ Processed all {total_pages} pages")
|
print(f"✅ Processed all {total_pages} pages")
|
||||||
|
|
||||||
@@ -547,6 +364,8 @@ async def process_pdf(
|
|||||||
"total_pages": total_pages,
|
"total_pages": total_pages,
|
||||||
"pages": pages_content,
|
"pages": pages_content,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"model": provider.id,
|
||||||
|
"model_label": provider.label,
|
||||||
"mode": mode,
|
"mode": mode,
|
||||||
"grounding": grounding,
|
"grounding": grounding,
|
||||||
"extract_images": extract_images,
|
"extract_images": extract_images,
|
||||||
@@ -575,12 +394,468 @@ async def process_pdf(
|
|||||||
headers={"Content-Disposition": f"attachment; filename=ocr_result.docx"}
|
headers={"Content-Disposition": f"attachment; filename=ocr_result.docx"}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
except ProviderError as e:
|
||||||
|
print(f"PDF provider error: {e}")
|
||||||
|
raise HTTPException(status_code=502, detail=str(e))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
import traceback
|
import traceback
|
||||||
print(f"Error processing PDF: {e}")
|
print(f"Error processing PDF: {e}")
|
||||||
print(traceback.format_exc())
|
print(traceback.format_exc())
|
||||||
raise HTTPException(status_code=500, detail="An internal error occurred during PDF processing.")
|
raise HTTPException(status_code=500, detail="An internal error occurred during PDF processing.")
|
||||||
|
|
||||||
|
# -----------------------------
|
||||||
|
# Job management routes
|
||||||
|
# -----------------------------
|
||||||
|
|
||||||
|
class ReviewRequest(BaseModel):
|
||||||
|
reviewed_text: str
|
||||||
|
reviewer_name: str
|
||||||
|
author: Optional[str] = None
|
||||||
|
book: Optional[str] = None
|
||||||
|
chapter: Optional[str] = None
|
||||||
|
page: Optional[str] = None
|
||||||
|
describe_text: Optional[str] = None
|
||||||
|
freeform_text: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
def _job_row_to_dict(row) -> Dict[str, Any]:
|
||||||
|
"""Convert a DB row (RealDictRow) to a plain dict with serialisable values."""
|
||||||
|
d = dict(row)
|
||||||
|
for key, val in d.items():
|
||||||
|
if isinstance(val, datetime):
|
||||||
|
d[key] = val.isoformat()
|
||||||
|
elif val is not None and hasattr(val, '__str__') and type(val).__name__ == 'UUID':
|
||||||
|
d[key] = str(val)
|
||||||
|
return d
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/api/jobs")
|
||||||
|
async def commit_job(
|
||||||
|
image: UploadFile = File(...),
|
||||||
|
author: str = Form(""),
|
||||||
|
book: str = Form(""),
|
||||||
|
chapter: str = Form(""),
|
||||||
|
page: str = Form(""),
|
||||||
|
ocr_text: str = Form(""),
|
||||||
|
describe_text: str = Form(""),
|
||||||
|
freeform_text: str = Form(""),
|
||||||
|
mode: str = Form("plain_ocr"),
|
||||||
|
ocr_model: str = Form(""),
|
||||||
|
):
|
||||||
|
"""Commit an OCR job: save the image and insert a DB record."""
|
||||||
|
job_id = str(uuid.uuid4())
|
||||||
|
|
||||||
|
# Determine file extension from original filename or content type
|
||||||
|
original_filename = image.filename or "image"
|
||||||
|
ext = os.path.splitext(original_filename)[1].lower()
|
||||||
|
if not ext:
|
||||||
|
ct = (image.content_type or "").lower()
|
||||||
|
ext_map = {
|
||||||
|
"image/png": ".png", "image/jpeg": ".jpg", "image/jpg": ".jpg",
|
||||||
|
"image/webp": ".webp", "image/gif": ".gif", "image/bmp": ".bmp",
|
||||||
|
}
|
||||||
|
ext = ext_map.get(ct, ".png")
|
||||||
|
|
||||||
|
image_path = os.path.join(OCR_IMAGES_DIR, f"{job_id}{ext}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
content = await image.read()
|
||||||
|
with open(image_path, "wb") as f:
|
||||||
|
f.write(content)
|
||||||
|
except Exception as exc:
|
||||||
|
raise HTTPException(status_code=500, detail="Failed to save image file.")
|
||||||
|
|
||||||
|
try:
|
||||||
|
with get_db() as conn:
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
cur.execute(
|
||||||
|
"""
|
||||||
|
INSERT INTO ocr_jobs
|
||||||
|
(id, author, book, chapter, page, image_path, original_filename,
|
||||||
|
ocr_text, describe_text, freeform_text, mode, ocr_model, status)
|
||||||
|
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, 'unreviewed')
|
||||||
|
RETURNING *
|
||||||
|
""",
|
||||||
|
(job_id, author or None, book or None, chapter or None,
|
||||||
|
page or None, image_path, original_filename,
|
||||||
|
ocr_text or None, describe_text or None, freeform_text or None,
|
||||||
|
mode, ocr_model or None),
|
||||||
|
)
|
||||||
|
row = cur.fetchone()
|
||||||
|
except Exception as exc:
|
||||||
|
# Clean up saved image if DB insert fails
|
||||||
|
try:
|
||||||
|
os.remove(image_path)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
# Unique constraint violation (author + chapter + page already exists)
|
||||||
|
if getattr(exc, 'pgcode', None) == '23505':
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=409,
|
||||||
|
detail="A job with this Author, Chapter, and Page already exists."
|
||||||
|
)
|
||||||
|
print(f"Job commit DB error: {exc}")
|
||||||
|
raise HTTPException(status_code=500, detail="Failed to save job to database.")
|
||||||
|
|
||||||
|
return JSONResponse(_job_row_to_dict(row), status_code=201)
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/api/jobs")
|
||||||
|
async def list_jobs(
|
||||||
|
search: Optional[str] = Query(None, description="General text search across all fields"),
|
||||||
|
author: Optional[str] = Query(None),
|
||||||
|
book: Optional[str] = Query(None),
|
||||||
|
chapter: Optional[str] = Query(None),
|
||||||
|
status: Optional[str] = Query(None, description="unreviewed | reviewed"),
|
||||||
|
limit: int = Query(20, ge=1, le=200),
|
||||||
|
offset: int = Query(0, ge=0),
|
||||||
|
):
|
||||||
|
"""Search and list jobs. All filters are optional and combinable."""
|
||||||
|
conditions = []
|
||||||
|
params: List[Any] = []
|
||||||
|
|
||||||
|
if search:
|
||||||
|
conditions.append(
|
||||||
|
"(author ILIKE %s OR book ILIKE %s OR chapter ILIKE %s "
|
||||||
|
"OR page ILIKE %s OR ocr_text ILIKE %s OR reviewer_name ILIKE %s)"
|
||||||
|
)
|
||||||
|
like = f"%{search}%"
|
||||||
|
params.extend([like, like, like, like, like, like])
|
||||||
|
|
||||||
|
if author:
|
||||||
|
conditions.append("author ILIKE %s")
|
||||||
|
params.append(f"%{author}%")
|
||||||
|
|
||||||
|
if book:
|
||||||
|
conditions.append("book ILIKE %s")
|
||||||
|
params.append(f"%{book}%")
|
||||||
|
|
||||||
|
if chapter:
|
||||||
|
conditions.append("chapter ILIKE %s")
|
||||||
|
params.append(f"%{chapter}%")
|
||||||
|
|
||||||
|
if status:
|
||||||
|
conditions.append("status = %s")
|
||||||
|
params.append(status)
|
||||||
|
|
||||||
|
where = ("WHERE " + " AND ".join(conditions)) if conditions else ""
|
||||||
|
|
||||||
|
try:
|
||||||
|
with get_db() as conn:
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
cur.execute(
|
||||||
|
f"SELECT COUNT(*) AS total FROM ocr_jobs {where}",
|
||||||
|
params,
|
||||||
|
)
|
||||||
|
total = cur.fetchone()["total"]
|
||||||
|
|
||||||
|
cur.execute(
|
||||||
|
f"""
|
||||||
|
SELECT id, author, book, chapter, page, submitted_at, status,
|
||||||
|
reviewer_name, reviewed_at, mode, ocr_model, original_filename
|
||||||
|
FROM ocr_jobs {where}
|
||||||
|
ORDER BY submitted_at DESC
|
||||||
|
LIMIT %s OFFSET %s
|
||||||
|
""",
|
||||||
|
params + [limit, offset],
|
||||||
|
)
|
||||||
|
rows = [_job_row_to_dict(r) for r in cur.fetchall()]
|
||||||
|
except Exception as exc:
|
||||||
|
print(f"list_jobs DB error: {exc}")
|
||||||
|
raise HTTPException(status_code=500, detail="Database error.")
|
||||||
|
|
||||||
|
return JSONResponse({"total": total, "limit": limit, "offset": offset, "jobs": rows})
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/api/jobs/suggestions")
|
||||||
|
async def job_suggestions():
|
||||||
|
"""Return distinct values for author, book, chapter, and reviewer_name to power autocomplete."""
|
||||||
|
try:
|
||||||
|
with get_db() as conn:
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
cur.execute("""
|
||||||
|
SELECT
|
||||||
|
array_remove(array_agg(DISTINCT author ORDER BY author), NULL) AS authors,
|
||||||
|
array_remove(array_agg(DISTINCT book ORDER BY book), NULL) AS books,
|
||||||
|
array_remove(array_agg(DISTINCT chapter ORDER BY chapter), NULL) AS chapters,
|
||||||
|
array_remove(array_agg(DISTINCT reviewer_name ORDER BY reviewer_name), NULL) AS reviewers
|
||||||
|
FROM ocr_jobs
|
||||||
|
""")
|
||||||
|
row = cur.fetchone()
|
||||||
|
except Exception as exc:
|
||||||
|
print(f"suggestions DB error: {exc}")
|
||||||
|
raise HTTPException(status_code=500, detail="Database error.")
|
||||||
|
|
||||||
|
return JSONResponse({
|
||||||
|
"authors": row["authors"] or [],
|
||||||
|
"books": row["books"] or [],
|
||||||
|
"chapters": row["chapters"] or [],
|
||||||
|
"reviewers": row["reviewers"] or [],
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/api/jobs/{job_id}")
|
||||||
|
async def get_job(job_id: str):
|
||||||
|
"""Retrieve full job record including OCR text."""
|
||||||
|
try:
|
||||||
|
uuid.UUID(job_id)
|
||||||
|
except ValueError:
|
||||||
|
raise HTTPException(status_code=400, detail="Invalid job ID.")
|
||||||
|
|
||||||
|
try:
|
||||||
|
with get_db() as conn:
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
cur.execute("SELECT * FROM ocr_jobs WHERE id = %s", (job_id,))
|
||||||
|
row = cur.fetchone()
|
||||||
|
except Exception as exc:
|
||||||
|
print(f"get_job DB error: {exc}")
|
||||||
|
raise HTTPException(status_code=500, detail="Database error.")
|
||||||
|
|
||||||
|
if not row:
|
||||||
|
raise HTTPException(status_code=404, detail="Job not found.")
|
||||||
|
|
||||||
|
return JSONResponse(_job_row_to_dict(row))
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/api/jobs/{job_id}/image")
|
||||||
|
async def get_job_image(job_id: str):
|
||||||
|
"""Serve the stored image for a job."""
|
||||||
|
try:
|
||||||
|
uuid.UUID(job_id)
|
||||||
|
except ValueError:
|
||||||
|
raise HTTPException(status_code=400, detail="Invalid job ID.")
|
||||||
|
|
||||||
|
try:
|
||||||
|
with get_db() as conn:
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
cur.execute("SELECT image_path FROM ocr_jobs WHERE id = %s", (job_id,))
|
||||||
|
row = cur.fetchone()
|
||||||
|
except Exception as exc:
|
||||||
|
print(f"get_job_image DB error: {exc}")
|
||||||
|
raise HTTPException(status_code=500, detail="Database error.")
|
||||||
|
|
||||||
|
if not row:
|
||||||
|
raise HTTPException(status_code=404, detail="Job not found.")
|
||||||
|
|
||||||
|
path = row["image_path"]
|
||||||
|
if not os.path.isfile(path):
|
||||||
|
raise HTTPException(status_code=404, detail="Image file not found on disk.")
|
||||||
|
|
||||||
|
return FileResponse(path)
|
||||||
|
|
||||||
|
|
||||||
|
@app.put("/api/jobs/{job_id}/review")
|
||||||
|
async def review_job(job_id: str, body: ReviewRequest):
|
||||||
|
"""Mark a job as reviewed with the corrected text and reviewer name."""
|
||||||
|
try:
|
||||||
|
uuid.UUID(job_id)
|
||||||
|
except ValueError:
|
||||||
|
raise HTTPException(status_code=400, detail="Invalid job ID.")
|
||||||
|
|
||||||
|
try:
|
||||||
|
with get_db() as conn:
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
cur.execute(
|
||||||
|
"""
|
||||||
|
UPDATE ocr_jobs
|
||||||
|
SET status = 'reviewed',
|
||||||
|
reviewed_text = %s,
|
||||||
|
reviewer_name = %s,
|
||||||
|
reviewed_at = NOW(),
|
||||||
|
author = %s,
|
||||||
|
book = %s,
|
||||||
|
chapter = %s,
|
||||||
|
page = %s,
|
||||||
|
describe_text = %s,
|
||||||
|
freeform_text = %s
|
||||||
|
WHERE id = %s
|
||||||
|
RETURNING *
|
||||||
|
""",
|
||||||
|
(
|
||||||
|
body.reviewed_text,
|
||||||
|
body.reviewer_name,
|
||||||
|
body.author or None,
|
||||||
|
body.book or None,
|
||||||
|
body.chapter or None,
|
||||||
|
body.page or None,
|
||||||
|
body.describe_text,
|
||||||
|
body.freeform_text,
|
||||||
|
job_id,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
row = cur.fetchone()
|
||||||
|
except Exception as exc:
|
||||||
|
print(f"review_job DB error: {exc}")
|
||||||
|
raise HTTPException(status_code=500, detail="Database error.")
|
||||||
|
|
||||||
|
if not row:
|
||||||
|
raise HTTPException(status_code=404, detail="Job not found.")
|
||||||
|
|
||||||
|
return JSONResponse(_job_row_to_dict(row))
|
||||||
|
|
||||||
|
|
||||||
|
class StatusRequest(BaseModel):
|
||||||
|
status: str
|
||||||
|
reviewer_name: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
@app.put("/api/jobs/{job_id}/status")
|
||||||
|
async def set_job_status(job_id: str, body: StatusRequest):
|
||||||
|
"""Toggle a job's reviewed status without touching its text or metadata.
|
||||||
|
|
||||||
|
Marking 'reviewed' requires a reviewer_name and stamps reviewed_at.
|
||||||
|
Marking 'unreviewed' clears reviewed_at while preserving reviewed_text.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
uuid.UUID(job_id)
|
||||||
|
except ValueError:
|
||||||
|
raise HTTPException(status_code=400, detail="Invalid job ID.")
|
||||||
|
|
||||||
|
if body.status not in ("reviewed", "unreviewed"):
|
||||||
|
raise HTTPException(status_code=400, detail="status must be 'reviewed' or 'unreviewed'.")
|
||||||
|
|
||||||
|
if body.status == "reviewed" and not (body.reviewer_name or "").strip():
|
||||||
|
raise HTTPException(status_code=400, detail="Reviewer name is required to mark reviewed.")
|
||||||
|
|
||||||
|
try:
|
||||||
|
with get_db() as conn:
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
if body.status == "reviewed":
|
||||||
|
cur.execute(
|
||||||
|
"""
|
||||||
|
UPDATE ocr_jobs
|
||||||
|
SET status = 'reviewed',
|
||||||
|
reviewer_name = %s,
|
||||||
|
reviewed_at = NOW()
|
||||||
|
WHERE id = %s
|
||||||
|
RETURNING *
|
||||||
|
""",
|
||||||
|
(body.reviewer_name.strip(), job_id),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
cur.execute(
|
||||||
|
"""
|
||||||
|
UPDATE ocr_jobs
|
||||||
|
SET status = 'unreviewed',
|
||||||
|
reviewed_at = NULL
|
||||||
|
WHERE id = %s
|
||||||
|
RETURNING *
|
||||||
|
""",
|
||||||
|
(job_id,),
|
||||||
|
)
|
||||||
|
row = cur.fetchone()
|
||||||
|
except Exception as exc:
|
||||||
|
print(f"set_job_status DB error: {exc}")
|
||||||
|
raise HTTPException(status_code=500, detail="Database error.")
|
||||||
|
|
||||||
|
if not row:
|
||||||
|
raise HTTPException(status_code=404, detail="Job not found.")
|
||||||
|
|
||||||
|
return JSONResponse(_job_row_to_dict(row))
|
||||||
|
|
||||||
|
|
||||||
|
class JobDescribeRequest(BaseModel):
|
||||||
|
model: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/api/jobs/{job_id}/describe")
|
||||||
|
async def describe_job(job_id: str, body: JobDescribeRequest):
|
||||||
|
"""Run Describe mode on a job's stored image and save the result to describe_text."""
|
||||||
|
try:
|
||||||
|
uuid.UUID(job_id)
|
||||||
|
except ValueError:
|
||||||
|
raise HTTPException(status_code=400, detail="Invalid job ID.")
|
||||||
|
|
||||||
|
# Look up the stored image for this job
|
||||||
|
try:
|
||||||
|
with get_db() as conn:
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
cur.execute("SELECT image_path FROM ocr_jobs WHERE id = %s", (job_id,))
|
||||||
|
row = cur.fetchone()
|
||||||
|
except Exception as exc:
|
||||||
|
print(f"describe_job lookup DB error: {exc}")
|
||||||
|
raise HTTPException(status_code=500, detail="Database error.")
|
||||||
|
|
||||||
|
if not row:
|
||||||
|
raise HTTPException(status_code=404, detail="Job not found.")
|
||||||
|
image_path = row["image_path"]
|
||||||
|
if not image_path or not os.path.isfile(image_path):
|
||||||
|
raise HTTPException(status_code=404, detail="Image file not found on disk.")
|
||||||
|
|
||||||
|
provider = _resolve_provider(body.model, "describe")
|
||||||
|
|
||||||
|
try:
|
||||||
|
text = provider.run(
|
||||||
|
image_path,
|
||||||
|
mode="describe",
|
||||||
|
prompt="",
|
||||||
|
grounding=False,
|
||||||
|
find_term=None,
|
||||||
|
schema=None,
|
||||||
|
include_caption=False,
|
||||||
|
options={"base_size": 1024, "image_size": 640, "crop_mode": True, "test_compress": False},
|
||||||
|
)
|
||||||
|
except ProviderError as e:
|
||||||
|
print(f"describe_job provider error: {e}")
|
||||||
|
raise HTTPException(status_code=502, detail=str(e))
|
||||||
|
except Exception as e:
|
||||||
|
print(f"describe_job inference error: {type(e).__name__}: {e}")
|
||||||
|
raise HTTPException(status_code=500, detail="An internal error occurred during description.")
|
||||||
|
|
||||||
|
display_text = clean_grounding_text(text) if ("<|ref|>" in text or "<|grounding|>" in text) else text
|
||||||
|
|
||||||
|
# Persist the generated description on the job
|
||||||
|
try:
|
||||||
|
with get_db() as conn:
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
cur.execute(
|
||||||
|
"UPDATE ocr_jobs SET describe_text = %s WHERE id = %s RETURNING *",
|
||||||
|
(display_text, job_id),
|
||||||
|
)
|
||||||
|
updated = cur.fetchone()
|
||||||
|
except Exception as exc:
|
||||||
|
print(f"describe_job save DB error: {exc}")
|
||||||
|
raise HTTPException(status_code=500, detail="Database error.")
|
||||||
|
|
||||||
|
if not updated:
|
||||||
|
raise HTTPException(status_code=404, detail="Job not found.")
|
||||||
|
|
||||||
|
return JSONResponse(_job_row_to_dict(updated))
|
||||||
|
|
||||||
|
|
||||||
|
@app.delete("/api/jobs/{job_id}")
|
||||||
|
async def delete_job(job_id: str):
|
||||||
|
"""Delete a job record and its stored image."""
|
||||||
|
try:
|
||||||
|
uuid.UUID(job_id)
|
||||||
|
except ValueError:
|
||||||
|
raise HTTPException(status_code=400, detail="Invalid job ID.")
|
||||||
|
|
||||||
|
try:
|
||||||
|
with get_db() as conn:
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
cur.execute(
|
||||||
|
"DELETE FROM ocr_jobs WHERE id = %s RETURNING image_path",
|
||||||
|
(job_id,),
|
||||||
|
)
|
||||||
|
row = cur.fetchone()
|
||||||
|
except Exception as exc:
|
||||||
|
print(f"delete_job DB error: {exc}")
|
||||||
|
raise HTTPException(status_code=500, detail="Database error.")
|
||||||
|
|
||||||
|
if not row:
|
||||||
|
raise HTTPException(status_code=404, detail="Job not found.")
|
||||||
|
|
||||||
|
# Best-effort removal of the stored image file
|
||||||
|
try:
|
||||||
|
if row["image_path"] and os.path.isfile(row["image_path"]):
|
||||||
|
os.remove(row["image_path"])
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return JSONResponse({"deleted": job_id})
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
host = env_config("API_HOST", default="0.0.0.0")
|
host = env_config("API_HOST", default="0.0.0.0")
|
||||||
port = env_config("API_PORT", default=8000, cast=int)
|
port = env_config("API_PORT", default=8000, cast=int)
|
||||||
|
|||||||
489
backend/providers.py
Normal file
489
backend/providers.py
Normal file
@@ -0,0 +1,489 @@
|
|||||||
|
"""
|
||||||
|
OCR provider abstraction.
|
||||||
|
|
||||||
|
Each provider knows how to turn an image + a semantic OCR request (mode, prompt,
|
||||||
|
options) into raw model text. DeepSeek-specific prompt tokens and grounding-box
|
||||||
|
parsing live here too so the FastAPI routes stay model-agnostic.
|
||||||
|
|
||||||
|
Two providers ship today:
|
||||||
|
- DeepSeekLocalProvider -> the local HF transformers DeepSeek-OCR model (GPU)
|
||||||
|
- OllamaProvider -> any vision model served by an external Ollama host
|
||||||
|
|
||||||
|
The registry is built from environment variables at startup (see build_registry()).
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import base64
|
||||||
|
import tempfile
|
||||||
|
import shutil
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from typing import List, Dict, Any, Optional
|
||||||
|
|
||||||
|
from decouple import config as env_config
|
||||||
|
|
||||||
|
# httpx is only needed when an Ollama model is actually used; import lazily so the
|
||||||
|
# backend can run DeepSeek-only without the dependency installed.
|
||||||
|
try:
|
||||||
|
import httpx
|
||||||
|
except Exception: # pragma: no cover - exercised only when httpx is missing
|
||||||
|
httpx = None
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Prompt builders
|
||||||
|
# =============================================================================
|
||||||
|
def build_prompt(
|
||||||
|
mode: str,
|
||||||
|
user_prompt: str,
|
||||||
|
grounding: bool,
|
||||||
|
find_term: Optional[str],
|
||||||
|
schema: Optional[str],
|
||||||
|
include_caption: bool,
|
||||||
|
) -> str:
|
||||||
|
"""Build the DeepSeek-OCR prompt (with its special tokens) based on mode."""
|
||||||
|
parts: List[str] = ["<image>"]
|
||||||
|
mode_requires_grounding = mode in {"find_ref", "layout_map", "pii_redact"}
|
||||||
|
if grounding or mode_requires_grounding:
|
||||||
|
parts.append("<|grounding|>")
|
||||||
|
|
||||||
|
parts.append(_instruction_for_mode(mode, user_prompt, find_term, schema, include_caption))
|
||||||
|
return "\n".join(parts)
|
||||||
|
|
||||||
|
|
||||||
|
def build_ollama_prompt(
|
||||||
|
mode: str,
|
||||||
|
user_prompt: str,
|
||||||
|
find_term: Optional[str],
|
||||||
|
schema: Optional[str],
|
||||||
|
include_caption: bool,
|
||||||
|
) -> str:
|
||||||
|
"""Build a plain natural-language prompt for a generic vision model.
|
||||||
|
|
||||||
|
No DeepSeek grounding tokens — Ollama vision models receive the image
|
||||||
|
separately and respond in plain text.
|
||||||
|
"""
|
||||||
|
if mode == "plain_ocr":
|
||||||
|
instruction = (
|
||||||
|
"Transcribe all of the text in this image exactly as it appears, "
|
||||||
|
"preserving line breaks and reading order. Output only the transcribed "
|
||||||
|
"text with no commentary."
|
||||||
|
)
|
||||||
|
elif mode == "markdown":
|
||||||
|
instruction = (
|
||||||
|
"Convert this document image to clean GitHub-flavored Markdown, "
|
||||||
|
"preserving headings, lists, and tables. Output only the Markdown."
|
||||||
|
)
|
||||||
|
elif mode == "tables_csv":
|
||||||
|
instruction = (
|
||||||
|
"Extract every table in this image and output CSV only. Use commas with "
|
||||||
|
"minimal quoting. If there are multiple tables, separate them with a line "
|
||||||
|
"containing '---'. Output only the CSV."
|
||||||
|
)
|
||||||
|
elif mode == "tables_md":
|
||||||
|
instruction = (
|
||||||
|
"Extract every table in this image as GitHub-flavored Markdown tables. "
|
||||||
|
"Output only the tables."
|
||||||
|
)
|
||||||
|
elif mode == "kv_json":
|
||||||
|
schema_text = schema.strip() if schema else "{}"
|
||||||
|
instruction = (
|
||||||
|
"Extract the key fields from this image and return strict JSON only "
|
||||||
|
f"(no prose). Use this schema, filling in the values: {schema_text}"
|
||||||
|
)
|
||||||
|
elif mode == "figure_chart":
|
||||||
|
instruction = (
|
||||||
|
"Parse the figure in this image. First extract any numeric series as a "
|
||||||
|
"two-column table (x,y). Then add a line containing '---' followed by a "
|
||||||
|
"two-sentence summary of the chart."
|
||||||
|
)
|
||||||
|
elif mode == "find_ref":
|
||||||
|
key = (find_term or "").strip() or "Total"
|
||||||
|
instruction = (
|
||||||
|
f"Find every occurrence of '{key}' in this image and quote the surrounding "
|
||||||
|
"text for each match. If it does not appear, say so."
|
||||||
|
)
|
||||||
|
elif mode == "layout_map":
|
||||||
|
instruction = (
|
||||||
|
'Identify the layout blocks in this image and return a JSON array of '
|
||||||
|
'objects {"type": one of ["title","paragraph","table","figure"]}. '
|
||||||
|
"Do not include the text content."
|
||||||
|
)
|
||||||
|
elif mode == "pii_redact":
|
||||||
|
instruction = (
|
||||||
|
"Find all emails, phone numbers, postal addresses, and IBANs in this image. "
|
||||||
|
'Return a JSON array of objects {"label", "text"}.'
|
||||||
|
)
|
||||||
|
elif mode == "multilingual":
|
||||||
|
instruction = (
|
||||||
|
"Transcribe all of the text in this image exactly, detecting the language "
|
||||||
|
"automatically and preserving the original script. Output only the text."
|
||||||
|
)
|
||||||
|
elif mode == "describe":
|
||||||
|
instruction = "Describe this image, focusing on the key visible elements."
|
||||||
|
elif mode == "freeform":
|
||||||
|
instruction = user_prompt.strip() if user_prompt else "Transcribe the text in this image."
|
||||||
|
else:
|
||||||
|
instruction = "Transcribe the text in this image."
|
||||||
|
|
||||||
|
if include_caption and mode != "describe":
|
||||||
|
instruction += "\nThen add a one-paragraph description of the image."
|
||||||
|
|
||||||
|
return instruction
|
||||||
|
|
||||||
|
|
||||||
|
def _instruction_for_mode(
|
||||||
|
mode: str,
|
||||||
|
user_prompt: str,
|
||||||
|
find_term: Optional[str],
|
||||||
|
schema: Optional[str],
|
||||||
|
include_caption: bool,
|
||||||
|
) -> str:
|
||||||
|
"""The DeepSeek instruction text (without the <image>/<|grounding|> prefix tokens)."""
|
||||||
|
if mode == "plain_ocr":
|
||||||
|
instruction = "Free OCR."
|
||||||
|
elif mode == "markdown":
|
||||||
|
instruction = "Convert the document to markdown."
|
||||||
|
elif mode == "tables_csv":
|
||||||
|
instruction = (
|
||||||
|
"Extract every table and output CSV only. "
|
||||||
|
"Use commas, minimal quoting. If multiple tables, separate with a line containing '---'."
|
||||||
|
)
|
||||||
|
elif mode == "tables_md":
|
||||||
|
instruction = "Extract every table as GitHub-flavored Markdown tables. Output only the tables."
|
||||||
|
elif mode == "kv_json":
|
||||||
|
schema_text = schema.strip() if schema else "{}"
|
||||||
|
instruction = (
|
||||||
|
"Extract key fields and return strict JSON only. "
|
||||||
|
f"Use this schema (fill the values): {schema_text}"
|
||||||
|
)
|
||||||
|
elif mode == "figure_chart":
|
||||||
|
instruction = (
|
||||||
|
"Parse the figure. First extract any numeric series as a two-column table (x,y). "
|
||||||
|
"Then summarize the chart in 2 sentences. Output the table, then a line '---', then the summary."
|
||||||
|
)
|
||||||
|
elif mode == "find_ref":
|
||||||
|
key = (find_term or "").strip() or "Total"
|
||||||
|
instruction = f"Locate <|ref|>{key}<|/ref|> in the image."
|
||||||
|
elif mode == "layout_map":
|
||||||
|
instruction = (
|
||||||
|
'Return a JSON array of blocks with fields {"type":["title","paragraph","table","figure"],'
|
||||||
|
'"box":[x1,y1,x2,y2]}. Do not include any text content.'
|
||||||
|
)
|
||||||
|
elif mode == "pii_redact":
|
||||||
|
instruction = (
|
||||||
|
'Find all occurrences of emails, phone numbers, postal addresses, and IBANs. '
|
||||||
|
'Return a JSON array of objects {label, text, box:[x1,y1,x2,y2]}.'
|
||||||
|
)
|
||||||
|
elif mode == "multilingual":
|
||||||
|
instruction = "Free OCR. Detect the language automatically and output in the same script."
|
||||||
|
elif mode == "describe":
|
||||||
|
instruction = "Describe this image. Focus on visible key elements."
|
||||||
|
elif mode == "freeform":
|
||||||
|
instruction = user_prompt.strip() if user_prompt else "OCR this image."
|
||||||
|
else:
|
||||||
|
instruction = "OCR this image."
|
||||||
|
|
||||||
|
if include_caption and mode != "describe":
|
||||||
|
instruction = instruction + "\nThen add a one-paragraph description of the image."
|
||||||
|
|
||||||
|
return instruction
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Grounding parser (DeepSeek-specific; no-op on plain text)
|
||||||
|
# =============================================================================
|
||||||
|
DET_BLOCK = re.compile(
|
||||||
|
r"<\|ref\|>(?P<label>.*?)<\|/ref\|>\s*<\|det\|>\s*(?P<coords>\[.*\])\s*<\|/det\|>",
|
||||||
|
re.DOTALL,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def clean_grounding_text(text: str) -> str:
|
||||||
|
"""Remove grounding tags from text for display, keeping labels."""
|
||||||
|
cleaned = re.sub(
|
||||||
|
r"<\|ref\|>(.*?)<\|/ref\|>\s*<\|det\|>\s*\[.*\]\s*<\|/det\|>",
|
||||||
|
r"\1",
|
||||||
|
text,
|
||||||
|
flags=re.DOTALL,
|
||||||
|
)
|
||||||
|
cleaned = re.sub(r"<\|grounding\|>", "", cleaned)
|
||||||
|
return cleaned.strip()
|
||||||
|
|
||||||
|
|
||||||
|
def parse_detections(text: str, image_width: int, image_height: int) -> List[Dict[str, Any]]:
|
||||||
|
"""Parse grounding boxes from text and scale 0-999 normalized coords to pixels."""
|
||||||
|
boxes: List[Dict[str, Any]] = []
|
||||||
|
for m in DET_BLOCK.finditer(text or ""):
|
||||||
|
label = m.group("label").strip()
|
||||||
|
coords_str = m.group("coords").strip()
|
||||||
|
|
||||||
|
try:
|
||||||
|
import ast
|
||||||
|
|
||||||
|
parsed = ast.literal_eval(coords_str)
|
||||||
|
|
||||||
|
if (
|
||||||
|
isinstance(parsed, list)
|
||||||
|
and len(parsed) == 4
|
||||||
|
and all(isinstance(n, (int, float)) for n in parsed)
|
||||||
|
):
|
||||||
|
box_coords = [parsed]
|
||||||
|
elif isinstance(parsed, list):
|
||||||
|
box_coords = parsed
|
||||||
|
else:
|
||||||
|
raise ValueError("Unsupported coords structure")
|
||||||
|
|
||||||
|
for box in box_coords:
|
||||||
|
if isinstance(box, (list, tuple)) and len(box) >= 4:
|
||||||
|
x1 = int(float(box[0]) / 999 * image_width)
|
||||||
|
y1 = int(float(box[1]) / 999 * image_height)
|
||||||
|
x2 = int(float(box[2]) / 999 * image_width)
|
||||||
|
y2 = int(float(box[3]) / 999 * image_height)
|
||||||
|
boxes.append({"label": label, "box": [x1, y1, x2, y2]})
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Grounding parse failed: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
return boxes
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Providers
|
||||||
|
# =============================================================================
|
||||||
|
GROUNDING_MODES = {"find_ref", "layout_map", "pii_redact"}
|
||||||
|
|
||||||
|
|
||||||
|
class ProviderError(Exception):
|
||||||
|
"""Raised when a provider cannot fulfil a request (e.g. backend unreachable)."""
|
||||||
|
|
||||||
|
|
||||||
|
class OCRProvider(ABC):
|
||||||
|
"""Turns an image + OCR request into raw model text."""
|
||||||
|
|
||||||
|
id: str
|
||||||
|
label: str
|
||||||
|
capabilities: Dict[str, Any]
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def run(
|
||||||
|
self,
|
||||||
|
image_path: str,
|
||||||
|
*,
|
||||||
|
mode: str,
|
||||||
|
prompt: str,
|
||||||
|
grounding: bool,
|
||||||
|
find_term: Optional[str],
|
||||||
|
schema: Optional[str],
|
||||||
|
include_caption: bool,
|
||||||
|
options: Dict[str, Any],
|
||||||
|
) -> str:
|
||||||
|
"""Return the raw text output of the model for this image/request."""
|
||||||
|
|
||||||
|
def info(self) -> Dict[str, Any]:
|
||||||
|
return {"id": self.id, "label": self.label, "capabilities": self.capabilities}
|
||||||
|
|
||||||
|
|
||||||
|
class DeepSeekLocalProvider(OCRProvider):
|
||||||
|
"""Local HF transformers DeepSeek-OCR model. Loaded lazily on first use."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.id = "deepseek-local"
|
||||||
|
self.label = "DeepSeek-OCR (local GPU)"
|
||||||
|
self.capabilities = {"grounding": True, "advanced_settings": True}
|
||||||
|
self._model = None
|
||||||
|
self._tokenizer = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def loaded(self) -> bool:
|
||||||
|
return self._model is not None and self._tokenizer is not None
|
||||||
|
|
||||||
|
def _ensure_loaded(self):
|
||||||
|
if self.loaded:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Heavy imports kept local so an Ollama-only deployment never needs torch.
|
||||||
|
import torch
|
||||||
|
from transformers import AutoModel, AutoTokenizer
|
||||||
|
|
||||||
|
os.environ.pop("TRANSFORMERS_CACHE", None)
|
||||||
|
model_name = env_config("MODEL_NAME", default="deepseek-ai/DeepSeek-OCR")
|
||||||
|
hf_home = env_config("HF_HOME", default="/models")
|
||||||
|
os.makedirs(hf_home, exist_ok=True)
|
||||||
|
|
||||||
|
print(f"🚀 Loading {model_name}...")
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
||||||
|
model = AutoModel.from_pretrained(
|
||||||
|
model_name,
|
||||||
|
trust_remote_code=True,
|
||||||
|
use_safetensors=True,
|
||||||
|
attn_implementation="eager",
|
||||||
|
torch_dtype=torch.bfloat16,
|
||||||
|
).eval().to("cuda")
|
||||||
|
|
||||||
|
try:
|
||||||
|
if getattr(tokenizer, "pad_token_id", None) is None and getattr(tokenizer, "eos_token_id", None) is not None:
|
||||||
|
tokenizer.pad_token = tokenizer.eos_token
|
||||||
|
if getattr(model.config, "pad_token_id", None) is None and getattr(tokenizer, "pad_token_id", None) is not None:
|
||||||
|
model.config.pad_token_id = tokenizer.pad_token_id
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
self._model = model
|
||||||
|
self._tokenizer = tokenizer
|
||||||
|
print("✅ DeepSeek-OCR loaded and ready!")
|
||||||
|
|
||||||
|
def run(self, image_path, *, mode, prompt, grounding, find_term, schema, include_caption, options):
|
||||||
|
self._ensure_loaded()
|
||||||
|
|
||||||
|
prompt_text = build_prompt(
|
||||||
|
mode=mode,
|
||||||
|
user_prompt=prompt,
|
||||||
|
grounding=grounding,
|
||||||
|
find_term=find_term,
|
||||||
|
schema=schema,
|
||||||
|
include_caption=include_caption,
|
||||||
|
)
|
||||||
|
|
||||||
|
out_dir = tempfile.mkdtemp(prefix="dsocr_")
|
||||||
|
try:
|
||||||
|
res = self._model.infer(
|
||||||
|
self._tokenizer,
|
||||||
|
prompt=prompt_text,
|
||||||
|
image_file=image_path,
|
||||||
|
output_path=out_dir,
|
||||||
|
base_size=int(options.get("base_size", 1024)),
|
||||||
|
image_size=int(options.get("image_size", 640)),
|
||||||
|
crop_mode=bool(options.get("crop_mode", True)),
|
||||||
|
save_results=False,
|
||||||
|
test_compress=bool(options.get("test_compress", False)),
|
||||||
|
eval_mode=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
if isinstance(res, str):
|
||||||
|
text = res.strip()
|
||||||
|
elif isinstance(res, dict) and "text" in res:
|
||||||
|
text = str(res["text"]).strip()
|
||||||
|
elif isinstance(res, (list, tuple)):
|
||||||
|
text = "\n".join(map(str, res)).strip()
|
||||||
|
else:
|
||||||
|
text = ""
|
||||||
|
|
||||||
|
if not text:
|
||||||
|
mmd = os.path.join(out_dir, "result.mmd")
|
||||||
|
if os.path.exists(mmd):
|
||||||
|
with open(mmd, "r", encoding="utf-8") as fh:
|
||||||
|
text = fh.read().strip()
|
||||||
|
return text
|
||||||
|
finally:
|
||||||
|
shutil.rmtree(out_dir, ignore_errors=True)
|
||||||
|
|
||||||
|
|
||||||
|
class OllamaProvider(OCRProvider):
|
||||||
|
"""A single vision model served by an external Ollama host."""
|
||||||
|
|
||||||
|
def __init__(self, tag: str, base_url: str, label: Optional[str] = None):
|
||||||
|
self.tag = tag
|
||||||
|
self.base_url = base_url.rstrip("/")
|
||||||
|
self.id = f"ollama:{tag}"
|
||||||
|
self.label = label or f"{tag} (Ollama)"
|
||||||
|
# Generic vision models don't emit DeepSeek grounding tokens.
|
||||||
|
self.capabilities = {"grounding": False, "advanced_settings": False}
|
||||||
|
|
||||||
|
def run(self, image_path, *, mode, prompt, grounding, find_term, schema, include_caption, options):
|
||||||
|
if httpx is None:
|
||||||
|
raise ProviderError("httpx is not installed; cannot reach Ollama.")
|
||||||
|
|
||||||
|
prompt_text = build_ollama_prompt(
|
||||||
|
mode=mode,
|
||||||
|
user_prompt=prompt,
|
||||||
|
find_term=find_term,
|
||||||
|
schema=schema,
|
||||||
|
include_caption=include_caption,
|
||||||
|
)
|
||||||
|
|
||||||
|
with open(image_path, "rb") as f:
|
||||||
|
img_b64 = base64.b64encode(f.read()).decode("utf-8")
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"model": self.tag,
|
||||||
|
"prompt": prompt_text,
|
||||||
|
"images": [img_b64],
|
||||||
|
"stream": False,
|
||||||
|
}
|
||||||
|
|
||||||
|
timeout = float(env_config("OLLAMA_TIMEOUT", default=300.0, cast=float))
|
||||||
|
try:
|
||||||
|
resp = httpx.post(f"{self.base_url}/api/generate", json=payload, timeout=timeout)
|
||||||
|
resp.raise_for_status()
|
||||||
|
data = resp.json()
|
||||||
|
except httpx.HTTPStatusError as e:
|
||||||
|
detail = ""
|
||||||
|
try:
|
||||||
|
detail = e.response.json().get("error", "")
|
||||||
|
except Exception:
|
||||||
|
detail = e.response.text[:200]
|
||||||
|
raise ProviderError(f"Ollama returned {e.response.status_code}: {detail}") from e
|
||||||
|
except httpx.HTTPError as e:
|
||||||
|
raise ProviderError(f"Could not reach Ollama at {self.base_url}: {e}") from e
|
||||||
|
|
||||||
|
return (data.get("response") or "").strip()
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Registry
|
||||||
|
# =============================================================================
|
||||||
|
class ModelRegistry:
|
||||||
|
def __init__(self, providers: List[OCRProvider], default_id: str):
|
||||||
|
self._providers: Dict[str, OCRProvider] = {p.id: p for p in providers}
|
||||||
|
# Fall back to the first registered provider if the configured default is gone.
|
||||||
|
self.default_id = default_id if default_id in self._providers else (
|
||||||
|
next(iter(self._providers), None)
|
||||||
|
)
|
||||||
|
|
||||||
|
def get(self, model_id: Optional[str]) -> OCRProvider:
|
||||||
|
chosen = model_id or self.default_id
|
||||||
|
provider = self._providers.get(chosen)
|
||||||
|
if provider is None:
|
||||||
|
raise ProviderError(f"Unknown model '{chosen}'.")
|
||||||
|
return provider
|
||||||
|
|
||||||
|
def list_models(self) -> List[Dict[str, Any]]:
|
||||||
|
out = []
|
||||||
|
for p in self._providers.values():
|
||||||
|
entry = p.info()
|
||||||
|
entry["default"] = (p.id == self.default_id)
|
||||||
|
out.append(entry)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def build_registry() -> ModelRegistry:
|
||||||
|
"""Build the provider registry from environment variables.
|
||||||
|
|
||||||
|
Env:
|
||||||
|
ENABLE_DEEPSEEK_LOCAL - register the local DeepSeek-OCR model (default: true)
|
||||||
|
OLLAMA_BASE_URL - Ollama host (default: http://host.docker.internal:11434)
|
||||||
|
OLLAMA_MODELS - comma-separated tags to surface (e.g. "glm-ocr,llama3.2-vision")
|
||||||
|
DEFAULT_OCR_MODEL - id to select by default (default: deepseek-local)
|
||||||
|
"""
|
||||||
|
providers: List[OCRProvider] = []
|
||||||
|
|
||||||
|
enable_deepseek = env_config("ENABLE_DEEPSEEK_LOCAL", default="true").strip().lower() in {"1", "true", "yes"}
|
||||||
|
if enable_deepseek:
|
||||||
|
providers.append(DeepSeekLocalProvider())
|
||||||
|
|
||||||
|
base_url = env_config("OLLAMA_BASE_URL", default="http://host.docker.internal:11434")
|
||||||
|
raw_tags = env_config("OLLAMA_MODELS", default="")
|
||||||
|
tags = [t.strip() for t in raw_tags.split(",") if t.strip()]
|
||||||
|
for tag in tags:
|
||||||
|
providers.append(OllamaProvider(tag=tag, base_url=base_url))
|
||||||
|
|
||||||
|
default_id = env_config("DEFAULT_OCR_MODEL", default="deepseek-local")
|
||||||
|
if not providers:
|
||||||
|
# Defensive: nothing configured. Register DeepSeek so the app still starts.
|
||||||
|
providers.append(DeepSeekLocalProvider())
|
||||||
|
default_id = "deepseek-local"
|
||||||
|
|
||||||
|
registry = ModelRegistry(providers, default_id)
|
||||||
|
print(f"🧠 OCR models registered: {[p.id for p in providers]} (default: {registry.default_id})")
|
||||||
|
return registry
|
||||||
@@ -15,3 +15,5 @@ PyMuPDF>=1.23.0
|
|||||||
img2pdf>=0.5.0
|
img2pdf>=0.5.0
|
||||||
python-docx>=1.1.0
|
python-docx>=1.1.0
|
||||||
markdown>=3.5.0
|
markdown>=3.5.0
|
||||||
|
psycopg2-binary>=2.9.0
|
||||||
|
httpx>=0.27.0
|
||||||
|
|||||||
@@ -1,4 +1,19 @@
|
|||||||
services:
|
services:
|
||||||
|
postgres:
|
||||||
|
image: postgres:16-alpine
|
||||||
|
container_name: deepseek-ocr-postgres
|
||||||
|
environment:
|
||||||
|
POSTGRES_USER: ${POSTGRES_USER:-ocr_user}
|
||||||
|
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-ocr_password}
|
||||||
|
POSTGRES_DB: ${POSTGRES_DB:-ocr_db}
|
||||||
|
volumes:
|
||||||
|
- postgres_data:/var/lib/postgresql/data
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-ocr_user} -d ${POSTGRES_DB:-ocr_db}"]
|
||||||
|
interval: 5s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 10
|
||||||
|
|
||||||
backend:
|
backend:
|
||||||
build: ./backend
|
build: ./backend
|
||||||
container_name: deepseek-ocr-backend
|
container_name: deepseek-ocr-backend
|
||||||
@@ -10,8 +25,23 @@ services:
|
|||||||
API_HOST: ${API_HOST:-0.0.0.0}
|
API_HOST: ${API_HOST:-0.0.0.0}
|
||||||
API_PORT: ${API_PORT:-8000}
|
API_PORT: ${API_PORT:-8000}
|
||||||
MAX_UPLOAD_SIZE_MB: ${MAX_UPLOAD_SIZE_MB:-100}
|
MAX_UPLOAD_SIZE_MB: ${MAX_UPLOAD_SIZE_MB:-100}
|
||||||
|
DATABASE_URL: ${DATABASE_URL:-postgresql://ocr_user:ocr_password@postgres:5432/ocr_db}
|
||||||
|
OCR_IMAGES_DIR: ${OCR_IMAGES_DIR:-/data/ocr_images}
|
||||||
|
ENABLE_DEEPSEEK_LOCAL: ${ENABLE_DEEPSEEK_LOCAL:-true}
|
||||||
|
OLLAMA_BASE_URL: ${OLLAMA_BASE_URL:-http://host.docker.internal:11434}
|
||||||
|
OLLAMA_MODELS: ${OLLAMA_MODELS:-}
|
||||||
|
DEFAULT_OCR_MODEL: ${DEFAULT_OCR_MODEL:-deepseek-local}
|
||||||
|
OLLAMA_TIMEOUT: ${OLLAMA_TIMEOUT:-300}
|
||||||
|
# Lets the container reach an Ollama server running on the Docker host
|
||||||
|
# (works out of the box on Docker Desktop; required for Linux engines).
|
||||||
|
extra_hosts:
|
||||||
|
- "host.docker.internal:host-gateway"
|
||||||
volumes:
|
volumes:
|
||||||
- ./models:/models
|
- ./models:/models
|
||||||
|
- ./ocr_images:/data/ocr_images
|
||||||
|
depends_on:
|
||||||
|
postgres:
|
||||||
|
condition: service_healthy
|
||||||
deploy:
|
deploy:
|
||||||
resources:
|
resources:
|
||||||
reservations:
|
reservations:
|
||||||
@@ -22,8 +52,6 @@ services:
|
|||||||
shm_size: "4g"
|
shm_size: "4g"
|
||||||
ports:
|
ports:
|
||||||
- "${API_PORT:-8000}:${API_PORT:-8000}"
|
- "${API_PORT:-8000}:${API_PORT:-8000}"
|
||||||
networks:
|
|
||||||
- ocr-network
|
|
||||||
|
|
||||||
frontend:
|
frontend:
|
||||||
build: ./frontend
|
build: ./frontend
|
||||||
@@ -32,9 +60,10 @@ services:
|
|||||||
- "${FRONTEND_PORT:-3000}:80"
|
- "${FRONTEND_PORT:-3000}:80"
|
||||||
depends_on:
|
depends_on:
|
||||||
- backend
|
- backend
|
||||||
networks:
|
|
||||||
- ocr-network
|
volumes:
|
||||||
|
postgres_data:
|
||||||
|
|
||||||
networks:
|
networks:
|
||||||
ocr-network:
|
default:
|
||||||
driver: bridge
|
name: rw-research
|
||||||
|
|||||||
@@ -1,18 +1,35 @@
|
|||||||
import { useState, useCallback } from 'react'
|
import { useState, useCallback, useEffect } from 'react'
|
||||||
|
import { useSuggestions } from './hooks/useSuggestions'
|
||||||
|
import { useModels } from './hooks/useModels'
|
||||||
import { motion, AnimatePresence } from 'framer-motion'
|
import { motion, AnimatePresence } from 'framer-motion'
|
||||||
import { Sparkles, Zap, Loader2, Settings, Image as ImageIcon, FileText } from 'lucide-react'
|
import {
|
||||||
|
Sparkles, Zap, Loader2, Settings, Image as ImageIcon, FileText,
|
||||||
|
Layers, ChevronLeft, CheckCircle2, Database,
|
||||||
|
} from 'lucide-react'
|
||||||
import ImageUpload from './components/ImageUpload'
|
import ImageUpload from './components/ImageUpload'
|
||||||
import ModeSelector from './components/ModeSelector'
|
import ModeSelector from './components/ModeSelector'
|
||||||
|
import ModelSelector from './components/ModelSelector'
|
||||||
import ResultPanel from './components/ResultPanel'
|
import ResultPanel from './components/ResultPanel'
|
||||||
import AdvancedSettings from './components/AdvancedSettings'
|
import AdvancedSettings from './components/AdvancedSettings'
|
||||||
import PDFProcessor from './components/PDFProcessor'
|
import PDFProcessor from './components/PDFProcessor'
|
||||||
|
import MetadataForm from './components/MetadataForm'
|
||||||
|
import JobsPanel from './components/JobsPanel'
|
||||||
import axios from 'axios'
|
import axios from 'axios'
|
||||||
|
|
||||||
const API_BASE = import.meta.env.VITE_API_URL || '/api'
|
const API_BASE = import.meta.env.VITE_API_URL || '/api'
|
||||||
|
|
||||||
|
const INPUT_CLASS =
|
||||||
|
'w-full bg-white/5 border border-white/10 rounded-lg px-3 py-2 text-sm text-gray-200 ' +
|
||||||
|
'placeholder-gray-600 focus:outline-none focus:border-purple-500/50 transition-colors'
|
||||||
|
|
||||||
function App() {
|
function App() {
|
||||||
|
const [view, setView] = useState('new_job')
|
||||||
|
|
||||||
|
// OCR state
|
||||||
|
const { models, loading: modelsLoading } = useModels()
|
||||||
|
const [model, setModel] = useState(null)
|
||||||
const [mode, setMode] = useState('plain_ocr')
|
const [mode, setMode] = useState('plain_ocr')
|
||||||
const [fileType, setFileType] = useState('image') // 'image' or 'pdf'
|
const [fileType, setFileType] = useState('image')
|
||||||
const [image, setImage] = useState(null)
|
const [image, setImage] = useState(null)
|
||||||
const [imagePreview, setImagePreview] = useState(null)
|
const [imagePreview, setImagePreview] = useState(null)
|
||||||
const [result, setResult] = useState(null)
|
const [result, setResult] = useState(null)
|
||||||
@@ -21,22 +38,39 @@ function App() {
|
|||||||
const [showAdvanced, setShowAdvanced] = useState(false)
|
const [showAdvanced, setShowAdvanced] = useState(false)
|
||||||
const [includeCaption, setIncludeCaption] = useState(false)
|
const [includeCaption, setIncludeCaption] = useState(false)
|
||||||
|
|
||||||
// Form state
|
|
||||||
const [prompt, setPrompt] = useState('')
|
const [prompt, setPrompt] = useState('')
|
||||||
const [findTerm, setFindTerm] = useState('')
|
const [findTerm, setFindTerm] = useState('')
|
||||||
const [advancedSettings, setAdvancedSettings] = useState({
|
const [advancedSettings, setAdvancedSettings] = useState({
|
||||||
base_size: 1024,
|
base_size: 1024, image_size: 640, crop_mode: true, test_compress: false,
|
||||||
image_size: 640,
|
|
||||||
crop_mode: true,
|
|
||||||
test_compress: false
|
|
||||||
})
|
})
|
||||||
|
|
||||||
const handleFileTypeChange = useCallback((newType) => {
|
const suggestions = useSuggestions()
|
||||||
// Clear current file when switching types
|
|
||||||
setImage(null)
|
const [metadata, setMetadata] = useState({ author: '', book: '', chapter: '', page: '' })
|
||||||
if (imagePreview) {
|
// Results accumulated per mode: { plain_ocr: 'text', describe: 'text', freeform: 'text' }
|
||||||
URL.revokeObjectURL(imagePreview)
|
const [modeResults, setModeResults] = useState({})
|
||||||
|
const [editedResults, setEditedResults] = useState({})
|
||||||
|
const [activeResultMode, setActiveResultMode] = useState(null)
|
||||||
|
const [commitLoading, setCommitLoading] = useState(false)
|
||||||
|
const [commitResult, setCommitResult] = useState(null)
|
||||||
|
|
||||||
|
// Modes that produce editable text output and can be committed to the DB
|
||||||
|
const COMMITTABLE_MODES = new Set(['plain_ocr', 'describe'])
|
||||||
|
const MODE_LABELS = { plain_ocr: 'OCR Text', describe: 'Description' }
|
||||||
|
|
||||||
|
// Pick the default model once the list loads
|
||||||
|
useEffect(() => {
|
||||||
|
if (!model && models.length > 0) {
|
||||||
|
setModel((models.find(m => m.default) || models[0]).id)
|
||||||
}
|
}
|
||||||
|
}, [models, model])
|
||||||
|
|
||||||
|
// Show the full-screen result view once at least one committable mode has a result
|
||||||
|
const showResultView = view === 'new_job' && Object.keys(modeResults).length > 0
|
||||||
|
|
||||||
|
const handleFileTypeChange = useCallback((newType) => {
|
||||||
|
setImage(null)
|
||||||
|
if (imagePreview) URL.revokeObjectURL(imagePreview)
|
||||||
setImagePreview(null)
|
setImagePreview(null)
|
||||||
setError(null)
|
setError(null)
|
||||||
setResult(null)
|
setResult(null)
|
||||||
@@ -45,42 +79,38 @@ function App() {
|
|||||||
|
|
||||||
const handleImageSelect = useCallback((file) => {
|
const handleImageSelect = useCallback((file) => {
|
||||||
if (file === null) {
|
if (file === null) {
|
||||||
// Clear everything when removing image
|
|
||||||
setImage(null)
|
setImage(null)
|
||||||
if (imagePreview && fileType === 'image') {
|
if (imagePreview && fileType === 'image') URL.revokeObjectURL(imagePreview)
|
||||||
URL.revokeObjectURL(imagePreview)
|
|
||||||
}
|
|
||||||
setImagePreview(null)
|
setImagePreview(null)
|
||||||
setError(null)
|
setError(null)
|
||||||
setResult(null)
|
setResult(null)
|
||||||
|
setModeResults({})
|
||||||
|
setEditedResults({})
|
||||||
|
setActiveResultMode(null)
|
||||||
|
setCommitResult(null)
|
||||||
} else {
|
} else {
|
||||||
setImage(file)
|
setImage(file)
|
||||||
// Only create preview URL for images, not PDFs
|
setImagePreview(fileType === 'image' ? URL.createObjectURL(file) : file)
|
||||||
if (fileType === 'image') {
|
|
||||||
setImagePreview(URL.createObjectURL(file))
|
|
||||||
} else {
|
|
||||||
setImagePreview(file) // Just store the file for PDFs
|
|
||||||
}
|
|
||||||
setError(null)
|
setError(null)
|
||||||
setResult(null)
|
setResult(null)
|
||||||
|
setModeResults({})
|
||||||
|
setEditedResults({})
|
||||||
|
setActiveResultMode(null)
|
||||||
|
setCommitResult(null)
|
||||||
}
|
}
|
||||||
}, [imagePreview, fileType])
|
}, [imagePreview, fileType])
|
||||||
|
|
||||||
const handleSubmit = async () => {
|
const handleSubmit = async () => {
|
||||||
if (!image) {
|
if (!image) { setError('Please upload an image first'); return }
|
||||||
setError('Please upload an image first')
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
setLoading(true)
|
setLoading(true)
|
||||||
setError(null)
|
setError(null)
|
||||||
|
setCommitResult(null)
|
||||||
try {
|
try {
|
||||||
const formData = new FormData()
|
const formData = new FormData()
|
||||||
formData.append('image', image)
|
formData.append('image', image)
|
||||||
|
if (model) formData.append('model', model)
|
||||||
formData.append('mode', mode)
|
formData.append('mode', mode)
|
||||||
formData.append('prompt', prompt)
|
formData.append('prompt', prompt)
|
||||||
// Enable grounding only for find mode
|
|
||||||
formData.append('grounding', mode === 'find_ref')
|
formData.append('grounding', mode === 'find_ref')
|
||||||
formData.append('include_caption', includeCaption)
|
formData.append('include_caption', includeCaption)
|
||||||
formData.append('find_term', findTerm)
|
formData.append('find_term', findTerm)
|
||||||
@@ -91,12 +121,16 @@ function App() {
|
|||||||
formData.append('test_compress', advancedSettings.test_compress)
|
formData.append('test_compress', advancedSettings.test_compress)
|
||||||
|
|
||||||
const response = await axios.post(`${API_BASE}/ocr`, formData, {
|
const response = await axios.post(`${API_BASE}/ocr`, formData, {
|
||||||
headers: {
|
headers: { 'Content-Type': 'multipart/form-data' },
|
||||||
'Content-Type': 'multipart/form-data',
|
|
||||||
},
|
|
||||||
})
|
})
|
||||||
|
|
||||||
setResult(response.data)
|
setResult(response.data)
|
||||||
|
if (COMMITTABLE_MODES.has(mode)) {
|
||||||
|
const text = response.data.text || ''
|
||||||
|
setModeResults(prev => ({ ...prev, [mode]: text }))
|
||||||
|
setEditedResults(prev => ({ ...prev, [mode]: text }))
|
||||||
|
setActiveResultMode(mode)
|
||||||
|
}
|
||||||
|
setCommitResult(null)
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
setError(err.response?.data?.detail || err.message || 'An error occurred')
|
setError(err.response?.data?.detail || err.message || 'An error occurred')
|
||||||
} finally {
|
} finally {
|
||||||
@@ -104,31 +138,61 @@ function App() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const handleCopy = useCallback(() => {
|
const handleNewAnalysis = () => {
|
||||||
if (result?.text) {
|
setResult(null)
|
||||||
navigator.clipboard.writeText(result.text)
|
setModeResults({})
|
||||||
|
setEditedResults({})
|
||||||
|
setActiveResultMode(null)
|
||||||
|
setCommitResult(null)
|
||||||
|
}
|
||||||
|
|
||||||
|
const handleCommitJob = useCallback(async () => {
|
||||||
|
if (!image) return
|
||||||
|
setCommitLoading(true)
|
||||||
|
setCommitResult(null)
|
||||||
|
try {
|
||||||
|
const formData = new FormData()
|
||||||
|
formData.append('image', image)
|
||||||
|
formData.append('author', metadata.author)
|
||||||
|
formData.append('book', metadata.book)
|
||||||
|
formData.append('chapter', metadata.chapter)
|
||||||
|
formData.append('page', metadata.page)
|
||||||
|
formData.append('ocr_text', editedResults.plain_ocr || '')
|
||||||
|
formData.append('describe_text', editedResults.describe || '')
|
||||||
|
formData.append('freeform_text', editedResults.freeform || '')
|
||||||
|
formData.append('mode', mode)
|
||||||
|
if (model) formData.append('ocr_model', model)
|
||||||
|
|
||||||
|
const response = await axios.post(`${API_BASE}/jobs`, formData, {
|
||||||
|
headers: { 'Content-Type': 'multipart/form-data' },
|
||||||
|
})
|
||||||
|
setCommitResult({ success: true, job: response.data })
|
||||||
|
} catch (err) {
|
||||||
|
setCommitResult({ success: false, error: err.response?.data?.detail || err.message })
|
||||||
|
} finally {
|
||||||
|
setCommitLoading(false)
|
||||||
}
|
}
|
||||||
}, [result])
|
}, [image, editedResults, metadata, mode, model])
|
||||||
|
|
||||||
|
const handleCopy = useCallback(() => {
|
||||||
|
const text = (activeResultMode && editedResults[activeResultMode]) || result?.text
|
||||||
|
if (text) navigator.clipboard.writeText(text)
|
||||||
|
}, [activeResultMode, editedResults, result])
|
||||||
|
|
||||||
const handleDownload = useCallback(() => {
|
const handleDownload = useCallback(() => {
|
||||||
if (!result?.text) return
|
const text = (activeResultMode && editedResults[activeResultMode]) || result?.text
|
||||||
|
if (!text) return
|
||||||
const extensions = {
|
const ext = { plain_ocr: 'txt', describe: 'txt', find_ref: 'txt', freeform: 'txt' }[mode] || 'txt'
|
||||||
plain_ocr: 'txt',
|
const blob = new Blob([text], { type: 'text/plain' })
|
||||||
describe: 'txt',
|
|
||||||
find_ref: 'txt',
|
|
||||||
freeform: 'txt',
|
|
||||||
}
|
|
||||||
|
|
||||||
const ext = extensions[mode] || 'txt'
|
|
||||||
const blob = new Blob([result.text], { type: 'text/plain' })
|
|
||||||
const url = URL.createObjectURL(blob)
|
const url = URL.createObjectURL(blob)
|
||||||
const a = document.createElement('a')
|
const a = document.createElement('a')
|
||||||
a.href = url
|
a.href = url
|
||||||
a.download = `deepseek-ocr-result.${ext}`
|
a.download = `deepseek-ocr-result.${ext}`
|
||||||
a.click()
|
a.click()
|
||||||
URL.revokeObjectURL(url)
|
URL.revokeObjectURL(url)
|
||||||
}, [result, mode])
|
}, [activeResultMode, editedResults, result, mode])
|
||||||
|
|
||||||
|
const metaField = (key) => (e) => setMetadata(m => ({ ...m, [key]: e.target.value }))
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className="min-h-screen relative overflow-hidden">
|
<div className="min-h-screen relative overflow-hidden">
|
||||||
@@ -138,27 +202,13 @@ function App() {
|
|||||||
<div className="absolute inset-0 bg-[url('data:image/svg+xml;base64,PHN2ZyB3aWR0aD0iNjAiIGhlaWdodD0iNjAiIHZpZXdCb3g9IjAgMCA2MCA2MCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj48ZyBmaWxsPSJub25lIiBmaWxsLXJ1bGU9ImV2ZW5vZGQiPjxwYXRoIGQ9Ik0zNiAxOGMzLjMxIDAgNiAyLjY5IDYgNnMtMi42OSA2LTYgNi02LTIuNjktNi02IDIuNjktNiA2LTZ6TTI0IDZjMy4zMSAwIDYgMi42OSA2IDZzLTIuNjkgNi02IDYtNi0yLjY5LTYtNiAyLjY5LTYgNi02ek00OCAzNmMzLjMxIDAgNiAyLjY5IDYgNnMtMi42OSA2LTYgNi02LTIuNjktNi02IDIuNjktNiA2LTZ6IiBzdHJva2U9InJnYmEoMTQ3LCA1MSwgMjM0LCAwLjEpIiBzdHJva2Utd2lkdGg9IjIiLz48L2c+PC9zdmc+')] opacity-30" />
|
<div className="absolute inset-0 bg-[url('data:image/svg+xml;base64,PHN2ZyB3aWR0aD0iNjAiIGhlaWdodD0iNjAiIHZpZXdCb3g9IjAgMCA2MCA2MCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj48ZyBmaWxsPSJub25lIiBmaWxsLXJ1bGU9ImV2ZW5vZGQiPjxwYXRoIGQ9Ik0zNiAxOGMzLjMxIDAgNiAyLjY5IDYgNnMtMi42OSA2LTYgNi02LTIuNjktNi02IDIuNjktNiA2LTZ6TTI0IDZjMy4zMSAwIDYgMi42OSA2IDZzLTIuNjkgNi02IDYtNi0yLjY5LTYtNiAyLjY5LTYgNi02ek00OCAzNmMzLjMxIDAgNiAyLjY5IDYgNnMtMi42OSA2LTYgNi02LTIuNjktNi02IDIuNjktNiA2LTZ6IiBzdHJva2U9InJnYmEoMTQ3LCA1MSwgMjM0LCAwLjEpIiBzdHJva2Utd2lkdGg9IjIiLz48L2c+PC9zdmc+')] opacity-30" />
|
||||||
<motion.div
|
<motion.div
|
||||||
className="absolute top-20 left-20 w-96 h-96 bg-purple-500/10 rounded-full blur-3xl"
|
className="absolute top-20 left-20 w-96 h-96 bg-purple-500/10 rounded-full blur-3xl"
|
||||||
animate={{
|
animate={{ scale: [1, 1.2, 1], opacity: [0.3, 0.5, 0.3] }}
|
||||||
scale: [1, 1.2, 1],
|
transition={{ duration: 8, repeat: Infinity, ease: 'easeInOut' }}
|
||||||
opacity: [0.3, 0.5, 0.3],
|
|
||||||
}}
|
|
||||||
transition={{
|
|
||||||
duration: 8,
|
|
||||||
repeat: Infinity,
|
|
||||||
ease: "easeInOut"
|
|
||||||
}}
|
|
||||||
/>
|
/>
|
||||||
<motion.div
|
<motion.div
|
||||||
className="absolute bottom-20 right-20 w-96 h-96 bg-cyan-500/10 rounded-full blur-3xl"
|
className="absolute bottom-20 right-20 w-96 h-96 bg-cyan-500/10 rounded-full blur-3xl"
|
||||||
animate={{
|
animate={{ scale: [1.2, 1, 1.2], opacity: [0.5, 0.3, 0.5] }}
|
||||||
scale: [1.2, 1, 1.2],
|
transition={{ duration: 8, repeat: Infinity, ease: 'easeInOut' }}
|
||||||
opacity: [0.5, 0.3, 0.5],
|
|
||||||
}}
|
|
||||||
transition={{
|
|
||||||
duration: 8,
|
|
||||||
repeat: Infinity,
|
|
||||||
ease: "easeInOut"
|
|
||||||
}}
|
|
||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@@ -166,11 +216,7 @@ function App() {
|
|||||||
<header className="sticky top-0 z-50 glass border-b border-white/10">
|
<header className="sticky top-0 z-50 glass border-b border-white/10">
|
||||||
<div className="max-w-7xl mx-auto px-6 py-4">
|
<div className="max-w-7xl mx-auto px-6 py-4">
|
||||||
<div className="flex items-center justify-between">
|
<div className="flex items-center justify-between">
|
||||||
<motion.div
|
<motion.div className="flex items-center gap-3" initial={{ opacity: 0, x: -20 }} animate={{ opacity: 1, x: 0 }}>
|
||||||
className="flex items-center gap-3"
|
|
||||||
initial={{ opacity: 0, x: -20 }}
|
|
||||||
animate={{ opacity: 1, x: 0 }}
|
|
||||||
>
|
|
||||||
<div className="relative">
|
<div className="relative">
|
||||||
<div className="absolute inset-0 bg-gradient-to-r from-purple-500 to-cyan-500 rounded-xl blur-lg opacity-75" />
|
<div className="absolute inset-0 bg-gradient-to-r from-purple-500 to-cyan-500 rounded-xl blur-lg opacity-75" />
|
||||||
<div className="relative bg-gradient-to-br from-purple-600 to-cyan-500 p-2 rounded-xl">
|
<div className="relative bg-gradient-to-br from-purple-600 to-cyan-500 p-2 rounded-xl">
|
||||||
@@ -182,173 +228,348 @@ function App() {
|
|||||||
<p className="text-xs text-gray-400">Next-Gen Vision AI</p>
|
<p className="text-xs text-gray-400">Next-Gen Vision AI</p>
|
||||||
</div>
|
</div>
|
||||||
</motion.div>
|
</motion.div>
|
||||||
|
|
||||||
|
<nav className="flex gap-2">
|
||||||
|
{showResultView && (
|
||||||
|
<motion.button
|
||||||
|
onClick={handleNewAnalysis}
|
||||||
|
className="flex items-center gap-2 px-4 py-2 rounded-xl text-sm font-medium glass text-gray-400 hover:bg-white/5 transition-all"
|
||||||
|
whileHover={{ scale: 1.02 }} whileTap={{ scale: 0.98 }}
|
||||||
|
>
|
||||||
|
<ChevronLeft className="w-4 h-4" />
|
||||||
|
New Analysis
|
||||||
|
</motion.button>
|
||||||
|
)}
|
||||||
|
<motion.button
|
||||||
|
onClick={() => setView('new_job')}
|
||||||
|
className={`flex items-center gap-2 px-4 py-2 rounded-xl text-sm font-medium transition-all ${view === 'new_job' ? 'bg-gradient-to-r from-purple-600 to-cyan-600 text-white' : 'glass text-gray-400 hover:bg-white/5'}`}
|
||||||
|
whileHover={{ scale: 1.02 }} whileTap={{ scale: 0.98 }}
|
||||||
|
>
|
||||||
|
<Zap className="w-4 h-4" />
|
||||||
|
New Job
|
||||||
|
</motion.button>
|
||||||
|
<motion.button
|
||||||
|
onClick={() => setView('jobs')}
|
||||||
|
className={`flex items-center gap-2 px-4 py-2 rounded-xl text-sm font-medium transition-all ${view === 'jobs' ? 'bg-gradient-to-r from-purple-600 to-cyan-600 text-white' : 'glass text-gray-400 hover:bg-white/5'}`}
|
||||||
|
whileHover={{ scale: 1.02 }} whileTap={{ scale: 0.98 }}
|
||||||
|
>
|
||||||
|
<Layers className="w-4 h-4" />
|
||||||
|
Browse Jobs
|
||||||
|
</motion.button>
|
||||||
|
</nav>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</header>
|
</header>
|
||||||
|
|
||||||
{/* Main Content */}
|
{/* Main Content */}
|
||||||
<main className="max-w-7xl mx-auto px-6 py-8">
|
<main className="max-w-7xl mx-auto px-6 py-6">
|
||||||
<div className="grid lg:grid-cols-2 gap-6">
|
<AnimatePresence>
|
||||||
{/* Left Panel - Upload & Controls */}
|
|
||||||
<motion.div
|
|
||||||
initial={{ opacity: 0, y: 20 }}
|
|
||||||
animate={{ opacity: 1, y: 0 }}
|
|
||||||
transition={{ delay: 0.1 }}
|
|
||||||
className="space-y-6"
|
|
||||||
>
|
|
||||||
{/* File Type Toggle */}
|
|
||||||
<div className="glass p-4 rounded-2xl">
|
|
||||||
<div className="grid grid-cols-2 gap-2">
|
|
||||||
<motion.button
|
|
||||||
onClick={() => handleFileTypeChange('image')}
|
|
||||||
className={`p-3 rounded-xl text-sm font-medium transition-all flex items-center justify-center gap-2 ${
|
|
||||||
fileType === 'image'
|
|
||||||
? 'bg-gradient-to-r from-purple-600 to-cyan-600 text-white'
|
|
||||||
: 'glass text-gray-400 hover:bg-white/5'
|
|
||||||
}`}
|
|
||||||
whileHover={{ scale: 1.02 }}
|
|
||||||
whileTap={{ scale: 0.98 }}
|
|
||||||
>
|
|
||||||
<ImageIcon className="w-4 h-4" />
|
|
||||||
Image OCR
|
|
||||||
</motion.button>
|
|
||||||
<motion.button
|
|
||||||
onClick={() => handleFileTypeChange('pdf')}
|
|
||||||
className={`p-3 rounded-xl text-sm font-medium transition-all flex items-center justify-center gap-2 ${
|
|
||||||
fileType === 'pdf'
|
|
||||||
? 'bg-gradient-to-r from-purple-600 to-cyan-600 text-white'
|
|
||||||
: 'glass text-gray-400 hover:bg-white/5'
|
|
||||||
}`}
|
|
||||||
whileHover={{ scale: 1.02 }}
|
|
||||||
whileTap={{ scale: 0.98 }}
|
|
||||||
>
|
|
||||||
<FileText className="w-4 h-4" />
|
|
||||||
PDF Processing
|
|
||||||
</motion.button>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
{/* Mode Selector with integrated inputs */}
|
{/* ── Full-screen OCR result view ── */}
|
||||||
<ModeSelector
|
{showResultView ? (
|
||||||
mode={mode}
|
<motion.div
|
||||||
onModeChange={setMode}
|
key="ocr_result"
|
||||||
prompt={prompt}
|
initial={{ opacity: 0, y: 20 }}
|
||||||
onPromptChange={setPrompt}
|
animate={{ opacity: 1, y: 0 }}
|
||||||
findTerm={findTerm}
|
exit={{ opacity: 0, y: -20 }}
|
||||||
onFindTermChange={setFindTerm}
|
className="flex flex-col gap-4"
|
||||||
/>
|
|
||||||
|
|
||||||
{/* Image/PDF Upload */}
|
|
||||||
<ImageUpload
|
|
||||||
onImageSelect={handleImageSelect}
|
|
||||||
preview={imagePreview}
|
|
||||||
fileType={fileType}
|
|
||||||
/>
|
|
||||||
|
|
||||||
{/* Advanced Settings Toggle */}
|
|
||||||
<motion.button
|
|
||||||
onClick={() => setShowAdvanced(!showAdvanced)}
|
|
||||||
className="w-full glass px-4 py-3 rounded-2xl flex items-center justify-between hover:bg-white/5 transition-colors"
|
|
||||||
whileHover={{ scale: 1.01 }}
|
|
||||||
whileTap={{ scale: 0.99 }}
|
|
||||||
>
|
>
|
||||||
<div className="flex items-center gap-2">
|
{/* Run additional modes */}
|
||||||
<Settings className="w-4 h-4 text-purple-400" />
|
<div className="glass p-4 rounded-2xl flex-shrink-0">
|
||||||
<span className="text-sm font-medium text-gray-300">Advanced Settings</span>
|
<div className="mb-3">
|
||||||
</div>
|
<ModelSelector
|
||||||
<motion.div
|
models={models} value={model} onChange={setModel} loading={modelsLoading}
|
||||||
animate={{ rotate: showAdvanced ? 180 : 0 }}
|
/>
|
||||||
transition={{ duration: 0.3 }}
|
</div>
|
||||||
>
|
<ModeSelector mode={mode} onModeChange={setMode} />
|
||||||
<svg className="w-4 h-4 text-gray-400" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
<div className="flex items-center gap-3 mt-3">
|
||||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M19 9l-7 7-7-7" />
|
<motion.button
|
||||||
</svg>
|
onClick={handleSubmit}
|
||||||
</motion.div>
|
disabled={loading}
|
||||||
</motion.button>
|
className={`flex items-center gap-2 px-5 py-2 rounded-xl font-medium text-sm transition-all ${loading ? 'opacity-50 cursor-not-allowed bg-white/5' : 'bg-gradient-to-r from-purple-600 to-cyan-600'}`}
|
||||||
|
whileHover={!loading ? { scale: 1.02 } : {}}
|
||||||
{/* Advanced Settings Panel */}
|
whileTap={!loading ? { scale: 0.98 } : {}}
|
||||||
<AnimatePresence>
|
|
||||||
{showAdvanced && (
|
|
||||||
<AdvancedSettings
|
|
||||||
settings={advancedSettings}
|
|
||||||
onSettingsChange={setAdvancedSettings}
|
|
||||||
includeCaption={includeCaption}
|
|
||||||
onIncludeCaptionChange={setIncludeCaption}
|
|
||||||
/>
|
|
||||||
)}
|
|
||||||
</AnimatePresence>
|
|
||||||
|
|
||||||
{/* Action Button / PDF Processor */}
|
|
||||||
{fileType === 'pdf' ? (
|
|
||||||
<PDFProcessor
|
|
||||||
pdfFile={image}
|
|
||||||
mode={mode}
|
|
||||||
prompt={prompt}
|
|
||||||
advancedSettings={advancedSettings}
|
|
||||||
includeCaption={includeCaption}
|
|
||||||
/>
|
|
||||||
) : (
|
|
||||||
<>
|
|
||||||
<motion.button
|
|
||||||
onClick={handleSubmit}
|
|
||||||
disabled={!image || loading}
|
|
||||||
className={`w-full relative overflow-hidden rounded-2xl p-[2px] ${
|
|
||||||
!image || loading ? 'opacity-50 cursor-not-allowed' : ''
|
|
||||||
}`}
|
|
||||||
whileHover={!loading && image ? { scale: 1.02 } : {}}
|
|
||||||
whileTap={!loading && image ? { scale: 0.98 } : {}}
|
|
||||||
>
|
|
||||||
<div className="absolute inset-0 bg-gradient-to-r from-purple-600 via-pink-600 to-cyan-600 animate-gradient" />
|
|
||||||
<div className="relative bg-dark-100 px-8 py-4 rounded-2xl flex items-center justify-center gap-3">
|
|
||||||
{loading ? (
|
|
||||||
<>
|
|
||||||
<Loader2 className="w-5 h-5 animate-spin" />
|
|
||||||
<span className="font-semibold">Processing Magic...</span>
|
|
||||||
</>
|
|
||||||
) : (
|
|
||||||
<>
|
|
||||||
<Zap className="w-5 h-5" />
|
|
||||||
<span className="font-semibold">Analyze Image</span>
|
|
||||||
</>
|
|
||||||
)}
|
|
||||||
</div>
|
|
||||||
</motion.button>
|
|
||||||
|
|
||||||
{error && (
|
|
||||||
<motion.div
|
|
||||||
initial={{ opacity: 0, y: -10 }}
|
|
||||||
animate={{ opacity: 1, y: 0 }}
|
|
||||||
className="glass p-4 rounded-2xl border-red-500/50 bg-red-500/10"
|
|
||||||
>
|
>
|
||||||
<p className="text-sm text-red-400">{error}</p>
|
{loading
|
||||||
</motion.div>
|
? <><Loader2 className="w-4 h-4 animate-spin" /> Processing...</>
|
||||||
)}
|
: <><Zap className="w-4 h-4" /> Analyze</>}
|
||||||
</>
|
</motion.button>
|
||||||
)}
|
{error && <p className="text-sm text-red-400">{error}</p>}
|
||||||
</motion.div>
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
{/* Right Panel - Results */}
|
{/* Image + Text */}
|
||||||
<motion.div
|
<div className="grid gap-6" style={{ gridTemplateColumns: '1fr 1fr', height: '130vh' }}>
|
||||||
initial={{ opacity: 0, y: 20 }}
|
{imagePreview && typeof imagePreview === 'string' ? (
|
||||||
animate={{ opacity: 1, y: 0 }}
|
<div className="glass rounded-2xl overflow-hidden flex items-center justify-center bg-black/20 h-full">
|
||||||
transition={{ delay: 0.2 }}
|
<img
|
||||||
>
|
src={imagePreview}
|
||||||
<ResultPanel
|
alt="Source"
|
||||||
result={result}
|
className="w-full h-full object-contain"
|
||||||
loading={loading}
|
/>
|
||||||
imagePreview={imagePreview}
|
</div>
|
||||||
onCopy={handleCopy}
|
) : (
|
||||||
onDownload={handleDownload}
|
<div className="glass rounded-2xl flex items-center justify-center h-full">
|
||||||
/>
|
<p className="text-gray-500 text-sm">No preview</p>
|
||||||
</motion.div>
|
</div>
|
||||||
</div>
|
)}
|
||||||
|
<div className="glass rounded-2xl p-4 flex flex-col h-full">
|
||||||
|
{/* Mode tabs — only shown when multiple modes have results */}
|
||||||
|
{Object.keys(modeResults).length > 1 && (
|
||||||
|
<div className="flex gap-1 mb-3 flex-shrink-0">
|
||||||
|
{Object.keys(modeResults).map(m => (
|
||||||
|
<button
|
||||||
|
key={m}
|
||||||
|
onClick={() => setActiveResultMode(m)}
|
||||||
|
className={`px-3 py-1 rounded-lg text-xs font-medium transition-colors ${
|
||||||
|
activeResultMode === m
|
||||||
|
? 'bg-purple-600 text-white'
|
||||||
|
: 'bg-white/5 text-gray-400 hover:bg-white/10'
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
{MODE_LABELS[m] || m}
|
||||||
|
</button>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
<p className="text-xs text-gray-400 mb-2 flex-shrink-0">
|
||||||
|
{MODE_LABELS[activeResultMode] || 'Result'}
|
||||||
|
<span className="text-purple-400 ml-1">(edit before committing)</span>
|
||||||
|
</p>
|
||||||
|
{loading && COMMITTABLE_MODES.has(mode) ? (
|
||||||
|
<div className="flex-1 flex items-center justify-center">
|
||||||
|
<Loader2 className="w-8 h-8 animate-spin text-purple-400" />
|
||||||
|
</div>
|
||||||
|
) : (
|
||||||
|
<textarea
|
||||||
|
value={activeResultMode ? (editedResults[activeResultMode] ?? '') : ''}
|
||||||
|
onChange={e => setEditedResults(prev => ({ ...prev, [activeResultMode]: e.target.value }))}
|
||||||
|
className="flex-1 w-full bg-transparent text-sm text-gray-200 font-mono resize-none focus:outline-none min-h-0"
|
||||||
|
placeholder="Run a mode to see results here..."
|
||||||
|
/>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Metadata row */}
|
||||||
|
<div className="glass p-4 rounded-2xl flex-shrink-0">
|
||||||
|
<datalist id="rv-authors">
|
||||||
|
{suggestions.authors.map(a => <option key={a} value={a} />)}
|
||||||
|
</datalist>
|
||||||
|
<datalist id="rv-books">
|
||||||
|
{(suggestions.books || []).map(b => <option key={b} value={b} />)}
|
||||||
|
</datalist>
|
||||||
|
<datalist id="rv-chapters">
|
||||||
|
{suggestions.chapters.map(c => <option key={c} value={c} />)}
|
||||||
|
</datalist>
|
||||||
|
<div className="grid grid-cols-4 gap-4">
|
||||||
|
{[
|
||||||
|
{ key: 'author', label: 'Author', placeholder: 'Author name', list: 'rv-authors' },
|
||||||
|
{ key: 'book', label: 'Book', placeholder: 'Book title', list: 'rv-books' },
|
||||||
|
{ key: 'chapter', label: 'Chapter', placeholder: 'Chapter', list: 'rv-chapters' },
|
||||||
|
{ key: 'page', label: 'Page', placeholder: 'Page number', list: undefined },
|
||||||
|
].map(({ key, label, placeholder, list }) => (
|
||||||
|
<div key={key}>
|
||||||
|
<label className="text-xs text-gray-400 mb-1 block">{label}</label>
|
||||||
|
<input
|
||||||
|
type="text"
|
||||||
|
list={list}
|
||||||
|
value={metadata[key]}
|
||||||
|
onChange={metaField(key)}
|
||||||
|
placeholder={placeholder}
|
||||||
|
className={INPUT_CLASS}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Commit row */}
|
||||||
|
<div className="flex items-center gap-4 flex-shrink-0">
|
||||||
|
<AnimatePresence>
|
||||||
|
{commitResult?.success && (
|
||||||
|
<motion.div
|
||||||
|
initial={{ opacity: 0, x: -10 }} animate={{ opacity: 1, x: 0 }} exit={{ opacity: 0 }}
|
||||||
|
className="flex-1 glass p-3 rounded-xl bg-green-500/10 border border-green-500/20"
|
||||||
|
>
|
||||||
|
<p className="text-xs text-green-400">
|
||||||
|
Job saved — ID: <span className="font-mono">{commitResult.job?.id}</span>
|
||||||
|
</p>
|
||||||
|
</motion.div>
|
||||||
|
)}
|
||||||
|
{commitResult && !commitResult.success && (
|
||||||
|
<motion.div
|
||||||
|
initial={{ opacity: 0, x: -10 }} animate={{ opacity: 1, x: 0 }} exit={{ opacity: 0 }}
|
||||||
|
className="flex-1 glass p-3 rounded-xl bg-red-500/10 border border-red-500/20"
|
||||||
|
>
|
||||||
|
<p className="text-xs text-red-400">{commitResult.error}</p>
|
||||||
|
</motion.div>
|
||||||
|
)}
|
||||||
|
</AnimatePresence>
|
||||||
|
<motion.button
|
||||||
|
onClick={handleCommitJob}
|
||||||
|
disabled={commitLoading || commitResult?.success}
|
||||||
|
className={`flex items-center gap-2 px-6 py-3 rounded-xl font-medium text-sm transition-all flex-shrink-0 ${
|
||||||
|
commitLoading || commitResult?.success
|
||||||
|
? 'opacity-50 cursor-not-allowed bg-white/5'
|
||||||
|
: 'bg-gradient-to-r from-blue-600 to-indigo-600 hover:from-blue-500 hover:to-indigo-500'
|
||||||
|
}`}
|
||||||
|
whileHover={!commitLoading && !commitResult?.success ? { scale: 1.02 } : {}}
|
||||||
|
whileTap={!commitLoading && !commitResult?.success ? { scale: 0.98 } : {}}
|
||||||
|
>
|
||||||
|
{commitLoading ? (
|
||||||
|
<><Loader2 className="w-4 h-4 animate-spin" /> Committing...</>
|
||||||
|
) : commitResult?.success ? (
|
||||||
|
<><CheckCircle2 className="w-4 h-4" /> Committed</>
|
||||||
|
) : (
|
||||||
|
<><Database className="w-4 h-4" /> Commit Job</>
|
||||||
|
)}
|
||||||
|
</motion.button>
|
||||||
|
</div>
|
||||||
|
</motion.div>
|
||||||
|
|
||||||
|
) : view === 'jobs' ? (
|
||||||
|
<motion.div
|
||||||
|
key="jobs"
|
||||||
|
initial={{ opacity: 0, y: 20 }}
|
||||||
|
animate={{ opacity: 1, y: 0 }}
|
||||||
|
exit={{ opacity: 0, y: -20 }}
|
||||||
|
>
|
||||||
|
<JobsPanel />
|
||||||
|
</motion.div>
|
||||||
|
|
||||||
|
) : (
|
||||||
|
/* ── Upload / Controls layout ── */
|
||||||
|
<motion.div
|
||||||
|
key="new_job"
|
||||||
|
initial={{ opacity: 0, y: 20 }}
|
||||||
|
animate={{ opacity: 1, y: 0 }}
|
||||||
|
exit={{ opacity: 0, y: -20 }}
|
||||||
|
>
|
||||||
|
<div className="grid lg:grid-cols-2 gap-6">
|
||||||
|
{/* Left Panel */}
|
||||||
|
<motion.div
|
||||||
|
initial={{ opacity: 0, y: 20 }}
|
||||||
|
animate={{ opacity: 1, y: 0 }}
|
||||||
|
transition={{ delay: 0.1 }}
|
||||||
|
className="space-y-6"
|
||||||
|
>
|
||||||
|
{/* File Type Toggle */}
|
||||||
|
<div className="glass p-4 rounded-2xl">
|
||||||
|
<div className="grid grid-cols-2 gap-2">
|
||||||
|
<motion.button
|
||||||
|
onClick={() => handleFileTypeChange('image')}
|
||||||
|
className={`p-3 rounded-xl text-sm font-medium transition-all flex items-center justify-center gap-2 ${fileType === 'image' ? 'bg-gradient-to-r from-purple-600 to-cyan-600 text-white' : 'glass text-gray-400 hover:bg-white/5'}`}
|
||||||
|
whileHover={{ scale: 1.02 }} whileTap={{ scale: 0.98 }}
|
||||||
|
>
|
||||||
|
<ImageIcon className="w-4 h-4" /> Image OCR
|
||||||
|
</motion.button>
|
||||||
|
<motion.button
|
||||||
|
onClick={() => handleFileTypeChange('pdf')}
|
||||||
|
className={`p-3 rounded-xl text-sm font-medium transition-all flex items-center justify-center gap-2 ${fileType === 'pdf' ? 'bg-gradient-to-r from-purple-600 to-cyan-600 text-white' : 'glass text-gray-400 hover:bg-white/5'}`}
|
||||||
|
whileHover={{ scale: 1.02 }} whileTap={{ scale: 0.98 }}
|
||||||
|
>
|
||||||
|
<FileText className="w-4 h-4" /> PDF Processing
|
||||||
|
</motion.button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<MetadataForm metadata={metadata} onChange={setMetadata} suggestions={suggestions} />
|
||||||
|
|
||||||
|
<ModelSelector
|
||||||
|
models={models} value={model} onChange={setModel} loading={modelsLoading}
|
||||||
|
/>
|
||||||
|
|
||||||
|
<ModeSelector mode={mode} onModeChange={setMode} />
|
||||||
|
|
||||||
|
<ImageUpload onImageSelect={handleImageSelect} preview={imagePreview} fileType={fileType} />
|
||||||
|
|
||||||
|
<motion.button
|
||||||
|
onClick={() => setShowAdvanced(!showAdvanced)}
|
||||||
|
className="w-full glass px-4 py-3 rounded-2xl flex items-center justify-between hover:bg-white/5 transition-colors"
|
||||||
|
whileHover={{ scale: 1.01 }} whileTap={{ scale: 0.99 }}
|
||||||
|
>
|
||||||
|
<div className="flex items-center gap-2">
|
||||||
|
<Settings className="w-4 h-4 text-purple-400" />
|
||||||
|
<span className="text-sm font-medium text-gray-300">Advanced Settings</span>
|
||||||
|
</div>
|
||||||
|
<motion.div animate={{ rotate: showAdvanced ? 180 : 0 }} transition={{ duration: 0.3 }}>
|
||||||
|
<svg className="w-4 h-4 text-gray-400" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||||
|
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M19 9l-7 7-7-7" />
|
||||||
|
</svg>
|
||||||
|
</motion.div>
|
||||||
|
</motion.button>
|
||||||
|
|
||||||
|
<AnimatePresence>
|
||||||
|
{showAdvanced && (
|
||||||
|
<AdvancedSettings
|
||||||
|
settings={advancedSettings} onSettingsChange={setAdvancedSettings}
|
||||||
|
includeCaption={includeCaption} onIncludeCaptionChange={setIncludeCaption}
|
||||||
|
/>
|
||||||
|
)}
|
||||||
|
</AnimatePresence>
|
||||||
|
|
||||||
|
{fileType === 'pdf' ? (
|
||||||
|
<PDFProcessor
|
||||||
|
pdfFile={image} mode={mode} prompt={prompt} model={model}
|
||||||
|
advancedSettings={advancedSettings} includeCaption={includeCaption}
|
||||||
|
/>
|
||||||
|
) : (
|
||||||
|
<>
|
||||||
|
<motion.button
|
||||||
|
onClick={handleSubmit}
|
||||||
|
disabled={!image || loading}
|
||||||
|
className={`w-full relative overflow-hidden rounded-2xl p-[2px] ${!image || loading ? 'opacity-50 cursor-not-allowed' : ''}`}
|
||||||
|
whileHover={!loading && image ? { scale: 1.02 } : {}}
|
||||||
|
whileTap={!loading && image ? { scale: 0.98 } : {}}
|
||||||
|
>
|
||||||
|
<div className="absolute inset-0 bg-gradient-to-r from-purple-600 via-pink-600 to-cyan-600 animate-gradient" />
|
||||||
|
<div className="relative bg-dark-100 px-8 py-4 rounded-2xl flex items-center justify-center gap-3">
|
||||||
|
{loading ? (
|
||||||
|
<><Loader2 className="w-5 h-5 animate-spin" /><span className="font-semibold">Processing Magic...</span></>
|
||||||
|
) : (
|
||||||
|
<><Zap className="w-5 h-5" /><span className="font-semibold">Analyze Image</span></>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</motion.button>
|
||||||
|
|
||||||
|
{error && (
|
||||||
|
<motion.div
|
||||||
|
initial={{ opacity: 0, y: -10 }} animate={{ opacity: 1, y: 0 }}
|
||||||
|
className="glass p-4 rounded-2xl border-red-500/50 bg-red-500/10"
|
||||||
|
>
|
||||||
|
<p className="text-sm text-red-400">{error}</p>
|
||||||
|
</motion.div>
|
||||||
|
)}
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
</motion.div>
|
||||||
|
|
||||||
|
{/* Right Panel - Results (non-plain_ocr modes or loading) */}
|
||||||
|
<motion.div
|
||||||
|
initial={{ opacity: 0, y: 20 }}
|
||||||
|
animate={{ opacity: 1, y: 0 }}
|
||||||
|
transition={{ delay: 0.2 }}
|
||||||
|
>
|
||||||
|
<ResultPanel
|
||||||
|
result={result}
|
||||||
|
loading={loading}
|
||||||
|
imagePreview={imagePreview}
|
||||||
|
onCopy={handleCopy}
|
||||||
|
onDownload={handleDownload}
|
||||||
|
/>
|
||||||
|
</motion.div>
|
||||||
|
</div>
|
||||||
|
</motion.div>
|
||||||
|
)}
|
||||||
|
</AnimatePresence>
|
||||||
</main>
|
</main>
|
||||||
|
|
||||||
{/* Footer */}
|
{/* Footer */}
|
||||||
<footer className="mt-20 border-t border-white/10 glass">
|
<footer className="mt-20 border-t border-white/10 glass">
|
||||||
<div className="max-w-7xl mx-auto px-6 py-8 text-center space-y-2">
|
<div className="max-w-7xl mx-auto px-6 py-8 text-center space-y-2">
|
||||||
<p className="text-sm text-gray-400">
|
<p className="text-sm text-gray-400">
|
||||||
Powered by <span className="gradient-text font-semibold">DeepSeek-OCR</span> •
|
Powered by <span className="gradient-text font-semibold">DeepSeek-OCR</span> •
|
||||||
Built with <span className="text-pink-400">♥</span> using React + FastAPI
|
Built with <span className="text-pink-400">♥</span> using React + FastAPI
|
||||||
</p>
|
</p>
|
||||||
<p className="text-xs text-gray-500">
|
<p className="text-xs text-gray-500">
|
||||||
|
|||||||
665
frontend/src/components/JobsPanel.jsx
Normal file
665
frontend/src/components/JobsPanel.jsx
Normal file
@@ -0,0 +1,665 @@
|
|||||||
|
import { useState, useEffect, useCallback } from 'react'
|
||||||
|
import { useSuggestions } from '../hooks/useSuggestions'
|
||||||
|
import { useModels } from '../hooks/useModels'
|
||||||
|
import { motion, AnimatePresence } from 'framer-motion'
|
||||||
|
import {
|
||||||
|
Search, ChevronLeft, ChevronRight, CheckCircle2, Clock,
|
||||||
|
FileText, Loader2, Save, RefreshCw, Trash2, Sparkles,
|
||||||
|
} from 'lucide-react'
|
||||||
|
import axios from 'axios'
|
||||||
|
|
||||||
|
const API_BASE = import.meta.env.VITE_API_URL || '/api'
|
||||||
|
|
||||||
|
const INPUT_CLASS =
|
||||||
|
'w-full bg-white/5 border border-white/10 rounded-lg px-3 py-2 text-sm text-gray-200 ' +
|
||||||
|
'placeholder-gray-600 focus:outline-none focus:border-purple-500/50 transition-colors'
|
||||||
|
|
||||||
|
const STATUS_COLORS = {
|
||||||
|
unreviewed: 'text-amber-400 bg-amber-400/10 border-amber-400/30',
|
||||||
|
reviewed: 'text-green-400 bg-green-400/10 border-green-400/30',
|
||||||
|
}
|
||||||
|
|
||||||
|
function StatusBadge({ status }) {
|
||||||
|
const Icon = status === 'reviewed' ? CheckCircle2 : Clock
|
||||||
|
return (
|
||||||
|
<span className={`inline-flex items-center gap-1 px-2 py-0.5 rounded-full text-xs border ${STATUS_COLORS[status] || 'text-gray-400'}`}>
|
||||||
|
<Icon className="w-3 h-3" />
|
||||||
|
{status}
|
||||||
|
</span>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─────────────────────────────────────────────────────────────
|
||||||
|
// Full-screen Job Detail
|
||||||
|
// ─────────────────────────────────────────────────────────────
|
||||||
|
function JobDetail({ jobId, onClose, onReviewed, onDeleted, suggestions = {} }) {
|
||||||
|
const { models } = useModels()
|
||||||
|
const [job, setJob] = useState(null)
|
||||||
|
const [loading, setLoading] = useState(true)
|
||||||
|
const [error, setError] = useState(null)
|
||||||
|
|
||||||
|
const [describeModel, setDescribeModel] = useState('')
|
||||||
|
const [generatingDescribe, setGeneratingDescribe] = useState(false)
|
||||||
|
|
||||||
|
const [editedText, setEditedText] = useState('')
|
||||||
|
const [editDescribeText, setEditDescribeText] = useState('')
|
||||||
|
const [editFreeformText, setEditFreeformText] = useState('')
|
||||||
|
const [activeTab, setActiveTab] = useState('ocr')
|
||||||
|
const [editAuthor, setEditAuthor] = useState('')
|
||||||
|
const [editBook, setEditBook] = useState('')
|
||||||
|
const [editChapter, setEditChapter] = useState('')
|
||||||
|
const [editPage, setEditPage] = useState('')
|
||||||
|
const [reviewerName, setReviewerName] = useState('')
|
||||||
|
|
||||||
|
const [submitting, setSubmitting] = useState(false)
|
||||||
|
const [saveResult, setSaveResult] = useState(null)
|
||||||
|
const [confirmDelete, setConfirmDelete] = useState(false)
|
||||||
|
const [deleting, setDeleting] = useState(false)
|
||||||
|
const [togglingStatus, setTogglingStatus] = useState(false)
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
let cancelled = false
|
||||||
|
setLoading(true)
|
||||||
|
setError(null)
|
||||||
|
setSaveResult(null)
|
||||||
|
|
||||||
|
axios.get(`${API_BASE}/jobs/${jobId}`)
|
||||||
|
.then(res => {
|
||||||
|
if (!cancelled) {
|
||||||
|
const d = res.data
|
||||||
|
setJob(d)
|
||||||
|
setEditedText(d.reviewed_text ?? d.ocr_text ?? '')
|
||||||
|
setEditDescribeText(d.describe_text ?? '')
|
||||||
|
setEditFreeformText(d.freeform_text ?? '')
|
||||||
|
setEditAuthor(d.author || '')
|
||||||
|
setEditBook(d.book || '')
|
||||||
|
setEditChapter(d.chapter || '')
|
||||||
|
setEditPage(d.page || '')
|
||||||
|
setReviewerName(d.reviewer_name || '')
|
||||||
|
// Default to the OCR tab when there's OCR text, otherwise Description
|
||||||
|
if (d.reviewed_text || d.ocr_text) setActiveTab('ocr')
|
||||||
|
else setActiveTab('describe')
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.catch(err => {
|
||||||
|
if (!cancelled) setError(err.response?.data?.detail || err.message)
|
||||||
|
})
|
||||||
|
.finally(() => { if (!cancelled) setLoading(false) })
|
||||||
|
|
||||||
|
return () => { cancelled = true }
|
||||||
|
}, [jobId])
|
||||||
|
|
||||||
|
// Default the Describe model to the job's original model (if available) or the registry default
|
||||||
|
useEffect(() => {
|
||||||
|
if (!describeModel && models.length > 0) {
|
||||||
|
const def = models.find(m => m.default) || models[0]
|
||||||
|
const fromJob = job?.ocr_model && models.some(m => m.id === job.ocr_model) ? job.ocr_model : null
|
||||||
|
setDescribeModel(fromJob || def.id)
|
||||||
|
}
|
||||||
|
}, [models, job, describeModel])
|
||||||
|
|
||||||
|
const handleGenerateDescribe = async () => {
|
||||||
|
setGeneratingDescribe(true)
|
||||||
|
setSaveResult(null)
|
||||||
|
try {
|
||||||
|
const res = await axios.post(`${API_BASE}/jobs/${jobId}/describe`, {
|
||||||
|
model: describeModel || null,
|
||||||
|
})
|
||||||
|
setJob(res.data)
|
||||||
|
setEditDescribeText(res.data.describe_text || '')
|
||||||
|
onReviewed(res.data)
|
||||||
|
} catch (err) {
|
||||||
|
setSaveResult({ success: false, error: err.response?.data?.detail || err.message })
|
||||||
|
} finally {
|
||||||
|
setGeneratingDescribe(false)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const handleSave = async () => {
|
||||||
|
if (!reviewerName.trim()) {
|
||||||
|
setSaveResult({ success: false, error: 'Reviewer name is required.' })
|
||||||
|
return
|
||||||
|
}
|
||||||
|
setSubmitting(true)
|
||||||
|
setSaveResult(null)
|
||||||
|
try {
|
||||||
|
const res = await axios.put(`${API_BASE}/jobs/${jobId}/review`, {
|
||||||
|
reviewed_text: editedText,
|
||||||
|
reviewer_name: reviewerName.trim(),
|
||||||
|
author: editAuthor,
|
||||||
|
book: editBook,
|
||||||
|
chapter: editChapter,
|
||||||
|
page: editPage,
|
||||||
|
describe_text: editDescribeText || null,
|
||||||
|
freeform_text: editFreeformText || null,
|
||||||
|
})
|
||||||
|
setJob(res.data)
|
||||||
|
setSaveResult({ success: true })
|
||||||
|
onReviewed(res.data)
|
||||||
|
} catch (err) {
|
||||||
|
setSaveResult({ success: false, error: err.response?.data?.detail || err.message })
|
||||||
|
} finally {
|
||||||
|
setSubmitting(false)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const handleToggleStatus = async () => {
|
||||||
|
// Marking reviewed accepts BOTH the reviewed document text and the description,
|
||||||
|
// so it goes through the full review save (not a status-only flip).
|
||||||
|
if (!isReviewed) {
|
||||||
|
setTogglingStatus(true)
|
||||||
|
try {
|
||||||
|
await handleSave()
|
||||||
|
} finally {
|
||||||
|
setTogglingStatus(false)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reverting to unreviewed preserves the saved reviewed text and description.
|
||||||
|
setTogglingStatus(true)
|
||||||
|
setSaveResult(null)
|
||||||
|
try {
|
||||||
|
const res = await axios.put(`${API_BASE}/jobs/${jobId}/status`, {
|
||||||
|
status: 'unreviewed',
|
||||||
|
reviewer_name: reviewerName.trim() || null,
|
||||||
|
})
|
||||||
|
setJob(res.data)
|
||||||
|
setReviewerName(res.data.reviewer_name || '')
|
||||||
|
onReviewed(res.data)
|
||||||
|
} catch (err) {
|
||||||
|
setSaveResult({ success: false, error: err.response?.data?.detail || err.message })
|
||||||
|
} finally {
|
||||||
|
setTogglingStatus(false)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const handleDelete = async () => {
|
||||||
|
setDeleting(true)
|
||||||
|
try {
|
||||||
|
await axios.delete(`${API_BASE}/jobs/${jobId}`)
|
||||||
|
onDeleted(jobId)
|
||||||
|
} catch (err) {
|
||||||
|
setSaveResult({ success: false, error: err.response?.data?.detail || err.message })
|
||||||
|
setConfirmDelete(false)
|
||||||
|
} finally {
|
||||||
|
setDeleting(false)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const isReviewed = job?.status === 'reviewed'
|
||||||
|
|
||||||
|
return (
|
||||||
|
<motion.div
|
||||||
|
key={jobId}
|
||||||
|
initial={{ opacity: 0, y: 20 }}
|
||||||
|
animate={{ opacity: 1, y: 0 }}
|
||||||
|
exit={{ opacity: 0, y: -20 }}
|
||||||
|
className="flex flex-col gap-4"
|
||||||
|
>
|
||||||
|
{/* Top bar */}
|
||||||
|
<div className="flex items-center gap-4 flex-shrink-0">
|
||||||
|
<motion.button
|
||||||
|
onClick={onClose}
|
||||||
|
className="flex items-center gap-2 glass glass-hover px-4 py-2 rounded-xl text-sm text-gray-300"
|
||||||
|
whileHover={{ scale: 1.02 }} whileTap={{ scale: 0.98 }}
|
||||||
|
>
|
||||||
|
<ChevronLeft className="w-4 h-4" />
|
||||||
|
Back to results
|
||||||
|
</motion.button>
|
||||||
|
{job && (
|
||||||
|
<>
|
||||||
|
<StatusBadge status={job.status} />
|
||||||
|
<motion.button
|
||||||
|
onClick={handleToggleStatus}
|
||||||
|
disabled={togglingStatus}
|
||||||
|
title={isReviewed ? 'Revert to unreviewed' : 'Mark as reviewed'}
|
||||||
|
className={`flex items-center gap-1 px-3 py-1.5 rounded-lg text-xs font-medium transition-colors disabled:opacity-50 ${
|
||||||
|
isReviewed
|
||||||
|
? 'glass glass-hover text-amber-400 hover:bg-amber-500/10'
|
||||||
|
: 'glass glass-hover text-green-400 hover:bg-green-500/10'
|
||||||
|
}`}
|
||||||
|
whileHover={!togglingStatus ? { scale: 1.02 } : {}}
|
||||||
|
whileTap={!togglingStatus ? { scale: 0.98 } : {}}
|
||||||
|
>
|
||||||
|
{togglingStatus ? (
|
||||||
|
<Loader2 className="w-3.5 h-3.5 animate-spin" />
|
||||||
|
) : isReviewed ? (
|
||||||
|
<Clock className="w-3.5 h-3.5" />
|
||||||
|
) : (
|
||||||
|
<CheckCircle2 className="w-3.5 h-3.5" />
|
||||||
|
)}
|
||||||
|
{isReviewed ? 'Mark Unreviewed' : 'Mark Reviewed'}
|
||||||
|
</motion.button>
|
||||||
|
<span className="text-xs text-gray-500 font-mono hidden sm:block">{job.id}</span>
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
<div className="ml-auto flex items-center gap-2">
|
||||||
|
{confirmDelete ? (
|
||||||
|
<>
|
||||||
|
<span className="text-xs text-red-400">Delete this job permanently?</span>
|
||||||
|
<motion.button
|
||||||
|
onClick={handleDelete}
|
||||||
|
disabled={deleting}
|
||||||
|
className="flex items-center gap-1 px-3 py-2 rounded-xl text-sm font-medium bg-red-600 hover:bg-red-500 disabled:opacity-50"
|
||||||
|
whileHover={{ scale: 1.02 }} whileTap={{ scale: 0.98 }}
|
||||||
|
>
|
||||||
|
{deleting ? <Loader2 className="w-4 h-4 animate-spin" /> : <Trash2 className="w-4 h-4" />}
|
||||||
|
Confirm
|
||||||
|
</motion.button>
|
||||||
|
<motion.button
|
||||||
|
onClick={() => setConfirmDelete(false)}
|
||||||
|
className="px-3 py-2 rounded-xl text-sm glass glass-hover text-gray-300"
|
||||||
|
whileHover={{ scale: 1.02 }} whileTap={{ scale: 0.98 }}
|
||||||
|
>
|
||||||
|
Cancel
|
||||||
|
</motion.button>
|
||||||
|
</>
|
||||||
|
) : (
|
||||||
|
<motion.button
|
||||||
|
onClick={() => setConfirmDelete(true)}
|
||||||
|
className="flex items-center gap-2 px-3 py-2 rounded-xl text-sm glass glass-hover text-red-400 hover:bg-red-500/10"
|
||||||
|
whileHover={{ scale: 1.02 }} whileTap={{ scale: 0.98 }}
|
||||||
|
>
|
||||||
|
<Trash2 className="w-4 h-4" />
|
||||||
|
Delete
|
||||||
|
</motion.button>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{loading && (
|
||||||
|
<div className="flex-1 flex items-center justify-center">
|
||||||
|
<Loader2 className="w-8 h-8 animate-spin text-purple-400" />
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{error && (
|
||||||
|
<div className="glass p-4 rounded-xl border-red-500/30 bg-red-500/10 flex-shrink-0">
|
||||||
|
<p className="text-sm text-red-400">{error}</p>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{job && !loading && (
|
||||||
|
<>
|
||||||
|
{/* Image + Text */}
|
||||||
|
<div className="grid gap-6" style={{ gridTemplateColumns: '1fr 1fr', height: '130vh' }}>
|
||||||
|
<div className="glass rounded-2xl overflow-hidden flex items-center justify-center bg-black/20 h-full">
|
||||||
|
<img
|
||||||
|
src={`${API_BASE}/jobs/${job.id}/image`}
|
||||||
|
alt="Job source"
|
||||||
|
className="w-full h-full object-contain"
|
||||||
|
onError={e => { e.target.style.display = 'none' }}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<div className="glass rounded-2xl p-4 flex flex-col h-full">
|
||||||
|
{/* Tabs — only show tabs that have content */}
|
||||||
|
{(() => {
|
||||||
|
const tabs = [
|
||||||
|
job.ocr_text || job.reviewed_text ? { id: 'ocr', label: 'OCR Text' } : null,
|
||||||
|
{ id: 'describe', label: 'Description' },
|
||||||
|
].filter(Boolean)
|
||||||
|
return tabs.length > 1 ? (
|
||||||
|
<div className="flex gap-1 mb-3 flex-shrink-0">
|
||||||
|
{tabs.map(t => (
|
||||||
|
<button
|
||||||
|
key={t.id}
|
||||||
|
onClick={() => setActiveTab(t.id)}
|
||||||
|
className={`px-3 py-1 rounded-lg text-xs font-medium transition-colors ${
|
||||||
|
activeTab === t.id
|
||||||
|
? 'bg-purple-600 text-white'
|
||||||
|
: 'bg-white/5 text-gray-400 hover:bg-white/10'
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
{t.label}
|
||||||
|
</button>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
) : null
|
||||||
|
})()}
|
||||||
|
|
||||||
|
<p className="text-xs text-gray-400 mb-2 flex-shrink-0">
|
||||||
|
{{ ocr: isReviewed ? 'Reviewed Text' : 'OCR Text', describe: 'Description' }[activeTab]}
|
||||||
|
<span className="text-purple-400 ml-1">(editable)</span>
|
||||||
|
</p>
|
||||||
|
|
||||||
|
{activeTab === 'ocr' && (
|
||||||
|
<>
|
||||||
|
<textarea
|
||||||
|
value={editedText}
|
||||||
|
onChange={e => setEditedText(e.target.value)}
|
||||||
|
className="flex-1 w-full bg-transparent text-sm text-gray-200 font-mono resize-none focus:outline-none min-h-0"
|
||||||
|
placeholder="OCR text..."
|
||||||
|
/>
|
||||||
|
{isReviewed && job.ocr_text && (
|
||||||
|
<details className="flex-shrink-0 mt-2 border-t border-white/10 pt-2">
|
||||||
|
<summary className="cursor-pointer text-xs text-gray-500 hover:text-gray-400 transition-colors">
|
||||||
|
Original OCR Text
|
||||||
|
</summary>
|
||||||
|
<pre className="text-xs text-gray-600 whitespace-pre-wrap font-mono mt-1 max-h-28 overflow-y-auto">
|
||||||
|
{job.ocr_text}
|
||||||
|
</pre>
|
||||||
|
</details>
|
||||||
|
)}
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
{activeTab === 'describe' && (
|
||||||
|
<>
|
||||||
|
<div className="flex items-center gap-2 mb-2 flex-shrink-0">
|
||||||
|
<select
|
||||||
|
value={describeModel}
|
||||||
|
onChange={e => setDescribeModel(e.target.value)}
|
||||||
|
disabled={generatingDescribe || models.length === 0}
|
||||||
|
className="bg-white/5 border border-white/10 rounded-lg px-2 py-1.5 text-xs text-gray-200 focus:outline-none focus:border-purple-500/50"
|
||||||
|
>
|
||||||
|
{models.length === 0 && <option value="">No models</option>}
|
||||||
|
{models.map(m => (
|
||||||
|
<option key={m.id} value={m.id}>{m.label}{m.default ? ' (default)' : ''}</option>
|
||||||
|
))}
|
||||||
|
</select>
|
||||||
|
<motion.button
|
||||||
|
onClick={handleGenerateDescribe}
|
||||||
|
disabled={generatingDescribe || !describeModel}
|
||||||
|
className={`flex items-center gap-1.5 px-3 py-1.5 rounded-lg text-xs font-medium transition-all ${
|
||||||
|
generatingDescribe || !describeModel
|
||||||
|
? 'opacity-50 cursor-not-allowed bg-white/5'
|
||||||
|
: 'bg-gradient-to-r from-violet-600 to-purple-600 hover:from-violet-500 hover:to-purple-500'
|
||||||
|
}`}
|
||||||
|
whileHover={!generatingDescribe && describeModel ? { scale: 1.02 } : {}}
|
||||||
|
whileTap={!generatingDescribe && describeModel ? { scale: 0.98 } : {}}
|
||||||
|
title="Run Describe on this job's image and save it"
|
||||||
|
>
|
||||||
|
{generatingDescribe
|
||||||
|
? <><Loader2 className="w-3.5 h-3.5 animate-spin" /> Generating…</>
|
||||||
|
: <><Sparkles className="w-3.5 h-3.5" /> Generate Description</>}
|
||||||
|
</motion.button>
|
||||||
|
</div>
|
||||||
|
<textarea
|
||||||
|
value={editDescribeText}
|
||||||
|
onChange={e => setEditDescribeText(e.target.value)}
|
||||||
|
className="flex-1 w-full bg-transparent text-sm text-gray-200 font-mono resize-none focus:outline-none min-h-0"
|
||||||
|
placeholder="No description yet — pick a model and click Generate Description, or type one here."
|
||||||
|
/>
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Metadata + reviewer row */}
|
||||||
|
<div className="glass p-4 rounded-2xl flex-shrink-0">
|
||||||
|
<datalist id="jd-authors">
|
||||||
|
{(suggestions.authors || []).map(a => <option key={a} value={a} />)}
|
||||||
|
</datalist>
|
||||||
|
<datalist id="jd-books">
|
||||||
|
{(suggestions.books || []).map(b => <option key={b} value={b} />)}
|
||||||
|
</datalist>
|
||||||
|
<datalist id="jd-chapters">
|
||||||
|
{(suggestions.chapters || []).map(c => <option key={c} value={c} />)}
|
||||||
|
</datalist>
|
||||||
|
<datalist id="jd-reviewers">
|
||||||
|
{(suggestions.reviewers || []).map(r => <option key={r} value={r} />)}
|
||||||
|
</datalist>
|
||||||
|
<div className="grid grid-cols-6 gap-4">
|
||||||
|
<div>
|
||||||
|
<label className="text-xs text-gray-400 mb-1 block">Author</label>
|
||||||
|
<input type="text" list="jd-authors" value={editAuthor} onChange={e => setEditAuthor(e.target.value)} placeholder="Author" className={INPUT_CLASS} />
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<label className="text-xs text-gray-400 mb-1 block">Book</label>
|
||||||
|
<input type="text" list="jd-books" value={editBook} onChange={e => setEditBook(e.target.value)} placeholder="Book title" className={INPUT_CLASS} />
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<label className="text-xs text-gray-400 mb-1 block">Chapter</label>
|
||||||
|
<input type="text" list="jd-chapters" value={editChapter} onChange={e => setEditChapter(e.target.value)} placeholder="Chapter" className={INPUT_CLASS} />
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<label className="text-xs text-gray-400 mb-1 block">Page</label>
|
||||||
|
<input type="text" value={editPage} onChange={e => setEditPage(e.target.value)} placeholder="Page" className={INPUT_CLASS} />
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<label className="text-xs text-gray-400 mb-1 block">Reviewer</label>
|
||||||
|
<input type="text" list="jd-reviewers" value={reviewerName} onChange={e => setReviewerName(e.target.value)} placeholder="Your name" className={INPUT_CLASS} />
|
||||||
|
</div>
|
||||||
|
<div className="flex flex-col justify-end">
|
||||||
|
<motion.button
|
||||||
|
onClick={handleSave}
|
||||||
|
disabled={submitting || !reviewerName.trim()}
|
||||||
|
className={`w-full flex items-center justify-center gap-2 px-4 py-2 rounded-lg font-medium text-sm transition-all ${
|
||||||
|
submitting || !reviewerName.trim()
|
||||||
|
? 'opacity-50 cursor-not-allowed bg-white/5'
|
||||||
|
: isReviewed
|
||||||
|
? 'bg-gradient-to-r from-blue-600 to-indigo-600 hover:from-blue-500 hover:to-indigo-500'
|
||||||
|
: 'bg-gradient-to-r from-green-600 to-emerald-600 hover:from-green-500 hover:to-emerald-500'
|
||||||
|
}`}
|
||||||
|
whileHover={!submitting && reviewerName.trim() ? { scale: 1.02 } : {}}
|
||||||
|
whileTap={!submitting && reviewerName.trim() ? { scale: 0.98 } : {}}
|
||||||
|
>
|
||||||
|
{submitting ? (
|
||||||
|
<><Loader2 className="w-4 h-4 animate-spin" /> Saving...</>
|
||||||
|
) : isReviewed ? (
|
||||||
|
<><Save className="w-4 h-4" /> Save Changes</>
|
||||||
|
) : (
|
||||||
|
<><CheckCircle2 className="w-4 h-4" /> Mark Reviewed</>
|
||||||
|
)}
|
||||||
|
</motion.button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{!isReviewed && (
|
||||||
|
<p className="text-xs text-gray-500 mt-2">
|
||||||
|
Marking reviewed accepts both the reviewed document text and the description.
|
||||||
|
</p>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{saveResult && (
|
||||||
|
<motion.div
|
||||||
|
initial={{ opacity: 0, y: -4 }} animate={{ opacity: 1, y: 0 }}
|
||||||
|
className={`mt-3 p-2 rounded-lg text-xs ${saveResult.success ? 'bg-green-500/10 text-green-400' : 'bg-red-500/10 text-red-400'}`}
|
||||||
|
>
|
||||||
|
{saveResult.success
|
||||||
|
? (isReviewed ? 'Changes saved!' : 'Job marked as reviewed!')
|
||||||
|
: saveResult.error}
|
||||||
|
</motion.div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Read-only info row */}
|
||||||
|
<div className="flex gap-6 mt-3 pt-3 border-t border-white/10">
|
||||||
|
{job.submitted_at && (
|
||||||
|
<span className="text-xs text-gray-500">Submitted: {new Date(job.submitted_at).toLocaleString()}</span>
|
||||||
|
)}
|
||||||
|
{isReviewed && job.reviewed_at && (
|
||||||
|
<span className="text-xs text-gray-500">Last reviewed: {new Date(job.reviewed_at).toLocaleString()}</span>
|
||||||
|
)}
|
||||||
|
{job.mode && <span className="text-xs text-gray-500">Mode: {job.mode}</span>}
|
||||||
|
{job.ocr_model && <span className="text-xs text-gray-500">Model: {job.ocr_model}</span>}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
</motion.div>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─────────────────────────────────────────────────────────────
|
||||||
|
// Search / List view
|
||||||
|
// ─────────────────────────────────────────────────────────────
|
||||||
|
export default function JobsPanel() {
|
||||||
|
const suggestions = useSuggestions()
|
||||||
|
const [search, setSearch] = useState('')
|
||||||
|
const [filterStatus, setFilterStatus] = useState('')
|
||||||
|
const [filterAuthor, setFilterAuthor] = useState('')
|
||||||
|
const [filterBook, setFilterBook] = useState('')
|
||||||
|
const [jobs, setJobs] = useState([])
|
||||||
|
const [total, setTotal] = useState(0)
|
||||||
|
const [page, setPage] = useState(0)
|
||||||
|
const [loading, setLoading] = useState(false)
|
||||||
|
const [error, setError] = useState(null)
|
||||||
|
const [selectedJobId, setSelectedJobId] = useState(null)
|
||||||
|
|
||||||
|
const LIMIT = 20
|
||||||
|
|
||||||
|
const fetchJobs = useCallback(async (pageNum = 0) => {
|
||||||
|
setLoading(true)
|
||||||
|
setError(null)
|
||||||
|
try {
|
||||||
|
const params = new URLSearchParams()
|
||||||
|
if (search.trim()) params.set('search', search.trim())
|
||||||
|
if (filterStatus) params.set('status', filterStatus)
|
||||||
|
if (filterAuthor.trim()) params.set('author', filterAuthor.trim())
|
||||||
|
if (filterBook.trim()) params.set('book', filterBook.trim())
|
||||||
|
params.set('limit', LIMIT)
|
||||||
|
params.set('offset', pageNum * LIMIT)
|
||||||
|
|
||||||
|
const res = await axios.get(`${API_BASE}/jobs?${params}`)
|
||||||
|
setJobs(res.data.jobs)
|
||||||
|
setTotal(res.data.total)
|
||||||
|
setPage(pageNum)
|
||||||
|
} catch (err) {
|
||||||
|
setError(err.response?.data?.detail || err.message)
|
||||||
|
} finally {
|
||||||
|
setLoading(false)
|
||||||
|
}
|
||||||
|
}, [search, filterStatus, filterAuthor, filterBook])
|
||||||
|
|
||||||
|
useEffect(() => { fetchJobs(0) }, []) // eslint-disable-line react-hooks/exhaustive-deps
|
||||||
|
|
||||||
|
const handleReviewed = (updatedJob) => {
|
||||||
|
setJobs(prev => prev.map(j => j.id === updatedJob.id ? { ...j, ...updatedJob } : j))
|
||||||
|
}
|
||||||
|
|
||||||
|
const totalPages = Math.ceil(total / LIMIT)
|
||||||
|
|
||||||
|
// When a job is selected show full-screen detail
|
||||||
|
if (selectedJobId) {
|
||||||
|
return (
|
||||||
|
<AnimatePresence mode="wait">
|
||||||
|
<JobDetail
|
||||||
|
key={selectedJobId}
|
||||||
|
jobId={selectedJobId}
|
||||||
|
onClose={() => setSelectedJobId(null)}
|
||||||
|
onReviewed={handleReviewed}
|
||||||
|
onDeleted={(id) => {
|
||||||
|
setJobs(prev => prev.filter(j => j.id !== id))
|
||||||
|
setTotal(prev => prev - 1)
|
||||||
|
setSelectedJobId(null)
|
||||||
|
}}
|
||||||
|
suggestions={suggestions}
|
||||||
|
/>
|
||||||
|
</AnimatePresence>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<motion.div
|
||||||
|
key="job_list"
|
||||||
|
initial={{ opacity: 0, y: 20 }}
|
||||||
|
animate={{ opacity: 1, y: 0 }}
|
||||||
|
exit={{ opacity: 0, y: -20 }}
|
||||||
|
className="space-y-4"
|
||||||
|
>
|
||||||
|
{/* Search form */}
|
||||||
|
<div className="glass p-4 rounded-2xl space-y-3">
|
||||||
|
<form onSubmit={e => { e.preventDefault(); fetchJobs(0) }} className="flex gap-2">
|
||||||
|
<input
|
||||||
|
type="text"
|
||||||
|
value={search}
|
||||||
|
onChange={e => setSearch(e.target.value)}
|
||||||
|
placeholder="Search all fields..."
|
||||||
|
className={`${INPUT_CLASS} flex-1`}
|
||||||
|
/>
|
||||||
|
<motion.button
|
||||||
|
type="submit"
|
||||||
|
className="flex items-center gap-2 px-4 py-2 rounded-lg bg-gradient-to-r from-purple-600 to-cyan-600 text-sm font-medium"
|
||||||
|
whileHover={{ scale: 1.02 }} whileTap={{ scale: 0.98 }}
|
||||||
|
>
|
||||||
|
<Search className="w-4 h-4" /> Search
|
||||||
|
</motion.button>
|
||||||
|
</form>
|
||||||
|
|
||||||
|
<datalist id="jp-authors">
|
||||||
|
{suggestions.authors.map(a => <option key={a} value={a} />)}
|
||||||
|
</datalist>
|
||||||
|
<datalist id="jp-books">
|
||||||
|
{(suggestions.books || []).map(b => <option key={b} value={b} />)}
|
||||||
|
</datalist>
|
||||||
|
<div className="grid grid-cols-3 gap-2">
|
||||||
|
<select value={filterStatus} onChange={e => setFilterStatus(e.target.value)} className={INPUT_CLASS}>
|
||||||
|
<option value="">All statuses</option>
|
||||||
|
<option value="unreviewed">Unreviewed</option>
|
||||||
|
<option value="reviewed">Reviewed</option>
|
||||||
|
</select>
|
||||||
|
<input type="text" list="jp-authors" value={filterAuthor} onChange={e => setFilterAuthor(e.target.value)} placeholder="Author..." className={INPUT_CLASS} />
|
||||||
|
<input type="text" list="jp-books" value={filterBook} onChange={e => setFilterBook(e.target.value)} placeholder="Book..." className={INPUT_CLASS} />
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div className="flex items-center justify-between">
|
||||||
|
<span className="text-xs text-gray-500">{total} job{total !== 1 ? 's' : ''} found</span>
|
||||||
|
<button onClick={() => fetchJobs(page)} className="flex items-center gap-1 text-xs text-gray-400 hover:text-gray-200 transition-colors">
|
||||||
|
<RefreshCw className="w-3 h-3" /> Refresh
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{loading && <div className="flex justify-center py-8"><Loader2 className="w-6 h-6 animate-spin text-purple-400" /></div>}
|
||||||
|
|
||||||
|
{error && (
|
||||||
|
<div className="glass p-4 rounded-xl border-red-500/30 bg-red-500/10">
|
||||||
|
<p className="text-sm text-red-400">{error}</p>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{!loading && !error && jobs.length === 0 && (
|
||||||
|
<div className="glass p-8 rounded-2xl text-center">
|
||||||
|
<FileText className="w-10 h-10 mx-auto mb-3 text-gray-600" />
|
||||||
|
<p className="text-gray-400">No jobs found</p>
|
||||||
|
<p className="text-xs text-gray-500 mt-1">Commit your first OCR job from the New Job tab</p>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Results grid */}
|
||||||
|
<div className="grid grid-cols-1 sm:grid-cols-2 lg:grid-cols-3 xl:grid-cols-4 gap-3">
|
||||||
|
<AnimatePresence>
|
||||||
|
{jobs.map(job => (
|
||||||
|
<motion.button
|
||||||
|
key={job.id}
|
||||||
|
onClick={() => setSelectedJobId(job.id)}
|
||||||
|
className="text-left glass p-4 rounded-xl border border-white/5 hover:border-white/20 hover:bg-white/5 transition-all"
|
||||||
|
initial={{ opacity: 0, y: 10 }}
|
||||||
|
animate={{ opacity: 1, y: 0 }}
|
||||||
|
exit={{ opacity: 0 }}
|
||||||
|
whileHover={{ scale: 1.02 }}
|
||||||
|
whileTap={{ scale: 0.98 }}
|
||||||
|
layout
|
||||||
|
>
|
||||||
|
<div className="flex items-start justify-between gap-2 mb-2">
|
||||||
|
<StatusBadge status={job.status} />
|
||||||
|
</div>
|
||||||
|
{job.book && <p className="text-sm font-medium text-gray-200 truncate">{job.book}</p>}
|
||||||
|
<div className="flex items-center gap-2 mt-0.5">
|
||||||
|
{job.chapter && <span className="text-xs text-gray-500">Ch. {job.chapter}</span>}
|
||||||
|
{job.page && <span className="text-xs text-gray-500">p. {job.page}</span>}
|
||||||
|
</div>
|
||||||
|
{job.author && <p className="text-xs text-gray-400 mt-1">{job.author}</p>}
|
||||||
|
<div className="flex items-center justify-between mt-2">
|
||||||
|
<p className="text-xs text-gray-600 font-mono">{new Date(job.submitted_at).toLocaleDateString()}</p>
|
||||||
|
{job.ocr_model && <span className="text-[10px] text-gray-500 truncate ml-2">{job.ocr_model}</span>}
|
||||||
|
</div>
|
||||||
|
</motion.button>
|
||||||
|
))}
|
||||||
|
</AnimatePresence>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{totalPages > 1 && (
|
||||||
|
<div className="flex items-center justify-center gap-3">
|
||||||
|
<button onClick={() => fetchJobs(page - 1)} disabled={page === 0} className="glass glass-hover p-2 rounded-lg disabled:opacity-30">
|
||||||
|
<ChevronLeft className="w-4 h-4" />
|
||||||
|
</button>
|
||||||
|
<span className="text-sm text-gray-400">Page {page + 1} of {totalPages}</span>
|
||||||
|
<button onClick={() => fetchJobs(page + 1)} disabled={page >= totalPages - 1} className="glass glass-hover p-2 rounded-lg disabled:opacity-30">
|
||||||
|
<ChevronRight className="w-4 h-4" />
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</motion.div>
|
||||||
|
)
|
||||||
|
}
|
||||||
77
frontend/src/components/MetadataForm.jsx
Normal file
77
frontend/src/components/MetadataForm.jsx
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
import { BookOpen } from 'lucide-react'
|
||||||
|
|
||||||
|
export default function MetadataForm({ metadata, onChange, suggestions = {} }) {
|
||||||
|
const { author, book, chapter, page } = metadata
|
||||||
|
const { authors = [], books = [], chapters = [] } = suggestions
|
||||||
|
|
||||||
|
const field = (key) => (e) => onChange({ ...metadata, [key]: e.target.value })
|
||||||
|
|
||||||
|
const inputClass =
|
||||||
|
'w-full bg-white/5 border border-white/10 rounded-lg px-3 py-2 text-sm text-gray-200 ' +
|
||||||
|
'placeholder-gray-600 focus:outline-none focus:border-purple-500/50 transition-colors'
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="glass p-4 rounded-2xl space-y-3">
|
||||||
|
<div className="flex items-center gap-2">
|
||||||
|
<BookOpen className="w-4 h-4 text-purple-400" />
|
||||||
|
<h3 className="text-sm font-medium text-gray-300">Job Metadata</h3>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<datalist id="mf-authors">
|
||||||
|
{authors.map(a => <option key={a} value={a} />)}
|
||||||
|
</datalist>
|
||||||
|
<datalist id="mf-books">
|
||||||
|
{books.map(b => <option key={b} value={b} />)}
|
||||||
|
</datalist>
|
||||||
|
<datalist id="mf-chapters">
|
||||||
|
{chapters.map(c => <option key={c} value={c} />)}
|
||||||
|
</datalist>
|
||||||
|
|
||||||
|
<div className="grid grid-cols-2 gap-3">
|
||||||
|
<div>
|
||||||
|
<label className="text-xs text-gray-400 mb-1 block">Author</label>
|
||||||
|
<input
|
||||||
|
type="text"
|
||||||
|
list="mf-authors"
|
||||||
|
value={author}
|
||||||
|
onChange={field('author')}
|
||||||
|
placeholder="Author name"
|
||||||
|
className={inputClass}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<label className="text-xs text-gray-400 mb-1 block">Book</label>
|
||||||
|
<input
|
||||||
|
type="text"
|
||||||
|
list="mf-books"
|
||||||
|
value={book}
|
||||||
|
onChange={field('book')}
|
||||||
|
placeholder="Book title"
|
||||||
|
className={inputClass}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<label className="text-xs text-gray-400 mb-1 block">Chapter</label>
|
||||||
|
<input
|
||||||
|
type="text"
|
||||||
|
list="mf-chapters"
|
||||||
|
value={chapter}
|
||||||
|
onChange={field('chapter')}
|
||||||
|
placeholder="Chapter"
|
||||||
|
className={inputClass}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<label className="text-xs text-gray-400 mb-1 block">Page</label>
|
||||||
|
<input
|
||||||
|
type="text"
|
||||||
|
value={page}
|
||||||
|
onChange={field('page')}
|
||||||
|
placeholder="Page number"
|
||||||
|
className={inputClass}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
@@ -1,29 +1,17 @@
|
|||||||
import { motion } from 'framer-motion'
|
import { motion } from 'framer-motion'
|
||||||
import { FileText, Eye, Search, Wand2 } from 'lucide-react'
|
import { FileText, Eye } from 'lucide-react'
|
||||||
|
|
||||||
const modes = [
|
const modes = [
|
||||||
{ id: 'plain_ocr', name: 'Plain OCR', icon: FileText, color: 'from-blue-500 to-cyan-500', desc: 'Extract raw text', needsInput: false },
|
{ id: 'plain_ocr', name: 'Plain OCR', icon: FileText, color: 'from-blue-500 to-cyan-500', desc: 'Extract raw text' },
|
||||||
{ id: 'describe', name: 'Describe', icon: Eye, color: 'from-violet-500 to-purple-500', desc: 'Image description', needsInput: false },
|
{ id: 'describe', name: 'Describe', icon: Eye, color: 'from-violet-500 to-purple-500', desc: 'Image description' },
|
||||||
{ id: 'find_ref', name: 'Find', icon: Search, color: 'from-yellow-500 to-orange-500', desc: 'Locate specific terms', needsInput: 'findTerm' },
|
|
||||||
{ id: 'freeform', name: 'Freeform', icon: Wand2, color: 'from-fuchsia-500 to-pink-500', desc: 'Custom prompt', needsInput: 'prompt' },
|
|
||||||
]
|
]
|
||||||
|
|
||||||
export default function ModeSelector({
|
export default function ModeSelector({ mode, onModeChange }) {
|
||||||
mode,
|
|
||||||
onModeChange,
|
|
||||||
prompt,
|
|
||||||
onPromptChange,
|
|
||||||
findTerm,
|
|
||||||
onFindTermChange
|
|
||||||
}) {
|
|
||||||
const selectedMode = modes.find(m => m.id === mode)
|
|
||||||
const needsInput = selectedMode?.needsInput
|
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className="glass p-4 rounded-2xl space-y-3">
|
<div className="glass p-4 rounded-2xl space-y-3">
|
||||||
<h3 className="text-sm font-semibold text-gray-200">Mode</h3>
|
<h3 className="text-sm font-semibold text-gray-200">Mode</h3>
|
||||||
|
|
||||||
<div className="grid grid-cols-4 gap-2">
|
<div className="grid grid-cols-2 gap-2">
|
||||||
{modes.map((m) => {
|
{modes.map((m) => {
|
||||||
const Icon = m.icon
|
const Icon = m.icon
|
||||||
const isSelected = mode === m.id
|
const isSelected = mode === m.id
|
||||||
@@ -32,6 +20,7 @@ export default function ModeSelector({
|
|||||||
<motion.button
|
<motion.button
|
||||||
key={m.id}
|
key={m.id}
|
||||||
onClick={() => onModeChange(m.id)}
|
onClick={() => onModeChange(m.id)}
|
||||||
|
title={m.desc}
|
||||||
className={`
|
className={`
|
||||||
relative p-2 rounded-xl text-center transition-all
|
relative p-2 rounded-xl text-center transition-all
|
||||||
${isSelected
|
${isSelected
|
||||||
@@ -68,38 +57,6 @@ export default function ModeSelector({
|
|||||||
)
|
)
|
||||||
})}
|
})}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{needsInput === 'findTerm' && (
|
|
||||||
<motion.div
|
|
||||||
initial={{ opacity: 0, height: 0 }}
|
|
||||||
animate={{ opacity: 1, height: 'auto' }}
|
|
||||||
exit={{ opacity: 0, height: 0 }}
|
|
||||||
>
|
|
||||||
<input
|
|
||||||
type="text"
|
|
||||||
value={findTerm}
|
|
||||||
onChange={(e) => onFindTermChange(e.target.value)}
|
|
||||||
placeholder="Enter term to find (e.g., Total, Invoice #)"
|
|
||||||
className="w-full bg-white/5 border border-white/10 rounded-xl px-3 py-2 text-sm focus:outline-none focus:border-purple-500 transition-colors"
|
|
||||||
/>
|
|
||||||
</motion.div>
|
|
||||||
)}
|
|
||||||
|
|
||||||
{needsInput === 'prompt' && (
|
|
||||||
<motion.div
|
|
||||||
initial={{ opacity: 0, height: 0 }}
|
|
||||||
animate={{ opacity: 1, height: 'auto' }}
|
|
||||||
exit={{ opacity: 0, height: 0 }}
|
|
||||||
>
|
|
||||||
<textarea
|
|
||||||
value={prompt}
|
|
||||||
onChange={(e) => onPromptChange(e.target.value)}
|
|
||||||
placeholder="Enter your custom prompt..."
|
|
||||||
className="w-full bg-white/5 border border-white/10 rounded-xl px-3 py-2 text-sm focus:outline-none focus:border-purple-500 transition-colors resize-none"
|
|
||||||
rows={2}
|
|
||||||
/>
|
|
||||||
</motion.div>
|
|
||||||
)}
|
|
||||||
</div>
|
</div>
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|||||||
33
frontend/src/components/ModelSelector.jsx
Normal file
33
frontend/src/components/ModelSelector.jsx
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
import { Cpu } from 'lucide-react'
|
||||||
|
|
||||||
|
const SELECT_CLASS =
|
||||||
|
'w-full bg-white/5 border border-white/10 rounded-lg px-3 py-2 text-sm text-gray-200 ' +
|
||||||
|
'focus:outline-none focus:border-purple-500/50 transition-colors'
|
||||||
|
|
||||||
|
// Dropdown to pick which OCR model runs the analysis.
|
||||||
|
// `models` comes from the useModels() hook; `value` is the selected model id.
|
||||||
|
export default function ModelSelector({ models, value, onChange, loading }) {
|
||||||
|
return (
|
||||||
|
<div className="glass p-4 rounded-2xl space-y-3">
|
||||||
|
<div className="flex items-center gap-2">
|
||||||
|
<Cpu className="w-4 h-4 text-purple-400" />
|
||||||
|
<h3 className="text-sm font-semibold text-gray-200">Model</h3>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<select
|
||||||
|
value={value || ''}
|
||||||
|
onChange={e => onChange(e.target.value)}
|
||||||
|
disabled={loading || models.length === 0}
|
||||||
|
className={SELECT_CLASS}
|
||||||
|
>
|
||||||
|
{loading && <option value="">Loading models…</option>}
|
||||||
|
{!loading && models.length === 0 && <option value="">No models available</option>}
|
||||||
|
{models.map(m => (
|
||||||
|
<option key={m.id} value={m.id}>
|
||||||
|
{m.label}{m.default ? ' (default)' : ''}
|
||||||
|
</option>
|
||||||
|
))}
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
@@ -5,7 +5,7 @@ import axios from 'axios'
|
|||||||
|
|
||||||
const API_BASE = import.meta.env.VITE_API_URL || '/api'
|
const API_BASE = import.meta.env.VITE_API_URL || '/api'
|
||||||
|
|
||||||
function PDFProcessor({ pdfFile, mode, prompt, advancedSettings, includeCaption }) {
|
function PDFProcessor({ pdfFile, mode, prompt, model, advancedSettings, includeCaption }) {
|
||||||
const [processing, setProcessing] = useState(false)
|
const [processing, setProcessing] = useState(false)
|
||||||
const [progress, setProgress] = useState(0)
|
const [progress, setProgress] = useState(0)
|
||||||
const [result, setResult] = useState(null)
|
const [result, setResult] = useState(null)
|
||||||
@@ -29,6 +29,7 @@ function PDFProcessor({ pdfFile, mode, prompt, advancedSettings, includeCaption
|
|||||||
try {
|
try {
|
||||||
const formData = new FormData()
|
const formData = new FormData()
|
||||||
formData.append('pdf_file', pdfFile)
|
formData.append('pdf_file', pdfFile)
|
||||||
|
if (model) formData.append('model', model)
|
||||||
formData.append('mode', mode)
|
formData.append('mode', mode)
|
||||||
formData.append('prompt', prompt)
|
formData.append('prompt', prompt)
|
||||||
formData.append('output_format', outputFormat)
|
formData.append('output_format', outputFormat)
|
||||||
@@ -80,7 +81,7 @@ function PDFProcessor({ pdfFile, mode, prompt, advancedSettings, includeCaption
|
|||||||
} finally {
|
} finally {
|
||||||
setProcessing(false)
|
setProcessing(false)
|
||||||
}
|
}
|
||||||
}, [pdfFile, mode, prompt, outputFormat, includeCaption, advancedSettings])
|
}, [pdfFile, mode, prompt, model, outputFormat, includeCaption, advancedSettings])
|
||||||
|
|
||||||
const handleDownloadJSON = useCallback(() => {
|
const handleDownloadJSON = useCallback(() => {
|
||||||
if (!result || outputFormat !== 'json') return
|
if (!result || outputFormat !== 'json') return
|
||||||
|
|||||||
@@ -205,7 +205,7 @@ export default function ResultPanel({ result, loading, imagePreview, onCopy, onD
|
|||||||
exit={{ opacity: 0, y: -20 }}
|
exit={{ opacity: 0, y: -20 }}
|
||||||
className="space-y-4"
|
className="space-y-4"
|
||||||
>
|
>
|
||||||
{/* Preview with boxes */}
|
{/* Preview with boxes (grounding modes) */}
|
||||||
{imagePreview && result.boxes && result.boxes.length > 0 && (
|
{imagePreview && result.boxes && result.boxes.length > 0 && (
|
||||||
<div className="relative rounded-xl overflow-hidden border border-white/10 bg-black">
|
<div className="relative rounded-xl overflow-hidden border border-white/10 bg-black">
|
||||||
<img
|
<img
|
||||||
@@ -226,15 +226,13 @@ export default function ResultPanel({ result, loading, imagePreview, onCopy, onD
|
|||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
{/* Text result */}
|
{/* Rendered text result */}
|
||||||
<div className="bg-white/5 border border-white/10 rounded-xl p-4 max-h-96 overflow-y-auto">
|
<div className="bg-white/5 border border-white/10 rounded-xl p-4 max-h-96 overflow-y-auto">
|
||||||
{isHTML ? (
|
{isHTML ? (
|
||||||
<div
|
<div
|
||||||
className="prose prose-invert prose-sm max-w-none"
|
className="prose prose-invert prose-sm max-w-none"
|
||||||
dangerouslySetInnerHTML={{ __html: DOMPurify.sanitize(result.text) }}
|
dangerouslySetInnerHTML={{ __html: DOMPurify.sanitize(result.text) }}
|
||||||
style={{
|
style={{ color: '#e5e7eb' }}
|
||||||
color: '#e5e7eb',
|
|
||||||
}}
|
|
||||||
/>
|
/>
|
||||||
) : isMarkdown ? (
|
) : isMarkdown ? (
|
||||||
<div className="prose prose-invert prose-sm max-w-none">
|
<div className="prose prose-invert prose-sm max-w-none">
|
||||||
|
|||||||
24
frontend/src/hooks/useModels.js
Normal file
24
frontend/src/hooks/useModels.js
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
import { useState, useEffect } from 'react'
|
||||||
|
|
||||||
|
const API_BASE = import.meta.env.VITE_API_URL || '/api'
|
||||||
|
|
||||||
|
// Fetches the OCR models available for selection. Returns { models, loading }.
|
||||||
|
// Each model: { id, label, capabilities: { grounding, advanced_settings }, default }
|
||||||
|
export function useModels() {
|
||||||
|
const [models, setModels] = useState([])
|
||||||
|
const [loading, setLoading] = useState(true)
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
let cancelled = false
|
||||||
|
fetch(`${API_BASE}/models`)
|
||||||
|
.then(r => (r.ok ? r.json() : null))
|
||||||
|
.then(data => {
|
||||||
|
if (!cancelled && data?.models) setModels(data.models)
|
||||||
|
})
|
||||||
|
.catch(() => {})
|
||||||
|
.finally(() => { if (!cancelled) setLoading(false) })
|
||||||
|
return () => { cancelled = true }
|
||||||
|
}, [])
|
||||||
|
|
||||||
|
return { models, loading }
|
||||||
|
}
|
||||||
16
frontend/src/hooks/useSuggestions.js
Normal file
16
frontend/src/hooks/useSuggestions.js
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
import { useState, useEffect } from 'react'
|
||||||
|
|
||||||
|
const API_BASE = import.meta.env.VITE_API_URL || '/api'
|
||||||
|
|
||||||
|
export function useSuggestions() {
|
||||||
|
const [suggestions, setSuggestions] = useState({ authors: [], books: [], chapters: [], reviewers: [] })
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
fetch(`${API_BASE}/jobs/suggestions`)
|
||||||
|
.then(r => r.ok ? r.json() : null)
|
||||||
|
.then(data => { if (data) setSuggestions(data) })
|
||||||
|
.catch(() => {})
|
||||||
|
}, [])
|
||||||
|
|
||||||
|
return suggestions
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user