Remove Freeform and Find from UI. Allow Description to be added to Reviewed job

2026-06-29 13:09:01 +01:00
parent 48f958de6c
commit 04bbbebd5a
10 changed files with 394 additions and 403 deletions
--- a/.env.example
+++ b/.env.example
@@ -11,6 +11,19 @@ FRONTEND_PORT=3000
 MODEL_NAME=deepseek-ai/DeepSeek-OCR
 HF_HOME=/models
 # OCR model selection
 # Register the local DeepSeek-OCR model (set to false for an Ollama-only deployment)
 ENABLE_DEEPSEEK_LOCAL=true
 # External Ollama host the backend should call (no trailing slash)
 OLLAMA_BASE_URL=http://host.docker.internal:11434
 # Comma-separated Ollama vision model tags to surface in the UI.
 # Pull these on the Ollama host first, e.g. `ollama pull glm-ocr`.
 OLLAMA_MODELS=glm-ocr,llama3.2-vision,minicpm-v,qwen2.5vl
 # Default model id selected in the UI (deepseek-local or ollama:<tag>)
 DEFAULT_OCR_MODEL=deepseek-local
 # Per-request timeout (seconds) for Ollama calls
 OLLAMA_TIMEOUT=300
 # CORS Configuration (comma-separated origins, defaults to http://localhost:3000)
 CORS_ORIGINS=http://localhost:3000
--- a/README.md
+++ b/README.md
@@ -172,6 +172,13 @@ FRONTEND_PORT=3000
 MODEL_NAME=deepseek-ai/DeepSeek-OCR
 HF_HOME=/models
 # OCR model selection (DeepSeek + Ollama)
 ENABLE_DEEPSEEK_LOCAL=true                          # register the local GPU model
 OLLAMA_BASE_URL=http://host.docker.internal:11434   # external Ollama host
 OLLAMA_MODELS=glm-ocr,llama3.2-vision,minicpm-v,qwen2.5vl
 DEFAULT_OCR_MODEL=deepseek-local                    # deepseek-local or ollama:<tag>
 OLLAMA_TIMEOUT=300                                  # per-request timeout (seconds)
 # Upload Configuration
 MAX_UPLOAD_SIZE_MB=100  # Maximum file upload size
@@ -186,13 +193,47 @@ CROP_MODE=true         # Enable dynamic cropping for large images
 - `API_HOST`: Backend API host (default: 0.0.0.0)
 - `API_PORT`: Backend API port (default: 8000)
 - `FRONTEND_PORT`: Frontend port (default: 3000)
- `MODEL_NAME`: HuggingFace model identifier
+- `MODEL_NAME`: HuggingFace model identifier for the local DeepSeek-OCR model
 - `HF_HOME`: Model cache directory
 - `ENABLE_DEEPSEEK_LOCAL`: Register the local DeepSeek-OCR model (set `false` for an Ollama-only deployment with no GPU model loaded)
 - `OLLAMA_BASE_URL`: URL of an external Ollama server the backend calls for non-DeepSeek models
 - `OLLAMA_MODELS`: Comma-separated Ollama vision model tags to expose in the UI (pull them on the Ollama host first, e.g. `ollama pull glm-ocr`)
 - `DEFAULT_OCR_MODEL`: Model id selected by default (`deepseek-local` or `ollama:<tag>`)
 - `OLLAMA_TIMEOUT`: Per-request timeout in seconds for Ollama calls
 - `MAX_UPLOAD_SIZE_MB`: Maximum file upload size in megabytes
 - `BASE_SIZE`: Base image processing size (affects memory usage)
 - `IMAGE_SIZE`: Tile size for dynamic cropping
 - `CROP_MODE`: Enable/disable dynamic image cropping
 ### Choosing an OCR Model
 The **Model** selector (next to the Mode selector) chooses which backend runs the OCR:
 - **DeepSeek-OCR (local GPU)** — the default. Loaded lazily on first use. Supports
  every mode including grounding/bounding-box modes (Find), plus the Advanced
  Settings (base size, crop mode, etc.).
 - **Ollama models** — any vision model pulled on your Ollama host and listed in
  `OLLAMA_MODELS` (e.g. `glm-ocr`, `llama3.2-vision`). These run remotely on the
  Ollama server. They return **plain text only**: bounding boxes are not produced,
  so grounding modes (Find) and the DeepSeek-specific Advanced Settings are ignored
  / disabled when an Ollama model is selected.
 Setup for Ollama models:
 ```bash
 # On the machine running Ollama
 ollama pull glm-ocr
 ollama pull llama3.2-vision
 # Point the backend at it (in .env), then restart
 OLLAMA_BASE_URL=http://host.docker.internal:11434
 OLLAMA_MODELS=glm-ocr,llama3.2-vision
 ```
 `GET /api/models` returns the registered models and their capabilities; the UI
 populates the selector from it. The model used for each job is stored on the job
 record (`ocr_model`) and shown in the Browse Jobs view.
 ## Tech Stack
 ### Frontend
@@ -377,6 +418,7 @@ For large images, the model uses dynamic cropping:
 **Parameters:**
 - `image` (file, required) - Image file to process (up to 100MB)
 - `model` (string) - OCR model id from `GET /api/models` (default: registry default). Grounding/Advanced settings apply to DeepSeek only.
 - `mode` (string) - OCR mode: `plain_ocr` | `describe` | `find_ref` | `freeform`
 - `prompt` (string) - Custom prompt for freeform mode
 - `grounding` (bool) - Enable bounding boxes (auto-enabled for find_ref)
@@ -416,6 +458,7 @@ Process PDF documents with OCR and export to various formats.
 **Parameters:**
 - `pdf_file` (file, required) - PDF file to process (up to 100MB)
 - `model` (string) - OCR model id from `GET /api/models` (default: registry default)
 - `mode` (string) - OCR mode: `plain_ocr` | `describe` | `find_ref` | `freeform`
 - `prompt` (string) - Custom prompt for freeform mode
 - `output_format` (string) - Output format: `markdown` | `html` | `docx` | `json`
--- a/backend/database.py
+++ b/backend/database.py
@@ -62,6 +62,11 @@ def init_db():
                ALTER TABLE ocr_jobs
                ADD COLUMN IF NOT EXISTS updated_at TIMESTAMPTZ
            """)
            # Which OCR model produced this job (e.g. "deepseek-local", "ollama:glm-ocr")
            cur.execute("""
                ALTER TABLE ocr_jobs
                ADD COLUMN IF NOT EXISTS ocr_model TEXT
            """)
            # Trigger function: stamp updated_at on every row update
            cur.execute("""
                CREATE OR REPLACE FUNCTION set_updated_at()
--- a/backend/main.py
+++ b/backend/main.py
@@ -1,8 +1,6 @@
 import os
 import re
 import uuid
 import tempfile
 import shutil
 import base64
 from typing import List, Dict, Any, Optional
 from contextlib import asynccontextmanager
@@ -12,8 +10,6 @@ from fastapi import FastAPI, File, UploadFile, Form, HTTPException, Query
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse, StreamingResponse, FileResponse
 from pydantic import BaseModel
 import torch
 from transformers import AutoModel, AutoTokenizer
 from PIL import Image
 import uvicorn
 from decouple import config as env_config
@@ -28,19 +24,28 @@ from pdf_utils import (
 )
 from format_converter import DocumentConverter
 from database import init_db, get_db
 from providers import (
    build_registry,
    parse_detections,
    clean_grounding_text,
    ProviderError,
    GROUNDING_MODES,
 )
 OCR_IMAGES_DIR = env_config("OCR_IMAGES_DIR", default="/data/ocr_images")
 # -----------------------------
-# Lifespan context for model loading
+# Lifespan context
 # -----------------------------
-model = None
+# The model registry holds all available OCR providers. Local models (e.g.
-tokenizer = None
+# DeepSeek-OCR) are loaded lazily on first use so an Ollama-only deployment
 # starts instantly and never touches the GPU.
 registry = None
@asynccontextmanager
 async def lifespan(app: FastAPI):
-    """Load model on startup, cleanup on shutdown"""
+    """Build the model registry on startup."""
-    global model, tokenizer
+    global registry
    # Image storage directory
    os.makedirs(OCR_IMAGES_DIR, exist_ok=True)
@@ -51,39 +56,8 @@ async def lifespan(app: FastAPI):
    except Exception as exc:
        print(f"Warning: database initialization failed: {exc}")
-    # Environment setup
+    # OCR model registry (providers load their models lazily)
-    os.environ.pop("TRANSFORMERS_CACHE", None)
+    registry = build_registry()
    MODEL_NAME = env_config("MODEL_NAME", default="deepseek-ai/DeepSeek-OCR")
    HF_HOME = env_config("HF_HOME", default="/models")
    os.makedirs(HF_HOME, exist_ok=True)
    # Load model
    print(f"🚀 Loading {MODEL_NAME}...")
    torch_dtype = torch.bfloat16
    tokenizer = AutoTokenizer.from_pretrained(
        MODEL_NAME,
        trust_remote_code=True,
    )
    model = AutoModel.from_pretrained(
        MODEL_NAME,
        trust_remote_code=True,
        use_safetensors=True,
        attn_implementation="eager",
        torch_dtype=torch_dtype,
    ).eval().to("cuda")
    # Pad token setup
    try:
        if getattr(tokenizer, "pad_token_id", None) is None and getattr(tokenizer, "eos_token_id", None) is not None:
            tokenizer.pad_token = tokenizer.eos_token
        if getattr(model.config, "pad_token_id", None) is None and getattr(tokenizer, "pad_token_id", None) is not None:
            model.config.pad_token_id = tokenizer.pad_token_id
    except Exception:
        pass
    print("✅ Model loaded and ready!")
    yield
@@ -112,155 +86,6 @@ app.add_middleware(
    allow_headers=["*"],
 )
 # -----------------------------
 # Prompt builder
 # -----------------------------
 def build_prompt(
    mode: str,
    user_prompt: str,
    grounding: bool,
    find_term: Optional[str],
    schema: Optional[str],
    include_caption: bool,
 ) -> str:
    """Build the prompt based on mode"""
    parts: List[str] = ["<image>"]
    mode_requires_grounding = mode in {"find_ref", "layout_map", "pii_redact"}
    if grounding or mode_requires_grounding:
        parts.append("<|grounding|>")
    instruction = ""
    if mode == "plain_ocr":
        instruction = "Free OCR."
    elif mode == "markdown":
        instruction = "Convert the document to markdown."
    elif mode == "tables_csv":
        instruction = (
            "Extract every table and output CSV only. "
            "Use commas, minimal quoting. If multiple tables, separate with a line containing '---'."
        )
    elif mode == "tables_md":
        instruction = "Extract every table as GitHub-flavored Markdown tables. Output only the tables."
    elif mode == "kv_json":
        schema_text = schema.strip() if schema else "{}"
        instruction = (
            "Extract key fields and return strict JSON only. "
            f"Use this schema (fill the values): {schema_text}"
        )
    elif mode == "figure_chart":
        instruction = (
            "Parse the figure. First extract any numeric series as a two-column table (x,y). "
            "Then summarize the chart in 2 sentences. Output the table, then a line '---', then the summary."
        )
    elif mode == "find_ref":
        key = (find_term or "").strip() or "Total"
        instruction = f"Locate <|ref|>{key}<|/ref|> in the image."
    elif mode == "layout_map":
        instruction = (
            'Return a JSON array of blocks with fields {"type":["title","paragraph","table","figure"],'
            '"box":[x1,y1,x2,y2]}. Do not include any text content.'
        )
    elif mode == "pii_redact":
        instruction = (
            'Find all occurrences of emails, phone numbers, postal addresses, and IBANs. '
            'Return a JSON array of objects {label, text, box:[x1,y1,x2,y2]}.'
        )
    elif mode == "multilingual":
        instruction = "Free OCR. Detect the language automatically and output in the same script."
    elif mode == "describe":
        instruction = "Describe this image. Focus on visible key elements."
    elif mode == "freeform":
        instruction = user_prompt.strip() if user_prompt else "OCR this image."
    else:
        instruction = "OCR this image."
    if include_caption and mode not in {"describe"}:
        instruction = instruction + "\nThen add a one-paragraph description of the image."
    parts.append(instruction)
    return "\n".join(parts)
 # -----------------------------
 # Grounding parser
 # -----------------------------
 # Match a full detection block and capture the coordinates as the entire list expression
 # Examples of captured coords (including outer brackets):
 #  - [[312, 339, 480, 681]]
 #  - [[504, 700, 625, 910], [771, 570, 996, 996]]
 #  - [[110, 310, 255, 800], [312, 343, 479, 680], ...]
 # Using a greedy bracket capture ensures we include all inner lists up to the last ']' before </|det|>
 DET_BLOCK = re.compile(
    r"<\|ref\|>(?P<label>.*?)<\|/ref\|>\s*<\|det\|>\s*(?P<coords>\[.*\])\s*<\|/det\|>",
    re.DOTALL,
 )
 def clean_grounding_text(text: str) -> str:
    """Remove grounding tags from text for display, keeping labels"""
    # Replace <|ref|>label<|/ref|><|det|>[...any nested lists...]<|/det|> with just the label
    cleaned = re.sub(
        r"<\|ref\|>(.*?)<\|/ref\|>\s*<\|det\|>\s*\[.*\]\s*<\|/det\|>",
        r"\1",
        text,
        flags=re.DOTALL,
    )
    # Also remove any standalone grounding tags
    cleaned = re.sub(r"<\|grounding\|>", "", cleaned)
    return cleaned.strip()
 def parse_detections(text: str, image_width: int, image_height: int) -> List[Dict[str, Any]]:
    """Parse grounding boxes from text and scale from 0-999 normalized coords to actual image dimensions
    Handles both single and multiple bounding boxes:
    - Single: <|ref|>label<|/ref|><|det|>[[x1,y1,x2,y2]]<|/det|>
    - Multiple: <|ref|>label<|/ref|><|det|>[[x1,y1,x2,y2], [x1,y1,x2,y2], ...]<|/det|>
    """
    boxes: List[Dict[str, Any]] = []
    for m in DET_BLOCK.finditer(text or ""):
        label = m.group("label").strip()
        coords_str = m.group("coords").strip()
        print(f"🔍 DEBUG: Found detection for '{label}'")
        print(f"📦 Raw coords string (with brackets): {coords_str}")
        try:
            import ast
            # Parse the full bracket expression directly (handles single and multiple)
            parsed = ast.literal_eval(coords_str)
            # Normalize to a list of lists
            if (
                isinstance(parsed, list)
                and len(parsed) == 4
                and all(isinstance(n, (int, float)) for n in parsed)
            ):
                # Single box provided as [x1,y1,x2,y2]
                box_coords = [parsed]
                print("📦 Single box (flat list) detected")
            elif isinstance(parsed, list):
                box_coords = parsed
                print(f"📦 Boxes detected: {len(box_coords)}")
            else:
                raise ValueError("Unsupported coords structure")
            # Process each box
            for idx, box in enumerate(box_coords):
                if isinstance(box, (list, tuple)) and len(box) >= 4:
                    x1 = int(float(box[0]) / 999 * image_width)
                    y1 = int(float(box[1]) / 999 * image_height)
                    x2 = int(float(box[2]) / 999 * image_width)
                    y2 = int(float(box[3]) / 999 * image_height)
                    print(f"  Box {idx+1}: {box} → [{x1}, {y1}, {x2}, {y2}]")
                    boxes.append({"label": label, "box": [x1, y1, x2, y2]})
                else:
                    print(f"  ⚠️ Skipping invalid box: {box}")
        except Exception as e:
            print(f"❌ Parsing failed: {e}")
            continue
    print(f"🎯 Total boxes parsed: {len(boxes)}")
    return boxes
 # -----------------------------
 # Routes
 # -----------------------------
@@ -270,11 +95,38 @@ async def root():
@app.get("/health")
 async def health():
-    return {"status": "healthy", "model_loaded": model is not None}
+    return {"status": "healthy", "models": registry.list_models() if registry else []}
@app.get("/api/models")
 async def list_models():
    """List the OCR models available for selection in the UI."""
    if registry is None:
        raise HTTPException(status_code=503, detail="Model registry not ready.")
    return JSONResponse({"models": registry.list_models()})
 def _resolve_provider(model_id: Optional[str], mode: str):
    """Look up the provider and reject capability mismatches (e.g. grounding)."""
    if registry is None:
        raise HTTPException(status_code=503, detail="Model registry not ready.")
    try:
        provider = registry.get(model_id)
    except ProviderError as exc:
        raise HTTPException(status_code=400, detail=str(exc))
    if mode in GROUNDING_MODES and not provider.capabilities.get("grounding"):
        raise HTTPException(
            status_code=400,
            detail=f"Model '{provider.label}' does not support grounding modes (e.g. {mode}).",
        )
    return provider
@app.post("/api/ocr")
 async def ocr_inference(
    image: UploadFile = File(...),
    model: Optional[str] = Form(None),
    mode: str = Form("plain_ocr"),
    prompt: str = Form(""),
    grounding: bool = Form(False),
@@ -290,32 +142,18 @@ async def ocr_inference(
    Perform OCR inference on uploaded image
    - **image**: Image file to process
    - **model**: OCR model id (see GET /api/models); defaults to the registry default
    - **mode**: OCR mode (plain_ocr, markdown, tables_csv, etc.)
    - **prompt**: Custom prompt for freeform mode
-    - **grounding**: Enable grounding boxes
+    - **grounding**: Enable grounding boxes (DeepSeek only)
    - **include_caption**: Add image description
    - **find_term**: Term to find (for find_ref mode)
    - **schema**: JSON schema (for kv_json mode)
-    - **base_size**: Base processing size
+    - **base_size/image_size/crop_mode/test_compress**: DeepSeek processing options
    - **image_size**: Image size parameter
    - **crop_mode**: Enable crop mode
    - **test_compress**: Test compression
    """
-    if model is None or tokenizer is None:
+    provider = _resolve_provider(model, mode)
        raise HTTPException(status_code=503, detail="Model not loaded yet")
    # Build prompt
    prompt_text = build_prompt(
        mode=mode,
        user_prompt=prompt,
        grounding=grounding,
        find_term=find_term,
        schema=schema,
        include_caption=include_caption,
    )
    tmp_img = None
    out_dir = None
    try:
        # Save uploaded file
        with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp:
@@ -330,42 +168,27 @@ async def ocr_inference(
        except Exception:
            orig_w = orig_h = None
-        out_dir = tempfile.mkdtemp(prefix="dsocr_")
+        # Run inference through the selected provider
-        
+        text = provider.run(
-        # Run inference
+            tmp_img,
-        res = model.infer(
+            mode=mode,
-            tokenizer,
+            prompt=prompt,
-            prompt=prompt_text,
+            grounding=grounding,
-            image_file=tmp_img,
+            find_term=find_term,
-            output_path=out_dir,
+            schema=schema,
-            base_size=base_size,
+            include_caption=include_caption,
-            image_size=image_size,
+            options={
-            crop_mode=crop_mode,
+                "base_size": base_size,
-            save_results=False,
+                "image_size": image_size,
-            test_compress=test_compress,
+                "crop_mode": crop_mode,
-            eval_mode=True,
+                "test_compress": test_compress,
            },
        )
        # Normalize response
        if isinstance(res, str):
            text = res.strip()
        elif isinstance(res, dict) and "text" in res:
            text = str(res["text"]).strip()
        elif isinstance(res, (list, tuple)):
            text = "\n".join(map(str, res)).strip()
        else:
            text = ""
        # Fallback: check output file
        if not text:
            mmd = os.path.join(out_dir, "result.mmd")
            if os.path.exists(mmd):
                with open(mmd, "r", encoding="utf-8") as fh:
                    text = fh.read().strip()
        if not text:
            text = "No text returned by model."
-        # Parse grounding boxes with proper coordinate scaling
+        # Parse grounding boxes (no-op for providers/text without grounding tokens)
        boxes = parse_detections(text, orig_w or 1, orig_h or 1) if ("<|det|>" in text or "<|ref|>" in text) else []
        # Clean grounding tags from display text, but keep the labels
@@ -382,14 +205,21 @@ async def ocr_inference(
            "boxes": boxes,
            "image_dims": {"w": orig_w, "h": orig_h},
            "metadata": {
                "model": provider.id,
                "model_label": provider.label,
                "mode": mode,
-                "grounding": grounding or (mode in {"find_ref","layout_map","pii_redact"}),
+                "grounding": grounding or (mode in GROUNDING_MODES),
                "base_size": base_size,
                "image_size": image_size,
                "crop_mode": crop_mode
            }
        })
    except ProviderError as e:
        print(f"OCR provider error: {e}")
        raise HTTPException(status_code=502, detail=str(e))
    except HTTPException:
        raise
    except Exception as e:
        print(f"OCR inference error: {type(e).__name__}: {str(e)}")
        raise HTTPException(status_code=500, detail="An internal error occurred during OCR processing.")
@@ -400,12 +230,11 @@ async def ocr_inference(
                os.remove(tmp_img)
            except Exception:
                pass
        if out_dir:
            shutil.rmtree(out_dir, ignore_errors=True)
@app.post("/api/process-pdf")
 async def process_pdf(
    pdf_file: UploadFile = File(...),
    model: Optional[str] = Form(None),
    mode: str = Form("plain_ocr"),
    prompt: str = Form(""),
    output_format: str = Form("markdown"),  # markdown, html, docx, json
@@ -432,8 +261,7 @@ async def process_pdf(
    - **image_size**: Image size parameter
    - **crop_mode**: Enable crop mode
    """
-    if model is None or tokenizer is None:
+    provider = _resolve_provider(model, mode)
        raise HTTPException(status_code=503, detail="Model not loaded yet")
    # Validate output format
    if output_format not in ["markdown", "html", "docx", "json"]:
@@ -456,56 +284,32 @@ async def process_pdf(
        for page_idx, img in enumerate(images):
            print(f"🔍 Processing page {page_idx + 1}/{total_pages}...")
            # Build prompt for this page
            prompt_text = build_prompt(
                mode=mode,
                user_prompt=prompt,
                grounding=grounding,
                find_term=None,
                schema=None,
                include_caption=include_caption,
            )
            # Save image temporarily
            tmp_img = None
            out_dir = None
            try:
                with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp:
                    img.save(tmp, format="PNG")
                    tmp_img = tmp.name
                orig_w, orig_h = img.size
                out_dir = tempfile.mkdtemp(prefix="dsocr_pdf_")
-                # Run inference
+                # Run inference through the selected provider
-                res = model.infer(
+                text = provider.run(
-                    tokenizer,
+                    tmp_img,
-                    prompt=prompt_text,
+                    mode=mode,
-                    image_file=tmp_img,
+                    prompt=prompt,
-                    output_path=out_dir,
+                    grounding=grounding,
-                    base_size=base_size,
+                    find_term=None,
-                    image_size=image_size,
+                    schema=None,
-                    crop_mode=crop_mode,
+                    include_caption=include_caption,
-                    save_results=False,
+                    options={
-                    test_compress=False,
+                        "base_size": base_size,
-                    eval_mode=True,
+                        "image_size": image_size,
                        "crop_mode": crop_mode,
                        "test_compress": False,
                    },
                )
                # Normalize response
                if isinstance(res, str):
                    text = res.strip()
                elif isinstance(res, dict) and "text" in res:
                    text = str(res["text"]).strip()
                elif isinstance(res, (list, tuple)):
                    text = "\n".join(map(str, res)).strip()
                else:
                    text = ""
                if not text:
                    mmd = os.path.join(out_dir, "result.mmd")
                    if os.path.exists(mmd):
                        with open(mmd, "r", encoding="utf-8") as fh:
                            text = fh.read().strip()
                if not text:
                    text = f"No text returned for page {page_idx + 1}."
@@ -550,8 +354,6 @@ async def process_pdf(
                        os.remove(tmp_img)
                    except Exception:
                        pass
                if out_dir:
                    shutil.rmtree(out_dir, ignore_errors=True)
        print(f"✅ Processed all {total_pages} pages")
@@ -562,6 +364,8 @@ async def process_pdf(
                "total_pages": total_pages,
                "pages": pages_content,
                "metadata": {
                    "model": provider.id,
                    "model_label": provider.label,
                    "mode": mode,
                    "grounding": grounding,
                    "extract_images": extract_images,
@@ -590,6 +394,9 @@ async def process_pdf(
                headers={"Content-Disposition": f"attachment; filename=ocr_result.docx"}
            )
    except ProviderError as e:
        print(f"PDF provider error: {e}")
        raise HTTPException(status_code=502, detail=str(e))
    except Exception as e:
        import traceback
        print(f"Error processing PDF: {e}")
@@ -633,6 +440,7 @@ async def commit_job(
    describe_text: str = Form(""),
    freeform_text: str = Form(""),
    mode: str = Form("plain_ocr"),
    ocr_model: str = Form(""),
 ):
    """Commit an OCR job: save the image and insert a DB record."""
    job_id = str(uuid.uuid4())
@@ -664,13 +472,14 @@ async def commit_job(
                    """
                    INSERT INTO ocr_jobs
                        (id, author, book, chapter, page, image_path, original_filename,
-                         ocr_text, describe_text, freeform_text, mode, status)
+                         ocr_text, describe_text, freeform_text, mode, ocr_model, status)
-                    VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, 'unreviewed')
+                    VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, 'unreviewed')
                    RETURNING *
                    """,
                    (job_id, author or None, book or None, chapter or None,
                     page or None, image_path, original_filename,
-                     ocr_text or None, describe_text or None, freeform_text or None, mode),
+                     ocr_text or None, describe_text or None, freeform_text or None,
                     mode, ocr_model or None),
                )
                row = cur.fetchone()
    except Exception as exc:
@@ -743,7 +552,7 @@ async def list_jobs(
                cur.execute(
                    f"""
                    SELECT id, author, book, chapter, page, submitted_at, status,
-                           reviewer_name, reviewed_at, mode, original_filename
+                           reviewer_name, reviewed_at, mode, ocr_model, original_filename
                    FROM ocr_jobs {where}
                    ORDER BY submitted_at DESC
                    LIMIT %s OFFSET %s
@@ -945,6 +754,75 @@ async def set_job_status(job_id: str, body: StatusRequest):
    return JSONResponse(_job_row_to_dict(row))
 class JobDescribeRequest(BaseModel):
    model: Optional[str] = None
@app.post("/api/jobs/{job_id}/describe")
 async def describe_job(job_id: str, body: JobDescribeRequest):
    """Run Describe mode on a job's stored image and save the result to describe_text."""
    try:
        uuid.UUID(job_id)
    except ValueError:
        raise HTTPException(status_code=400, detail="Invalid job ID.")
    # Look up the stored image for this job
    try:
        with get_db() as conn:
            with conn.cursor() as cur:
                cur.execute("SELECT image_path FROM ocr_jobs WHERE id = %s", (job_id,))
                row = cur.fetchone()
    except Exception as exc:
        print(f"describe_job lookup DB error: {exc}")
        raise HTTPException(status_code=500, detail="Database error.")
    if not row:
        raise HTTPException(status_code=404, detail="Job not found.")
    image_path = row["image_path"]
    if not image_path or not os.path.isfile(image_path):
        raise HTTPException(status_code=404, detail="Image file not found on disk.")
    provider = _resolve_provider(body.model, "describe")
    try:
        text = provider.run(
            image_path,
            mode="describe",
            prompt="",
            grounding=False,
            find_term=None,
            schema=None,
            include_caption=False,
            options={"base_size": 1024, "image_size": 640, "crop_mode": True, "test_compress": False},
        )
    except ProviderError as e:
        print(f"describe_job provider error: {e}")
        raise HTTPException(status_code=502, detail=str(e))
    except Exception as e:
        print(f"describe_job inference error: {type(e).__name__}: {e}")
        raise HTTPException(status_code=500, detail="An internal error occurred during description.")
    display_text = clean_grounding_text(text) if ("<|ref|>" in text or "<|grounding|>" in text) else text
    # Persist the generated description on the job
    try:
        with get_db() as conn:
            with conn.cursor() as cur:
                cur.execute(
                    "UPDATE ocr_jobs SET describe_text = %s WHERE id = %s RETURNING *",
                    (display_text, job_id),
                )
                updated = cur.fetchone()
    except Exception as exc:
        print(f"describe_job save DB error: {exc}")
        raise HTTPException(status_code=500, detail="Database error.")
    if not updated:
        raise HTTPException(status_code=404, detail="Job not found.")
    return JSONResponse(_job_row_to_dict(updated))
@app.delete("/api/jobs/{job_id}")
 async def delete_job(job_id: str):
    """Delete a job record and its stored image."""
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@@ -16,3 +16,4 @@ img2pdf>=0.5.0
 python-docx>=1.1.0
 markdown>=3.5.0
 psycopg2-binary>=2.9.0
 httpx>=0.27.0
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -27,6 +27,15 @@ services:
      MAX_UPLOAD_SIZE_MB: ${MAX_UPLOAD_SIZE_MB:-100}
      DATABASE_URL: ${DATABASE_URL:-postgresql://ocr_user:ocr_password@postgres:5432/ocr_db}
      OCR_IMAGES_DIR: ${OCR_IMAGES_DIR:-/data/ocr_images}
      ENABLE_DEEPSEEK_LOCAL: ${ENABLE_DEEPSEEK_LOCAL:-true}
      OLLAMA_BASE_URL: ${OLLAMA_BASE_URL:-http://host.docker.internal:11434}
      OLLAMA_MODELS: ${OLLAMA_MODELS:-}
      DEFAULT_OCR_MODEL: ${DEFAULT_OCR_MODEL:-deepseek-local}
      OLLAMA_TIMEOUT: ${OLLAMA_TIMEOUT:-300}
    # Lets the container reach an Ollama server running on the Docker host
    # (works out of the box on Docker Desktop; required for Linux engines).
    extra_hosts:
      - "host.docker.internal:host-gateway"
    volumes:
      - ./models:/models
      - ./ocr_images:/data/ocr_images
--- a/frontend/src/App.jsx
+++ b/frontend/src/App.jsx
@@ -1,5 +1,6 @@
-import { useState, useCallback } from 'react'
+import { useState, useCallback, useEffect } from 'react'
 import { useSuggestions } from './hooks/useSuggestions'
 import { useModels } from './hooks/useModels'
 import { motion, AnimatePresence } from 'framer-motion'
 import {
  Sparkles, Zap, Loader2, Settings, Image as ImageIcon, FileText,
@@ -7,6 +8,7 @@ import {
 } from 'lucide-react'
 import ImageUpload from './components/ImageUpload'
 import ModeSelector from './components/ModeSelector'
 import ModelSelector from './components/ModelSelector'
 import ResultPanel from './components/ResultPanel'
 import AdvancedSettings from './components/AdvancedSettings'
 import PDFProcessor from './components/PDFProcessor'
@@ -24,6 +26,8 @@ function App() {
  const [view, setView] = useState('new_job')
  // OCR state
  const { models, loading: modelsLoading } = useModels()
  const [model, setModel] = useState(null)
  const [mode, setMode] = useState('plain_ocr')
  const [fileType, setFileType] = useState('image')
  const [image, setImage] = useState(null)
@@ -51,8 +55,15 @@ function App() {
  const [commitResult, setCommitResult] = useState(null)
  // Modes that produce editable text output and can be committed to the DB
-  const COMMITTABLE_MODES = new Set(['plain_ocr', 'describe', 'freeform'])
+  const COMMITTABLE_MODES = new Set(['plain_ocr', 'describe'])
-  const MODE_LABELS = { plain_ocr: 'OCR Text', describe: 'Description', freeform: 'Freeform' }
+  const MODE_LABELS = { plain_ocr: 'OCR Text', describe: 'Description' }
  // Pick the default model once the list loads
  useEffect(() => {
    if (!model && models.length > 0) {
      setModel((models.find(m => m.default) || models[0]).id)
    }
  }, [models, model])
  // Show the full-screen result view once at least one committable mode has a result
  const showResultView = view === 'new_job' && Object.keys(modeResults).length > 0
@@ -97,6 +108,7 @@ function App() {
    try {
      const formData = new FormData()
      formData.append('image', image)
      if (model) formData.append('model', model)
      formData.append('mode', mode)
      formData.append('prompt', prompt)
      formData.append('grounding', mode === 'find_ref')
@@ -149,6 +161,7 @@ function App() {
      formData.append('describe_text', editedResults.describe || '')
      formData.append('freeform_text', editedResults.freeform || '')
      formData.append('mode', mode)
      if (model) formData.append('ocr_model', model)
      const response = await axios.post(`${API_BASE}/jobs`, formData, {
        headers: { 'Content-Type': 'multipart/form-data' },
@@ -159,7 +172,7 @@ function App() {
    } finally {
      setCommitLoading(false)
    }
-  }, [image, editedResults, metadata, mode])
+  }, [image, editedResults, metadata, mode, model])
  const handleCopy = useCallback(() => {
    const text = (activeResultMode && editedResults[activeResultMode]) || result?.text
@@ -263,11 +276,12 @@ function App() {
            >
              {/* Run additional modes */}
              <div className="glass p-4 rounded-2xl flex-shrink-0">
-                <ModeSelector
+                <div className="mb-3">
-                  mode={mode} onModeChange={setMode}
+                  <ModelSelector
-                  prompt={prompt} onPromptChange={setPrompt}
+                    models={models} value={model} onChange={setModel} loading={modelsLoading}
-                  findTerm={findTerm} onFindTermChange={setFindTerm}
+                  />
-                />
+                </div>
                <ModeSelector mode={mode} onModeChange={setMode} />
                <div className="flex items-center gap-3 mt-3">
                  <motion.button
                    onClick={handleSubmit}
@@ -462,12 +476,12 @@ function App() {
                  <MetadataForm metadata={metadata} onChange={setMetadata} suggestions={suggestions} />
-                  <ModeSelector
+                  <ModelSelector
-                    mode={mode} onModeChange={setMode}
+                    models={models} value={model} onChange={setModel} loading={modelsLoading}
                    prompt={prompt} onPromptChange={setPrompt}
                    findTerm={findTerm} onFindTermChange={setFindTerm}
                  />
                  <ModeSelector mode={mode} onModeChange={setMode} />
                  <ImageUpload onImageSelect={handleImageSelect} preview={imagePreview} fileType={fileType} />
                  <motion.button
@@ -497,7 +511,7 @@ function App() {
                  {fileType === 'pdf' ? (
                    <PDFProcessor
-                      pdfFile={image} mode={mode} prompt={prompt}
+                      pdfFile={image} mode={mode} prompt={prompt} model={model}
                      advancedSettings={advancedSettings} includeCaption={includeCaption}
                    />
                  ) : (
--- a/frontend/src/components/JobsPanel.jsx
+++ b/frontend/src/components/JobsPanel.jsx
@@ -1,9 +1,10 @@
 import { useState, useEffect, useCallback } from 'react'
 import { useSuggestions } from '../hooks/useSuggestions'
 import { useModels } from '../hooks/useModels'
 import { motion, AnimatePresence } from 'framer-motion'
 import {
  Search, ChevronLeft, ChevronRight, CheckCircle2, Clock,
-  FileText, Loader2, Save, RefreshCw, Trash2,
+  FileText, Loader2, Save, RefreshCw, Trash2, Sparkles,
 } from 'lucide-react'
 import axios from 'axios'
@@ -32,10 +33,14 @@ function StatusBadge({ status }) {
 // Full-screen Job Detail
 // ─────────────────────────────────────────────────────────────
 function JobDetail({ jobId, onClose, onReviewed, onDeleted, suggestions = {} }) {
  const { models } = useModels()
  const [job, setJob] = useState(null)
  const [loading, setLoading] = useState(true)
  const [error, setError] = useState(null)
  const [describeModel, setDescribeModel] = useState('')
  const [generatingDescribe, setGeneratingDescribe] = useState(false)
  const [editedText, setEditedText]         = useState('')
  const [editDescribeText, setEditDescribeText] = useState('')
  const [editFreeformText, setEditFreeformText] = useState('')
@@ -71,10 +76,9 @@ function JobDetail({ jobId, onClose, onReviewed, onDeleted, suggestions = {} })
          setEditChapter(d.chapter || '')
          setEditPage(d.page || '')
          setReviewerName(d.reviewer_name || '')
-          // Default to first tab that has content
+          // Default to the OCR tab when there's OCR text, otherwise Description
          if (d.reviewed_text || d.ocr_text) setActiveTab('ocr')
-          else if (d.describe_text) setActiveTab('describe')
+          else setActiveTab('describe')
          else if (d.freeform_text) setActiveTab('freeform')
        }
      })
      .catch(err => {
@@ -85,6 +89,32 @@ function JobDetail({ jobId, onClose, onReviewed, onDeleted, suggestions = {} })
    return () => { cancelled = true }
  }, [jobId])
  // Default the Describe model to the job's original model (if available) or the registry default
  useEffect(() => {
    if (!describeModel && models.length > 0) {
      const def = models.find(m => m.default) || models[0]
      const fromJob = job?.ocr_model && models.some(m => m.id === job.ocr_model) ? job.ocr_model : null
      setDescribeModel(fromJob || def.id)
    }
  }, [models, job, describeModel])
  const handleGenerateDescribe = async () => {
    setGeneratingDescribe(true)
    setSaveResult(null)
    try {
      const res = await axios.post(`${API_BASE}/jobs/${jobId}/describe`, {
        model: describeModel || null,
      })
      setJob(res.data)
      setEditDescribeText(res.data.describe_text || '')
      onReviewed(res.data)
    } catch (err) {
      setSaveResult({ success: false, error: err.response?.data?.detail || err.message })
    } finally {
      setGeneratingDescribe(false)
    }
  }
  const handleSave = async () => {
    if (!reviewerName.trim()) {
      setSaveResult({ success: false, error: 'Reviewer name is required.' })
@@ -114,16 +144,24 @@ function JobDetail({ jobId, onClose, onReviewed, onDeleted, suggestions = {} })
  }
  const handleToggleStatus = async () => {
-    const next = isReviewed ? 'unreviewed' : 'reviewed'
+    // Marking reviewed accepts BOTH the reviewed document text and the description,
-    if (next === 'reviewed' && !reviewerName.trim()) {
+    // so it goes through the full review save (not a status-only flip).
-      setSaveResult({ success: false, error: 'Reviewer name is required to mark reviewed.' })
+    if (!isReviewed) {
      setTogglingStatus(true)
      try {
        await handleSave()
      } finally {
        setTogglingStatus(false)
      }
      return
    }
    // Reverting to unreviewed preserves the saved reviewed text and description.
    setTogglingStatus(true)
    setSaveResult(null)
    try {
      const res = await axios.put(`${API_BASE}/jobs/${jobId}/status`, {
-        status: next,
+        status: 'unreviewed',
        reviewer_name: reviewerName.trim() || null,
      })
      setJob(res.data)
@@ -259,8 +297,7 @@ function JobDetail({ jobId, onClose, onReviewed, onDeleted, suggestions = {} })
              {(() => {
                const tabs = [
                  job.ocr_text || job.reviewed_text ? { id: 'ocr', label: 'OCR Text' } : null,
-                  job.describe_text != null ? { id: 'describe', label: 'Description' } : null,
+                  { id: 'describe', label: 'Description' },
                  job.freeform_text != null ? { id: 'freeform', label: 'Freeform' } : null,
                ].filter(Boolean)
                return tabs.length > 1 ? (
                  <div className="flex gap-1 mb-3 flex-shrink-0">
@@ -282,7 +319,7 @@ function JobDetail({ jobId, onClose, onReviewed, onDeleted, suggestions = {} })
              })()}
              <p className="text-xs text-gray-400 mb-2 flex-shrink-0">
-                {{ ocr: isReviewed ? 'Reviewed Text' : 'OCR Text', describe: 'Description', freeform: 'Freeform' }[activeTab]}
+                {{ ocr: isReviewed ? 'Reviewed Text' : 'OCR Text', describe: 'Description' }[activeTab]}
                <span className="text-purple-400 ml-1">(editable)</span>
              </p>
@@ -307,20 +344,43 @@ function JobDetail({ jobId, onClose, onReviewed, onDeleted, suggestions = {} })
                </>
              )}
              {activeTab === 'describe' && (
-                <textarea
+                <>
-                  value={editDescribeText}
+                  <div className="flex items-center gap-2 mb-2 flex-shrink-0">
-                  onChange={e => setEditDescribeText(e.target.value)}
+                    <select
-                  className="flex-1 w-full bg-transparent text-sm text-gray-200 font-mono resize-none focus:outline-none min-h-0"
+                      value={describeModel}
-                  placeholder="Description text..."
+                      onChange={e => setDescribeModel(e.target.value)}
-                />
+                      disabled={generatingDescribe || models.length === 0}
-              )}
+                      className="bg-white/5 border border-white/10 rounded-lg px-2 py-1.5 text-xs text-gray-200 focus:outline-none focus:border-purple-500/50"
-              {activeTab === 'freeform' && (
+                    >
-                <textarea
+                      {models.length === 0 && <option value="">No models</option>}
-                  value={editFreeformText}
+                      {models.map(m => (
-                  onChange={e => setEditFreeformText(e.target.value)}
+                        <option key={m.id} value={m.id}>{m.label}{m.default ? ' (default)' : ''}</option>
-                  className="flex-1 w-full bg-transparent text-sm text-gray-200 font-mono resize-none focus:outline-none min-h-0"
+                      ))}
-                  placeholder="Freeform result..."
+                    </select>
-                />
+                    <motion.button
                      onClick={handleGenerateDescribe}
                      disabled={generatingDescribe || !describeModel}
                      className={`flex items-center gap-1.5 px-3 py-1.5 rounded-lg text-xs font-medium transition-all ${
                        generatingDescribe || !describeModel
                          ? 'opacity-50 cursor-not-allowed bg-white/5'
                          : 'bg-gradient-to-r from-violet-600 to-purple-600 hover:from-violet-500 hover:to-purple-500'
                      }`}
                      whileHover={!generatingDescribe && describeModel ? { scale: 1.02 } : {}}
                      whileTap={!generatingDescribe && describeModel ? { scale: 0.98 } : {}}
                      title="Run Describe on this job's image and save it"
                    >
                      {generatingDescribe
                        ? <><Loader2 className="w-3.5 h-3.5 animate-spin" /> Generating…</>
                        : <><Sparkles className="w-3.5 h-3.5" /> Generate Description</>}
                    </motion.button>
                  </div>
                  <textarea
                    value={editDescribeText}
                    onChange={e => setEditDescribeText(e.target.value)}
                    className="flex-1 w-full bg-transparent text-sm text-gray-200 font-mono resize-none focus:outline-none min-h-0"
                    placeholder="No description yet — pick a model and click Generate Description, or type one here."
                  />
                </>
              )}
            </div>
          </div>
@@ -385,6 +445,12 @@ function JobDetail({ jobId, onClose, onReviewed, onDeleted, suggestions = {} })
              </div>
            </div>
            {!isReviewed && (
              <p className="text-xs text-gray-500 mt-2">
                Marking reviewed accepts both the reviewed document text and the description.
              </p>
            )}
            {saveResult && (
              <motion.div
                initial={{ opacity: 0, y: -4 }} animate={{ opacity: 1, y: 0 }}
@@ -405,6 +471,7 @@ function JobDetail({ jobId, onClose, onReviewed, onDeleted, suggestions = {} })
                <span className="text-xs text-gray-500">Last reviewed: {new Date(job.reviewed_at).toLocaleString()}</span>
              )}
              {job.mode && <span className="text-xs text-gray-500">Mode: {job.mode}</span>}
              {job.ocr_model && <span className="text-xs text-gray-500">Model: {job.ocr_model}</span>}
            </div>
          </div>
        </>
@@ -573,7 +640,10 @@ export default function JobsPanel() {
                {job.page && <span className="text-xs text-gray-500">p. {job.page}</span>}
              </div>
              {job.author && <p className="text-xs text-gray-400 mt-1">{job.author}</p>}
-              <p className="text-xs text-gray-600 mt-2 font-mono">{new Date(job.submitted_at).toLocaleDateString()}</p>
+              <div className="flex items-center justify-between mt-2">
                <p className="text-xs text-gray-600 font-mono">{new Date(job.submitted_at).toLocaleDateString()}</p>
                {job.ocr_model && <span className="text-[10px] text-gray-500 truncate ml-2">{job.ocr_model}</span>}
              </div>
            </motion.button>
          ))}
        </AnimatePresence>
--- a/frontend/src/components/ModeSelector.jsx
+++ b/frontend/src/components/ModeSelector.jsx
@@ -1,29 +1,17 @@
 import { motion } from 'framer-motion'
-import { FileText, Eye, Search, Wand2 } from 'lucide-react'
+import { FileText, Eye } from 'lucide-react'
 const modes = [
-  { id: 'plain_ocr', name: 'Plain OCR', icon: FileText, color: 'from-blue-500 to-cyan-500', desc: 'Extract raw text', needsInput: false },
+  { id: 'plain_ocr', name: 'Plain OCR', icon: FileText, color: 'from-blue-500 to-cyan-500', desc: 'Extract raw text' },
-  { id: 'describe', name: 'Describe', icon: Eye, color: 'from-violet-500 to-purple-500', desc: 'Image description', needsInput: false },
+  { id: 'describe', name: 'Describe', icon: Eye, color: 'from-violet-500 to-purple-500', desc: 'Image description' },
  { id: 'find_ref', name: 'Find', icon: Search, color: 'from-yellow-500 to-orange-500', desc: 'Locate specific terms', needsInput: 'findTerm' },
  { id: 'freeform', name: 'Freeform', icon: Wand2, color: 'from-fuchsia-500 to-pink-500', desc: 'Custom prompt', needsInput: 'prompt' },
 ]
-export default function ModeSelector({ 
+export default function ModeSelector({ mode, onModeChange }) {
  mode, 
  onModeChange, 
  prompt, 
  onPromptChange,
  findTerm,
  onFindTermChange
 }) {
  const selectedMode = modes.find(m => m.id === mode)
  const needsInput = selectedMode?.needsInput
  return (
    <div className="glass p-4 rounded-2xl space-y-3">
      <h3 className="text-sm font-semibold text-gray-200">Mode</h3>
-      <div className="grid grid-cols-4 gap-2">
+      <div className="grid grid-cols-2 gap-2">
        {modes.map((m) => {
          const Icon = m.icon
          const isSelected = mode === m.id
@@ -32,6 +20,7 @@ export default function ModeSelector({
            <motion.button
              key={m.id}
              onClick={() => onModeChange(m.id)}
              title={m.desc}
              className={`
                relative p-2 rounded-xl text-center transition-all
                ${isSelected
@@ -68,38 +57,6 @@ export default function ModeSelector({
          )
        })}
      </div>
      {needsInput === 'findTerm' && (
        <motion.div
          initial={{ opacity: 0, height: 0 }}
          animate={{ opacity: 1, height: 'auto' }}
          exit={{ opacity: 0, height: 0 }}
        >
          <input
            type="text"
            value={findTerm}
            onChange={(e) => onFindTermChange(e.target.value)}
            placeholder="Enter term to find (e.g., Total, Invoice #)"
            className="w-full bg-white/5 border border-white/10 rounded-xl px-3 py-2 text-sm focus:outline-none focus:border-purple-500 transition-colors"
          />
        </motion.div>
      )}
      {needsInput === 'prompt' && (
        <motion.div
          initial={{ opacity: 0, height: 0 }}
          animate={{ opacity: 1, height: 'auto' }}
          exit={{ opacity: 0, height: 0 }}
        >
          <textarea
            value={prompt}
            onChange={(e) => onPromptChange(e.target.value)}
            placeholder="Enter your custom prompt..."
            className="w-full bg-white/5 border border-white/10 rounded-xl px-3 py-2 text-sm focus:outline-none focus:border-purple-500 transition-colors resize-none"
            rows={2}
          />
        </motion.div>
      )}
    </div>
  )
 }
--- a/frontend/src/components/PDFProcessor.jsx
+++ b/frontend/src/components/PDFProcessor.jsx
@@ -5,7 +5,7 @@ import axios from 'axios'
 const API_BASE = import.meta.env.VITE_API_URL || '/api'
-function PDFProcessor({ pdfFile, mode, prompt, advancedSettings, includeCaption }) {
+function PDFProcessor({ pdfFile, mode, prompt, model, advancedSettings, includeCaption }) {
  const [processing, setProcessing] = useState(false)
  const [progress, setProgress] = useState(0)
  const [result, setResult] = useState(null)
@@ -29,6 +29,7 @@ function PDFProcessor({ pdfFile, mode, prompt, advancedSettings, includeCaption
    try {
      const formData = new FormData()
      formData.append('pdf_file', pdfFile)
      if (model) formData.append('model', model)
      formData.append('mode', mode)
      formData.append('prompt', prompt)
      formData.append('output_format', outputFormat)
@@ -80,7 +81,7 @@ function PDFProcessor({ pdfFile, mode, prompt, advancedSettings, includeCaption
    } finally {
      setProcessing(false)
    }
-  }, [pdfFile, mode, prompt, outputFormat, includeCaption, advancedSettings])
+  }, [pdfFile, mode, prompt, model, outputFormat, includeCaption, advancedSettings])
  const handleDownloadJSON = useCallback(() => {
    if (!result || outputFormat !== 'json') return