import os import psycopg2 import psycopg2.extras from contextlib import contextmanager from decouple import config as env_config DATABASE_URL = env_config( "DATABASE_URL", default="postgresql://ocr_user:ocr_password@postgres:5432/ocr_db" ) def _get_conn(): return psycopg2.connect(DATABASE_URL, cursor_factory=psycopg2.extras.RealDictCursor) def init_db(): """Create tables if they don't exist. Called once at startup.""" conn = None try: conn = _get_conn() with conn.cursor() as cur: cur.execute(""" CREATE TABLE IF NOT EXISTS ocr_jobs ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), author TEXT, book TEXT, chapter TEXT, page TEXT, submitted_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), image_path TEXT NOT NULL, original_filename TEXT, ocr_text TEXT, status TEXT NOT NULL DEFAULT 'unreviewed', reviewed_text TEXT, reviewer_name TEXT, reviewed_at TIMESTAMPTZ, mode TEXT ) """) # Index for fast full-text-style searches on common fields cur.execute(""" CREATE INDEX IF NOT EXISTS ocr_jobs_status_idx ON ocr_jobs(status) """) cur.execute(""" CREATE INDEX IF NOT EXISTS ocr_jobs_submitted_at_idx ON ocr_jobs(submitted_at DESC) """) # Unique constraint: prevent duplicate (author, chapter, page) submissions. # Applies only when all three fields are non-null. cur.execute(""" CREATE UNIQUE INDEX IF NOT EXISTS ocr_jobs_author_chapter_page_unique ON ocr_jobs (author, chapter, page) WHERE author IS NOT NULL AND chapter IS NOT NULL AND page IS NOT NULL """) conn.commit() print("Database initialized.") except Exception as exc: print(f"Database init failed: {exc}") if conn: conn.rollback() raise finally: if conn: conn.close() @contextmanager def get_db(): """Yield a connection and auto-commit/rollback.""" conn = _get_conn() try: yield conn conn.commit() except Exception: conn.rollback() raise finally: conn.close()