Add in .env.example for setting ports, fix upload limit, fix bounding box, can now dismiss previous image, change markdown expectation to HTML - not MD. updated README with nvidia driver/container instructions

This commit is contained in:
Ray Dumasia
2025-10-21 21:35:17 +01:00
parent e02338436b
commit 3efc4da7ff
9 changed files with 399 additions and 101 deletions

View File

@@ -4,6 +4,9 @@ server {
root /usr/share/nginx/html;
index index.html;
# Allow larger file uploads (100MB)
client_max_body_size 100M;
# Gzip compression
gzip on;
gzip_types text/plain text/css application/json application/javascript text/xml application/xml application/xml+rss text/javascript;

View File

@@ -27,11 +27,22 @@ function App() {
})
const handleImageSelect = useCallback((file) => {
setImage(file)
setImagePreview(URL.createObjectURL(file))
setError(null)
setResult(null)
}, [])
if (file === null) {
// Clear everything when removing image
setImage(null)
if (imagePreview) {
URL.revokeObjectURL(imagePreview)
}
setImagePreview(null)
setError(null)
setResult(null)
} else {
setImage(file)
setImagePreview(URL.createObjectURL(file))
setError(null)
setResult(null)
}
}, [imagePreview])
const handleSubmit = async () => {
if (!image) {
@@ -47,7 +58,8 @@ function App() {
formData.append('image', image)
formData.append('mode', mode)
formData.append('prompt', prompt)
formData.append('grounding', mode === 'find_ref') // Auto-enable for find mode
// Enable grounding only for find mode
formData.append('grounding', mode === 'find_ref')
formData.append('include_caption', false)
formData.append('find_term', findTerm)
formData.append('schema', '')
@@ -81,12 +93,9 @@ function App() {
const extensions = {
plain_ocr: 'txt',
markdown: 'md',
tables_csv: 'csv',
tables_md: 'md',
kv_json: 'json',
layout_map: 'json',
pii_redact: 'json',
describe: 'txt',
find_ref: 'txt',
freeform: 'txt',
}
const ext = extensions[mode] || 'txt'

View File

@@ -71,27 +71,28 @@ export default function ImageUpload({ onImageSelect, preview }) {
<motion.div
initial={{ opacity: 0, scale: 0.9 }}
animate={{ opacity: 1, scale: 1 }}
className="relative group"
className="relative group rounded-2xl overflow-hidden"
>
<img
src={preview}
alt="Preview"
className="w-full rounded-2xl border border-white/10"
/>
<motion.button
onClick={() => onImageSelect(null)}
className="absolute top-3 right-3 bg-red-500/80 backdrop-blur-sm p-2 rounded-full opacity-0 group-hover:opacity-100 transition-opacity"
whileHover={{ scale: 1.1 }}
whileTap={{ scale: 0.9 }}
>
<X className="w-4 h-4" />
</motion.button>
{/* Grounding overlay canvas */}
<canvas
id="preview-canvas"
className="absolute top-0 left-0 w-full h-full pointer-events-none"
/>
<div className="absolute top-3 right-3 flex gap-2">
<motion.button
onClick={(e) => {
e.stopPropagation()
onImageSelect(null)
}}
className="bg-red-500/90 backdrop-blur-sm px-3 py-2 rounded-full opacity-100 hover:bg-red-600 transition-colors flex items-center gap-2 shadow-lg"
whileHover={{ scale: 1.05 }}
whileTap={{ scale: 0.95 }}
title="Remove image"
>
<X className="w-4 h-4" />
<span className="text-sm font-medium">Remove</span>
</motion.button>
</div>
</motion.div>
)}
</div>

View File

@@ -14,7 +14,7 @@ export default function ModeSelector({
prompt,
onPromptChange,
findTerm,
onFindTermChange
onFindTermChange
}) {
const selectedMode = modes.find(m => m.id === mode)
const needsInput = selectedMode?.needsInput

View File

@@ -9,8 +9,19 @@ export default function ResultPanel({ result, loading, imagePreview, onCopy, onD
const [showAdvanced, setShowAdvanced] = useState(false)
const [imageLoaded, setImageLoaded] = useState(false)
// Check if text looks like markdown
const isMarkdown = result?.text && (
// Check if text looks like HTML (model outputs HTML, not markdown)
const isHTML = result?.text && (
result.text.includes('<table') ||
result.text.includes('<tr>') ||
result.text.includes('<td>') ||
result.text.includes('<div') ||
result.text.includes('<p>') ||
result.text.includes('<h1') ||
result.text.includes('<h2')
)
// Also check if it looks like markdown (for backwards compatibility)
const isMarkdown = result?.text && !isHTML && (
result.text.includes('##') ||
result.text.includes('**') ||
result.text.includes('```') ||
@@ -216,7 +227,15 @@ export default function ResultPanel({ result, loading, imagePreview, onCopy, onD
{/* Text result */}
<div className="bg-white/5 border border-white/10 rounded-xl p-4 max-h-96 overflow-y-auto">
{isMarkdown ? (
{isHTML ? (
<div
className="prose prose-invert prose-sm max-w-none"
dangerouslySetInnerHTML={{ __html: result.text }}
style={{
color: '#e5e7eb',
}}
/>
) : isMarkdown ? (
<div className="prose prose-invert prose-sm max-w-none">
<ReactMarkdown>{result.text}</ReactMarkdown>
</div>
@@ -227,10 +246,39 @@ export default function ResultPanel({ result, loading, imagePreview, onCopy, onD
)}
</div>
{/* Raw Response Viewer */}
{result.raw_text && (
<details className="glass rounded-xl overflow-hidden">
<summary className="px-4 py-3 cursor-pointer flex items-center justify-between hover:bg-white/5 transition-colors">
<span className="text-sm font-medium text-gray-300">🔍 Raw Model Response</span>
<ChevronDown className="w-4 h-4 text-gray-400" />
</summary>
<div className="px-4 py-3 border-t border-white/10 space-y-2">
<p className="text-xs text-gray-400 mb-2">Unprocessed output from the model (useful for debugging)</p>
<div className="bg-black/30 rounded-lg p-3 max-h-64 overflow-y-auto">
<pre className="text-xs text-green-400 font-mono whitespace-pre-wrap break-words select-all">
{result.raw_text}
</pre>
</div>
<div className="flex gap-2 mt-2">
<button
onClick={() => navigator.clipboard.writeText(result.raw_text)}
className="text-xs px-3 py-1 bg-white/5 hover:bg-white/10 rounded-lg transition-colors"
>
Copy Raw
</button>
<span className="text-xs text-gray-500 py-1">
{result.raw_text.length} characters
</span>
</div>
</div>
</details>
)}
{/* Advanced Settings Dropdown */}
<details className="glass rounded-xl overflow-hidden">
<summary className="px-4 py-3 cursor-pointer flex items-center justify-between hover:bg-white/5 transition-colors">
<span className="text-sm font-medium text-gray-300">Advanced Settings & Metadata</span>
<span className="text-sm font-medium text-gray-300"> Metadata & Debug Info</span>
<ChevronDown className="w-4 h-4 text-gray-400" />
</summary>
<div className="px-4 py-3 border-t border-white/10 space-y-3">
@@ -244,14 +292,21 @@ export default function ResultPanel({ result, loading, imagePreview, onCopy, onD
)}
{result.boxes?.length > 0 && (
<div>
<p className="text-xs text-gray-400 mb-2">Detected Regions ({result.boxes.length})</p>
<div className="space-y-1">
<p className="text-xs text-gray-400 mb-2">Parsed Bounding Boxes ({result.boxes.length})</p>
<div className="bg-black/30 rounded-lg p-2 space-y-1 max-h-32 overflow-y-auto">
{result.boxes.map((box, idx) => (
<div key={idx} className="text-xs text-gray-500">
{box.label}: [{box.box.map(n => Math.round(n)).join(', ')}]
<div key={idx} className="text-xs font-mono">
<span className="text-cyan-400">Box {idx + 1}:</span>{' '}
<span className="text-purple-400">{box.label}</span>{' '}
<span className="text-gray-500">
[{box.box.map(n => Math.round(n)).join(', ')}]
</span>
</div>
))}
</div>
<p className="text-xs text-gray-500 mt-2">
Coordinates are scaled from model output (0-999) to image pixels
</p>
</div>
)}
</div>