perf: resize images server-side before sending to Claude vision
- Resize to 1568px max (Claude's efficient ceiling) via Pillow - Convert PNG screenshots to JPEG q85 (~5MB → ~200KB typical) - Cap at 3 images per message (~4,800 token budget max) - Graceful fallback if Pillow unavailable (Claude auto-resizes) - Add Pillow + libjpeg/zlib deps to requirements + Dockerfile Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -10,6 +10,8 @@ RUN apt-get update && apt-get install -y \
|
||||
libcairo2-dev \
|
||||
libgdk-pixbuf-2.0-dev \
|
||||
libffi-dev \
|
||||
libjpeg-dev \
|
||||
zlib1g-dev \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install Python dependencies
|
||||
|
||||
@@ -50,21 +50,86 @@ router = APIRouter(prefix="/assistant", tags=["assistant-chat"])
|
||||
|
||||
VISION_CONTENT_TYPES = {"image/png", "image/jpeg", "image/gif", "image/webp"}
|
||||
|
||||
# Claude vision costs: (width × height) / 750 tokens per image.
|
||||
# Claude auto-resizes images >1568px on the longest edge.
|
||||
# We resize server-side to avoid sending multi-MB base64 payloads over the wire.
|
||||
MAX_IMAGE_DIMENSION = 1568 # Claude's max efficient resolution
|
||||
MAX_IMAGES_PER_MESSAGE = 3 # Cap to control token budget
|
||||
|
||||
|
||||
def _resize_image_for_vision(file_data: bytes, content_type: str) -> tuple[bytes, str]:
|
||||
"""Resize image to fit within Claude's efficient vision bounds.
|
||||
|
||||
Returns (resized_bytes, media_type). Converts PNG screenshots to JPEG
|
||||
when it reduces size significantly (screenshots are often huge PNGs).
|
||||
"""
|
||||
try:
|
||||
from PIL import Image
|
||||
from io import BytesIO
|
||||
|
||||
img = Image.open(BytesIO(file_data))
|
||||
w, h = img.size
|
||||
|
||||
# Only resize if larger than Claude's max efficient dimension
|
||||
if max(w, h) > MAX_IMAGE_DIMENSION:
|
||||
ratio = MAX_IMAGE_DIMENSION / max(w, h)
|
||||
new_w, new_h = int(w * ratio), int(h * ratio)
|
||||
img = img.resize((new_w, new_h), Image.LANCZOS)
|
||||
|
||||
# Convert RGBA (common in screenshots) to RGB for JPEG
|
||||
out_type = content_type
|
||||
if img.mode in ("RGBA", "P") and content_type == "image/png":
|
||||
img = img.convert("RGB")
|
||||
out_type = "image/jpeg"
|
||||
|
||||
buf = BytesIO()
|
||||
if out_type == "image/jpeg":
|
||||
img.save(buf, format="JPEG", quality=85, optimize=True)
|
||||
else:
|
||||
img.save(buf, format=img.format or "PNG", optimize=True)
|
||||
|
||||
result = buf.getvalue()
|
||||
|
||||
# Only use resized version if it's actually smaller
|
||||
if len(result) < len(file_data):
|
||||
return result, out_type
|
||||
return file_data, content_type
|
||||
|
||||
except ImportError:
|
||||
# Pillow not installed — send original (Claude auto-resizes)
|
||||
logger.debug("Pillow not available, sending original image to Claude")
|
||||
return file_data, content_type
|
||||
except Exception:
|
||||
logger.warning("Image resize failed, sending original")
|
||||
return file_data, content_type
|
||||
|
||||
|
||||
async def _fetch_upload_images(
|
||||
upload_ids: list[UUID],
|
||||
account_id: UUID,
|
||||
db: AsyncSession,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Fetch uploaded images from S3 and return as base64-encoded dicts for Claude vision."""
|
||||
"""Fetch uploaded images from S3 and return as base64-encoded dicts for Claude vision.
|
||||
|
||||
Resizes images server-side to reduce network payload and applies a per-message
|
||||
cap to control token budget (~1,600 tokens per full-res image).
|
||||
"""
|
||||
if not upload_ids or not settings.STORAGE_ENDPOINT:
|
||||
return []
|
||||
|
||||
from app.services import storage_service
|
||||
|
||||
# Cap the number of images to limit token cost
|
||||
capped_ids = upload_ids[:MAX_IMAGES_PER_MESSAGE]
|
||||
if len(upload_ids) > MAX_IMAGES_PER_MESSAGE:
|
||||
logger.info(
|
||||
"Capped images from %d to %d for token budget",
|
||||
len(upload_ids), MAX_IMAGES_PER_MESSAGE,
|
||||
)
|
||||
|
||||
result = await db.execute(
|
||||
select(FileUpload).where(
|
||||
FileUpload.id.in_(upload_ids),
|
||||
FileUpload.id.in_(capped_ids),
|
||||
FileUpload.account_id == account_id,
|
||||
FileUpload.content_type.in_(VISION_CONTENT_TYPES),
|
||||
)
|
||||
@@ -75,9 +140,12 @@ async def _fetch_upload_images(
|
||||
for upload in uploads:
|
||||
try:
|
||||
file_data = storage_service.download_file(upload.storage_key)
|
||||
resized_data, media_type = _resize_image_for_vision(
|
||||
file_data, upload.content_type
|
||||
)
|
||||
images.append({
|
||||
"media_type": upload.content_type,
|
||||
"data": base64.b64encode(file_data).decode("ascii"),
|
||||
"media_type": media_type,
|
||||
"data": base64.b64encode(resized_data).decode("ascii"),
|
||||
})
|
||||
except Exception:
|
||||
logger.warning("Failed to fetch upload %s from S3", upload.id)
|
||||
|
||||
@@ -54,3 +54,6 @@ apscheduler>=3.10.4
|
||||
|
||||
# Object Storage
|
||||
boto3>=1.34.0
|
||||
|
||||
# Image processing (vision upload resize)
|
||||
Pillow>=10.0.0
|
||||
|
||||
Reference in New Issue
Block a user