perf: resize images server-side before sending to Claude vision

- Resize to 1568px max (Claude's efficient ceiling) via Pillow
- Convert PNG screenshots to JPEG q85 (~5MB → ~200KB typical)
- Cap at 3 images per message (~4,800 token budget max)
- Graceful fallback if Pillow unavailable (Claude auto-resizes)
- Add Pillow + libjpeg/zlib deps to requirements + Dockerfile

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
chihlasm
2026-03-24 04:46:02 +00:00
parent 3b682069d3
commit 1c0f912cf6
3 changed files with 77 additions and 4 deletions

View File

@@ -10,6 +10,8 @@ RUN apt-get update && apt-get install -y \
libcairo2-dev \
libgdk-pixbuf-2.0-dev \
libffi-dev \
libjpeg-dev \
zlib1g-dev \
&& rm -rf /var/lib/apt/lists/*
# Install Python dependencies

View File

@@ -50,21 +50,86 @@ router = APIRouter(prefix="/assistant", tags=["assistant-chat"])
VISION_CONTENT_TYPES = {"image/png", "image/jpeg", "image/gif", "image/webp"}
# Claude vision costs: (width × height) / 750 tokens per image.
# Claude auto-resizes images >1568px on the longest edge.
# We resize server-side to avoid sending multi-MB base64 payloads over the wire.
MAX_IMAGE_DIMENSION = 1568 # Claude's max efficient resolution
MAX_IMAGES_PER_MESSAGE = 3 # Cap to control token budget
def _resize_image_for_vision(file_data: bytes, content_type: str) -> tuple[bytes, str]:
"""Resize image to fit within Claude's efficient vision bounds.
Returns (resized_bytes, media_type). Converts PNG screenshots to JPEG
when it reduces size significantly (screenshots are often huge PNGs).
"""
try:
from PIL import Image
from io import BytesIO
img = Image.open(BytesIO(file_data))
w, h = img.size
# Only resize if larger than Claude's max efficient dimension
if max(w, h) > MAX_IMAGE_DIMENSION:
ratio = MAX_IMAGE_DIMENSION / max(w, h)
new_w, new_h = int(w * ratio), int(h * ratio)
img = img.resize((new_w, new_h), Image.LANCZOS)
# Convert RGBA (common in screenshots) to RGB for JPEG
out_type = content_type
if img.mode in ("RGBA", "P") and content_type == "image/png":
img = img.convert("RGB")
out_type = "image/jpeg"
buf = BytesIO()
if out_type == "image/jpeg":
img.save(buf, format="JPEG", quality=85, optimize=True)
else:
img.save(buf, format=img.format or "PNG", optimize=True)
result = buf.getvalue()
# Only use resized version if it's actually smaller
if len(result) < len(file_data):
return result, out_type
return file_data, content_type
except ImportError:
# Pillow not installed — send original (Claude auto-resizes)
logger.debug("Pillow not available, sending original image to Claude")
return file_data, content_type
except Exception:
logger.warning("Image resize failed, sending original")
return file_data, content_type
async def _fetch_upload_images(
upload_ids: list[UUID],
account_id: UUID,
db: AsyncSession,
) -> list[dict[str, Any]]:
"""Fetch uploaded images from S3 and return as base64-encoded dicts for Claude vision."""
"""Fetch uploaded images from S3 and return as base64-encoded dicts for Claude vision.
Resizes images server-side to reduce network payload and applies a per-message
cap to control token budget (~1,600 tokens per full-res image).
"""
if not upload_ids or not settings.STORAGE_ENDPOINT:
return []
from app.services import storage_service
# Cap the number of images to limit token cost
capped_ids = upload_ids[:MAX_IMAGES_PER_MESSAGE]
if len(upload_ids) > MAX_IMAGES_PER_MESSAGE:
logger.info(
"Capped images from %d to %d for token budget",
len(upload_ids), MAX_IMAGES_PER_MESSAGE,
)
result = await db.execute(
select(FileUpload).where(
FileUpload.id.in_(upload_ids),
FileUpload.id.in_(capped_ids),
FileUpload.account_id == account_id,
FileUpload.content_type.in_(VISION_CONTENT_TYPES),
)
@@ -75,9 +140,12 @@ async def _fetch_upload_images(
for upload in uploads:
try:
file_data = storage_service.download_file(upload.storage_key)
resized_data, media_type = _resize_image_for_vision(
file_data, upload.content_type
)
images.append({
"media_type": upload.content_type,
"data": base64.b64encode(file_data).decode("ascii"),
"media_type": media_type,
"data": base64.b64encode(resized_data).decode("ascii"),
})
except Exception:
logger.warning("Failed to fetch upload %s from S3", upload.id)

View File

@@ -54,3 +54,6 @@ apscheduler>=3.10.4
# Object Storage
boto3>=1.34.0
# Image processing (vision upload resize)
Pillow>=10.0.0