feat: wire PDF and text file content into AI chat messages

PDF uploads were stored in S3 and had text extracted during upload, but fetch_upload_images() filtered exclusively for image MIME types, so document content never reached the AI. - Add fetch_upload_documents() in storage_service.py to retrieve extracted_content for PDFs and text files - Update ai_sessions.py chat endpoint to call both fetch_upload_images and fetch_upload_documents, injecting document text as context - Add PDF text extraction in _generate_ai_description (pypdf) - Add pypdf>=4.0.0 to requirements.txt - Fix test_db teardown to avoid connection pool issues - Add 5 tests for fetch_upload_documents Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-27 21:02:56 +00:00
parent 3cea949519
commit 11de850054
6 changed files with 324 additions and 12 deletions
--- a/backend/app/api/endpoints/ai_sessions.py
+++ b/backend/app/api/endpoints/ai_sessions.py
@@ -280,18 +280,28 @@ async def send_chat_message(
    user_id = current_user.id
    account_id = current_user.account_id

-    # Fetch attached images from S3 (if any)
+    # Fetch attached uploads from S3 (if any)
    images = None
+    message = data.message
    if data.upload_ids:
-        from app.services.storage_service import fetch_upload_images
+        from app.services.storage_service import fetch_upload_images, fetch_upload_documents
        images = await fetch_upload_images(data.upload_ids, account_id, db) or None

+        # Inject document text (PDFs, text files) as context in the message
+        documents = await fetch_upload_documents(data.upload_ids, account_id, db)
+        if documents:
+            doc_parts = []
+            for doc in documents:
+                doc_parts.append(f"--- Attached file: {doc['filename']} ---\n{doc['text']}")
+            doc_context = "\n\n".join(doc_parts)
+            message = f"{message}\n\n[Attached document content]\n{doc_context}"
+
    try:
        ai_content, suggested_flows, session, fork_metadata, actions_data, questions_data = await unified_chat_service.send_chat_message(
            session_id=session_id,
            user_id=user_id,
            account_id=account_id,
-            message=data.message,
+            message=message,
            db=db,
            images=images,
        )