feat: add .docx upload support with text extraction

- Add DOCX MIME type to ALLOWED_DOCUMENT_TYPES in storage_service.py
- Add python-docx text extraction in _generate_ai_description
- Extract shared _store_document_content helper for PDF/DOCX
- Add python-docx>=1.1.0 to requirements.txt
- Add tests for docx upload acceptance and document fetch

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
chihlasm
2026-03-27 21:08:12 +00:00
parent 11de850054
commit 217e70cb81
4 changed files with 102 additions and 17 deletions

View File

@@ -16,7 +16,8 @@ logger = logging.getLogger(__name__)
ALLOWED_IMAGE_TYPES = {"image/png", "image/jpeg", "image/gif", "image/webp"}
ALLOWED_TEXT_TYPES = {"text/plain", "text/csv", "application/octet-stream"}
ALLOWED_DOCUMENT_TYPES = {"application/pdf"}
DOCX_MIME = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
ALLOWED_DOCUMENT_TYPES = {"application/pdf", DOCX_MIME}
ALLOWED_TYPES = ALLOWED_IMAGE_TYPES | ALLOWED_TEXT_TYPES | ALLOWED_DOCUMENT_TYPES
MAX_IMAGE_SIZE = 5 * 1024 * 1024 # 5MB