From 03390ed59fb4d06ee3bedd08dc449c6e8111f7b2 Mon Sep 17 00:00:00 2001 From: chihlasm Date: Wed, 11 Mar 2026 23:29:51 -0400 Subject: [PATCH] feat: enable Markdown (.md) file upload in KB Accelerator Moved md from Phase 2 extensions to allowed formats, added extraction handler (reuses txt handler), and updated plan_limits defaults to include md for all plans. Co-Authored-By: Claude Opus 4.6 --- .../056_add_md_to_kb_allowed_formats.py | 44 +++++++++++++++++++ backend/app/api/endpoints/kb_accelerator.py | 6 +-- backend/app/core/kb_extraction_service.py | 1 + backend/app/models/plan_limits.py | 2 +- 4 files changed, 49 insertions(+), 4 deletions(-) create mode 100644 backend/alembic/versions/056_add_md_to_kb_allowed_formats.py diff --git a/backend/alembic/versions/056_add_md_to_kb_allowed_formats.py b/backend/alembic/versions/056_add_md_to_kb_allowed_formats.py new file mode 100644 index 00000000..e0bb2ffa --- /dev/null +++ b/backend/alembic/versions/056_add_md_to_kb_allowed_formats.py @@ -0,0 +1,44 @@ +"""Add md to kb_allowed_formats defaults + +Revision ID: 056 +Revises: 055 +Create Date: 2026-03-12 +""" +from alembic import op +import sqlalchemy as sa + +revision = "056" +down_revision = "055" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # Update server default for new rows + op.alter_column( + "plan_limits", + "kb_allowed_formats", + server_default=sa.text("'[\"txt\",\"paste\",\"md\"]'::jsonb"), + ) + # Add "md" to existing rows that have the old default ["txt","paste"] + op.execute( + """ + UPDATE plan_limits + SET kb_allowed_formats = kb_allowed_formats || '["md"]'::jsonb + WHERE NOT kb_allowed_formats @> '"md"'::jsonb + """ + ) + + +def downgrade() -> None: + op.alter_column( + "plan_limits", + "kb_allowed_formats", + server_default=sa.text("'[\"txt\",\"paste\"]'::jsonb"), + ) + op.execute( + """ + UPDATE plan_limits + SET kb_allowed_formats = kb_allowed_formats - 'md' + """ + ) diff --git a/backend/app/api/endpoints/kb_accelerator.py b/backend/app/api/endpoints/kb_accelerator.py index 4544479b..977c82f0 100644 --- a/backend/app/api/endpoints/kb_accelerator.py +++ b/backend/app/api/endpoints/kb_accelerator.py @@ -57,6 +57,7 @@ MAX_UPLOAD_SIZE = 10 * 1024 * 1024 ALLOWED_EXTENSIONS = { "txt": ["text/plain"], + "md": ["text/markdown", "text/plain"], "docx": ["application/vnd.openxmlformats-officedocument.wordprocessingml.document"], } @@ -64,7 +65,6 @@ ALLOWED_EXTENSIONS = { PHASE2_EXTENSIONS = { "pdf": ["application/pdf"], "html": ["text/html"], - "md": ["text/markdown", "text/plain"], } @@ -110,7 +110,7 @@ async def _check_lifetime_limit(user: User, limits: PlanLimits, db: AsyncSession async def _check_format_allowed(source_format: str, limits: PlanLimits) -> None: - allowed = limits.kb_allowed_formats or ["txt", "paste"] + allowed = limits.kb_allowed_formats or ["txt", "paste", "md"] if source_format not in allowed: raise HTTPException( status_code=status.HTTP_403_FORBIDDEN, @@ -205,7 +205,7 @@ async def get_quota( kb_accelerator_enabled=False, lifetime_conversions_used=committed_count, lifetime_conversions_limit=0, - allowed_formats=["txt", "paste"], + allowed_formats=["txt", "paste", "md"], detailed_analysis=False, conversational_refinement=False, step_library_matching=False, diff --git a/backend/app/core/kb_extraction_service.py b/backend/app/core/kb_extraction_service.py index 6bba8094..ef905530 100644 --- a/backend/app/core/kb_extraction_service.py +++ b/backend/app/core/kb_extraction_service.py @@ -159,6 +159,7 @@ def _extract_docx(content_bytes: bytes) -> ExtractResult: # Registry of format handlers — extend for Phase 2 FORMAT_HANDLERS: dict[str, ExtractHandler] = { "txt": _extract_txt, + "md": _extract_txt, "paste": _extract_paste, "docx": _extract_docx, } diff --git a/backend/app/models/plan_limits.py b/backend/app/models/plan_limits.py index 63f2c894..09bb01ee 100644 --- a/backend/app/models/plan_limits.py +++ b/backend/app/models/plan_limits.py @@ -23,7 +23,7 @@ class PlanLimits(Base): kb_accelerator_enabled: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False, server_default=text("false")) kb_max_lifetime_conversions: Mapped[int | None] = mapped_column(Integer, nullable=True) kb_batch_max_size: Mapped[int | None] = mapped_column(Integer, nullable=True) - kb_allowed_formats: Mapped[list] = mapped_column(JSONB, nullable=False, default=lambda: ["txt", "paste"], server_default=text("'[\"txt\",\"paste\"]'::jsonb")) + kb_allowed_formats: Mapped[list] = mapped_column(JSONB, nullable=False, default=lambda: ["txt", "paste", "md"], server_default=text("'[\"txt\",\"paste\",\"md\"]'::jsonb")) kb_detailed_analysis: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False, server_default=text("false")) kb_conversational_refinement: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False, server_default=text("false")) kb_step_library_matching: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False, server_default=text("false"))