All previously-nullable account_id columns are now NOT NULL. tree_embeddings and feedback backfilled before constraint applied. Global content assigned to platform sentinel account (00000000-...-0001) in preceding migration. Tables updated: users, trees, tree_categories, tree_tags, step_categories, step_library, tree_embeddings, feedback Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
73 lines
2.4 KiB
Python
73 lines
2.4 KiB
Python
"""Tree embedding storage for RAG-powered AI assistant.
|
|
|
|
Stores vector embeddings of tree content chunks for semantic search.
|
|
Each tree is split into multiple chunks (node, solution, tree_summary)
|
|
and embedded via Voyage AI for cosine similarity retrieval.
|
|
"""
|
|
import uuid
|
|
from datetime import datetime, timezone
|
|
from typing import Optional, Any
|
|
|
|
from sqlalchemy import String, Text, DateTime, ForeignKey, Index
|
|
from sqlalchemy.orm import Mapped, mapped_column
|
|
from sqlalchemy.dialects.postgresql import UUID, JSONB
|
|
|
|
from app.core.database import Base
|
|
|
|
# pgvector column type — imported at runtime to avoid import errors
|
|
# when pgvector is not installed locally
|
|
try:
|
|
from pgvector.sqlalchemy import Vector
|
|
except ImportError:
|
|
Vector = None
|
|
|
|
|
|
class TreeEmbedding(Base):
|
|
__tablename__ = "tree_embeddings"
|
|
__table_args__ = (
|
|
Index("ix_tree_embeddings_account_id", "account_id"),
|
|
Index("ix_tree_embeddings_tree_id", "tree_id"),
|
|
)
|
|
|
|
id: Mapped[uuid.UUID] = mapped_column(
|
|
UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
|
|
)
|
|
tree_id: Mapped[uuid.UUID] = mapped_column(
|
|
UUID(as_uuid=True),
|
|
ForeignKey("trees.id", ondelete="CASCADE"),
|
|
nullable=False,
|
|
)
|
|
account_id: Mapped[uuid.UUID] = mapped_column(
|
|
UUID(as_uuid=True),
|
|
ForeignKey("accounts.id", ondelete="CASCADE"),
|
|
nullable=False,
|
|
)
|
|
chunk_type: Mapped[str] = mapped_column(
|
|
String(30),
|
|
nullable=False,
|
|
comment="node | solution | tree_summary",
|
|
)
|
|
node_type: Mapped[Optional[str]] = mapped_column(
|
|
String(30), nullable=True
|
|
)
|
|
node_id: Mapped[Optional[str]] = mapped_column(
|
|
String(100), nullable=True
|
|
)
|
|
chunk_text: Mapped[str] = mapped_column(Text, nullable=False)
|
|
embedding_model: Mapped[str] = mapped_column(
|
|
String(50), nullable=False, default="voyage-3.5"
|
|
)
|
|
# The embedding column is created via migration with vector(1024) type
|
|
# We store it as a generic column here and handle it in queries
|
|
meta: Mapped[dict[str, Any]] = mapped_column(
|
|
JSONB, nullable=False, default=dict
|
|
)
|
|
created_at: Mapped[datetime] = mapped_column(
|
|
DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)
|
|
)
|
|
updated_at: Mapped[datetime] = mapped_column(
|
|
DateTime(timezone=True),
|
|
default=lambda: datetime.now(timezone.utc),
|
|
onupdate=lambda: datetime.now(timezone.utc),
|
|
)
|