Implements three-phase AI assistant feature: - Phase 0: RAG infrastructure with pgvector embeddings, Voyage AI integration, tree chunking service, and semantic search over team's flow library - Phase 1: In-session copilot panel during flow navigation with contextual AI help, current step awareness, and suggested related flows - Phase 2: Standalone AI chat page with persistent conversation history, pin/delete, and configurable retention policies (account-level) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
166 lines
5.0 KiB
Python
166 lines
5.0 KiB
Python
"""Tree chunker — converts tree_structure JSON into embeddable text chunks.
|
|
|
|
Produces three chunk types:
|
|
- tree_summary: Name + description + tags + type overview
|
|
- node: Individual node content with breadcrumb path context
|
|
- solution: Full solution/action text with path context
|
|
"""
|
|
import logging
|
|
from typing import Any
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def _get_breadcrumb(node: dict, parent_path: str = "") -> str:
|
|
"""Build a breadcrumb path string for a node."""
|
|
content = node.get("content", node.get("label", ""))[:80]
|
|
if parent_path:
|
|
return f"{parent_path} > {content}"
|
|
return content
|
|
|
|
|
|
def _chunk_node(
|
|
node: dict,
|
|
tree_name: str,
|
|
tree_type: str,
|
|
tags: list[str],
|
|
parent_path: str = "",
|
|
) -> list[dict[str, Any]]:
|
|
"""Recursively chunk a node and its children."""
|
|
chunks = []
|
|
node_type = node.get("type", "unknown")
|
|
node_id = node.get("id", "")
|
|
content = node.get("content", node.get("label", ""))
|
|
breadcrumb = _get_breadcrumb(node, parent_path)
|
|
|
|
# Build chunk text based on node type
|
|
if node_type in ("question", "decision"):
|
|
options = node.get("children", [])
|
|
option_labels = [
|
|
child.get("label", child.get("content", ""))[:100]
|
|
for child in options
|
|
if isinstance(child, dict)
|
|
]
|
|
text_parts = [
|
|
f"[{node_type}] {content}",
|
|
]
|
|
if option_labels:
|
|
text_parts.append(f"Options: {', '.join(option_labels)}")
|
|
text_parts.append(f"Path: {breadcrumb}")
|
|
text_parts.append(f"Flow: {tree_name} | Type: {tree_type}")
|
|
if tags:
|
|
text_parts.append(f"Tags: {', '.join(tags)}")
|
|
|
|
chunks.append({
|
|
"chunk_type": "node",
|
|
"node_type": node_type,
|
|
"node_id": node_id,
|
|
"chunk_text": "\n".join(text_parts),
|
|
})
|
|
|
|
elif node_type in ("action", "solution", "info", "warning"):
|
|
text_parts = [
|
|
f"[{node_type}] {content}",
|
|
f"Path: {breadcrumb}",
|
|
f"Flow: {tree_name} | Type: {tree_type}",
|
|
]
|
|
if tags:
|
|
text_parts.append(f"Tags: {', '.join(tags)}")
|
|
|
|
chunk_type = "solution" if node_type == "solution" else "node"
|
|
chunks.append({
|
|
"chunk_type": chunk_type,
|
|
"node_type": node_type,
|
|
"node_id": node_id,
|
|
"chunk_text": "\n".join(text_parts),
|
|
})
|
|
|
|
elif node_type in ("step", "section_header"):
|
|
text_parts = [
|
|
f"[{node_type}] {content}",
|
|
f"Path: {breadcrumb}",
|
|
f"Flow: {tree_name} | Type: {tree_type}",
|
|
]
|
|
if node.get("description"):
|
|
text_parts.insert(1, node["description"])
|
|
if tags:
|
|
text_parts.append(f"Tags: {', '.join(tags)}")
|
|
|
|
chunks.append({
|
|
"chunk_type": "node",
|
|
"node_type": node_type,
|
|
"node_id": node_id,
|
|
"chunk_text": "\n".join(text_parts),
|
|
})
|
|
|
|
# Recurse into children
|
|
children = node.get("children", [])
|
|
if isinstance(children, list):
|
|
for child in children:
|
|
if isinstance(child, dict):
|
|
chunks.extend(
|
|
_chunk_node(child, tree_name, tree_type, tags, breadcrumb)
|
|
)
|
|
|
|
# Follow next_node_id linked nodes (action nodes)
|
|
# These are handled at the tree level, not recursively
|
|
|
|
return chunks
|
|
|
|
|
|
def chunk_tree(
|
|
tree_name: str,
|
|
tree_type: str,
|
|
description: str | None,
|
|
tags: list[str],
|
|
tree_structure: dict[str, Any],
|
|
) -> list[dict[str, Any]]:
|
|
"""Convert a tree into embeddable text chunks.
|
|
|
|
Args:
|
|
tree_name: Name of the flow.
|
|
tree_type: troubleshooting | procedural | maintenance.
|
|
description: Flow description.
|
|
tags: List of tag names.
|
|
tree_structure: The tree_structure JSONB content.
|
|
|
|
Returns:
|
|
List of chunk dicts with keys: chunk_type, node_type, node_id, chunk_text.
|
|
"""
|
|
chunks = []
|
|
|
|
# Tree summary chunk
|
|
summary_parts = [
|
|
f"Flow: {tree_name}",
|
|
f"Type: {tree_type}",
|
|
]
|
|
if description:
|
|
summary_parts.append(f"Description: {description}")
|
|
if tags:
|
|
summary_parts.append(f"Tags: {', '.join(tags)}")
|
|
|
|
chunks.append({
|
|
"chunk_type": "tree_summary",
|
|
"node_type": None,
|
|
"node_id": None,
|
|
"chunk_text": "\n".join(summary_parts),
|
|
})
|
|
|
|
# Chunk the tree structure nodes
|
|
root = tree_structure
|
|
if isinstance(root, dict):
|
|
# Handle both flat structure and nested
|
|
if "children" in root or "type" in root:
|
|
chunks.extend(
|
|
_chunk_node(root, tree_name, tree_type, tags)
|
|
)
|
|
# Handle steps array (procedural flows)
|
|
if "steps" in root and isinstance(root["steps"], list):
|
|
for step in root["steps"]:
|
|
if isinstance(step, dict):
|
|
chunks.extend(
|
|
_chunk_node(step, tree_name, tree_type, tags)
|
|
)
|
|
|
|
return chunks
|