Files
resolutionflow/backend/app/services/tree_chunker.py
Michael Chihlas 1aa60dada2 feat: add AI assistant with in-session copilot and standalone chat with RAG
Implements three-phase AI assistant feature:
- Phase 0: RAG infrastructure with pgvector embeddings, Voyage AI integration,
  tree chunking service, and semantic search over team's flow library
- Phase 1: In-session copilot panel during flow navigation with contextual
  AI help, current step awareness, and suggested related flows
- Phase 2: Standalone AI chat page with persistent conversation history,
  pin/delete, and configurable retention policies (account-level)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-04 01:36:36 -05:00

166 lines
5.0 KiB
Python

"""Tree chunker — converts tree_structure JSON into embeddable text chunks.
Produces three chunk types:
- tree_summary: Name + description + tags + type overview
- node: Individual node content with breadcrumb path context
- solution: Full solution/action text with path context
"""
import logging
from typing import Any
logger = logging.getLogger(__name__)
def _get_breadcrumb(node: dict, parent_path: str = "") -> str:
"""Build a breadcrumb path string for a node."""
content = node.get("content", node.get("label", ""))[:80]
if parent_path:
return f"{parent_path} > {content}"
return content
def _chunk_node(
node: dict,
tree_name: str,
tree_type: str,
tags: list[str],
parent_path: str = "",
) -> list[dict[str, Any]]:
"""Recursively chunk a node and its children."""
chunks = []
node_type = node.get("type", "unknown")
node_id = node.get("id", "")
content = node.get("content", node.get("label", ""))
breadcrumb = _get_breadcrumb(node, parent_path)
# Build chunk text based on node type
if node_type in ("question", "decision"):
options = node.get("children", [])
option_labels = [
child.get("label", child.get("content", ""))[:100]
for child in options
if isinstance(child, dict)
]
text_parts = [
f"[{node_type}] {content}",
]
if option_labels:
text_parts.append(f"Options: {', '.join(option_labels)}")
text_parts.append(f"Path: {breadcrumb}")
text_parts.append(f"Flow: {tree_name} | Type: {tree_type}")
if tags:
text_parts.append(f"Tags: {', '.join(tags)}")
chunks.append({
"chunk_type": "node",
"node_type": node_type,
"node_id": node_id,
"chunk_text": "\n".join(text_parts),
})
elif node_type in ("action", "solution", "info", "warning"):
text_parts = [
f"[{node_type}] {content}",
f"Path: {breadcrumb}",
f"Flow: {tree_name} | Type: {tree_type}",
]
if tags:
text_parts.append(f"Tags: {', '.join(tags)}")
chunk_type = "solution" if node_type == "solution" else "node"
chunks.append({
"chunk_type": chunk_type,
"node_type": node_type,
"node_id": node_id,
"chunk_text": "\n".join(text_parts),
})
elif node_type in ("step", "section_header"):
text_parts = [
f"[{node_type}] {content}",
f"Path: {breadcrumb}",
f"Flow: {tree_name} | Type: {tree_type}",
]
if node.get("description"):
text_parts.insert(1, node["description"])
if tags:
text_parts.append(f"Tags: {', '.join(tags)}")
chunks.append({
"chunk_type": "node",
"node_type": node_type,
"node_id": node_id,
"chunk_text": "\n".join(text_parts),
})
# Recurse into children
children = node.get("children", [])
if isinstance(children, list):
for child in children:
if isinstance(child, dict):
chunks.extend(
_chunk_node(child, tree_name, tree_type, tags, breadcrumb)
)
# Follow next_node_id linked nodes (action nodes)
# These are handled at the tree level, not recursively
return chunks
def chunk_tree(
tree_name: str,
tree_type: str,
description: str | None,
tags: list[str],
tree_structure: dict[str, Any],
) -> list[dict[str, Any]]:
"""Convert a tree into embeddable text chunks.
Args:
tree_name: Name of the flow.
tree_type: troubleshooting | procedural | maintenance.
description: Flow description.
tags: List of tag names.
tree_structure: The tree_structure JSONB content.
Returns:
List of chunk dicts with keys: chunk_type, node_type, node_id, chunk_text.
"""
chunks = []
# Tree summary chunk
summary_parts = [
f"Flow: {tree_name}",
f"Type: {tree_type}",
]
if description:
summary_parts.append(f"Description: {description}")
if tags:
summary_parts.append(f"Tags: {', '.join(tags)}")
chunks.append({
"chunk_type": "tree_summary",
"node_type": None,
"node_id": None,
"chunk_text": "\n".join(summary_parts),
})
# Chunk the tree structure nodes
root = tree_structure
if isinstance(root, dict):
# Handle both flat structure and nested
if "children" in root or "type" in root:
chunks.extend(
_chunk_node(root, tree_name, tree_type, tags)
)
# Handle steps array (procedural flows)
if "steps" in root and isinstance(root["steps"], list):
for step in root["steps"]:
if isinstance(step, dict):
chunks.extend(
_chunk_node(step, tree_name, tree_type, tags)
)
return chunks