#!/usr/bin/env python3 """ ResolutionFlow Decision Trees - Batch 2: Networking, Active Directory, Microsoft 365. This script adds new troubleshooting decision trees to complement the original seed_trees.py trees. Covers common MSP scenarios across three categories: - Networking (DNS, DHCP, VPN, Bandwidth, Wireless, Firewall) - Active Directory / Entra ID (Lockouts, Replication, GPO, Sync, Domain Join, Auth) - Microsoft 365 (Teams, OneDrive, Mail Flow, SharePoint, MFA, Licensing) Run from the backend directory: python -m scripts.seed_trees_v2 --email admin@example.com --password YourPass123 Requirements: - Backend server must be running (uvicorn app.main:app) """ import asyncio import argparse import httpx from typing import Any # Import AD trees from separate module (file is too large for one file) from scripts.seed_trees_ad import ( get_repeated_lockout_tree, get_ad_replication_tree, get_gpo_not_applying_tree, get_entra_id_sync_tree, get_domain_join_tree, get_kerberos_auth_tree, ) # Import additional networking trees from separate module from scripts.seed_trees_networking import ( get_bandwidth_slow_internet_tree, get_wireless_connectivity_tree, get_firewall_blocking_tree, ) # Import M365 trees from separate module from scripts.seed_trees_m365 import ( get_teams_call_quality_tree, get_onedrive_sync_tree, get_mail_flow_tree, get_sharepoint_permissions_tree, get_mfa_lockout_tree, get_license_assignment_tree, ) # API Configuration API_BASE_URL = "http://localhost:8000/api/v1" ADMIN_EMAIL = None ADMIN_PASSWORD = None # ============================================================================= # TREE STRUCTURE NORMALIZATION # ============================================================================= def normalize_node(node: dict[str, Any]) -> None: """Recursively fix node fields to match the backend validation schema. - Action nodes: copies 'description' to 'action' if 'action' is missing - Solution nodes: copies 'description' to 'solution' if 'solution' is missing - Decision nodes with only 1 child: duplicates the child with an 'Other' option """ node_type = node.get("type") if node_type == "action": if "action" not in node and "description" in node: node["action"] = node["description"] elif node_type == "solution": if "solution" not in node and "description" in node: node["solution"] = node["description"] elif node_type == "decision": children = node.get("children", []) if len(children) == 1: # Add a generic second branch so validation passes fallback = { "id": children[0]["id"] + "_alt", "type": "solution", "title": "Escalate for Further Investigation", "solution": "The issue does not match the expected scenario. Escalate to a senior engineer or gather additional information before proceeding." } children.append(fallback) # Also add an option for the new branch if options exist options = node.get("options", []) if options and len(options) == 1: options.append({ "id": options[0]["id"] + "_alt", "label": "None of the above / Not sure", "next_node_id": fallback["id"] }) # Recurse into children for child in node.get("children", []): normalize_node(child) def normalize_tree_structure(tree_data: dict[str, Any]) -> dict[str, Any]: """Normalize an entire tree's structure before sending to the API.""" if "tree_structure" in tree_data: normalize_node(tree_data["tree_structure"]) return tree_data # ============================================================================= # GLOBAL CATEGORY MANAGEMENT # ============================================================================= def slugify(name: str) -> str: """Convert a category name to a URL-safe slug.""" import re slug = re.sub(r'[^a-zA-Z0-9 ]', '', name.lower()) slug = re.sub(r' +', '-', slug.strip()) return slug async def ensure_global_categories( client: httpx.AsyncClient, token: str, category_names: list[str] ) -> dict[str, str]: """Ensure global categories exist and return a name -> UUID mapping. Creates any categories that don't already exist. Returns dict like {"Networking": "uuid-here", "Microsoft 365": "uuid-here"} """ headers = {"Authorization": f"Bearer {token}"} category_map: dict[str, str] = {} # Fetch existing global categories resp = await client.get(f"{API_BASE_URL}/admin/categories/global", headers=headers) if resp.status_code == 200: for cat in resp.json(): category_map[cat["name"]] = cat["id"] # Create any missing categories for name in category_names: if name not in category_map: slug = slugify(name) create_resp = await client.post( f"{API_BASE_URL}/admin/categories/global", json={"name": name, "slug": slug, "description": f"Troubleshooting trees for {name}"}, headers=headers ) if create_resp.status_code == 201: cat_data = create_resp.json() category_map[name] = cat_data["id"] print(f" [NEW] Created global category: {name}") elif create_resp.status_code == 409: # Slug conflict — already exists, re-fetch resp2 = await client.get(f"{API_BASE_URL}/admin/categories/global", headers=headers) if resp2.status_code == 200: for cat in resp2.json(): if cat["name"] == name: category_map[name] = cat["id"] break print(f" [OK] Category already exists: {name}") else: print(f" [WARN] Failed to create category '{name}': {create_resp.text}") else: print(f" [OK] Category exists: {name}") return category_map # ============================================================================= # NETWORKING TREES # ============================================================================= def get_dns_resolution_tree() -> dict[str, Any]: """ DNS Resolution Failures - Comprehensive networking tree. Covers client-side DNS issues, server-side DNS problems, conditional forwarders, split-brain DNS, and common misconfigurations. """ return { "name": "DNS Resolution Failures", "description": "Troubleshoot DNS resolution issues including failed lookups, slow resolution, wrong answers, and DNS server problems. Covers both client-side and server-side diagnostics with PowerShell and nslookup commands.", "category": "Networking", "tree_structure": { "id": "root", "type": "decision", "question": "What type of DNS issue is the user experiencing?", "help_text": "Identify whether this affects a single user, multiple users, or a specific resource. This determines if it's a client-side or server-side issue.", "options": [ {"id": "single_user", "label": "Single user can't resolve names", "next_node_id": "check_single_client"}, {"id": "multiple_users", "label": "Multiple users affected", "next_node_id": "check_dns_server"}, {"id": "specific_resource", "label": "One specific hostname won't resolve", "next_node_id": "check_specific_record"}, {"id": "intermittent", "label": "DNS works sometimes, fails other times", "next_node_id": "check_intermittent"} ], "children": [ { "id": "check_single_client", "type": "action", "title": "Check Client DNS Configuration", "description": "Verify the client's DNS settings and basic connectivity.\n\n**PowerShell:**\n```\nGet-DnsClientServerAddress -AddressFamily IPv4 | Format-Table InterfaceAlias,ServerAddresses\n\nGet-Service -Name Dnscache\n\nResolve-DnsName google.com\n```\n\n**Quick check:** Can the user ping the DNS server IP directly?\n```\nTest-Connection -ComputerName -Count 2\n```", "next_node_id": "client_dns_result" }, { "id": "client_dns_result", "type": "decision", "question": "What did the client DNS check reveal?", "help_text": "Compare the DNS servers configured against your environment's expected values", "options": [ {"id": "wrong_dns", "label": "DNS servers are wrong / DHCP-assigned are incorrect", "next_node_id": "fix_client_dns"}, {"id": "cant_reach_dns", "label": "Can't ping the DNS server", "next_node_id": "check_network_path"}, {"id": "dns_ok_no_resolve", "label": "DNS config looks correct but still can't resolve", "next_node_id": "flush_dns_cache"}, {"id": "dns_service_stopped", "label": "DNS Client service is stopped", "next_node_id": "restart_dns_client"} ], "children": [ { "id": "fix_client_dns", "type": "action", "title": "Correct Client DNS Configuration", "description": "The client has incorrect DNS servers configured.\n\n**If using DHCP (preferred):**\n```\nipconfig /release\nipconfig /renew\n```\n\n**If DHCP is assigning wrong DNS**, check the DHCP server scope options.\n\n**If static DNS is needed:**\n```\nSet-DnsClientServerAddress -InterfaceAlias \"Ethernet\" -ServerAddresses \"10.0.0.10\",\"10.0.0.11\"\n```\n\n**Verify after change:**\n```\nResolve-DnsName google.com\nResolve-DnsName your-internal-server.domain.local\n```", "next_node_id": "verify_dns_resolution" }, { "id": "check_network_path", "type": "action", "title": "Diagnose Network Path to DNS Server", "description": "The client can't reach the DNS server. This is a network connectivity issue.\n\n**Commands:**\n```\nTest-NetConnection -ComputerName -Port 53 -InformationLevel Detailed\n\ntracert \n```\n\n**Common causes:**\n- VLAN misconfiguration\n- Firewall blocking port 53 (UDP/TCP)\n- Network cable / WiFi disconnect\n- VPN tunnel down", "next_node_id": "network_path_result" }, { "id": "network_path_result", "type": "decision", "question": "Can the client reach the DNS server on port 53?", "help_text": "Test-NetConnection will show TcpTestSucceeded: True/False", "options": [ {"id": "port_blocked", "label": "Port 53 is blocked (TcpTestSucceeded: False)", "next_node_id": "escalate_firewall"}, {"id": "no_route", "label": "No route / request times out completely", "next_node_id": "escalate_network"}, {"id": "port_open", "label": "Port 53 is open but DNS still fails", "next_node_id": "check_dns_server"} ], "children": [ { "id": "escalate_firewall", "type": "solution", "title": "Escalate: Firewall Blocking DNS Traffic", "description": "Port 53 (DNS) is being blocked between the client and DNS server.\n\n**Likely causes:**\n- Host-based firewall on client or server\n- Network firewall / ACL blocking UDP 53 and/or TCP 53\n- New firewall rule change\n\n**Actions:**\n1. Check Windows Firewall: `Get-NetFirewallRule | Where-Object {$_.DisplayName -like '*DNS*'}`\n2. Escalate to network team with source IP, destination IP, port 53, traceroute output\n\n**Ticket Notes:** Include traceroute output and Test-NetConnection results." }, { "id": "escalate_network", "type": "solution", "title": "Escalate: Network Routing Issue", "description": "The client cannot reach the DNS server at all.\n\n**Check before escalating:**\n1. Is the client on the correct VLAN? `Get-NetAdapter | Select Name,Status,LinkSpeed`\n2. Does the client have a valid IP? `ipconfig /all`\n3. Can the client ping its default gateway?\n\n**If no gateway connectivity:** Local network issue (cable, switch port, WiFi)\n**If gateway works but DNS unreachable:** Routing issue between subnets\n\n**Escalate to:** Network Engineering team" } ] }, { "id": "flush_dns_cache", "type": "action", "title": "Flush DNS Cache and Re-register", "description": "DNS config looks correct. The issue may be a stale cache.\n\n**PowerShell (run as Administrator):**\n```\nClear-DnsClientCache\nipconfig /registerdns\nGet-DnsClientCache | Measure-Object\n```\n\n**Also check the hosts file for overrides:**\n```\nGet-Content C:\\Windows\\System32\\drivers\\etc\\hosts | Where-Object {$_ -notmatch '^#' -and $_ -ne ''}\n```\n\nA stale hosts file entry will override DNS every time.", "next_node_id": "post_flush_check" }, { "id": "post_flush_check", "type": "decision", "question": "Did flushing DNS and checking the hosts file resolve the issue?", "help_text": "Test: Resolve-DnsName ", "options": [ {"id": "resolved", "label": "Yes, DNS is working now", "next_node_id": "solution_cache_flush"}, {"id": "hosts_entry", "label": "Found a bad hosts file entry", "next_node_id": "fix_hosts_file"}, {"id": "still_failing", "label": "Still not resolving", "next_node_id": "nslookup_deep_dive"} ], "children": [ { "id": "solution_cache_flush", "type": "solution", "title": "Resolved: Stale DNS Cache", "description": "The issue was caused by a stale DNS cache entry.\n\n**Root cause:** DNS cache held an outdated or incorrect record. Common after server IP changes, DNS record updates, or VPN cycling.\n\n**Resolution:** Flushed DNS client cache with `Clear-DnsClientCache`.\n\n**Prevention:** If recurring for many users, consider lowering TTL values on frequently-changed records." }, { "id": "fix_hosts_file", "type": "action", "title": "Remove Bad Hosts File Entry", "description": "A static entry in the hosts file is overriding DNS.\n\n**Edit (run Notepad as Administrator):**\n```\nnotepad C:\\Windows\\System32\\drivers\\etc\\hosts\n```\n\nRemove or comment out the offending line by adding `#` at the beginning.\n\n**Common culprits:** Old dev/test entries, malware-added entries, legacy workarounds.\n\n**After editing:** `Clear-DnsClientCache`\n\n**Security note:** If the hosts file has entries you don't recognize, run a malware scan.", "next_node_id": "verify_dns_resolution" }, { "id": "nslookup_deep_dive", "type": "action", "title": "Deep DNS Diagnostics with nslookup", "description": "Standard troubleshooting hasn't resolved it. Compare DNS responses.\n\n**Test against different DNS servers:**\n```\nnslookup problematic-hostname \nnslookup problematic-hostname 8.8.8.8\n\nResolve-DnsName -Name problematic-hostname -Type A -Server \n```\n\n**Compare results:** If public DNS resolves it but internal doesn't, the record is missing from your internal DNS.", "next_node_id": "nslookup_result" }, { "id": "nslookup_result", "type": "decision", "question": "What do the nslookup comparisons show?", "help_text": "Compare internal DNS server response vs public DNS (8.8.8.8)", "options": [ {"id": "internal_missing", "label": "Internal DNS can't resolve, public DNS can", "next_node_id": "check_forwarders"}, {"id": "both_fail", "label": "Neither internal nor public DNS can resolve", "next_node_id": "check_name_validity"}, {"id": "wrong_ip", "label": "DNS returns wrong IP address", "next_node_id": "check_stale_record"}, {"id": "internal_only", "label": "It's an internal name (no public record expected)", "next_node_id": "check_dns_zone"} ], "children": [ { "id": "check_forwarders", "type": "solution", "title": "Check DNS Forwarders Configuration", "description": "Internal DNS can't resolve external names — likely a forwarder issue.\n\n**On the DNS Server:**\n```\nGet-DnsServerForwarder\nTest-NetConnection -ComputerName 8.8.8.8 -Port 53\n```\n\n**Common causes:** Forwarders unreachable, root hints disabled with no forwarders, conditional forwarder misconfigured.\n\n**Fix:** Update forwarders to reliable public DNS (8.8.8.8, 1.1.1.1).\n\n**Escalate to:** Systems Administration if DNS server changes are needed." }, { "id": "check_name_validity", "type": "solution", "title": "Verify Hostname is Valid", "description": "Neither internal nor public DNS can resolve this name.\n\n**Check:**\n1. Is the hostname spelled correctly?\n2. Does the DNS record actually exist?\n3. Has the record had time to propagate? (up to 48 hours for new records)\n4. Use: https://mxtoolbox.com/DNSLookup.aspx\n\n**Ticket Notes:** Document the exact hostname and test results." }, { "id": "check_stale_record", "type": "solution", "title": "Escalate: Stale or Incorrect DNS Record", "description": "DNS is returning the wrong IP address.\n\n**Possible causes:** Server migrated but DNS not updated, DNS scavenging disabled, dynamic DNS registration from wrong host.\n\n**Gather:**\n```\nResolve-DnsName -Name hostname -Type A | Select Name,IPAddress,TTL\n```\n\n**Escalate to:** DNS Administrator with current wrong IP and expected correct IP." }, { "id": "check_dns_zone", "type": "solution", "title": "Escalate: Missing Internal DNS Record", "description": "Internal hostname doesn't have a DNS record.\n\n**For the DNS admin:**\n```\nGet-DnsServerZone | Where-Object {$_.ZoneName -like '*yourdomain*'}\nGet-DnsServerResourceRecord -ZoneName 'yourdomain.local' -Name 'hostname'\n```\n\n**If dynamic DNS:** Re-register from the target machine: `ipconfig /registerdns`\n\n**Escalate to:** DNS Administrator to create the missing record." } ] } ] }, { "id": "restart_dns_client", "type": "action", "title": "Restart DNS Client Service", "description": "The DNS Client service (Dnscache) is stopped.\n\n**PowerShell (Administrator):**\n```\nStart-Service -Name Dnscache\nSet-Service -Name Dnscache -StartupType Automatic\nGet-Service -Name Dnscache\n```\n\n**Note:** This service should ALWAYS be running. Investigate why it was stopped.", "next_node_id": "verify_dns_resolution" } ] }, { "id": "check_dns_server", "type": "action", "title": "Check DNS Server Health", "description": "Multiple users affected — check the DNS server itself.\n\n**On the DNS Server (PowerShell):**\n```\nGet-Service -Name DNS\n\nTest-DnsServer -IPAddress -ZoneName yourdomain.local\n\nGet-WinEvent -FilterHashtable @{LogName='DNS Server';Level=2} -MaxEvents 10\n```", "next_node_id": "dns_server_result" }, { "id": "dns_server_result", "type": "decision", "question": "What is the DNS server status?", "help_text": "Check the service, overall server health, and event logs", "options": [ {"id": "service_stopped", "label": "DNS Server service is stopped/crashed", "next_node_id": "restart_dns_server"}, {"id": "server_unreachable", "label": "DNS server is unreachable / down", "next_node_id": "dns_server_down"}, {"id": "service_running_errors", "label": "Service running but event log shows errors", "next_node_id": "dns_event_errors"}, {"id": "server_looks_ok", "label": "Server and service look healthy", "next_node_id": "check_dns_zones"} ], "children": [ { "id": "restart_dns_server", "type": "action", "title": "Restart DNS Server Service", "description": "**CAUTION:** Restarting DNS affects all users relying on this server.\n\n```\nRestart-Service -Name DNS -Force\nGet-Service -Name DNS\nResolve-DnsName google.com -Server localhost\n```\n\n**If it won't start:**\n```\nGet-NetTCPConnection -LocalPort 53\nGet-NetUDPEndpoint -LocalPort 53\n```\n\n**Important:** If a secondary DNS server exists, verify clients can failover.", "next_node_id": "dns_restart_result" }, { "id": "dns_restart_result", "type": "decision", "question": "Did the DNS Server service restart successfully?", "help_text": "Verify with: Get-Service -Name DNS", "options": [ {"id": "restart_ok", "label": "Yes, service running and resolving", "next_node_id": "solution_dns_service_restart"}, {"id": "restart_fail", "label": "Service won't start", "next_node_id": "escalate_dns_critical"} ], "children": [ { "id": "solution_dns_service_restart", "type": "solution", "title": "Resolved: DNS Server Service Restarted", "description": "The DNS Server service was stopped and has been restarted.\n\n**Post-resolution:**\n1. Monitor for the next few hours\n2. Set up monitoring alerts for DNS service status\n3. Review event logs for recurring errors\n4. Investigate why it stopped\n\n**Ticket Notes:** DNS service was stopped, restarted successfully. Root cause investigation needed." }, { "id": "escalate_dns_critical", "type": "solution", "title": "CRITICAL: DNS Server Won't Start", "description": "**Priority: CRITICAL**\n\n**Immediate actions:**\n1. Ensure secondary DNS is handling queries\n2. If no secondary, consider pointing clients to 8.8.8.8 temporarily\n3. Check disk space, Windows updates, event logs\n\n**Escalate to:** Senior Systems Administrator\n**Communication:** Notify affected users of degraded DNS." } ] }, { "id": "dns_server_down", "type": "solution", "title": "CRITICAL: DNS Server Unreachable", "description": "**Priority: CRITICAL**\n\n**Immediate actions:**\n1. Check secondary DNS server\n2. Check via iLO/iDRAC/IPMI or hypervisor\n3. Contact datacenter if hosted\n\n**Temporary workaround** (emergency only):\n```\nSet-DhcpServerv4OptionValue -ScopeId -DnsServer 8.8.8.8,1.1.1.1\n```\nThis breaks internal name resolution.\n\n**Escalate to:** Infrastructure team immediately" }, { "id": "dns_event_errors", "type": "solution", "title": "Investigate DNS Server Event Log Errors", "description": "DNS service is running but logging errors.\n\n**Common errors:**\n- **Event 4015:** Zone transfer failed — check connectivity between DNS servers\n- **Event 4004:** Zone not loaded — corrupt zone file\n- **Event 7062:** No forwarders reachable — check internet connectivity\n- **Event 4512:** LDAP-integrated zone error — check AD replication: `repadmin /replsummary`\n\n**Escalate to:** DNS/Systems Administrator with event log exports." }, { "id": "check_dns_zones", "type": "solution", "title": "Check DNS Zones and Records", "description": "DNS server looks healthy. Check the zones.\n\n**On DNS server:**\n```\nGet-DnsServerZone | Format-Table ZoneName,ZoneType,DynamicUpdate\n\nGet-DnsServerResourceRecord -ZoneName 'yourdomain.local' -Name 'problemhost'\n```\n\n**For AD-integrated zones:** Verify AD replication is healthy.\n\n**Escalate to:** DNS Administrator with zone configuration output." } ] }, { "id": "check_specific_record", "type": "action", "title": "Test the Specific Hostname", "description": "One specific hostname won't resolve. Identify whether the record exists.\n\n**Commands:**\n```\nResolve-DnsName -Name 'problematic-hostname' -Type A\nnslookup problematic-hostname \nnslookup problematic-hostname 8.8.8.8\nGet-DnsClientGlobalSetting | Select SuffixSearchList\n```\n\n**Key question:** Is this an internal name (.local / .corp) or external?", "next_node_id": "specific_record_result" }, { "id": "specific_record_result", "type": "decision", "question": "Is this an internal or external hostname?", "help_text": "Internal names typically end in .local, .corp, .internal or your AD domain suffix", "options": [ {"id": "internal_name", "label": "Internal hostname (.local / AD domain)", "next_node_id": "check_dns_zones"}, {"id": "external_name", "label": "External / public hostname", "next_node_id": "check_external_dns"}, {"id": "unsure", "label": "Not sure / no suffix", "next_node_id": "check_suffix_search"} ], "children": [ { "id": "check_external_dns", "type": "solution", "title": "Test External DNS Resolution Path", "description": "Public hostname failing. Compare resolution paths.\n\n```\nResolve-DnsName -Name 'example.com' -Server \nResolve-DnsName -Name 'example.com' -Server 8.8.8.8\n```\n\n**If public DNS works but internal doesn't:** Check content filter/DNS filter, conditional forwarders, or split-brain DNS configuration.\n\n**Escalate to:** DNS/Network admin with comparison results." }, { "id": "check_suffix_search", "type": "solution", "title": "Check DNS Suffix Search List", "description": "Short names may not resolve if the DNS suffix search list is incomplete.\n\n```\nGet-DnsClientGlobalSetting | Select SuffixSearchList\nipconfig /all | findstr 'Search'\n```\n\n**Common issue:** Suffix search list doesn't include the right domain.\n\n**Fix:** Set via DHCP (scope option 015) or GPO (Computer Config > Admin Templates > Network > DNS Client)." } ] }, { "id": "check_intermittent", "type": "decision", "question": "Is the intermittent failure affecting internal names, external names, or both?", "help_text": "Test both an internal hostname and google.com multiple times", "options": [ {"id": "internal_intermittent", "label": "Only internal names fail intermittently", "next_node_id": "check_multiple_dns_servers"}, {"id": "external_intermittent", "label": "Only external names fail intermittently", "next_node_id": "check_forwarder_health"}, {"id": "both_intermittent", "label": "Both fail intermittently", "next_node_id": "check_dns_load"} ], "children": [ { "id": "check_multiple_dns_servers", "type": "solution", "title": "Check Multiple DNS Server Consistency", "description": "If you have multiple DNS servers, one may be out of sync.\n\n```\nResolve-DnsName -Name 'testhost.yourdomain.local' -Server 10.0.0.10\nResolve-DnsName -Name 'testhost.yourdomain.local' -Server 10.0.0.11\n```\n\n**If results differ:** Check AD replication:\n```\nrepadmin /replsummary\nrepadmin /showrepl\n```\n\n**Temporary workaround:** Point affected clients to the working DNS server only.\n\n**Escalate to:** Senior Systems Admin with replication data." }, { "id": "check_forwarder_health", "type": "solution", "title": "Check Upstream DNS / Forwarder Health", "description": "External resolution is intermittent — likely a forwarder or upstream issue.\n\n```\nGet-DnsServerForwarder\nResolve-DnsName google.com -Server \nTest-Connection -ComputerName -Count 20 | Measure-Object -Property ResponseTime -Average\n```\n\n**If using ISP DNS:** Try switching to 8.8.8.8, 1.1.1.1.\n**If packet loss to forwarders:** Network/ISP issue." }, { "id": "check_dns_load", "type": "solution", "title": "Investigate DNS Server Performance", "description": "Both internal and external resolution intermittent — server may be overloaded.\n\n```\nGet-Counter '\\Processor(_Total)\\% Processor Time','\\Memory\\Available MBytes'\nGet-Counter '\\DNS\\Total Query Received/sec','\\DNS\\Recursive Queries/sec'\n```\n\n**If overloaded:** Add another DNS server, check for amplification attacks, review recursive settings.\n\n**Escalate to:** Systems Administration with performance data." } ] }, { "id": "verify_dns_resolution", "type": "decision", "question": "Is DNS resolving correctly now?", "help_text": "Test: Resolve-DnsName google.com AND Resolve-DnsName ", "options": [ {"id": "all_resolved", "label": "Yes, both internal and external resolve", "next_node_id": "solution_resolved"}, {"id": "still_issues", "label": "Still having issues", "next_node_id": "check_dns_server"} ], "children": [ { "id": "solution_resolved", "type": "solution", "title": "DNS Resolution Issue Resolved", "description": "DNS is now working correctly.\n\n**Document in ticket:**\n- Root cause identified\n- Steps taken to resolve\n- Configuration changes made\n\n**If widespread issue:** Send communication to affected users confirming resolution." } ] } ] } } def get_dhcp_issues_tree() -> dict[str, Any]: """ DHCP Lease Issues / No IP Address - Networking tree. Covers APIPA addresses, scope exhaustion, relay agents, and DHCP server problems. """ return { "name": "DHCP Lease Issues / No IP Address", "description": "Troubleshoot DHCP problems including clients not getting IP addresses, APIPA (169.254.x.x) addresses, scope exhaustion, and DHCP server failures. Covers both client-side and server-side diagnostics.", "category": "Networking", "tree_structure": { "id": "root", "type": "decision", "question": "How many devices are affected by the DHCP issue?", "help_text": "This determines whether it's a client-specific issue or a DHCP server/infrastructure problem.", "options": [ {"id": "single", "label": "Single device can't get an IP", "next_node_id": "check_single_device"}, {"id": "multiple_same_subnet", "label": "Multiple devices on the same subnet", "next_node_id": "check_dhcp_scope"}, {"id": "multiple_subnets", "label": "Multiple subnets affected", "next_node_id": "check_dhcp_server_health"}, {"id": "new_device", "label": "Brand new device / just reimaged", "next_node_id": "check_new_device"} ], "children": [ { "id": "check_single_device", "type": "action", "title": "Check Device Network Configuration", "description": "Start with basic network adapter diagnostics.\n\n**PowerShell:**\n```\nipconfig /all\nGet-NetIPInterface -AddressFamily IPv4 | Select InterfaceAlias,Dhcp\nGet-NetAdapter | Select Name,Status,LinkSpeed,MediaConnectionState\n```\n\n**Look for:**\n- 169.254.x.x (APIPA) = DHCP request failed\n- Is DHCP enabled on the adapter?\n- Is the adapter showing 'Up' status?", "next_node_id": "single_device_result" }, { "id": "single_device_result", "type": "decision", "question": "What does the device's network configuration show?", "help_text": "Check ipconfig /all output carefully", "options": [ {"id": "apipa", "label": "169.254.x.x address (APIPA)", "next_node_id": "try_dhcp_renew"}, {"id": "static_ip", "label": "Static IP configured (DHCP not enabled)", "next_node_id": "fix_static_to_dhcp"}, {"id": "no_adapter", "label": "Network adapter disabled or missing", "next_node_id": "fix_adapter"}, {"id": "has_ip_not_working", "label": "Has a DHCP IP but network isn't working", "next_node_id": "check_wrong_scope"}, {"id": "no_link", "label": "No media / disconnected", "next_node_id": "check_physical"} ], "children": [ { "id": "try_dhcp_renew", "type": "action", "title": "Release and Renew DHCP Lease", "description": "APIPA address means DHCP requests are failing.\n\n**PowerShell (Administrator):**\n```\nipconfig /release\nStart-Sleep -Seconds 5\nipconfig /renew\nipconfig /all\n```\n\n**Error meanings:**\n- \"No DHCP server could be contacted\" = Server unreachable\n- \"The semaphore timeout period has expired\" = Network connectivity issue\n\n**While waiting:** Check if other devices on same switch/VLAN have IPs.", "next_node_id": "dhcp_renew_result" }, { "id": "dhcp_renew_result", "type": "decision", "question": "Did the DHCP renew succeed?", "help_text": "Does the device now have a valid (non-169.254) IP?", "options": [ {"id": "got_ip", "label": "Yes, received a valid IP", "next_node_id": "solution_dhcp_renewed"}, {"id": "still_apipa", "label": "Still showing 169.254.x.x", "next_node_id": "check_dhcp_reachability"}, {"id": "error_msg", "label": "Got an error message", "next_node_id": "check_dhcp_reachability"} ], "children": [ { "id": "solution_dhcp_renewed", "type": "solution", "title": "Resolved: DHCP Lease Renewed", "description": "Device successfully obtained an IP address.\n\n**Likely caused by:** Temporary network glitch, expired lease from sleep/hibernation, brief DHCP server unavailability.\n\n**Verify full connectivity:**\n```\nping \nping 8.8.8.8\nnslookup google.com\n```\n\n**Ticket Notes:** Single device DHCP failure, resolved with release/renew." }, { "id": "check_dhcp_reachability", "type": "action", "title": "Test DHCP Server Reachability", "description": "Can't get a lease. Check DHCP server connectivity.\n\n```\nTest-Connection -ComputerName -Count 2\narp -a\n```\n\n**Key check:** If the device is on a different subnet from the DHCP server, is there a DHCP relay/IP helper configured on the router?", "next_node_id": "dhcp_reach_result" }, { "id": "dhcp_reach_result", "type": "decision", "question": "Can the device communicate with anything on the network?", "help_text": "Check ARP table and ping known hosts on same subnet", "options": [ {"id": "can_reach_local", "label": "Can reach local devices but not DHCP server", "next_node_id": "check_relay_agent"}, {"id": "cant_reach_anything", "label": "Can't reach anything", "next_node_id": "check_physical"}, {"id": "can_reach_dhcp", "label": "Can ping DHCP server but still no lease", "next_node_id": "check_dhcp_scope"} ], "children": [ { "id": "check_relay_agent", "type": "solution", "title": "Check DHCP Relay Agent / IP Helper", "description": "DHCP broadcasts aren't reaching the server. The relay agent may be misconfigured.\n\n**On the router/L3 switch:**\n```\n# Cisco IOS\nshow running-config interface vlan \n# Look for: ip helper-address \n```\n\n**Escalate to:** Network Engineering to verify relay/helper\n**Include:** VLAN ID, subnet, DHCP server IP, affected location" } ] } ] }, { "id": "fix_static_to_dhcp", "type": "action", "title": "Switch from Static IP to DHCP", "description": "Device has a static IP instead of DHCP.\n\n**PowerShell (Administrator):**\n```\nSet-NetIPInterface -InterfaceAlias 'Ethernet' -Dhcp Enabled\nRemove-NetIPAddress -InterfaceAlias 'Ethernet' -Confirm:$false\nSet-DnsClientServerAddress -InterfaceAlias 'Ethernet' -ResetServerAddresses\nipconfig /renew\n```\n\n**Before changing:** Verify this device doesn't NEED a static IP (servers, printers, etc.)", "next_node_id": "verify_dhcp_working" }, { "id": "fix_adapter", "type": "action", "title": "Enable or Troubleshoot Network Adapter", "description": "Network adapter is disabled or not detected.\n\n```\nGet-NetAdapter -IncludeHidden | Select Name,Status,InterfaceDescription\nEnable-NetAdapter -Name 'Ethernet'\nGet-PnpDevice -Class Net | Select Status,FriendlyName\n```\n\n**If missing from Device Manager:** Check BIOS NIC setting, try different USB port, or install drivers.\n**If showing error:** Uninstall device > Scan for hardware changes.", "next_node_id": "verify_dhcp_working" }, { "id": "check_wrong_scope", "type": "solution", "title": "Verify IP is from Correct Scope", "description": "Device has a DHCP IP but it may be from the wrong scope/VLAN.\n\n```\nipconfig /all\nipconfig /all | findstr 'DHCP Server'\n```\n\n**Compare:** Is the IP in the expected range for this location/VLAN?\n\n**Common cause:** Wrong switch port/VLAN or DHCP scope options misconfigured.\n\n**Escalate to:** Network team with device MAC address and current IP info." }, { "id": "check_physical", "type": "action", "title": "Check Physical Network Connection", "description": "No network connectivity at all.\n\n**Check in order:**\n1. Ethernet cable plugged in firmly? Try different cable.\n2. Switch port link light on? Try different port.\n3. WiFi enabled and connected to correct SSID?\n4. NIC link/activity LEDs?\n\n```\nGet-NetAdapter | Select Name,Status,LinkSpeed\nnetsh wlan show interfaces\n```\n\n**If using dock/dongle:** Try connecting directly.", "next_node_id": "physical_result" }, { "id": "physical_result", "type": "decision", "question": "Did fixing the physical connection restore network?", "help_text": "Does adapter show 'Up' status?", "options": [ {"id": "fixed_physical", "label": "Yes, adapter is up", "next_node_id": "try_dhcp_renew"}, {"id": "still_no_link", "label": "Still no link", "next_node_id": "escalate_hardware"}, {"id": "link_up_no_dhcp", "label": "Link up but still no IP", "next_node_id": "try_dhcp_renew"} ], "children": [ { "id": "escalate_hardware", "type": "solution", "title": "Escalate: Possible Hardware Failure", "description": "No link despite checking cables and ports.\n\n**Try before escalating:**\n1. Different cable + different switch port\n2. USB Ethernet adapter as workaround\n3. Uninstall/reinstall NIC driver\n\n**Escalate to:** Desktop Support for hardware assessment." } ] } ] }, { "id": "check_dhcp_scope", "type": "action", "title": "Check DHCP Scope Health", "description": "Multiple devices affected. Check the DHCP scope.\n\n**On DHCP Server:**\n```\nGet-DhcpServerv4ScopeStatistics | Format-Table ScopeId,Free,InUse,PercentageInUse\n\nGet-DhcpServerv4ScopeStatistics | Where-Object {$_.Free -eq 0}\n\nGet-DhcpServerv4Lease -ScopeId | Sort LeaseExpiryTime\n```\n\n**Key metric:** PercentageInUse at 100% = scope exhausted.", "next_node_id": "scope_health_result" }, { "id": "scope_health_result", "type": "decision", "question": "What is the DHCP scope status?", "help_text": "Check scope statistics output", "options": [ {"id": "exhausted", "label": "Scope is 100% full", "next_node_id": "fix_scope_exhaustion"}, {"id": "scope_disabled", "label": "Scope is deactivated", "next_node_id": "activate_scope"}, {"id": "scope_ok", "label": "Has free addresses and is active", "next_node_id": "check_dhcp_server_health"}, {"id": "scope_missing", "label": "No scope for this subnet", "next_node_id": "escalate_missing_scope"} ], "children": [ { "id": "fix_scope_exhaustion", "type": "action", "title": "Address DHCP Scope Exhaustion", "description": "No more IPs available.\n\n```\nGet-DhcpServerv4Lease -ScopeId | Where-Object {$_.AddressState -eq 'InactiveReservation'}\n\nGet-DhcpServerv4Scope -ScopeId | Select LeaseDuration\n```\n\n**Options:**\n1. Reduce lease duration (8 hours for offices)\n2. Delete stale/inactive leases\n3. Expand the scope range\n4. Create exclusions for devices moved to static IPs\n\n**Be careful** expanding scopes — don't overlap with other subnets.", "next_node_id": "scope_fix_result" }, { "id": "scope_fix_result", "type": "decision", "question": "Were you able to free up addresses?", "help_text": "Check updated free count in scope statistics", "options": [ {"id": "freed_addresses", "label": "Yes, scope has free addresses", "next_node_id": "solution_scope_fixed"}, {"id": "need_expansion", "label": "Need to expand scope", "next_node_id": "escalate_scope_expansion"} ], "children": [ { "id": "solution_scope_fixed", "type": "solution", "title": "Resolved: DHCP Scope Addresses Freed", "description": "Cleaned up stale leases.\n\n**Post-resolution:**\n1. Have users run `ipconfig /renew`\n2. Monitor scope utilization\n3. Set up threshold alerts (warn at 80%, critical at 90%)\n4. Regular audit of stale leases\n\n**Ticket Notes:** DHCP scope exhaustion, cleaned stale leases." }, { "id": "escalate_scope_expansion", "type": "solution", "title": "Escalate: DHCP Scope Needs Expansion", "description": "More devices than current range supports.\n\n**Options:**\n1. Expand IP range (if subnet allows)\n2. Reduce lease duration\n3. Move to larger subnet (requires IP redesign)\n4. Segment network (separate VLANs)\n\n**Escalate to:** Network Engineering\n**Temporary workaround:** Static IPs for critical devices." } ] }, { "id": "activate_scope", "type": "action", "title": "Activate DHCP Scope", "description": "Scope is deactivated. Verify it SHOULD be active first.\n\n```\nGet-DhcpServerv4Scope -ScopeId | Select ScopeId,Name,State\nSet-DhcpServerv4Scope -ScopeId -State Active\n```\n\n**After activating:** Have users run `ipconfig /renew`", "next_node_id": "verify_dhcp_working" }, { "id": "escalate_missing_scope", "type": "solution", "title": "Escalate: No DHCP Scope for This Subnet", "description": "No scope configured for this subnet.\n\n**Possible causes:** New VLAN without scope, DHCP migration missed this scope, accidental deletion.\n\n**Escalate to:** DHCP Administrator / Network Engineering\n**Include:** Subnet, VLAN ID, expected IP range, gateway IP\n**Temporary workaround:** Static IPs for critical devices." } ] }, { "id": "check_dhcp_server_health", "type": "action", "title": "Check DHCP Server Health", "description": "Scope has addresses but devices can't get IPs. Check the server.\n\n```\nGet-Service -Name DHCPServer\nGet-DhcpServerInDC\nGet-WinEvent -FilterHashtable @{LogName='Microsoft-Windows-DHCP Server Events/Operational';Level=2,3} -MaxEvents 10\nGet-NetUDPEndpoint -LocalPort 67\n```\n\n**Check if DHCP failover is configured** — partner server too.", "next_node_id": "dhcp_server_result" }, { "id": "dhcp_server_result", "type": "decision", "question": "What is the DHCP server status?", "help_text": "Check service, authorization, and event logs", "options": [ {"id": "service_stopped", "label": "DHCP Server service stopped", "next_node_id": "restart_dhcp_service"}, {"id": "not_authorized", "label": "Not authorized in AD", "next_node_id": "authorize_dhcp"}, {"id": "server_healthy", "label": "Server healthy, service running", "next_node_id": "check_dhcp_conflicts"}, {"id": "server_down", "label": "Server completely unreachable", "next_node_id": "escalate_dhcp_server_down"} ], "children": [ { "id": "restart_dhcp_service", "type": "action", "title": "Restart DHCP Server Service", "description": "```\nRestart-Service -Name DHCPServer -Force\nGet-Service -Name DHCPServer\n```\n\n**After restart:** Have clients run `ipconfig /renew`\n\n**If failover partner exists:** It should have been handling leases.", "next_node_id": "verify_dhcp_working" }, { "id": "authorize_dhcp", "type": "solution", "title": "Escalate: DHCP Server Authorization", "description": "DHCP server not authorized in AD. Unauthorized servers cannot issue leases.\n\n**Requires Domain Admin:**\n```\nAdd-DhcpServerInDC -DnsName dhcpserver.yourdomain.local -IPAddress \n```\n\n**Escalate to:** Domain Administrator\n**Priority:** High" }, { "id": "check_dhcp_conflicts", "type": "solution", "title": "Check for IP Conflicts or Rogue DHCP", "description": "Server healthy with available addresses but clients can't get leases.\n\n**Check for rogue DHCP:**\n```\nipconfig /all | findstr 'DHCP Server'\n```\nIf unexpected IP, there's a rogue DHCP server.\n\n**Check for IP conflicts:**\n```\nGet-DhcpServerv4Lease -ScopeId | Where-Object {$_.AddressState -like '*Decline*'}\n```\n\n**Other possibilities:** MAC address filtering, DHCP policies restricting leases.\n\n**Escalate to:** Network/Systems Admin with audit log findings." }, { "id": "escalate_dhcp_server_down", "type": "solution", "title": "CRITICAL: DHCP Server Down", "description": "**Priority: CRITICAL**\n\n**Immediate actions:**\n1. Check failover DHCP server\n2. Check hypervisor or iLO/iDRAC for server access\n\n**Mitigation:** Existing devices keep leases until expiration. New devices get APIPA.\n\n**Escalate to:** Infrastructure team immediately\n**Communication:** New network connections may fail." } ] }, { "id": "check_new_device", "type": "decision", "question": "Is the new device connected via Ethernet or WiFi?", "help_text": "New/reimaged devices may need specific network access configuration", "options": [ {"id": "new_ethernet", "label": "Ethernet (wired)", "next_node_id": "check_port_security"}, {"id": "new_wifi", "label": "WiFi (wireless)", "next_node_id": "check_wifi_auth"} ], "children": [ { "id": "check_port_security", "type": "decision", "question": "Does your environment use 802.1X or MAC filtering?", "help_text": "Many organizations require device authentication before network access", "options": [ {"id": "has_nac", "label": "Yes, 802.1X / NAC", "next_node_id": "register_device_nac"}, {"id": "has_mac_filter", "label": "Yes, MAC whitelist", "next_node_id": "register_mac"}, {"id": "no_security", "label": "No, open wired access", "next_node_id": "check_single_device"} ], "children": [ { "id": "register_device_nac", "type": "solution", "title": "Register Device in NAC / 802.1X", "description": "New devices must be registered in NAC.\n\n1. Get MAC: `Get-NetAdapter | Select Name,MacAddress`\n2. Register in NAC platform (Cisco ISE, ClearPass, etc.)\n3. Assign correct policy/group\n4. Reconnect cable or restart NIC\n\n**Escalate to:** Network Security if you don't have NAC access." }, { "id": "register_mac", "type": "solution", "title": "Add MAC Address to Whitelist", "description": "Switch port or DHCP requires MAC registration.\n\n```\nGet-NetAdapter | Select Name,MacAddress\n```\n\nRegister in DHCP MAC filtering, switch port security, or network management platform.\n\n**After registration:** Disconnect/reconnect cable, then `ipconfig /renew`" } ] }, { "id": "check_wifi_auth", "type": "solution", "title": "Configure WiFi for New Device", "description": "New devices need WiFi credentials and may need certificate enrollment.\n\n**Steps:**\n1. Connect to correct SSID (corporate, not guest)\n2. Enter credentials or certificates\n3. If MDM managed: Enroll in Intune/JAMF first\n\n**Common issues:** No WiFi profile yet (needs MDM), certificate not enrolled (WPA2-Enterprise), connected to guest instead of corporate." } ] }, { "id": "verify_dhcp_working", "type": "decision", "question": "Is the device now receiving a valid IP address?", "help_text": "Run ipconfig /renew and check for valid (non-169.254) IP", "options": [ {"id": "working", "label": "Yes, DHCP is working", "next_node_id": "solution_dhcp_resolved"}, {"id": "still_failing", "label": "Still not getting an IP", "next_node_id": "check_dhcp_server_health"} ], "children": [ { "id": "solution_dhcp_resolved", "type": "solution", "title": "DHCP Issue Resolved", "description": "Device successfully receiving DHCP lease.\n\n**Verify:**\n```\nipconfig /all\nping \nnslookup google.com\n```\n\n**Document:** Root cause, steps taken, config changes, scope for impact." } ] } ] } } def get_site_to_site_vpn_tree() -> dict[str, Any]: """ Site-to-Site VPN Tunnel Down - Networking tree. Covers IPSec tunnel failures, IKE negotiation, routing, and ISP outages. """ return { "name": "Site-to-Site VPN Tunnel Down", "description": "Troubleshoot site-to-site VPN tunnel failures including IPSec/IKE negotiation issues, routing problems, ISP outages, and configuration mismatches. Covers common firewall vendors and diagnostic approaches.", "category": "Networking", "tree_structure": { "id": "root", "type": "decision", "question": "What are the symptoms of the VPN tunnel issue?", "help_text": "Determine whether the tunnel is completely down or partially working. Can users at the remote site access ANY resources at the main site?", "options": [ {"id": "completely_down", "label": "No connectivity between sites", "next_node_id": "check_tunnel_status"}, {"id": "partial", "label": "Some traffic works, some doesn't", "next_node_id": "check_partial_connectivity"}, {"id": "intermittent", "label": "VPN keeps dropping and reconnecting", "next_node_id": "check_vpn_stability"}, {"id": "slow", "label": "VPN is up but extremely slow", "next_node_id": "check_vpn_performance"} ], "children": [ { "id": "check_tunnel_status", "type": "action", "title": "Verify VPN Tunnel Status on Firewall", "description": "Log into the firewall and check tunnel status.\n\n**FortiGate:**\n```\nget vpn ipsec tunnel summary\ndiagnose vpn ike gateway list name \n```\n\n**SonicWall:** Network > IPSec VPN > Settings\n\n**Meraki:** Security & SD-WAN > Site-to-Site VPN\n\n**Palo Alto:**\n```\nshow vpn ipsec-sa\nshow vpn ike-sa\n```\n\n**pfSense:** Status > IPsec\n\n**Check:** Is Phase 1 (IKE) established? Is Phase 2 (IPSec SA) established?", "next_node_id": "tunnel_status_result" }, { "id": "tunnel_status_result", "type": "decision", "question": "What does the tunnel status show?", "help_text": "Phase 1 (IKE/ISAKMP) establishes identity, Phase 2 (IPSec) carries traffic", "options": [ {"id": "phase1_down", "label": "Phase 1 (IKE) is down", "next_node_id": "troubleshoot_phase1"}, {"id": "phase1_up_phase2_down", "label": "Phase 1 up, Phase 2 down", "next_node_id": "troubleshoot_phase2"}, {"id": "both_up", "label": "Both phases show up", "next_node_id": "check_routing"}, {"id": "cant_check", "label": "Can't log into firewall", "next_node_id": "check_firewall_access"} ], "children": [ { "id": "troubleshoot_phase1", "type": "decision", "question": "What is the Phase 1 failure reason?", "help_text": "Check VPN log on the firewall for specific errors", "options": [ {"id": "timeout", "label": "Timeout / no response from peer", "next_node_id": "check_peer_reachability"}, {"id": "auth_fail", "label": "Authentication / PSK mismatch", "next_node_id": "fix_psk_mismatch"}, {"id": "proposal_mismatch", "label": "No proposal chosen / mismatch", "next_node_id": "fix_phase1_proposal"}, {"id": "id_mismatch", "label": "ID payload mismatch", "next_node_id": "fix_peer_id"} ], "children": [ { "id": "check_peer_reachability", "type": "action", "title": "Check Connectivity to Remote VPN Endpoint", "description": "Phase 1 timing out — remote endpoint may be unreachable.\n\n**From the local firewall:**\n```\nping \n```\n\n**Check in order:**\n1. Can you ping remote peer's public IP?\n2. Has remote site's public IP changed? (ISP change, DHCP WAN)\n3. Is remote firewall online?\n4. Is your ISP having issues?\n\n**Contact remote site:** Can they access internet normally?", "next_node_id": "peer_reach_result" }, { "id": "peer_reach_result", "type": "decision", "question": "Can you reach the remote VPN endpoint?", "help_text": "If ping fails, the issue is upstream of VPN configuration", "options": [ {"id": "peer_unreachable", "label": "Cannot reach remote peer", "next_node_id": "check_isp_wan"}, {"id": "peer_reachable", "label": "Can ping but VPN won't connect", "next_node_id": "check_ike_ports"}, {"id": "ip_changed", "label": "Remote peer IP changed", "next_node_id": "update_peer_ip"} ], "children": [ { "id": "check_isp_wan", "type": "decision", "question": "Is internet working at both sites?", "help_text": "Check if both sites can browse normally", "options": [ {"id": "local_down", "label": "Local internet is down", "next_node_id": "escalate_local_isp"}, {"id": "remote_down", "label": "Remote internet is down", "next_node_id": "escalate_remote_isp"}, {"id": "both_ok", "label": "Both have internet", "next_node_id": "check_ike_ports"} ], "children": [ { "id": "escalate_local_isp", "type": "solution", "title": "Local Internet / ISP Outage", "description": "VPN down due to local internet outage.\n\n**Actions:**\n1. Check ISP status page\n2. Power cycle modem/ONT and edge router\n3. Contact ISP support\n4. Check if WAN IP changed (if dynamic)\n\n**Ticket Notes:** VPN down due to local ISP outage." }, { "id": "escalate_remote_isp", "type": "solution", "title": "Remote Site Internet / ISP Outage", "description": "VPN down due to remote site internet loss.\n\n**Actions:**\n1. Contact someone at remote site\n2. Have them check ISP and power cycle equipment\n3. If dynamic WAN IP, may need VPN update when service returns\n\n**Ticket Notes:** VPN down due to remote site ISP outage." } ] }, { "id": "check_ike_ports", "type": "solution", "title": "Check IKE/NAT-T Ports (UDP 500/4500)", "description": "Can reach peer but VPN won't negotiate. IKE ports may be blocked.\n\n**Required ports:**\n- UDP 500 (IKE/ISAKMP)\n- UDP 4500 (NAT Traversal)\n- Protocol 50 (ESP) if not using NAT-T\n\n**Common causes:** ISP blocking VPN, upstream NAT interference, firewall rules blocking outbound 500/4500.\n\n**Escalate to:** Network team to verify firewall rules and ISP blocking." }, { "id": "update_peer_ip", "type": "solution", "title": "Update Remote Peer IP Address", "description": "Remote site's public IP changed (common with dynamic IP ISPs).\n\n**Actions:**\n1. Get new IP from remote site\n2. Update VPN peer config on local firewall\n3. Update remote firewall if needed\n4. Re-establish tunnel\n\n**Prevention:** Use DDNS, configure VPN with FQDN, or get static IP from ISP.\n\n**Escalate to:** Network admin to update firewall config." } ] }, { "id": "fix_psk_mismatch", "type": "solution", "title": "Fix Pre-Shared Key Mismatch", "description": "Authentication failing — PSKs don't match.\n\n**Common causes:** Key changed on one side, trailing spaces, copy/paste error.\n\n**Resolution:**\n1. Verify PSK on both firewalls independently\n2. If mismatched, update one to match the other\n3. Clear VPN SA and force re-negotiation\n\n**Security:** Don't share PSKs over unencrypted channels.\n\n**Escalate to:** Network admin (requires firewall access on both sides)." }, { "id": "fix_phase1_proposal", "type": "solution", "title": "Fix Phase 1 Proposal Mismatch", "description": "Endpoints can't agree on encryption settings.\n\n**Must match on BOTH sides:**\n- IKE Version (IKEv1 or IKEv2)\n- Encryption (AES-256, AES-128)\n- Hash (SHA-256, SHA-1)\n- DH Group (14, 19, 20)\n- Lifetime (usually 28800 sec / 8 hours)\n\n**Common causes:** Firmware update changed defaults, one side reconfigured.\n\n**Best practice:** IKEv2, AES-256, SHA-256, DH Group 14+\n\n**Escalate to:** Network admin to align proposals." }, { "id": "fix_peer_id", "type": "solution", "title": "Fix Peer ID Mismatch", "description": "IKE Peer ID doesn't match expectations.\n\n**Peer ID can be:** IP address, FQDN, User FQDN (email), or DN (certificate).\n\n**Common causes:** NAT changing source IP, FQDN not resolving, certificate CN mismatch.\n\n**Escalate to:** Network admin to verify and align peer ID on both endpoints." } ] }, { "id": "troubleshoot_phase2", "type": "solution", "title": "Troubleshoot Phase 2 (IPSec SA) Failure", "description": "Phase 1 up but Phase 2 won't establish.\n\n**Must match on BOTH sides:**\n- Encryption (AES-256, AES-128)\n- Hash/integrity (SHA-256, SHA-1)\n- PFS DH Group (must match or both disabled)\n- Lifetime (usually 3600 sec / 1 hour)\n\n**Also check Proxy IDs / Traffic Selectors:**\nLocal and remote subnet definitions must mirror each other.\n\n**Most common Phase 2 issue:** Mismatched proxy IDs.\n\n**Escalate to:** Network admin to compare Phase 2 settings on both firewalls." }, { "id": "check_routing", "type": "action", "title": "Check VPN Routing", "description": "Both phases up but traffic isn't flowing. This is a routing issue.\n\n**From a workstation:**\n```\ntracert \nTest-NetConnection -ComputerName -TraceRoute\n```\n\n**On the firewall:** Check for a route to the remote subnet via the VPN tunnel interface.\n\n**If traffic goes out the internet gateway** instead of VPN, there's a missing or incorrect route.", "next_node_id": "routing_result" }, { "id": "routing_result", "type": "decision", "question": "Is traffic being routed through the VPN tunnel?", "help_text": "Traceroute should show traffic going through VPN interface, not internet gateway", "options": [ {"id": "wrong_route", "label": "Traffic going out internet (wrong route)", "next_node_id": "fix_vpn_routing"}, {"id": "correct_route_blocked", "label": "Routing correct but traffic blocked", "next_node_id": "check_firewall_policy"}, {"id": "route_ok_both", "label": "Routing correct from both sides", "next_node_id": "check_nat_overlap"} ], "children": [ { "id": "fix_vpn_routing", "type": "solution", "title": "Fix VPN Routing", "description": "Traffic not routed through VPN tunnel.\n\n**Common causes:** Missing static route, route overridden by more specific route, policy-based VPN needs firewall policy.\n\n**Escalate to:** Network admin to add/fix routes on firewall." }, { "id": "check_firewall_policy", "type": "solution", "title": "Check Firewall Policy for VPN Traffic", "description": "Routing correct but firewall blocking traffic.\n\nCheck on BOTH firewalls: Is there a policy allowing traffic between local and remote subnets via VPN interface? Are correct ports allowed? Check deny rule logs.\n\n**Escalate to:** Network/Security admin to review policies." }, { "id": "check_nat_overlap", "type": "solution", "title": "Check NAT or Subnet Overlap Issues", "description": "If both sites use the same subnet (e.g., 192.168.1.0/24), VPN traffic fails.\n\n**Check:** Overlapping IP ranges? NAT applied to VPN traffic incorrectly? VPN exempted from outbound NAT?\n\n**If subnets overlap:** Requires NAT on tunnel or re-addressing.\n**If NAT issue:** Add NAT exemption rule for VPN subnets.\n\n**Escalate to:** Network admin for firewall changes." } ] }, { "id": "check_firewall_access", "type": "solution", "title": "Cannot Access Firewall Management", "description": "Unable to log into the firewall.\n\n**Try:** Web GUI, SSH/console, different workstation, check management VLAN.\n\n**If completely unresponsive:** Check power, try console access (serial cable for physical appliances).\n\n**Escalate to:** Network admin or vendor support." } ] }, { "id": "check_partial_connectivity", "type": "decision", "question": "What specifically works and what doesn't across the VPN?", "help_text": "Test: ping (ICMP), file shares (SMB/445), RDP (3389), web (80/443)", "options": [ {"id": "ping_works_apps_dont", "label": "Ping works but apps don't (RDP, file shares)", "next_node_id": "check_mtu"}, {"id": "some_hosts", "label": "Can reach some hosts but not others", "next_node_id": "check_subnet_selectors"}, {"id": "one_direction", "label": "Works one direction but not the other", "next_node_id": "check_asymmetric"} ], "children": [ { "id": "check_mtu", "type": "solution", "title": "MTU / Fragmentation Issue", "description": "Ping works but larger packets fail. Classic MTU issue.\n\n**Test:**\n```\nping -f -l 1500\nping -f -l 1400\n```\n\n**The -f flag prevents fragmentation.** Find the largest working size.\n\n**Fix:** Enable MSS clamping on the firewall VPN interface (set TCP MSS to 1360-1400).\n\n**Escalate to:** Network admin for MSS clamping configuration." }, { "id": "check_subnet_selectors", "type": "solution", "title": "Check VPN Phase 2 Subnet Selectors", "description": "Some hosts reachable, others not. VPN may not cover all subnets.\n\nPhase 2 selectors define which subnets traverse the VPN. If a subnet isn't listed, that traffic won't use the tunnel.\n\n**Fix:** Add Phase 2 entries for missing subnets on both firewalls.\n\n**Escalate to:** Network admin to review Phase 2 selectors." }, { "id": "check_asymmetric", "type": "solution", "title": "Investigate Asymmetric Routing", "description": "Traffic works one direction only.\n\n**Common causes:**\n1. Missing return route on one side\n2. Firewall policy only on one side\n3. NAT interference on one side\n\nRun traceroute from BOTH directions and compare.\n\n**Escalate to:** Network admin to check routes and policies on both endpoints." } ] }, { "id": "check_vpn_stability", "type": "decision", "question": "How frequently is the VPN dropping?", "help_text": "Check VPN logs for reconnection frequency and patterns", "options": [ {"id": "every_few_hours", "label": "Drops regularly (every few hours)", "next_node_id": "check_lifetime_mismatch"}, {"id": "random_drops", "label": "Random drops throughout the day", "next_node_id": "check_dpd_keepalive"}, {"id": "daily_pattern", "label": "Drops at same time daily", "next_node_id": "check_scheduled_tasks"} ], "children": [ { "id": "check_lifetime_mismatch", "type": "solution", "title": "Check Phase 1/Phase 2 Lifetime Mismatch", "description": "Regular drops suggest a lifetime/rekey issue.\n\n**Phase 2 should ALWAYS be shorter than Phase 1.**\n- Phase 1: 28800 seconds (8 hours)\n- Phase 2: 3600 seconds (1 hour)\n\n**Common problem:** Phase 2 longer than Phase 1 causes failure during rekey.\n\n**Escalate to:** Network admin to align lifetime settings." }, { "id": "check_dpd_keepalive", "type": "solution", "title": "Check Dead Peer Detection (DPD) Settings", "description": "Random drops may be aggressive DPD killing the tunnel.\n\n**Recommended DPD:** Interval 10-30s, Retry 3-5 attempts, Action: Restart.\n\n**Also check WAN stability:**\n```\nTest-Connection -ComputerName -Count 100 | Measure-Object -Property ResponseTime -Average -Maximum\n```\n\n**Escalate to:** Network admin to adjust DPD and check WAN stability." }, { "id": "check_scheduled_tasks", "type": "solution", "title": "Investigate Daily Drop Pattern", "description": "Same-time drops suggest a scheduled event.\n\n**Common causes:** Backup jobs saturating WAN, scheduled firewall changes, ISP maintenance, auto-update/reboot schedule, DHCP WAN lease renewal.\n\nCorrelate drop time with firewall system logs and scheduled events." } ] }, { "id": "check_vpn_performance", "type": "solution", "title": "VPN Performance Issues", "description": "VPN up but slow.\n\n**Test:**\n```\nTest-Connection -ComputerName -Count 20 | Measure-Object -Property ResponseTime -Average -Maximum\n```\n\n**Common causes and fixes:**\n1. Slow WAN at one/both sites — upgrade internet\n2. Underpowered firewall — check CPU during VPN traffic\n3. MTU/fragmentation — enable MSS clamping\n4. Too much traffic — add QoS/traffic shaping\n5. High latency (distance) — normal for distant sites\n\n**Escalate to:** Network admin with throughput test results and baseline speeds." } ] } } # ============================================================================= # SEEDING INFRASTRUCTURE # ============================================================================= async def get_admin_token(client: httpx.AsyncClient) -> str: """Authenticate with admin credentials.""" if not ADMIN_EMAIL or not ADMIN_PASSWORD: raise Exception("Admin credentials not provided. Use --email and --password.") login_response = await client.post( f"{API_BASE_URL}/auth/login", data={"username": ADMIN_EMAIL, "password": ADMIN_PASSWORD} ) if login_response.status_code != 200: raise Exception(f"Failed to login: {login_response.text}") return login_response.json()["access_token"] async def create_tree(client: httpx.AsyncClient, token: str, tree_data: dict, category_id: str | None = None) -> dict | None: """Create a tree via the API. Returns None if tree already exists.""" headers = {"Authorization": f"Bearer {token}"} tree_data["is_default"] = True tree_data["is_public"] = True # Normalize description -> action/solution fields normalize_tree_structure(tree_data) # Set category_id if available (future-proof global categories) if category_id: tree_data["category_id"] = category_id list_response = await client.get(f"{API_BASE_URL}/trees", headers=headers) if list_response.status_code == 200: existing_trees = list_response.json() for tree in existing_trees: if tree["name"] == tree_data["name"]: if not tree.get("is_public") or not tree.get("is_default"): await client.put( f"{API_BASE_URL}/trees/{tree['id']}", json={"is_public": True, "is_default": True}, headers=headers ) print(f" [UPDATE] '{tree_data['name']}' visibility updated") return None print(f" [SKIP] '{tree_data['name']}' already exists") return None response = await client.post( f"{API_BASE_URL}/trees", json=tree_data, headers=headers ) if response.status_code not in (200, 201): raise Exception(f"Failed to create '{tree_data['name']}': {response.text}") tree = response.json() print(f" [OK] Created '{tree_data['name']}' (ID: {tree['id']})") return tree async def seed_database(): """Main seeding function.""" print("\n" + "=" * 60) print(" RESOLUTIONFLOW - Batch 2 Trees Seeder") print(" Networking | Active Directory | Microsoft 365") print("=" * 60) async with httpx.AsyncClient(timeout=60.0) as client: try: health_check = await client.get(f"{API_BASE_URL.replace('/api/v1', '')}/health") if health_check.status_code != 200: print(f"\n[ERROR] API health check failed: {health_check.status_code}") return False except httpx.ConnectError: print("\n[ERROR] Cannot connect to API server") print(f" Make sure the server is running at {API_BASE_URL}") return False print("\n[1/3] Authenticating...") try: token = await get_admin_token(client) print(f" Logged in as {ADMIN_EMAIL}") except Exception as e: print(f" [ERROR] {e}") return False print("\n[2/5] Setting up global categories...") all_category_names = ["Networking", "Active Directory / Entra ID", "Microsoft 365"] try: category_map = await ensure_global_categories(client, token, all_category_names) print(f" {len(category_map)} categories ready") except Exception as e: print(f" [WARN] Category setup failed: {e}") print(f" Falling back to legacy text categories") category_map = {} print("\n[3/5] Preparing decision trees...") trees_to_create = [ ("Networking", get_dns_resolution_tree()), ("Networking", get_dhcp_issues_tree()), ("Networking", get_site_to_site_vpn_tree()), # Active Directory / Entra ID ("Active Directory / Entra ID", get_repeated_lockout_tree()), ("Active Directory / Entra ID", get_ad_replication_tree()), ("Active Directory / Entra ID", get_gpo_not_applying_tree()), ("Active Directory / Entra ID", get_entra_id_sync_tree()), ("Active Directory / Entra ID", get_domain_join_tree()), ("Active Directory / Entra ID", get_kerberos_auth_tree()), # Microsoft 365 (Batch 3) ("Microsoft 365", get_teams_call_quality_tree()), ("Microsoft 365", get_onedrive_sync_tree()), ("Microsoft 365", get_mail_flow_tree()), ("Microsoft 365", get_sharepoint_permissions_tree()), ("Microsoft 365", get_mfa_lockout_tree()), ("Microsoft 365", get_license_assignment_tree()), # Additional Networking (Batch 4) ("Networking", get_bandwidth_slow_internet_tree()), ("Networking", get_wireless_connectivity_tree()), ("Networking", get_firewall_blocking_tree()), ] print(f" Found {len(trees_to_create)} trees to seed\n") print("[4/5] Creating decision trees...") created_count = 0 skipped_count = 0 current_category = None for category, tree_data in trees_to_create: if category != current_category: print(f"\n {category}:") current_category = category try: cat_id = category_map.get(category) if category_map else None result = await create_tree(client, token, tree_data, category_id=cat_id) if result: created_count += 1 else: skipped_count += 1 except Exception as e: print(f" [FAIL] '{tree_data['name']}': {e}") print("\n[5/5] Summary") print("=" * 60) print(" SEEDING COMPLETE") print("=" * 60) print(f" Global categories: {len(category_map)}") print(f" Trees created: {created_count}") print(f" Trees skipped: {skipped_count}") print(f" Total: {created_count + skipped_count}") print() return True def main(): parser = argparse.ArgumentParser( description="Seed ResolutionFlow with Batch 2 trees" ) parser.add_argument("--api-url", default="http://localhost:8000/api/v1") parser.add_argument("--email", required=True, help="Admin email") parser.add_argument("--password", required=True, help="Admin password") args = parser.parse_args() global API_BASE_URL, ADMIN_EMAIL, ADMIN_PASSWORD API_BASE_URL = args.api_url ADMIN_EMAIL = args.email ADMIN_PASSWORD = args.password success = asyncio.run(seed_database()) exit(0 if success else 1) if __name__ == "__main__": main()