diff --git a/backend/scripts/seed_trees_ad.py b/backend/scripts/seed_trees_ad.py new file mode 100644 index 00000000..133efacb --- /dev/null +++ b/backend/scripts/seed_trees_ad.py @@ -0,0 +1,761 @@ +#!/usr/bin/env python3 +""" +ResolutionFlow Decision Trees - Batch 2b: Active Directory / Entra ID + +Six AD/Entra ID troubleshooting trees for MSP engineers. +Imported by seed_trees_v2.py for seeding. +""" + +from typing import Any + + +def get_repeated_lockout_tree() -> dict[str, Any]: + """User Account Locked Out (Repeated) - AD tree.""" + return { + "name": "User Account Locked Out (Repeated)", + "description": "Investigate and resolve repeated Active Directory account lockouts. Covers lockout source identification, common causes like stale credentials, service accounts, and mobile devices, with PowerShell diagnostics.", + "category": "Active Directory", + "tree_structure": { + "id": "root", + "type": "decision", + "question": "Is this a one-time lockout or has the user been locked out multiple times recently?", + "help_text": "Check AD account properties and recent lockout history. A single lockout is usually a forgotten password; repeated lockouts indicate a deeper issue.", + "options": [ + {"id": "one_time", "label": "First or one-time lockout", "next_node_id": "simple_unlock"}, + {"id": "repeated", "label": "Multiple lockouts (keeps happening)", "next_node_id": "find_lockout_source"}, + {"id": "many_users", "label": "Multiple users getting locked out", "next_node_id": "check_brute_force"} + ], + "children": [ + { + "id": "simple_unlock", + "type": "action", + "title": "Unlock Account and Verify", + "description": "Simple lockout — unlock and confirm.\n\n**PowerShell:**\n```\nUnlock-ADAccount -Identity \"username\"\nGet-ADUser -Identity \"username\" -Properties LockedOut,PasswordLastSet,PasswordExpired\n```\n\n**Ask the user:**\n- Did you recently change your password?\n- Are you typing the right password?\n- Is Caps Lock on?\n\n**If password expired:** Reset it.\n**If user forgot password:** Reset and have them set a new one at next login.", + "next_node_id": "verify_simple_unlock" + }, + { + "id": "verify_simple_unlock", + "type": "decision", + "question": "Can the user log in successfully now?", + "help_text": "Have the user try logging in after the unlock", + "options": [ + {"id": "success", "label": "Yes, user is logged in", "next_node_id": "solution_simple_unlock"}, + {"id": "locked_again", "label": "User got locked out again within minutes", "next_node_id": "find_lockout_source"}, + {"id": "wrong_password", "label": "User says password is wrong (but it's correct in AD)", "next_node_id": "check_password_sync"} + ], + "children": [ + { + "id": "solution_simple_unlock", + "type": "solution", + "title": "Resolved: Account Unlocked", + "description": "Simple lockout resolved by unlocking the account.\n\n**Ticket Notes:** Account was locked due to failed login attempts. Unlocked via PowerShell. User confirmed successful login.\n\n**If this recurs:** Use the 'repeated lockout' path to investigate the source." + }, + { + "id": "check_password_sync", + "type": "action", + "title": "Check Password Sync Status", + "description": "User's password works in AD but not at the login prompt. This may be a sync/replication issue.\n\n**Check AD replication:**\n```\nrepadmin /replsummary\nrepadmin /showrepl\n```\n\n**Check which DC the user is authenticating against:**\n```\nnltest /dsgetdc:yourdomain.local\necho %LOGONSERVER%\n```\n\n**If using Entra ID / M365:** Check if password hash sync is current in Entra Connect.\n\n**Common cause:** Password was reset on DC1 but DC2 hasn't replicated yet. User's workstation is authenticating against DC2.", + "next_node_id": "find_lockout_source" + } + ] + }, + { + "id": "find_lockout_source", + "type": "action", + "title": "Identify Lockout Source Computer", + "description": "Find which computer or device is causing the lockouts.\n\n**Step 1: Find the PDC Emulator** (lockout events are forwarded here):\n```\nGet-ADDomain | Select PDCEmulator\n```\n\n**Step 2: Query lockout events on the PDC:**\n```\nGet-WinEvent -ComputerName -FilterHashtable @{\n LogName='Security'\n Id=4740\n} -MaxEvents 20 | Where-Object {\n $_.Properties[0].Value -eq 'username'\n} | Select TimeCreated,\n @{N='User';E={$_.Properties[0].Value}},\n @{N='SourceComputer';E={$_.Properties[1].Value}}\n```\n\n**Alternative:** Use Microsoft Account Lockout Status Tool (LockoutStatus.exe) for a GUI approach.\n\n**Document:** The source computer name and timestamps.", + "next_node_id": "lockout_source_result" + }, + { + "id": "lockout_source_result", + "type": "decision", + "question": "What is the lockout source?", + "help_text": "The SourceComputer field in Event 4740 tells you where the bad attempts come from", + "options": [ + {"id": "user_workstation", "label": "User's own workstation", "next_node_id": "check_cached_creds_workstation"}, + {"id": "mobile_device", "label": "Mobile device or Exchange/ActiveSync", "next_node_id": "check_mobile_device"}, + {"id": "server", "label": "A server (file server, app server, etc.)", "next_node_id": "check_service_account"}, + {"id": "multiple_sources", "label": "Multiple different source computers", "next_node_id": "check_brute_force"}, + {"id": "cant_determine", "label": "Source is blank or can't determine", "next_node_id": "enable_netlogon_logging"} + ], + "children": [ + { + "id": "check_cached_creds_workstation", + "type": "action", + "title": "Check for Cached/Saved Credentials on Workstation", + "description": "The user's own workstation is sending bad credentials.\n\n**Check on the user's workstation:**\n\n**1. Windows Credential Manager:**\n```\nrundll32.exe keymgr.dll, KRShowKeyMgr\n# Or: Control Panel > Credential Manager\n```\nLook for saved credentials with old passwords.\n\n**2. Mapped drives with saved credentials:**\n```\nnet use\n```\nCheck for drives mapped with explicit credentials.\n\n**3. Scheduled tasks running as the user:**\n```\nGet-ScheduledTask | Where-Object {$_.Principal.UserId -like '*username*'}\n```\n\n**4. Browser saved passwords** — check Edge, Chrome for saved domain passwords.\n\n**5. RDP saved connections** — check for .rdp files with saved credentials.", + "next_node_id": "cached_cred_result" + }, + { + "id": "cached_cred_result", + "type": "decision", + "question": "Did you find stale credentials?", + "help_text": "Any saved password that doesn't match the current AD password will cause lockouts", + "options": [ + {"id": "found_cred_manager", "label": "Found old entries in Credential Manager", "next_node_id": "fix_credential_manager"}, + {"id": "found_mapped_drive", "label": "Found mapped drive with saved creds", "next_node_id": "fix_mapped_drives"}, + {"id": "found_scheduled_task", "label": "Found scheduled task running as user", "next_node_id": "fix_scheduled_task"}, + {"id": "nothing_found", "label": "Nothing obvious found", "next_node_id": "check_deeper_sources"} + ], + "children": [ + { + "id": "fix_credential_manager", + "type": "solution", + "title": "Resolved: Remove Stale Credential Manager Entries", + "description": "Old passwords saved in Credential Manager were causing lockouts.\n\n**Fix:**\n1. Open Credential Manager (Control Panel)\n2. Under 'Windows Credentials', find entries for your domain\n3. Remove or update entries with the correct password\n4. Restart the workstation\n5. Unlock the AD account: `Unlock-ADAccount -Identity \"username\"`\n\n**Prevention:** Educate user that after password changes, they should update saved credentials.\n\n**Ticket Notes:** Stale credentials in Credential Manager causing lockouts. Entries removed/updated." + }, + { + "id": "fix_mapped_drives", + "type": "solution", + "title": "Resolved: Fix Mapped Drive Credentials", + "description": "A mapped network drive was using old credentials.\n\n**Fix:**\n```\n# Remove the problematic mapping\nnet use Z: /delete\n\n# Remap without saved credentials (will use current login)\nnet use Z: \\\\server\\share /persistent:yes\n```\n\n**Or use Group Policy** to manage drive mappings (preferred for enterprise).\n\n**After fixing:** Unlock the account and monitor for recurrence." + }, + { + "id": "fix_scheduled_task", + "type": "solution", + "title": "Resolved: Fix Scheduled Task Credentials", + "description": "A scheduled task was running with the user's old password.\n\n**Fix:**\n1. Open Task Scheduler on the affected machine\n2. Find the task running as the user\n3. Update the password in the task properties\n\n**PowerShell:**\n```\nGet-ScheduledTask | Where-Object {$_.Principal.UserId -like '*username*'} | Select TaskName,TaskPath\n```\n\n**Best practice:** Scheduled tasks should use service accounts, not user accounts.\n\n**After fixing:** Unlock the account." + }, + { + "id": "check_deeper_sources", + "type": "action", + "title": "Check Less Obvious Lockout Sources", + "description": "Common sources cleared. Check these less obvious causes:\n\n**1. Outlook/Teams on another device:**\nIs the user logged into Outlook or Teams on a second computer, tablet, or phone with old password?\n\n**2. WiFi authentication (802.1X):**\nIf your WiFi uses domain credentials, the saved WiFi password may be old.\n\n**3. VPN client:**\nSaved VPN credentials with old password.\n\n**4. Applications with saved logins:**\nLOB apps, web portals using Windows auth.\n\n**5. Another user's machine:**\nIs someone else trying to access a share using this person's credentials?\n\n**Ask the user:** Have you logged into any other devices recently? Changed your password recently? Using any company apps on your phone?", + "next_node_id": "escalate_persistent_lockout" + }, + { + "id": "escalate_persistent_lockout", + "type": "solution", + "title": "Escalate: Persistent Lockout - Source Unknown", + "description": "Unable to identify the lockout source through standard methods.\n\n**Advanced investigation needed:**\n1. Enable detailed Netlogon logging on DCs\n2. Use network packet capture to find authentication attempts\n3. Review RADIUS/NPS logs if using 802.1X\n4. Check Entra ID sign-in logs for cloud auth attempts\n\n**Temporary workaround:**\n- Increase account lockout threshold temporarily\n- Or add user to a 'lockout exempt' fine-grained password policy (if available)\n\n**Escalate to:** Senior Systems Administrator\n**Include:** Event 4740 logs, source computers found, items already checked." + } + ] + }, + { + "id": "check_mobile_device", + "type": "solution", + "title": "Fix Mobile Device / Exchange ActiveSync", + "description": "A mobile device (phone/tablet) is sending old credentials via ActiveSync or Outlook mobile.\n\n**Fix:**\n1. Have the user update their password on their mobile device:\n - iPhone: Settings > Passwords & Accounts > Exchange > re-enter password\n - Android: Settings > Accounts > Exchange > update password\n - Outlook Mobile: Profile > Account > re-enter password\n2. If that doesn't work, remove and re-add the email account on the device\n\n**To confirm it's ActiveSync:**\nCheck Exchange/M365 ActiveSync logs for the user:\n```\nGet-MobileDeviceStatistics -Mailbox user@domain.com | Select DeviceFriendlyName,LastSyncAttemptTime,Status\n```\n\n**After fixing:** Unlock the AD account.\n\n**Prevention:** Consider using Intune or MDM to manage device password policies." + }, + { + "id": "check_service_account", + "type": "solution", + "title": "Fix Service or Application Using User Credentials", + "description": "A server or application is using this user's credentials (usually incorrectly).\n\n**Check on the source server:**\n```\n# Services running as this user\nGet-WmiObject Win32_Service | Where-Object {$_.StartName -like '*username*'} | Select Name,StartName,State\n\n# IIS App Pools\nGet-IISAppPool | Where-Object {$_.ProcessModel.UserName -like '*username*'}\n\n# Scheduled Tasks\nGet-ScheduledTask | Where-Object {$_.Principal.UserId -like '*username*'}\n\n# COM+ Applications\n# Check via Component Services (dcomcnfg)\n```\n\n**Best practice:** Services should use dedicated service accounts (preferably Managed Service Accounts), never personal user accounts.\n\n**Fix:** Update the password in the service/app or migrate to a proper service account.\n\n**After fixing:** Unlock the AD account." + }, + { + "id": "check_brute_force", + "type": "action", + "title": "Investigate Potential Brute Force Attack", + "description": "Multiple users getting locked out or lockouts from many different sources could indicate an attack.\n\n**Check Security Event Log for patterns:**\n```\n# Failed logon attempts (Event 4625)\nGet-WinEvent -FilterHashtable @{LogName='Security';Id=4625} -MaxEvents 100 |\n Group-Object {$_.Properties[5].Value} | Sort Count -Descending |\n Select Count,Name -First 20\n```\n\n**Red flags:**\n- Lockouts from unknown/external IPs\n- Lockouts happening at unusual hours\n- Many accounts targeted simultaneously\n- Attempts from multiple geographic locations\n\n**If this looks like an attack:**\n1. Do NOT just unlock accounts — investigate first\n2. Check if any accounts were actually compromised\n3. Review VPN and external-facing authentication logs", + "next_node_id": "brute_force_result" + }, + { + "id": "brute_force_result", + "type": "decision", + "question": "Does this appear to be a security incident?", + "help_text": "Look at the pattern of lockouts, source IPs, and timing", + "options": [ + {"id": "likely_attack", "label": "Yes, appears to be an attack / security incident", "next_node_id": "escalate_security"}, + {"id": "not_attack", "label": "No, appears to be a system/config issue", "next_node_id": "check_common_mass_lockout"} + ], + "children": [ + { + "id": "escalate_security", + "type": "solution", + "title": "SECURITY INCIDENT: Escalate Immediately", + "description": "**Priority: CRITICAL — Potential security incident.**\n\n**Do NOT just unlock accounts.**\n\n**Immediate actions:**\n1. Document all affected accounts and lockout sources\n2. Check if any accounts show successful logins from suspicious IPs\n3. Preserve event logs for forensics\n4. Check if MFA was bypassed\n\n**Escalate to:** Security team / CISO immediately\n**Include:** Event log exports, list of affected accounts, source IPs, timeline\n\n**Consider:**\n- Blocking suspicious source IPs at the firewall\n- Forcing password resets for affected accounts\n- Enabling enhanced logging\n\n**Communication:** Follow your incident response plan." + }, + { + "id": "check_common_mass_lockout", + "type": "solution", + "title": "Investigate Mass Lockout (Non-Security)", + "description": "Multiple users locked out but doesn't appear to be an attack.\n\n**Common causes of mass lockouts:**\n\n1. **Password policy change:** New policy locked accounts that don't comply\n2. **Application with hardcoded credentials:** An app using a shared credential that was changed\n3. **GPO change:** New GPO tightened lockout thresholds\n4. **Service account cascade:** A service account got locked, causing dependent services to fail and retry\n5. **Kerberos ticket issues:** Time sync problem between DCs and clients\n\n**Check:**\n```\n# Recent GPO changes\nGet-GPO -All | Sort ModificationTime -Descending | Select DisplayName,ModificationTime -First 10\n\n# Time sync\nw32tm /query /status\n```\n\n**Escalate to:** Senior AD Administrator with pattern analysis." + } + ] + }, + { + "id": "enable_netlogon_logging", + "type": "solution", + "title": "Enable Netlogon Logging for Detailed Tracking", + "description": "Event 4740 doesn't show the source. Enable Netlogon debug logging.\n\n**On the PDC Emulator:**\n```\n# Enable Netlogon debug logging\nnltest /dbflag:0x2080ffff\n\n# Log location\n# C:\\Windows\\debug\\netlogon.log\n```\n\n**Wait for the next lockout**, then search the log:\n```\nSelect-String -Path C:\\Windows\\debug\\netlogon.log -Pattern 'username'\n```\n\n**IMPORTANT:** Disable logging after troubleshooting:\n```\nnltest /dbflag:0x0\n```\n\nNetlogon logging is verbose and can fill disk space if left on.\n\n**Escalate to:** Senior AD admin if you need help interpreting the logs." + } + ] + } + ] + } + } + + +def get_ad_replication_tree() -> dict[str, Any]: + """AD Replication Failures - Systems administration tree.""" + return { + "name": "AD Replication Failures", + "description": "Diagnose and resolve Active Directory replication issues between domain controllers. Covers repadmin diagnostics, common error codes, DNS dependencies, and RPC connectivity troubleshooting.", + "category": "Active Directory", + "tree_structure": { + "id": "root", + "type": "decision", + "question": "How was the AD replication issue discovered?", + "help_text": "Replication failures can cause inconsistent data across DCs — different users see different results for passwords, group memberships, GPOs, and DNS.", + "options": [ + {"id": "monitoring_alert", "label": "Monitoring alert / repadmin check", "next_node_id": "run_repl_diagnostics"}, + {"id": "user_symptoms", "label": "User-reported symptoms (password not working on some PCs, etc.)", "next_node_id": "confirm_repl_issue"}, + {"id": "dcdiag_failure", "label": "DCDiag reported failures", "next_node_id": "run_repl_diagnostics"}, + {"id": "new_dc", "label": "New DC not replicating", "next_node_id": "check_new_dc"} + ], + "children": [ + { + "id": "confirm_repl_issue", + "type": "action", + "title": "Confirm This Is a Replication Issue", + "description": "User symptoms may or may not be replication. Quick check:\n\n```\nrepadmin /replsummary\n```\n\nIf you see failures or large 'number of failures' counts, replication is broken.\n\n**Also try:**\n```\nrepadmin /showrepl\ndcdiag /test:replications\n```\n\n**If replication looks healthy:** The user's issue is likely something else (password reset needed, group membership change, etc.)", + "next_node_id": "repl_confirmed" + }, + { + "id": "repl_confirmed", + "type": "decision", + "question": "Does repadmin /replsummary show failures?", + "help_text": "Look for non-zero failure counts and error codes", + "options": [ + {"id": "yes_failures", "label": "Yes, replication failures shown", "next_node_id": "run_repl_diagnostics"}, + {"id": "no_failures", "label": "No, replication looks healthy", "next_node_id": "solution_repl_healthy"} + ], + "children": [ + { + "id": "solution_repl_healthy", + "type": "solution", + "title": "AD Replication is Healthy", + "description": "Replication is working correctly. The user's issue has a different root cause.\n\n**Common alternative causes for 'replication-like' symptoms:**\n- Password was recently changed and user hit a DC that hasn't processed it yet (wait 15 min, normal delay)\n- Group membership change (Kerberos ticket needs renewal — user must log out/in)\n- DNS stale record (different from AD replication)\n\n**Ticket Notes:** AD replication verified healthy. User issue has different root cause." + } + ] + }, + { + "id": "run_repl_diagnostics", + "type": "action", + "title": "Run Detailed Replication Diagnostics", + "description": "Gather comprehensive replication status.\n\n```\n# Summary of all replication partnerships\nrepadmin /replsummary\n\n# Detailed per-DC replication status\nrepadmin /showrepl * /csv > C:\\temp\\replstatus.csv\n\n# Check for lingering objects\nrepadmin /removelingeringobjects\n\n# Full DC health check\ndcdiag /v /c /d /e /s:\n```\n\n**Key things to note:**\n- Which DCs are failing?\n- What error codes are shown?\n- How long has replication been failing?\n- Is it one-way or both directions?", + "next_node_id": "repl_error_type" + }, + { + "id": "repl_error_type", + "type": "decision", + "question": "What replication error code or message do you see?", + "help_text": "Check the error code in repadmin /showrepl output", + "options": [ + {"id": "rpc_error", "label": "RPC server unavailable (Error 1722)", "next_node_id": "fix_rpc"}, + {"id": "dns_error", "label": "DNS lookup failure (Error 8524/8453)", "next_node_id": "fix_repl_dns"}, + {"id": "access_denied", "label": "Access denied (Error 8453/5)", "next_node_id": "fix_repl_access"}, + {"id": "schema_mismatch", "label": "Schema mismatch / version error", "next_node_id": "fix_schema"}, + {"id": "other_error", "label": "Different error or not sure", "next_node_id": "general_repl_troubleshooting"} + ], + "children": [ + { + "id": "fix_rpc", + "type": "action", + "title": "Fix RPC Connectivity (Error 1722)", + "description": "AD replication uses RPC. Error 1722 means DCs can't communicate.\n\n**Test RPC connectivity:**\n```\n# Test from source DC to destination DC\nTest-NetConnection -ComputerName -Port 135\nTest-NetConnection -ComputerName -Port 445\n\n# Test RPC endpoint mapper\nportqry -n -e 135\n```\n\n**Common causes:**\n- Firewall blocking RPC ports (135 + dynamic range 49152-65535)\n- DC is offline or unreachable\n- DNS returning wrong IP for the DC\n- Windows Firewall enabled with wrong rules\n\n**Check DNS resolution for the DC:**\n```\nnslookup \nnslookup \n```", + "next_node_id": "rpc_result" + }, + { + "id": "rpc_result", + "type": "decision", + "question": "Can you reach the target DC on port 135?", + "help_text": "Test-NetConnection result", + "options": [ + {"id": "port_blocked", "label": "Port 135 blocked", "next_node_id": "escalate_rpc_firewall"}, + {"id": "dc_offline", "label": "DC is completely unreachable", "next_node_id": "escalate_dc_offline"}, + {"id": "port_open_still_fails", "label": "Port open but replication still fails", "next_node_id": "check_rpc_dynamic_ports"} + ], + "children": [ + { + "id": "escalate_rpc_firewall", + "type": "solution", + "title": "Escalate: Firewall Blocking AD Replication", + "description": "A firewall is blocking RPC between DCs.\n\n**Required ports for AD replication:**\n- TCP 135 (RPC Endpoint Mapper)\n- TCP 389 (LDAP)\n- TCP 636 (LDAP SSL)\n- TCP 3268 (Global Catalog)\n- TCP 88 (Kerberos)\n- TCP 445 (SMB)\n- TCP 49152-65535 (RPC dynamic ports)\n - Or restrict to a fixed port range via registry\n\n**Escalate to:** Network team to open required ports between DCs.\n**Priority:** High — AD replication is critical infrastructure." + }, + { + "id": "escalate_dc_offline", + "type": "solution", + "title": "Escalate: Domain Controller Offline", + "description": "The target DC is unreachable.\n\n**Check:**\n1. Is the server powered on? (hypervisor, iLO/iDRAC)\n2. Is the OS running? (try RDP, ping)\n3. Was it recently decommissioned?\n\n**If permanently offline:** The DC metadata needs to be cleaned from AD:\n```\nntdsutil\n metadata cleanup\n connections\n connect to server \n quit\n select operation target\n list domains\n ...\n```\n\n**Escalate to:** Senior AD Administrator\n**Priority:** High" + }, + { + "id": "check_rpc_dynamic_ports", + "type": "solution", + "title": "Check RPC Dynamic Port Range", + "description": "Port 135 is open but RPC dynamic ports may be blocked.\n\nAD replication uses dynamic RPC ports (49152-65535 by default).\n\n**To restrict to a specific range** (makes firewall rules easier):\n```\n# On each DC - set fixed RPC port range\nreg add HKLM\\SYSTEM\\CurrentControlSet\\Services\\NTDS\\Parameters /v \"TCP/IP Port\" /t REG_DWORD /d 50000\n```\nRestart the NTDS service after.\n\n**Escalate to:** Network team with the dynamic port range information." + } + ] + }, + { + "id": "fix_repl_dns", + "type": "solution", + "title": "Fix DNS Issues Blocking Replication", + "description": "AD replication depends heavily on DNS. DCs find each other via SRV records.\n\n**Check DNS health:**\n```\n# Verify DC SRV records exist\nnslookup -type=srv _ldap._tcp.dc._msdcs.yourdomain.local\n\n# Re-register DC DNS records\nipconfig /registerdns\nnet stop netlogon && net start netlogon\n\n# Verify DNS on the DC\ndcdiag /test:dns /v\n```\n\n**Common causes:**\n- DC's DNS records missing or stale\n- DC pointing to wrong DNS server\n- DNS zone not replicating\n\n**Each DC should point to:** Itself and at least one other DC for DNS.\n\n**Escalate to:** DNS/AD Administrator if records are missing and won't re-register." + }, + { + "id": "fix_repl_access", + "type": "solution", + "title": "Fix Access Denied Errors in Replication", + "description": "Replication is being denied — authentication or permission issue.\n\n**Common causes:**\n- Time skew between DCs (Kerberos requires <5 min difference)\n- Computer account password expired\n- Permissions removed from DC object in AD\n\n**Check time sync:**\n```\nw32tm /query /status\nw32tm /query /peers\n\n# Force time resync\nw32tm /resync /force\n```\n\n**If time is more than 5 minutes off:** Kerberos will fail. Fix time sync first.\n\n**Check secure channel:**\n```\nTest-ComputerSecureChannel -Verbose\nTest-ComputerSecureChannel -Repair\n```\n\n**Escalate to:** Senior AD Administrator if permissions or secure channel repair fails." + }, + { + "id": "fix_schema", + "type": "solution", + "title": "Escalate: Schema Version Mismatch", + "description": "Schema versions don't match between DCs.\n\n**Check schema version:**\n```\nGet-ADObject (Get-ADRootDSE).schemaNamingContext -Properties objectVersion | Select objectVersion\n```\n\n**This usually happens when:** A DC was promoted or demoted improperly, or an AD upgrade (schema extension) partially completed.\n\n**This requires:** Senior AD administrator intervention. Do not attempt schema repairs without expertise.\n\n**Escalate to:** Senior AD Administrator / Directory Services specialist\n**Priority:** High — schema issues can corrupt the directory." + }, + { + "id": "general_repl_troubleshooting", + "type": "solution", + "title": "General Replication Troubleshooting", + "description": "For errors not covered above, try these general steps:\n\n**1. Force replication:**\n```\nrepadmin /syncall /APed\n```\n\n**2. Check DC health:**\n```\ndcdiag /v /c\n```\n\n**3. Check event logs:**\n```\nGet-WinEvent -FilterHashtable @{LogName='Directory Service';Level=2,3} -MaxEvents 20\n```\n\n**4. Verify AD sites and subnets:**\nAD Sites and Services — are DCs in the correct sites? Are site links configured?\n\n**5. Check USN rollback:**\nIf a DC was restored from snapshot incorrectly, USN rollback can break replication permanently for that DC.\n\n**Escalate to:** Senior AD Administrator with dcdiag output and event logs." + } + ] + }, + { + "id": "check_new_dc", + "type": "solution", + "title": "Troubleshoot New DC Not Replicating", + "description": "A newly promoted DC isn't replicating.\n\n**Check in order:**\n\n1. **DNS:** Is the new DC registered in DNS? Can it resolve other DCs?\n```\nnslookup \ndcdiag /test:dns\n```\n\n2. **Site assignment:** Is the new DC in the correct AD site?\n Open AD Sites and Services and verify.\n\n3. **Replication partners:** Does it have replication partners?\n```\nrepadmin /showrepl \n```\n\n4. **Initial replication:** After promotion, initial replication can take time. Wait 15-30 minutes.\n\n5. **Network:** Can the new DC reach other DCs on required ports?\n\n**If still not replicating after 30 minutes:** Run `dcdiag /v` and `repadmin /showrepl` and escalate with the output." + } + ] + } + } + + +def get_gpo_not_applying_tree() -> dict[str, Any]: + """Group Policy Not Applying - AD tree.""" + return { + "name": "Group Policy Not Applying", + "description": "Troubleshoot Group Policy Objects that aren't applying to users or computers. Covers GPResult diagnostics, scope filtering, WMI filters, inheritance, and common GPO processing issues.", + "category": "Active Directory", + "tree_structure": { + "id": "root", + "type": "decision", + "question": "Is the GPO not applying to a single user/computer or multiple?", + "help_text": "This determines whether it's a scoping/targeting issue or a broader GPO infrastructure problem.", + "options": [ + {"id": "single_target", "label": "Single user or computer", "next_node_id": "run_gpresult"}, + {"id": "multiple_targets", "label": "Multiple users/computers", "next_node_id": "check_gpo_config"}, + {"id": "new_gpo", "label": "Newly created GPO not working", "next_node_id": "check_new_gpo"}, + {"id": "gpo_stopped", "label": "GPO was working but stopped", "next_node_id": "check_gpo_changes"} + ], + "children": [ + { + "id": "run_gpresult", + "type": "action", + "title": "Run GPResult on the Affected Machine", + "description": "GPResult shows exactly which GPOs are applied and which are filtered out.\n\n**PowerShell (as Administrator on the affected machine):**\n```\n# Full HTML report (most useful)\ngpresult /h C:\\temp\\gpresult.html\nstart C:\\temp\\gpresult.html\n\n# Quick console output\ngpresult /r\n\n# For a specific user\ngpresult /user domain\\username /r\n```\n\n**Look for your GPO in the report:**\n- Is it listed under 'Applied GPOs'?\n- Is it listed under 'Denied GPOs' or 'Filtered GPOs'?\n- Is it missing entirely?", + "next_node_id": "gpresult_result" + }, + { + "id": "gpresult_result", + "type": "decision", + "question": "Where does your GPO appear in the GPResult report?", + "help_text": "Check both Computer Configuration and User Configuration sections", + "options": [ + {"id": "applied", "label": "GPO shows as Applied but settings not working", "next_node_id": "check_conflicting_gpo"}, + {"id": "filtered_security", "label": "GPO shows as Filtered (Security)", "next_node_id": "fix_security_filtering"}, + {"id": "filtered_wmi", "label": "GPO shows as Filtered (WMI)", "next_node_id": "fix_wmi_filter"}, + {"id": "not_listed", "label": "GPO not listed at all", "next_node_id": "check_gpo_link"}, + {"id": "denied", "label": "GPO shows as Denied", "next_node_id": "check_block_inheritance"} + ], + "children": [ + { + "id": "check_conflicting_gpo", + "type": "solution", + "title": "Check for Conflicting GPO / Precedence", + "description": "GPO is applied but settings aren't taking effect. Another GPO may be overriding it.\n\n**GPO precedence (highest to lowest):**\n1. Local GPO\n2. Site GPOs\n3. Domain GPOs\n4. OU GPOs (child OU overrides parent OU)\n\n**Later-applied GPOs win** when settings conflict.\n\n**In the GPResult report:** Look for other GPOs that configure the same setting. The last one applied wins.\n\n**Also check:**\n- Is the setting under Computer or User configuration? It must match what you configured.\n- Are Preferences vs Policies confused? (Preferences can be overridden by users)\n\n**Fix:** Adjust GPO link order, use Enforced on the important GPO, or remove conflicting settings." + }, + { + "id": "fix_security_filtering", + "type": "solution", + "title": "Fix Security Filtering", + "description": "GPO is filtered out by security permissions.\n\n**Check in Group Policy Management Console:**\n1. Select the GPO\n2. Check 'Security Filtering' section\n3. By default, 'Authenticated Users' should be listed\n\n**Common issues:**\n- Removed 'Authenticated Users' and added a specific group, but target isn't in that group\n- Missing 'Domain Computers' read permission (required since MS16-072 patch)\n\n**Fix for MS16-072:**\nThe GPO needs 'Domain Computers' (for computer policies) or 'Authenticated Users' with Read permission in the Delegation tab, even if security filtering targets a specific group.\n\n**GPMC:** GPO > Delegation tab > Add 'Domain Computers' with Read permission." + }, + { + "id": "fix_wmi_filter", + "type": "solution", + "title": "Fix WMI Filter", + "description": "A WMI filter is preventing the GPO from applying.\n\n**Check the WMI filter query:**\nGPMC > Select GPO > WMI Filtering section — note the filter name.\nThen check: GPMC > WMI Filters > open the filter to see the query.\n\n**Test the WMI filter on the target machine:**\n```\n# Run the WMI query directly\nGet-WmiObject -Query \"SELECT * FROM Win32_OperatingSystem WHERE Version LIKE '10%'\"\n```\nIf it returns nothing, the filter is excluding this machine.\n\n**Common WMI filter issues:**\n- OS version filter excludes newer Windows versions\n- Hardware filter doesn't match (laptop vs desktop)\n- WMI repository corruption on client\n\n**Fix WMI on client:** `winmgmt /salvagerepository`" + }, + { + "id": "check_gpo_link", + "type": "solution", + "title": "GPO Not Linked or Wrong OU", + "description": "GPO doesn't appear in GPResult at all — it's likely not linked to the correct OU or the object is in the wrong OU.\n\n**Check:**\n1. **Where is the user/computer in AD?**\n```\nGet-ADUser -Identity username | Select DistinguishedName\nGet-ADComputer -Identity computername | Select DistinguishedName\n```\n\n2. **Where is the GPO linked?**\nGPMC > Select GPO > check 'Scope' tab > 'Links' section\n\n3. **Does the OU match?** The GPO link OU must be the same OU (or a parent OU) where the user/computer object lives.\n\n**Common issues:**\n- Computer/user in wrong OU\n- GPO linked to wrong OU\n- GPO link is disabled (check the link status)\n\n**Fix:** Move the object to correct OU or link GPO to correct OU." + }, + { + "id": "check_block_inheritance", + "type": "solution", + "title": "Check Block Inheritance / Enforced", + "description": "GPO is being denied — likely by Block Inheritance on the OU.\n\n**In GPMC:** Check the OU where the target object resides. If it has a blue exclamation mark, 'Block Inheritance' is enabled.\n\n**Options to fix:**\n1. Remove Block Inheritance on the OU (affects all GPOs)\n2. Set the GPO to 'Enforced' — this overrides Block Inheritance\n3. Link the GPO directly to the blocking OU\n\n**Use Enforced sparingly** — it overrides normal precedence and can cause unexpected behavior." + } + ] + }, + { + "id": "check_gpo_config", + "type": "solution", + "title": "Check GPO Configuration for Multiple Targets", + "description": "GPO not applying to multiple targets. Check the GPO itself.\n\n**In GPMC:**\n1. Is the GPO link enabled? (Not disabled or unenforced)\n2. Is the GPO status correct? (Not 'All settings disabled')\n - GPO > Details tab > GPO Status\n3. Are the settings in the correct section?\n - Computer settings only apply to computer objects\n - User settings only apply to user objects\n\n**Force GP update on a test machine:**\n```\ngpupdate /force\n```\n\n**Check SYSVOL replication:**\n```\n# Compare GPO version on different DCs\nGet-GPO -Name \"Your GPO Name\" -Server DC1 | Select DisplayName,Computer,User\nGet-GPO -Name \"Your GPO Name\" -Server DC2 | Select DisplayName,Computer,User\n```\n\n**If versions differ:** SYSVOL replication (DFS-R or FRS) may be broken." + }, + { + "id": "check_new_gpo", + "type": "solution", + "title": "New GPO Checklist", + "description": "Newly created GPO not working. Verify these common mistakes:\n\n**1. Is it linked?** Creating a GPO doesn't link it automatically.\n**2. Is the link enabled?** Check for the green checkmark on the link.\n**3. Security filtering:** Default is 'Authenticated Users' (correct).\n**4. Computer vs User settings:** Make sure settings are in the right section.\n**5. Loopback processing:** If applying user settings based on computer location, you need loopback processing enabled.\n**6. Replication time:** New GPO needs to replicate to all DCs. Wait 15-30 minutes.\n\n**Force update:**\n```\ngpupdate /force\ngpresult /r\n```\n\n**Still not working:** Check the GPResult report for why it's filtered." + }, + { + "id": "check_gpo_changes", + "type": "solution", + "title": "Investigate GPO That Stopped Working", + "description": "GPO was working but stopped. Something changed.\n\n**Check recent changes:**\n```\n# When was the GPO last modified?\nGet-GPO -Name \"Your GPO Name\" | Select DisplayName,ModificationTime\n\n# All recently modified GPOs\nGet-GPO -All | Where-Object {$_.ModificationTime -gt (Get-Date).AddDays(-7)} | Sort ModificationTime -Descending\n```\n\n**Common causes:**\n- Someone edited the GPO and broke a setting\n- Security filtering was changed\n- WMI filter was added or modified\n- OU structure changed (objects moved)\n- SYSVOL replication broke\n- A Windows update changed how a setting works\n\n**Check SYSVOL health:**\n```\ndcdiag /test:sysvolcheck\ndcdiag /test:dfsrevent\n```\n\n**Escalate to:** Whoever manages GPOs with the modification timeline." + } + ] + } + } + + +def get_entra_id_sync_tree() -> dict[str, Any]: + """Entra ID Sync Issues (AD Connect) - Cloud identity tree.""" + return { + "name": "Entra ID Sync Issues (AD Connect)", + "description": "Troubleshoot Microsoft Entra Connect (formerly Azure AD Connect) synchronization failures. Covers sync cycle errors, password hash sync, attribute conflicts, and connector space issues.", + "category": "Active Directory", + "tree_structure": { + "id": "root", + "type": "decision", + "question": "What type of Entra ID sync issue are you experiencing?", + "help_text": "Entra Connect syncs on-premises AD objects to Entra ID (Azure AD). Issues affect M365 services, SSO, and cloud app access.", + "options": [ + {"id": "sync_stopped", "label": "Sync has completely stopped", "next_node_id": "check_sync_service"}, + {"id": "specific_user", "label": "Specific user/group not syncing", "next_node_id": "check_user_sync"}, + {"id": "password_sync", "label": "Password changes not syncing to cloud", "next_node_id": "check_password_hash_sync"}, + {"id": "export_errors", "label": "Sync errors / export failures", "next_node_id": "check_sync_errors"} + ], + "children": [ + { + "id": "check_sync_service", + "type": "action", + "title": "Check Entra Connect Sync Service", + "description": "Verify the sync service is running.\n\n**On the Entra Connect server:**\n```\n# Check sync service status\nGet-Service ADSync\n\n# Check last sync time\nGet-ADSyncScheduler\n\n# Check sync cycle status\nGet-ADSyncScheduler | Select SyncCycleEnabled,NextSyncCycleStartTimeInUTC,CurrentlyEffectiveSyncCycleInterval\n```\n\n**Also check:** Entra admin center > Entra Connect > Sync status\n\n**If the service is stopped:** Start it: `Start-Service ADSync`", + "next_node_id": "sync_service_result" + }, + { + "id": "sync_service_result", + "type": "decision", + "question": "What is the sync service status?", + "help_text": "Check service state and scheduler status", + "options": [ + {"id": "service_stopped", "label": "ADSync service is stopped", "next_node_id": "fix_sync_service"}, + {"id": "scheduler_disabled", "label": "Service running but scheduler disabled", "next_node_id": "enable_scheduler"}, + {"id": "service_running", "label": "Service running, scheduler active", "next_node_id": "check_sync_errors"}, + {"id": "server_unreachable", "label": "Entra Connect server is down", "next_node_id": "escalate_connect_server"} + ], + "children": [ + { + "id": "fix_sync_service", + "type": "action", + "title": "Start ADSync Service", + "description": "```\nStart-Service ADSync\nGet-Service ADSync\n\n# If it won't start, check event logs\nGet-WinEvent -FilterHashtable @{LogName='Application';ProviderName='ADSync'} -MaxEvents 20\n```\n\n**Common causes of service failure:**\n- SQL Server Express instance is down (ADSync uses a local SQL)\n- Disk space full on the Entra Connect server\n- Service account password changed\n- Windows update broke something\n\n**Check SQL:** `Get-Service 'ADSync' ; Get-Service MSSQL*`", + "next_node_id": "check_sync_errors" + }, + { + "id": "enable_scheduler", + "type": "solution", + "title": "Re-enable Sync Scheduler", + "description": "Scheduler was disabled (commonly done during maintenance).\n\n```\n# Re-enable the scheduler\nSet-ADSyncScheduler -SyncCycleEnabled $true\n\n# Trigger an immediate sync\nStart-ADSyncSyncCycle -PolicyType Delta\n\n# Verify\nGet-ADSyncScheduler\n```\n\n**Note:** Scheduler is sometimes disabled during maintenance or troubleshooting. If someone disabled it, check if there's ongoing work before re-enabling.\n\n**Ticket Notes:** Sync scheduler was disabled. Re-enabled and triggered delta sync." + }, + { + "id": "escalate_connect_server", + "type": "solution", + "title": "CRITICAL: Entra Connect Server Down", + "description": "**Priority: HIGH** — Sync will stop but existing cloud accounts continue working.\n\n**Impact:** Password changes, new users, and group changes won't sync to M365.\n\n**Immediate actions:**\n1. Check VM/server status in hypervisor\n2. Existing users can still log into M365 (cached auth)\n3. Password changes won't sync until server is back\n\n**Escalate to:** Infrastructure team to restore the server\n**Note:** If server can't be recovered, Entra Connect can be reinstalled on another server (requires config backup or reconfiguration)." + } + ] + }, + { + "id": "check_user_sync", + "type": "action", + "title": "Check Why Specific User Isn't Syncing", + "description": "Use the Entra Connect Synchronization Service Manager or PowerShell.\n\n```\n# Search for the user in connector space\n$csUser = Get-ADSyncCSObject -ConnectorName \"yourdomain.local\" -DistinguishedName \"CN=User Name,OU=Users,DC=yourdomain,DC=local\"\n\n# Check if user is in sync scope\n# (Simpler approach - check if user exists in Entra)\nGet-AzureADUser -SearchString \"username\" | Select DisplayName,UserPrincipalName,DirSyncEnabled\n```\n\n**Common reasons a user doesn't sync:**\n- User is in an OU not selected for sync (OU filtering)\n- User is filtered by attribute-based sync rule\n- Duplicate or conflicting attribute (UPN, proxyAddress)\n- User was soft-deleted in Entra and conflicts", + "next_node_id": "user_sync_result" + }, + { + "id": "user_sync_result", + "type": "decision", + "question": "Why is the user not syncing?", + "help_text": "Check Entra Connect OU filtering and sync rules", + "options": [ + {"id": "wrong_ou", "label": "User is in an OU not selected for sync", "next_node_id": "fix_ou_filtering"}, + {"id": "attribute_conflict", "label": "Duplicate attribute conflict (UPN, email)", "next_node_id": "fix_attribute_conflict"}, + {"id": "filtered_rule", "label": "Filtered by a sync rule", "next_node_id": "fix_sync_rule"}, + {"id": "unclear", "label": "Not sure why", "next_node_id": "check_sync_errors"} + ], + "children": [ + { + "id": "fix_ou_filtering", + "type": "solution", + "title": "Fix OU Filtering", + "description": "The user's OU is not included in the sync scope.\n\n**Options:**\n1. **Move the user** to an OU that's in sync scope\n2. **Add the OU** to the sync configuration:\n - Run the Entra Connect wizard\n - Choose 'Customize synchronization options'\n - Select the additional OU\n - Complete the wizard\n\n**After changing:** Run a delta sync:\n```\nStart-ADSyncSyncCycle -PolicyType Delta\n```\n\n**Caution:** Adding a large OU may sync many objects — verify your Entra ID license count." + }, + { + "id": "fix_attribute_conflict", + "type": "solution", + "title": "Fix Duplicate Attribute Conflict", + "description": "Another object already has the same UPN or proxyAddress in Entra ID.\n\n**Check Entra admin center:**\nEntra ID > Users > search for the conflicting UPN or email.\n\n**Common conflicts:**\n- User was deleted and recreated with same UPN (soft-deleted copy still in Entra recycle bin)\n- Two AD users have the same proxyAddress/email\n- A cloud-only user exists with the same UPN\n\n**Fixes:**\n1. If soft-deleted: Permanently delete the old object in Entra recycle bin\n2. If duplicate email: Fix the duplicate in AD\n3. If cloud-only conflict: Delete the cloud user or change its UPN\n\n**After fixing:** Run delta sync: `Start-ADSyncSyncCycle -PolicyType Delta`" + }, + { + "id": "fix_sync_rule", + "type": "solution", + "title": "Escalate: Custom Sync Rule Filtering", + "description": "A custom sync rule is filtering out this user.\n\n**Check sync rules:**\nOpen 'Synchronization Rules Editor' on the Entra Connect server.\n\nCustom rules are risky to modify without understanding the full sync configuration.\n\n**Escalate to:** Identity/Cloud Administrator who manages Entra Connect\n**Include:** User's DN, the sync rule name, and why the user needs to sync." + } + ] + }, + { + "id": "check_password_hash_sync", + "type": "solution", + "title": "Troubleshoot Password Hash Sync", + "description": "Password changes in AD aren't reflecting in M365/Entra ID.\n\n**Check PHS status:**\n```\nInvoke-ADSyncDiagnostics -PasswordSync\n```\n\n**Check Event Log:**\n```\nGet-WinEvent -FilterHashtable @{LogName='Application';ProviderName='Directory Synchronization';Id=656,657} -MaxEvents 10\n```\n\n**Event 656:** Successful password sync\n**Event 657:** Failed password sync\n\n**Common causes:**\n- Password hash sync feature disabled in Entra Connect config\n- Connector account permissions changed in AD\n- Recent password change hasn't synced yet (wait for next cycle, usually 2 min)\n\n**Force immediate password sync:**\n```\nInvoke-ADSyncDiagnostics -PasswordSync\n```\n\n**If PHS is disabled:** Re-run the Entra Connect wizard and enable it.\n\n**Escalate to:** Identity Administrator if the connector account needs permission fixes." + }, + { + "id": "check_sync_errors", + "type": "solution", + "title": "Review Sync Errors", + "description": "Check for export errors and sync failures.\n\n**On the Entra Connect server:**\n1. Open **Synchronization Service Manager**\n2. Check the **Operations** tab for recent sync cycles\n3. Look for 'export' operations with errors\n4. Click on the error count for details\n\n**PowerShell:**\n```\n# Get recent sync results\nGet-ADSyncRunProfileResult | Sort StartDate -Descending | Select -First 5\n\n# Check Entra portal\n# Entra admin center > Entra Connect > Sync errors\n```\n\n**Common export errors:**\n- InvalidSoftMatch: Attribute conflict in cloud\n- DataValidationFailed: Invalid characters in attributes\n- LargeObject: Object exceeds attribute size limits\n\n**Escalate to:** Identity/Cloud Administrator with the specific error details." + } + ] + } + } + + +def get_domain_join_tree() -> dict[str, Any]: + """User Cannot Join Domain - AD tree.""" + return { + "name": "Computer Cannot Join Domain", + "description": "Troubleshoot domain join failures for new or reimaged computers. Covers DNS requirements, authentication issues, computer account limits, and common error codes.", + "category": "Active Directory", + "tree_structure": { + "id": "root", + "type": "decision", + "question": "What error occurs when trying to join the domain?", + "help_text": "Try joining: System Properties > Computer Name > Change > Domain. Note the exact error message.", + "options": [ + {"id": "domain_not_found", "label": "Domain could not be contacted / not found", "next_node_id": "check_dns_for_domain"}, + {"id": "access_denied", "label": "Access denied / insufficient permissions", "next_node_id": "check_join_permissions"}, + {"id": "account_exists", "label": "Computer account already exists", "next_node_id": "fix_existing_account"}, + {"id": "other_error", "label": "Different error message", "next_node_id": "check_general_join"} + ], + "children": [ + { + "id": "check_dns_for_domain", + "type": "action", + "title": "Verify DNS Can Resolve Domain Controllers", + "description": "Domain join requires DNS to find DCs. This is the most common failure.\n\n**On the computer being joined:**\n```\n# Check DNS settings\nipconfig /all\n\n# Can you resolve the domain?\nnslookup yourdomain.local\n\n# Can you find DC SRV records?\nnslookup -type=srv _ldap._tcp.dc._msdcs.yourdomain.local\n\n# Can you ping a DC?\nping \n```\n\n**The computer's DNS MUST point to an internal DNS server** that has the AD DNS zones. Public DNS (8.8.8.8) won't work for domain join.", + "next_node_id": "dns_join_result" + }, + { + "id": "dns_join_result", + "type": "decision", + "question": "Can the computer resolve the domain name?", + "help_text": "nslookup should return DC IP addresses", + "options": [ + {"id": "wrong_dns", "label": "DNS is pointing to wrong server (public DNS, etc.)", "next_node_id": "fix_dns_for_join"}, + {"id": "dns_ok_cant_reach", "label": "DNS resolves but can't reach the DC", "next_node_id": "check_network_to_dc"}, + {"id": "dns_resolves_ok", "label": "DNS resolves and can ping DC", "next_node_id": "check_join_permissions"} + ], + "children": [ + { + "id": "fix_dns_for_join", + "type": "action", + "title": "Set DNS to Internal DNS Servers", + "description": "The computer must use your AD DNS servers.\n\n```\n# Set DNS to your domain controllers/DNS servers\nSet-DnsClientServerAddress -InterfaceAlias 'Ethernet' -ServerAddresses '',''\n\n# Verify\nnslookup yourdomain.local\n```\n\n**If using DHCP:** The DHCP scope should be assigning internal DNS. If not, fix the DHCP scope options.\n\n**After setting DNS:** Retry the domain join.", + "next_node_id": "retry_join" + }, + { + "id": "check_network_to_dc", + "type": "solution", + "title": "Check Network Connectivity to Domain Controller", + "description": "DNS resolves but can't reach the DC. Check network path.\n\n```\nTest-NetConnection -ComputerName -Port 389\nTest-NetConnection -ComputerName -Port 445\ntracert \n```\n\n**Required ports for domain join:**\n- TCP/UDP 389 (LDAP)\n- TCP 445 (SMB)\n- TCP/UDP 88 (Kerberos)\n- TCP 135 + dynamic RPC\n- TCP/UDP 53 (DNS)\n\n**Common causes:** VLAN isolation, firewall blocking, VPN not connected.\n\n**Escalate to:** Network team if ports are blocked." + } + ] + }, + { + "id": "check_join_permissions", + "type": "decision", + "question": "What credentials are being used to join the domain?", + "help_text": "Domain join requires specific permissions in AD", + "options": [ + {"id": "regular_user", "label": "Regular domain user account", "next_node_id": "check_join_quota"}, + {"id": "admin_account", "label": "Domain admin or delegated join account", "next_node_id": "check_admin_join_issue"}, + {"id": "wrong_creds", "label": "Credentials might be wrong / expired", "next_node_id": "verify_credentials"} + ], + "children": [ + { + "id": "check_join_quota", + "type": "solution", + "title": "Check Domain Join Quota", + "description": "Regular users can join up to **10 computers** by default (ms-DS-MachineAccountQuota).\n\n**Check current quota:**\n```\nGet-ADObject -Identity (Get-ADDomain).DistinguishedName -Properties ms-DS-MachineAccountQuota | Select ms-DS-MachineAccountQuota\n```\n\n**Check how many the user has joined:**\n```\nGet-ADComputer -Filter {ms-DS-CreatorSID -eq $((Get-ADUser username).SID)} | Measure-Object\n```\n\n**If quota exceeded:**\n1. Use a domain admin account to join instead\n2. Or pre-stage the computer account in AD (allows the user to join that specific computer)\n3. Or increase the quota (not recommended for security)\n\n**Best practice:** Pre-stage computer accounts or use a dedicated join account with delegated permissions." + }, + { + "id": "check_admin_join_issue", + "type": "solution", + "title": "Admin Account Can't Join - Check OU Permissions", + "description": "Even admin accounts can fail if OU permissions are restricted.\n\n**Check:**\n1. Does a computer account already exist with this name? `Get-ADComputer -Identity \"COMPUTERNAME\"`\n2. If pre-staged, does the joining user have 'Reset Password' and 'Write Account Restrictions' on that computer object?\n3. Is the target OU restricted via delegation?\n\n**Try joining to default Computers container first:** If that works, it's an OU permissions issue.\n\n**If admin account is locked or expired:**\n```\nGet-ADUser -Identity adminaccount -Properties LockedOut,Enabled,PasswordExpired\n```" + }, + { + "id": "verify_credentials", + "type": "solution", + "title": "Verify Domain Credentials", + "description": "Make sure the credentials are correct.\n\n**Use the full domain format:**\n- `DOMAIN\\username` or `username@domain.local`\n\n**Verify the account works:**\n- Try logging into another domain-joined PC\n- Or test: `runas /user:DOMAIN\\username cmd`\n\n**Check if account is locked/disabled:**\n```\nGet-ADUser -Identity username -Properties LockedOut,Enabled,PasswordExpired\n```" + } + ] + }, + { + "id": "fix_existing_account", + "type": "solution", + "title": "Fix Existing Computer Account Conflict", + "description": "A computer account with this name already exists in AD.\n\n**Options:**\n1. **Delete the old account** (if the old computer is decommissioned):\n```\nRemove-ADComputer -Identity \"COMPUTERNAME\"\n```\n\n2. **Reset the old account** (allows rejoin):\n```\nReset-ComputerMachinePassword -Server -Credential (Get-Credential)\n```\n\n3. **Use a different computer name**\n\n4. **Pre-stage:** If the account was pre-staged, the joining user needs permission on that specific object.\n\n**After fixing:** Retry the domain join." + }, + { + "id": "check_general_join", + "type": "solution", + "title": "General Domain Join Troubleshooting", + "description": "For other domain join errors:\n\n**Check the basics:**\n1. Time sync: Is the computer within 5 minutes of the DC?\n ```\n w32tm /query /status\n net time \\\\\n ```\n2. Network: Can you access `\\\\\\SYSVOL`?\n3. Firewall: Is Windows Firewall blocking domain traffic?\n4. Secure channel: For rejoins, try: `Test-ComputerSecureChannel -Repair`\n\n**Common error codes:**\n- 53: Network path not found (connectivity issue)\n- 1355: Domain not found (DNS issue)\n- 2224: Account already exists\n- 2691: Already joined to a domain (unjoin first)\n\n**Escalate to:** AD Administrator with the exact error code and message." + }, + { + "id": "retry_join", + "type": "decision", + "question": "Did the domain join succeed after fixing DNS?", + "help_text": "Retry: System Properties > Computer Name > Change > Domain", + "options": [ + {"id": "success", "label": "Yes, joined successfully", "next_node_id": "solution_joined"}, + {"id": "different_error", "label": "Different error now", "next_node_id": "check_general_join"} + ], + "children": [ + { + "id": "solution_joined", + "type": "solution", + "title": "Resolved: Computer Joined Domain", + "description": "Computer successfully joined the domain.\n\n**Post-join steps:**\n1. Restart the computer (required)\n2. Log in with domain credentials\n3. Verify Group Policy: `gpupdate /force`\n4. Move computer to correct OU if needed\n\n**Ticket Notes:** Domain join completed. Root cause was [DNS/permissions/etc]." + } + ] + } + ] + } + } + + +def get_kerberos_auth_tree() -> dict[str, Any]: + """Kerberos/NTLM Authentication Failures - AD tree.""" + return { + "name": "Kerberos / NTLM Authentication Failures", + "description": "Troubleshoot authentication failures including Kerberos ticket issues, NTLM fallback problems, SPN misconfigurations, and time sync issues that affect logins, file shares, and web applications.", + "category": "Active Directory", + "tree_structure": { + "id": "root", + "type": "decision", + "question": "What authentication symptom is the user experiencing?", + "help_text": "Authentication issues can manifest as login failures, access denied to resources, or double-prompts for credentials.", + "options": [ + {"id": "login_failure", "label": "Can't log into Windows at all", "next_node_id": "check_dc_connectivity"}, + {"id": "resource_access", "label": "Logged in but can't access file shares/apps", "next_node_id": "check_kerberos_tickets"}, + {"id": "double_prompt", "label": "Gets prompted for credentials repeatedly (SSO not working)", "next_node_id": "check_spn_issues"}, + {"id": "intermittent", "label": "Authentication works sometimes, fails other times", "next_node_id": "check_time_sync"} + ], + "children": [ + { + "id": "check_dc_connectivity", + "type": "action", + "title": "Check Domain Controller Connectivity", + "description": "Windows login requires DC access for Kerberos authentication.\n\n**On the affected machine:**\n```\n# Which DC is being used?\necho %LOGONSERVER%\nnltest /dsgetdc:yourdomain.local\n\n# Can you reach a DC?\nTest-NetConnection -ComputerName -Port 88\nTest-NetConnection -ComputerName -Port 389\n```\n\n**If offline:** Windows will use cached credentials for login (if previously logged in). First-time logins require DC connectivity.\n\n**No DC available:** Check network, VPN, DNS settings.", + "next_node_id": "dc_connect_result" + }, + { + "id": "dc_connect_result", + "type": "decision", + "question": "Can the machine reach a domain controller?", + "help_text": "Kerberos uses port 88, LDAP uses port 389", + "options": [ + {"id": "no_dc", "label": "Can't reach any DC", "next_node_id": "fix_dc_connectivity"}, + {"id": "dc_reachable", "label": "DC is reachable but login still fails", "next_node_id": "check_account_status"}, + {"id": "cached_login", "label": "Can log in with cached creds only", "next_node_id": "fix_dc_connectivity"} + ], + "children": [ + { + "id": "fix_dc_connectivity", + "type": "solution", + "title": "Restore Domain Controller Connectivity", + "description": "Machine can't reach a DC. Check:\n\n1. **Network:** Is the machine connected? `ipconfig /all`\n2. **DNS:** Pointing to internal DNS? `nslookup yourdomain.local`\n3. **VPN:** If remote, is VPN connected?\n4. **Firewall:** Ports 88, 389, 445, 135 open to DC?\n5. **DC status:** Are DCs actually online?\n\n**If VPN user:** Connect VPN first, then Ctrl+Alt+Del > Switch User > log in with domain creds (forces DC authentication).\n\n**If all DCs are down:** This is a major outage. Users can only use cached logins.\n\n**Escalate to:** Network team (if routing issue) or Infrastructure (if DC issue)." + }, + { + "id": "check_account_status", + "type": "solution", + "title": "Check AD Account Status", + "description": "DC is reachable but auth fails. Check the account.\n\n```\nGet-ADUser -Identity username -Properties LockedOut,Enabled,PasswordExpired,PasswordLastSet,AccountExpirationDate\n```\n\n**Possible issues:**\n- Account locked out → Unlock it\n- Account disabled → Enable or investigate why\n- Password expired → Reset password\n- Account expired → Extend expiration date\n\n**Also check:** Is the computer's secure channel healthy?\n```\nTest-ComputerSecureChannel -Verbose\n```\nIf broken: `Test-ComputerSecureChannel -Repair -Credential (Get-Credential)`" + } + ] + }, + { + "id": "check_kerberos_tickets", + "type": "action", + "title": "Check Kerberos Tickets", + "description": "User is logged in but can't access resources. Check Kerberos tickets.\n\n```\n# List current Kerberos tickets\nklist\n\n# Purge and get new tickets (forces re-authentication)\nklist purge\n\n# Then access the resource again — new tickets will be requested\n```\n\n**Look for:**\n- Are there valid TGT (krbtgt) tickets?\n- Are there service tickets for the resource you're accessing?\n- Have tickets expired?\n\n**If no tickets at all:** The machine may not be properly domain-joined or DC unreachable.", + "next_node_id": "ticket_result" + }, + { + "id": "ticket_result", + "type": "decision", + "question": "Did purging and refreshing tickets fix the issue?", + "help_text": "After klist purge, try accessing the resource again", + "options": [ + {"id": "fixed", "label": "Yes, resource access works now", "next_node_id": "solution_ticket_refresh"}, + {"id": "still_fails", "label": "Still can't access the resource", "next_node_id": "check_spn_issues"}, + {"id": "ntlm_fallback", "label": "Works but with credential prompt (NTLM fallback)", "next_node_id": "check_spn_issues"} + ], + "children": [ + { + "id": "solution_ticket_refresh", + "type": "solution", + "title": "Resolved: Stale Kerberos Tickets", + "description": "Old Kerberos tickets were cached with outdated information.\n\n**Common causes:** Group membership change, password change, DC switchover.\n\n**Resolution:** Purged ticket cache with `klist purge`.\n\n**If this happens frequently:** The user may need to log out and back in after permission changes, or there may be a time sync issue.\n\n**Ticket Notes:** Stale Kerberos tickets cleared. User can access resources normally." + } + ] + }, + { + "id": "check_spn_issues", + "type": "solution", + "title": "Check SPN Configuration", + "description": "Kerberos requires correct Service Principal Names (SPNs) on the target service.\n\n**Check SPNs for a service account:**\n```\nsetspn -L \n\n# Check for duplicate SPNs (common problem)\nsetspn -X\n```\n\n**Common SPN issues:**\n- Missing SPN: Kerberos can't find the service, falls back to NTLM\n- Duplicate SPN: Two accounts claim the same service — Kerberos fails\n- Wrong SPN format: Must match how clients access the service\n\n**Example SPNs:**\n- File share: `HOST/servername`\n- Web app: `HTTP/webapp.domain.local`\n- SQL: `MSSQLSvc/sqlserver.domain.local:1433`\n\n**Fix duplicate SPNs:** Remove the incorrect one: `setspn -D `\n\n**Escalate to:** Senior AD admin for SPN changes — incorrect SPNs can break other services." + }, + { + "id": "check_time_sync", + "type": "action", + "title": "Check Time Synchronization", + "description": "Kerberos requires clocks to be within 5 minutes of each other.\n\n```\n# Check current time vs DC time\nw32tm /query /status\nnet time \\\\\n\n# Check time source\nw32tm /query /source\n\n# Force time resync\nw32tm /resync /force\n\n# Check time offset\nw32tm /stripchart /computer: /samples:5\n```\n\n**If time is off by more than 5 minutes:** Kerberos authentication will fail completely.\n\n**Common causes of time drift:**\n- VM time sync disabled\n- Laptop was offline for extended period\n- NTP source unreachable\n- Hyper-V time sync conflicting with domain time", + "next_node_id": "time_result" + }, + { + "id": "time_result", + "type": "decision", + "question": "Was the time more than 5 minutes off?", + "help_text": "Compare client time to DC time", + "options": [ + {"id": "time_fixed", "label": "Yes, fixed time sync — auth works now", "next_node_id": "solution_time_sync"}, + {"id": "time_ok", "label": "Time was fine, issue is something else", "next_node_id": "check_kerberos_tickets"} + ], + "children": [ + { + "id": "solution_time_sync", + "type": "solution", + "title": "Resolved: Time Sync Issue", + "description": "Kerberos was failing due to clock skew greater than 5 minutes.\n\n**Prevention:**\n- Ensure all domain members sync time from the DC\n- PDC Emulator should sync from an external NTP source\n- For VMs: Disable hypervisor time sync (use domain time hierarchy)\n\n**Verify domain time hierarchy:**\n```\nw32tm /query /source\n```\nDomain members should show a DC. PDC should show an NTP server.\n\n**Ticket Notes:** Authentication failure due to clock skew. Resynced time." + } + ] + } + ] + } + } diff --git a/backend/scripts/seed_trees_m365.py b/backend/scripts/seed_trees_m365.py new file mode 100644 index 00000000..7f955bf8 --- /dev/null +++ b/backend/scripts/seed_trees_m365.py @@ -0,0 +1,1269 @@ +#!/usr/bin/env python3 +""" +ResolutionFlow Decision Trees - Batch 3: Microsoft 365 + +Six M365 troubleshooting trees for MSP engineers. +Imported by seed_trees_v2.py for seeding. + +Trees: +1. Teams Call Quality Issues +2. OneDrive Sync Problems +3. Mail Flow Issues (Exchange Online) +4. SharePoint Permissions Problems +5. MFA / Conditional Access Lockout +6. License Assignment Problems +""" + +from typing import Any + + +# ============================================================================= +# Tree 1: Teams Call Quality Issues +# ============================================================================= +def get_teams_call_quality_tree() -> dict[str, Any]: + """Teams Call Quality Issues - M365 tree.""" + return { + "name": "Teams Call Quality Issues", + "description": "Diagnose and resolve Microsoft Teams call quality problems including choppy audio, dropped calls, video freezing, and echo. Covers network diagnostics, client troubleshooting, and Teams admin center analysis.", + "category": "Microsoft 365", + "tree_structure": { + "id": "root", + "type": "decision", + "question": "What type of call quality issue is the user experiencing?", + "help_text": "Get specifics from the user. Different symptoms point to different root causes.", + "options": [ + {"id": "audio_choppy", "label": "Choppy/robotic audio or audio cutting out", "next_node_id": "check_network_basics"}, + {"id": "calls_dropping", "label": "Calls disconnecting / dropping entirely", "next_node_id": "check_call_drop_scope"}, + {"id": "video_issues", "label": "Video freezing, pixelated, or not loading", "next_node_id": "check_video_bandwidth"}, + {"id": "echo_feedback", "label": "Echo, feedback, or background noise", "next_node_id": "check_audio_device"}, + {"id": "cant_join", "label": "Can't join calls at all", "next_node_id": "check_join_failure"} + ], + "children": [ + { + "id": "check_network_basics", + "type": "action", + "title": "Run Network Quality Checks", + "description": "Choppy audio is almost always network-related. Run these checks:\n\n**Step 1: Speed test**\nGo to https://www.speedtest.net — Teams needs:\n- 1.5 Mbps up/down minimum for calls\n- 4 Mbps+ for video calls\n- Jitter under 30ms, latency under 50ms\n\n**Step 2: Check Teams Network Assessment Tool**\n```\n# Download from Microsoft, run:\nNetworkAssessmentTool.exe\n```\n\n**Step 3: Quick checks:**\n- Is the user on Wi-Fi or Ethernet?\n- Is anyone else on the network having issues?\n- Is the user on VPN? (common cause)\n\n**Document:** Speed test results, Wi-Fi vs wired, VPN status.", + "next_node_id": "network_result" + }, + { + "id": "network_result", + "type": "decision", + "question": "What did the network check reveal?", + "help_text": "Compare results against Teams requirements", + "options": [ + {"id": "low_bandwidth", "label": "Bandwidth is low (under 4 Mbps)", "next_node_id": "fix_bandwidth"}, + {"id": "high_jitter", "label": "High jitter (>30ms) or packet loss", "next_node_id": "fix_jitter"}, + {"id": "vpn_issue", "label": "User is on VPN — likely the cause", "next_node_id": "fix_vpn_teams"}, + {"id": "network_ok", "label": "Network looks fine", "next_node_id": "check_teams_client"} + ], + "children": [ + { + "id": "fix_bandwidth", + "type": "action", + "title": "Address Low Bandwidth", + "description": "Bandwidth is insufficient for quality Teams calls.\n\n**Immediate fixes:**\n- Switch to wired Ethernet if on Wi-Fi\n- Close bandwidth-heavy apps (streaming, large downloads)\n- Turn off incoming video in the call to save bandwidth\n- Ask others on the network to pause heavy usage during calls\n\n**If on Wi-Fi:**\n- Move closer to the access point\n- Switch to 5GHz band if available\n- Check for interference (microwaves, Bluetooth devices nearby)\n\n**If bandwidth is consistently low:** This is an ISP or network infrastructure issue. Escalate to the client's network admin or ISP.", + "next_node_id": "bandwidth_resolved" + }, + { + "id": "bandwidth_resolved", + "type": "decision", + "question": "Did the bandwidth fixes help?", + "help_text": "Have the user try another call after making changes", + "options": [ + {"id": "yes", "label": "Yes, call quality improved", "next_node_id": "solution_bandwidth"}, + {"id": "no", "label": "No, still having issues", "next_node_id": "check_teams_client"} + ], + "children": [ + { + "id": "solution_bandwidth", + "type": "solution", + "title": "Resolved: Bandwidth Issue", + "description": "Call quality improved after addressing bandwidth.\n\n**Ticket Notes:** Teams call quality issue caused by insufficient bandwidth. Resolved by [switching to Ethernet / closing competing apps / adjusting Wi-Fi].\n\n**Recommendations for client:**\n- Prioritize Teams traffic via QoS if they have a managed network\n- Consider dedicated bandwidth for voice/video if this is recurring\n- Teams audio ports: UDP 3478-3481" + } + ] + }, + { + "id": "fix_jitter", + "type": "action", + "title": "Address High Jitter / Packet Loss", + "description": "High jitter or packet loss causes choppy, robotic audio.\n\n**Check for the cause:**\n```\n# Continuous ping to check for loss\nping -t teams.microsoft.com\n\n# Trace route to find where loss occurs\ntracert teams.microsoft.com\n```\n\n**Common causes:**\n- Congested Wi-Fi (too many devices on same AP)\n- Bad Ethernet cable or port\n- ISP congestion (especially cable internet during peak hours)\n- Old/overloaded router or switch\n- Network loop or broadcast storm\n\n**If packet loss is at the first hop:** Local network issue (switch, cable, or Wi-Fi AP).\n**If packet loss starts mid-route:** ISP or upstream issue.", + "next_node_id": "jitter_resolved" + }, + { + "id": "jitter_resolved", + "type": "decision", + "question": "Were you able to reduce jitter/packet loss?", + "help_text": "Rerun speed test to verify improvement", + "options": [ + {"id": "yes", "label": "Yes, jitter/loss improved", "next_node_id": "solution_jitter"}, + {"id": "no", "label": "No, network still unstable", "next_node_id": "solution_escalate_network"} + ], + "children": [ + { + "id": "solution_jitter", + "type": "solution", + "title": "Resolved: Network Jitter / Packet Loss", + "description": "Call quality improved after fixing network jitter.\n\n**Ticket Notes:** Teams call quality issue caused by network jitter/packet loss. Root cause: [Wi-Fi congestion / bad cable / ISP issue]. Resolved by [specific fix].\n\n**Prevention:** Implement QoS policies to prioritize real-time media traffic. Teams uses UDP 3478-3481 for media." + }, + { + "id": "solution_escalate_network", + "type": "solution", + "title": "Escalate: Network Infrastructure Issue", + "description": "Network issues persist — escalate to network team or ISP.\n\n**Ticket Notes:** Teams call quality degraded due to persistent jitter/packet loss. Local troubleshooting performed. Traceroute shows loss at [hop]. Escalating to [network team / ISP].\n\n**Include in escalation:**\n- Speed test results\n- Traceroute output\n- Times when issues are worst\n- Number of affected users" + } + ] + }, + { + "id": "fix_vpn_teams", + "type": "action", + "title": "Resolve VPN Impact on Teams", + "description": "VPN is a very common cause of Teams call quality issues. VPN tunnels add latency and can't handle real-time media well.\n\n**Best fix: Split tunnel Teams traffic**\nTeams should bypass the VPN. Most modern VPN clients support split tunneling.\n\n**Microsoft's M365 optimize endpoints that should bypass VPN:**\n- Teams media: UDP 3478-3481 to 13.107.64.0/18, 52.112.0.0/14\n- Check: https://aka.ms/o365endpoints\n\n**If split tunneling isn't an option:**\n- Disconnect VPN for the duration of the call\n- Use Teams on a phone (off VPN) as a fallback\n\n**Verify improvement:** Have the user make a test call with VPN disconnected. If quality improves, VPN is confirmed as the cause.", + "next_node_id": "solution_vpn_teams" + }, + { + "id": "solution_vpn_teams", + "type": "solution", + "title": "Resolved: VPN Causing Teams Quality Issues", + "description": "VPN tunnel was degrading Teams call quality.\n\n**Ticket Notes:** Teams call quality issue caused by VPN tunnel routing real-time media traffic. Recommended split tunneling for M365 optimize endpoints.\n\n**Recommendation to client:** Implement split tunneling for Microsoft 365 'Optimize' category endpoints. This is Microsoft's official recommendation and significantly improves Teams performance." + }, + { + "id": "check_teams_client", + "type": "action", + "title": "Check Teams Client and Device", + "description": "Network looks fine — check the Teams client itself.\n\n**Step 1: Check Teams version**\nClick the three dots (···) > About > Check for updates\nMake sure it's the new Teams (not classic Teams).\n\n**Step 2: Clear Teams cache**\n```\n# Close Teams first, then:\nrd /s /q \"%APPDATA%\\Microsoft\\Teams\"\n# For new Teams:\nrd /s /q \"%LOCALAPPDATA%\\Packages\\MSTeams_8wekyb3d8bbwe\\LocalCache\"\n```\n\n**Step 3: Check audio device**\n- Settings > Devices — is the correct mic/speaker selected?\n- Make a test call: Settings > Devices > Make a test call\n\n**Step 4: Check for GPU/driver issues**\n- Settings > General > disable GPU hardware acceleration\n- Update audio and GPU drivers", + "next_node_id": "client_fix_result" + }, + { + "id": "client_fix_result", + "type": "decision", + "question": "Did client-side fixes help?", + "help_text": "Have the user make a test call after each change", + "options": [ + {"id": "yes", "label": "Yes, quality improved", "next_node_id": "solution_client_fix"}, + {"id": "no", "label": "No, still having issues", "next_node_id": "check_admin_center"} + ], + "children": [ + { + "id": "solution_client_fix", + "type": "solution", + "title": "Resolved: Teams Client Issue", + "description": "Call quality improved after Teams client troubleshooting.\n\n**Ticket Notes:** Teams call quality issue resolved by [updating Teams / clearing cache / fixing audio device / disabling GPU acceleration].\n\n**Follow-up:** Monitor for recurrence. If the issue was a corrupt cache, it's unlikely to recur." + }, + { + "id": "check_admin_center", + "type": "action", + "title": "Check Teams Admin Center Call Analytics", + "description": "Use the Teams Admin Center for detailed call diagnostics.\n\n**Teams Admin Center > Users > [Select User] > Call history**\n- Look at recent calls for quality scores\n- Check the 'Advanced' tab for packet loss, jitter, and round-trip time\n\n**Call Quality Dashboard (CQD):**\nhttps://cqd.teams.microsoft.com\n- Shows trends across the organization\n- Filter by user, building, subnet\n\n**What to look for:**\n- Poor streams marked in red\n- High packet loss (>5%)\n- High jitter (>30ms)\n- Audio device issues flagged\n\n**If issue is org-wide:** Likely a network/ISP issue at the office. Escalate.", + "next_node_id": "solution_escalate_teams" + }, + { + "id": "solution_escalate_teams", + "type": "solution", + "title": "Escalate: Teams Call Quality — Admin Investigation", + "description": "Individual troubleshooting didn't resolve the issue. Escalation needed.\n\n**Ticket Notes:** Teams call quality issues persist after network and client troubleshooting. Call analytics reviewed in Teams Admin Center. [Include CQD findings.]\n\n**Possible next steps:**\n- Open a Microsoft support ticket with call IDs\n- Review network architecture for QoS/traffic prioritization\n- Consider deploying Teams Network Assessment Tool org-wide\n- Check if issue correlates with specific ISP, building, or subnet" + } + ] + } + ] + }, + { + "id": "check_call_drop_scope", + "type": "decision", + "question": "Is it just this one user or multiple users dropping calls?", + "help_text": "Scope determines whether this is a local issue or org-wide", + "options": [ + {"id": "one_user", "label": "Just this one user", "next_node_id": "check_network_basics"}, + {"id": "multiple", "label": "Multiple users at the same location", "next_node_id": "solution_escalate_network"}, + {"id": "org_wide", "label": "Happening across the org / multiple locations", "next_node_id": "check_m365_health"} + ], + "children": [ + { + "id": "check_m365_health", + "type": "action", + "title": "Check Microsoft 365 Service Health", + "description": "Org-wide call drops may be a Microsoft service issue.\n\n**Check service health:**\n1. Microsoft 365 Admin Center > Health > Service health\n2. Look for advisories on Microsoft Teams\n3. Check https://status.office365.com\n4. Check @MSABORSKY on Twitter/X for outage reports\n\n**Also check:** https://downdetector.com/status/ms-teams/\n\n**If there's an active incident:** Microsoft is aware. Document the incident ID and wait for resolution. Set expectations with users.", + "next_node_id": "service_health_result" + }, + { + "id": "service_health_result", + "type": "decision", + "question": "Is there a Microsoft service issue?", + "help_text": "Check M365 Admin Center service health", + "options": [ + {"id": "yes_outage", "label": "Yes, active Teams incident", "next_node_id": "solution_m365_outage"}, + {"id": "no_outage", "label": "No active incidents", "next_node_id": "solution_escalate_teams"} + ], + "children": [ + { + "id": "solution_m365_outage", + "type": "solution", + "title": "Microsoft 365 Service Incident", + "description": "Active Microsoft service incident is causing Teams call drops.\n\n**Ticket Notes:** Teams call drops affecting multiple users are caused by Microsoft service incident [Incident ID]. Microsoft is investigating. No action required on our end.\n\n**Communication to users:**\n- Microsoft is aware of the issue and working on a fix\n- Workaround: Use phone dial-in for critical meetings\n- Monitor M365 Admin Center for updates\n\n**Follow-up:** Verify resolution once Microsoft marks the incident as resolved." + } + ] + } + ] + }, + { + "id": "check_video_bandwidth", + "type": "action", + "title": "Check Video Bandwidth and Hardware", + "description": "Video issues need more bandwidth and GPU than audio.\n\n**Teams video requirements:**\n- 1:1 video: 1.5 Mbps up/down\n- Group video: 2.5 Mbps up/down\n- HD video: 4+ Mbps\n\n**Quick checks:**\n1. Run speed test — is bandwidth sufficient?\n2. Is the user on Wi-Fi? (5GHz recommended for video)\n3. Check CPU usage during a call — is it over 80%?\n4. Is the laptop plugged in? (throttles on battery)\n\n**Try:** Have the user turn off incoming video to see if their own video stabilizes. If it does, it's a bandwidth issue.\n\n**GPU acceleration:** Settings > General — toggle hardware acceleration.", + "next_node_id": "network_result" + }, + { + "id": "check_audio_device", + "type": "action", + "title": "Troubleshoot Echo / Audio Feedback", + "description": "Echo and feedback are device issues, not network issues.\n\n**Common causes:**\n- Speaker audio being picked up by the microphone\n- Using laptop speakers + laptop mic (not headset)\n- Two devices in the same room both joined to the call\n- Bluetooth headset with poor echo cancellation\n\n**Fixes:**\n1. **Use a headset.** This is the #1 fix for echo.\n2. Check if user has multiple devices in the call (phone + laptop)\n3. In Teams: Settings > Devices — enable noise suppression (High)\n4. Lower speaker volume to reduce mic pickup\n5. If using a conference room speaker: check for echo cancellation settings\n\n**Test:** Settings > Devices > Make a test call", + "next_node_id": "echo_resolved" + }, + { + "id": "echo_resolved", + "type": "decision", + "question": "Did the echo/feedback improve?", + "help_text": "Make a test call after changes", + "options": [ + {"id": "yes", "label": "Yes, audio is clean now", "next_node_id": "solution_echo_fix"}, + {"id": "no", "label": "No, echo persists", "next_node_id": "check_teams_client"} + ], + "children": [ + { + "id": "solution_echo_fix", + "type": "solution", + "title": "Resolved: Audio Echo / Feedback", + "description": "Echo resolved by fixing the audio device setup.\n\n**Ticket Notes:** Teams echo/feedback caused by [open speakers + mic / duplicate devices in call / no headset]. Resolved by [using headset / removing duplicate device / enabling noise suppression].\n\n**Recommendation:** Encourage headset use for all Teams calls, especially in shared office spaces." + } + ] + }, + { + "id": "check_join_failure", + "type": "action", + "title": "Diagnose Call Join Failure", + "description": "User can't join Teams calls at all.\n\n**Step 1: Can they join from the web?**\nTry joining at https://teams.microsoft.com — this rules out client issues.\n\n**Step 2: Check firewall/proxy**\nTeams requires these to be open:\n- TCP 443 (HTTPS)\n- UDP 3478-3481 (media — CRITICAL)\n- IP ranges: 13.107.64.0/18, 52.112.0.0/14\n\n**Step 3: Check Teams permissions**\n- Is the user licensed for Teams?\n- Teams Admin Center > Users > check calling policies\n- Is Teams enabled in their M365 license?\n\n**Step 4: Is it a meeting-specific issue?**\n- Can they start their own meeting?\n- Can they join other meetings?\n- Is the meeting from an external org? (external access may be blocked)", + "next_node_id": "join_failure_result" + }, + { + "id": "join_failure_result", + "type": "decision", + "question": "What resolved the join failure?", + "help_text": "Work through the steps above", + "options": [ + {"id": "web_works", "label": "Web app works — client issue", "next_node_id": "check_teams_client"}, + {"id": "firewall", "label": "Firewall/proxy was blocking Teams", "next_node_id": "solution_firewall_teams"}, + {"id": "license", "label": "User wasn't licensed for Teams", "next_node_id": "solution_teams_license"}, + {"id": "still_broken", "label": "Nothing works — can't join anywhere", "next_node_id": "check_m365_health"} + ], + "children": [ + { + "id": "solution_firewall_teams", + "type": "solution", + "title": "Resolved: Firewall Blocking Teams", + "description": "Firewall or proxy was blocking Teams media traffic.\n\n**Ticket Notes:** Teams calls failing due to firewall blocking required ports. Opened UDP 3478-3481 for Teams media traffic.\n\n**Required endpoints for Teams:**\n- TCP 443 for signaling\n- UDP 3478-3481 for media\n- IPs: 13.107.64.0/18, 52.112.0.0/14, 52.120.0.0/14\n\n**Important:** If UDP is blocked, Teams falls back to TCP which significantly degrades call quality. UDP must be open for good call quality." + }, + { + "id": "solution_teams_license", + "type": "solution", + "title": "Resolved: Teams License Missing", + "description": "User did not have a Teams license assigned.\n\n**Ticket Notes:** User unable to join Teams calls. M365 license did not include Teams (or Teams was disabled in the license). Assigned Teams license and waited for provisioning.\n\n**Note:** License changes can take up to 24 hours to fully propagate, though usually it's within an hour. User may need to sign out and back in." + } + ] + } + ] + } + } + + +# ============================================================================= +# Tree 2: OneDrive Sync Problems +# ============================================================================= +def get_onedrive_sync_tree() -> dict[str, Any]: + """OneDrive Sync Problems - M365 tree.""" + return { + "name": "OneDrive Sync Problems", + "description": "Troubleshoot OneDrive sync failures, stuck syncs, conflict files, and storage issues. Covers the OneDrive sync client, Known Folder Move (KFM), SharePoint library sync, and common error codes.", + "category": "Microsoft 365", + "tree_structure": { + "id": "root", + "type": "decision", + "question": "What is the OneDrive sync status icon showing?", + "help_text": "Look at the OneDrive cloud icon in the system tray (bottom right). Hover over it for status. The icon color and symbol indicate the issue type.", + "options": [ + {"id": "red_x", "label": "Red X or red circle — sync error", "next_node_id": "check_sync_error"}, + {"id": "paused", "label": "Paused icon — sync is paused", "next_node_id": "check_paused_reason"}, + {"id": "stuck_processing", "label": "Sync arrows spinning but not completing (stuck)", "next_node_id": "check_stuck_sync"}, + {"id": "conflicts", "label": "Conflict files appearing (duplicates with names like 'file-username')", "next_node_id": "check_conflicts"}, + {"id": "no_icon", "label": "No OneDrive icon at all", "next_node_id": "check_onedrive_running"} + ], + "children": [ + { + "id": "check_sync_error", + "type": "action", + "title": "Identify the Sync Error", + "description": "Click the OneDrive icon > click the error message to see details.\n\n**Common errors and what they mean:**\n- **\"You're out of storage\"** → OneDrive is full (check quota)\n- **\"File is in use\"** → Close the file in the app that has it open\n- **\"File name contains invalid characters\"** → Rename the file (remove # % & { } \\ < > * ? / ! ' \" : @ + ` | =)\n- **\"Path is too long\"** → Total path must be under 400 characters\n- **\"This item might not exist or is no longer available\"** → File was deleted from server\n- **\"We can't sync this library\"** → SharePoint library issue\n\n**Get the specific error code:** OneDrive icon > Help & Settings > Settings > look for error codes.\n\n**Document:** The exact error message and code.", + "next_node_id": "sync_error_type" + }, + { + "id": "sync_error_type", + "type": "decision", + "question": "What type of sync error is it?", + "help_text": "Based on the error message you found", + "options": [ + {"id": "storage_full", "label": "Storage full / quota exceeded", "next_node_id": "fix_storage"}, + {"id": "file_issue", "label": "Specific file(s) can't sync (name, size, locked)", "next_node_id": "fix_file_issues"}, + {"id": "auth_error", "label": "Sign-in or authentication error", "next_node_id": "fix_onedrive_auth"}, + {"id": "library_error", "label": "Can't sync SharePoint library", "next_node_id": "fix_sharepoint_sync"} + ], + "children": [ + { + "id": "fix_storage", + "type": "action", + "title": "Resolve Storage Full Issue", + "description": "OneDrive storage is full.\n\n**Check current usage:**\n1. OneDrive icon > Help & Settings > Settings > Account tab\n2. Or go to https://onedrive.live.com > Settings > Storage\n\n**Default quotas:**\n- Business: 1 TB per user (can be increased to 5 TB by admin)\n- E3/E5: 5 TB+\n\n**Free up space:**\n1. Empty the OneDrive recycle bin (items count toward quota for 93 days)\n2. Use Files On-Demand: right-click large folders > Free up space\n3. Move large files that don't need to sync to a local drive\n\n**If user legitimately needs more space:** Admin can increase quota in M365 Admin Center > Users > OneDrive settings.", + "next_node_id": "solution_storage_fixed" + }, + { + "id": "solution_storage_fixed", + "type": "solution", + "title": "Resolved: OneDrive Storage Issue", + "description": "OneDrive sync restored after resolving storage quota.\n\n**Ticket Notes:** OneDrive sync failed due to storage quota exceeded. Resolved by [emptying recycle bin / enabling Files On-Demand / increasing admin quota]. Current usage: [X] of [Y].\n\n**Prevention:** Enable Files On-Demand to keep storage usage low. Consider admin-side quota increases for power users." + }, + { + "id": "fix_file_issues", + "type": "action", + "title": "Fix File-Specific Sync Errors", + "description": "Specific files can't sync. Fix based on the error:\n\n**Invalid characters:** Rename to remove: # % & { } \\ < > * ? / ! ' \" : @ + ` | =\n\n**Path too long (400 char limit):**\n- Shorten folder names in the path\n- Move the file to a less deeply nested location\n\n**File too large:** Max file size is 250 GB. Compress if possible.\n\n**File in use:** Close the file in whatever app has it open.\n```\n# Find what's locking a file:\nHandle.exe \"filename\" (from Sysinternals)\n```\n\n**Unsupported files:** OneNote notebooks sync separately. PST files and database files can't sync.\n\n**After fixing:** OneDrive should auto-retry. If not, pause and resume sync.", + "next_node_id": "solution_file_fix" + }, + { + "id": "solution_file_fix", + "type": "solution", + "title": "Resolved: File Sync Error", + "description": "OneDrive sync error resolved by fixing the problematic file.\n\n**Ticket Notes:** OneDrive sync error on file [filename]. Cause: [invalid characters / path too long / file locked / file too large]. Resolved by [renaming / moving / closing / compressing].\n\n**Prevention:** Educate users on OneDrive file name and path limitations. Share the Microsoft article on restrictions." + }, + { + "id": "fix_onedrive_auth", + "type": "action", + "title": "Fix OneDrive Authentication Error", + "description": "OneDrive can't authenticate to M365.\n\n**Step 1: Unlink and relink account**\nOneDrive icon > Settings > Account > Unlink this PC\nThen sign back in.\n\n**Step 2: Check credentials**\n- Can the user sign in at https://portal.office.com ?\n- Was their password recently changed?\n- Is their account enabled in Entra ID?\n\n**Step 3: Check Conditional Access**\n- Is the device compliant? (Intune)\n- Is OneDrive blocked by a Conditional Access policy?\n- Check sign-in logs in Entra ID > User > Sign-in logs\n\n**Step 4: Clear cached credentials**\n```\ncontrol keymgr.dll\n```\nRemove any entries for Office or Microsoft.", + "next_node_id": "solution_auth_fix" + }, + { + "id": "solution_auth_fix", + "type": "solution", + "title": "Resolved: OneDrive Authentication Error", + "description": "OneDrive authentication restored.\n\n**Ticket Notes:** OneDrive sync failing due to authentication error. Resolved by [unlinking/relinking / clearing credentials / fixing Conditional Access compliance / password reset].\n\n**If Conditional Access was the issue:** Document the policy that blocked the sign-in and whether the device was non-compliant." + }, + { + "id": "fix_sharepoint_sync", + "type": "action", + "title": "Fix SharePoint Library Sync Error", + "description": "SharePoint library won't sync to OneDrive.\n\n**Common causes:**\n- Library has more than 300,000 items (sync limit)\n- Sync was started by another user and permissions changed\n- Library uses checkout/approval workflows incompatible with sync\n\n**Fixes:**\n1. Stop syncing the library: OneDrive icon > Settings > Account > Stop sync\n2. Delete the local sync folder\n3. Re-sync from SharePoint: Go to the library in browser > Sync button\n\n**If \"library too large\":**\n- Sync specific folders instead of the entire library\n- SharePoint > Library > Sync > choose folders\n\n**If permissions changed:** Verify the user still has access to the library in SharePoint.", + "next_node_id": "solution_sp_sync_fix" + }, + { + "id": "solution_sp_sync_fix", + "type": "solution", + "title": "Resolved: SharePoint Library Sync", + "description": "SharePoint library sync restored.\n\n**Ticket Notes:** SharePoint library sync failing. Resolved by [re-syncing / syncing specific folders / fixing permissions].\n\n**Best practice:** For large libraries, sync only the folders users actively need rather than the entire library." + } + ] + }, + { + "id": "check_paused_reason", + "type": "action", + "title": "Check Why Sync is Paused", + "description": "OneDrive sync is paused. This can be manual or automatic.\n\n**Click the OneDrive icon** to see why:\n\n**Common reasons:**\n- User manually paused it (Resume button will be visible)\n- Battery saver mode paused sync (laptop on battery)\n- Metered network detected (mobile hotspot or flagged Wi-Fi)\n- Upload bandwidth limit set (Settings > Network)\n\n**To resume:**\n1. Click OneDrive icon > Resume syncing\n2. If on battery: plug in or change setting in OneDrive > Settings > General\n3. If metered: OneDrive > Settings > General > uncheck 'Pause when on metered'\n\n**If it keeps pausing itself:** Check if Group Policy is enforcing pause rules.", + "next_node_id": "solution_paused_fixed" + }, + { + "id": "solution_paused_fixed", + "type": "solution", + "title": "Resolved: OneDrive Sync Paused", + "description": "OneDrive sync resumed.\n\n**Ticket Notes:** OneDrive sync was paused due to [manual pause / battery saver / metered network / bandwidth limit]. Resumed sync and adjusted settings as needed.\n\n**If battery saver is the cause:** Consider adjusting the OneDrive power settings so sync isn't paused every time the user unplugs." + }, + { + "id": "check_stuck_sync", + "type": "action", + "title": "Fix Stuck OneDrive Sync", + "description": "Sync arrows are spinning but files aren't actually syncing.\n\n**Step 1: Check what's stuck**\nOneDrive icon > View sync problems — shows which files are stuck.\n\n**Step 2: Quick fixes (try in order):**\n1. Pause sync for 2 minutes, then resume\n2. Close and reopen OneDrive\n3. Reset OneDrive:\n```\n%localappdata%\\Microsoft\\OneDrive\\onedrive.exe /reset\n```\nIf OneDrive doesn't restart after 2 minutes:\n```\n%localappdata%\\Microsoft\\OneDrive\\onedrive.exe\n```\n\n**Step 3: If reset doesn't work:**\n- Check for a large file that's holding up the queue\n- Move the stuck file out of the OneDrive folder, let sync complete, then move it back\n- Check Event Viewer > Application log for OneDrive errors", + "next_node_id": "stuck_resolved" + }, + { + "id": "stuck_resolved", + "type": "decision", + "question": "Did the sync resume?", + "help_text": "Check if files are actually uploading/downloading now", + "options": [ + {"id": "yes", "label": "Yes, sync is working now", "next_node_id": "solution_stuck_fixed"}, + {"id": "no", "label": "No, still stuck after reset", "next_node_id": "fix_onedrive_reinstall"} + ], + "children": [ + { + "id": "solution_stuck_fixed", + "type": "solution", + "title": "Resolved: OneDrive Sync Unstuck", + "description": "OneDrive sync resumed after troubleshooting.\n\n**Ticket Notes:** OneDrive sync was stuck/not completing. Resolved by [pause-resume / OneDrive reset / removing stuck file]. All files now syncing normally.\n\n**If /reset was used:** User's sync partnerships were preserved. No data was lost." + }, + { + "id": "fix_onedrive_reinstall", + "type": "action", + "title": "Reinstall OneDrive", + "description": "Reset didn't work — reinstall the OneDrive client.\n\n**Step 1: Uninstall**\nSettings > Apps > Microsoft OneDrive > Uninstall\n\n**Step 2: Delete leftover config**\n```\nrd /s /q \"%localappdata%\\Microsoft\\OneDrive\"\n```\n\n**Step 3: Reinstall**\nDownload from: https://www.microsoft.com/en-us/microsoft-365/onedrive/download\n\n**Step 4: Sign in and configure**\n- Sign in with the user's M365 account\n- Choose folders to sync\n- Enable Files On-Demand\n\n**Important:** No files are lost — everything is still in the cloud. The reinstall just rebuilds the local sync engine.", + "next_node_id": "solution_reinstall" + }, + { + "id": "solution_reinstall", + "type": "solution", + "title": "Resolved: OneDrive Reinstalled", + "description": "OneDrive sync restored after reinstallation.\n\n**Ticket Notes:** OneDrive sync stuck and unresponsive to reset. Reinstalled OneDrive client. Sync restored. No data loss.\n\n**Post-install:** Verify Known Folder Move (KFM) is re-enabled if the org uses it (Desktop, Documents, Pictures backup)." + } + ] + }, + { + "id": "check_conflicts", + "type": "action", + "title": "Resolve OneDrive Conflict Files", + "description": "Conflict files appear when two people (or two devices) edit the same file simultaneously and OneDrive can't merge the changes.\n\n**How to identify conflicts:**\n- Files named like: \"Report-JohnSmith.docx\" or \"Report (1).docx\"\n- OneDrive icon > View sync problems may list conflicts\n\n**Resolution steps:**\n1. Open both versions (original and conflict copy)\n2. Determine which version is correct (or merge changes manually)\n3. Keep the correct version, delete the conflict copy\n4. If it's an Office file: the original may have version history — check File > Info > Version History\n\n**Prevention:**\n- Use co-authoring (Office Online or desktop Office) instead of local editing\n- If editing offline frequently, conflicts are inevitable\n- For shared files: encourage users to communicate when editing", + "next_node_id": "solution_conflicts" + }, + { + "id": "solution_conflicts", + "type": "solution", + "title": "Resolved: OneDrive File Conflicts", + "description": "Conflict files resolved.\n\n**Ticket Notes:** OneDrive sync created conflict copies due to simultaneous edits. Reviewed both versions with user and kept the correct one. Educated user on co-authoring.\n\n**Prevention:** Enable AutoSave in Office apps and use real-time co-authoring to prevent future conflicts. Avoid editing synced files offline unless necessary." + }, + { + "id": "check_onedrive_running", + "type": "action", + "title": "Get OneDrive Running", + "description": "No OneDrive icon in the system tray means it's not running.\n\n**Step 1: Check if it's installed**\nSearch Start menu for \"OneDrive\" — if not found, it needs to be installed.\n\n**Step 2: Start OneDrive**\n```\n%localappdata%\\Microsoft\\OneDrive\\onedrive.exe\n```\n\n**Step 3: Check if it's set to start automatically**\nTask Manager > Startup tab > Look for OneDrive — should be Enabled.\n\n**Step 4: Check if it's hidden**\nClick the ^ arrow in the system tray — OneDrive may be in the overflow area.\n\n**If OneDrive isn't installed:**\nIt should be included with Windows 10/11 and Microsoft 365. If missing:\n- Download from https://www.microsoft.com/en-us/microsoft-365/onedrive/download\n- Or install Microsoft 365 Apps which includes it", + "next_node_id": "onedrive_started" + }, + { + "id": "onedrive_started", + "type": "decision", + "question": "Is OneDrive running now?", + "help_text": "Check the system tray after starting it", + "options": [ + {"id": "yes", "label": "Yes, it's running and syncing", "next_node_id": "solution_onedrive_started"}, + {"id": "error", "label": "It starts but shows an error", "next_node_id": "check_sync_error"}, + {"id": "not_installed", "label": "It's not installed", "next_node_id": "fix_onedrive_reinstall"} + ], + "children": [ + { + "id": "solution_onedrive_started", + "type": "solution", + "title": "Resolved: OneDrive Started", + "description": "OneDrive was not running and has been started.\n\n**Ticket Notes:** OneDrive sync client was not running. Started manually and enabled auto-start. Sync is now active.\n\n**Verify:** Task Manager > Startup > OneDrive should be Enabled to prevent this from happening again." + } + ] + } + ] + } + } + + +# ============================================================================= +# Tree 3: Mail Flow Issues (Exchange Online) +# ============================================================================= +def get_mail_flow_tree() -> dict[str, Any]: + """Mail Flow Issues - M365 tree.""" + return { + "name": "Mail Flow Issues (Exchange Online)", + "description": "Diagnose and resolve email delivery problems in Exchange Online / Microsoft 365. Covers NDR analysis, mail flow rules, spam filtering, connector issues, DNS records (SPF/DKIM/DMARC), and message tracing.", + "category": "Microsoft 365", + "tree_structure": { + "id": "root", + "type": "decision", + "question": "What is the email issue?", + "help_text": "Identify the specific mail flow problem to narrow down the cause.", + "options": [ + {"id": "not_receiving", "label": "User is NOT receiving emails", "next_node_id": "check_inbound"}, + {"id": "not_sending", "label": "Emails are NOT being sent (bounce/NDR)", "next_node_id": "check_ndr"}, + {"id": "delayed", "label": "Emails are delayed (slow delivery)", "next_node_id": "check_delayed"}, + {"id": "going_to_spam", "label": "Legitimate emails going to junk/spam", "next_node_id": "check_spam_filter"}, + {"id": "external_reject", "label": "External recipients say our emails go to their spam", "next_node_id": "check_outbound_reputation"} + ], + "children": [ + { + "id": "check_inbound", + "type": "action", + "title": "Run a Message Trace for Inbound Mail", + "description": "User isn't receiving emails. First, verify whether mail is reaching Exchange Online.\n\n**Message Trace:**\n1. M365 Admin Center > Exchange Admin Center > Mail Flow > Message Trace\n2. Search for messages TO the user within the last 48 hours\n3. Look at the status: Delivered, Failed, FilteredAsSpam, Quarantined\n\n**PowerShell alternative:**\n```\nConnect-ExchangeOnline\nGet-MessageTrace -RecipientAddress user@domain.com -StartDate (Get-Date).AddDays(-2) -EndDate (Get-Date) | Format-Table Received,SenderAddress,Subject,Status\n```\n\n**Quick checks while trace runs:**\n- Is the user's mailbox full? (Check quota in Exchange Admin > Mailboxes)\n- Is there an Inbox rule moving emails? (OWA > Settings > Mail > Rules)\n- Is Focused Inbox hiding emails? (Check 'Other' tab in Outlook)", + "next_node_id": "inbound_trace_result" + }, + { + "id": "inbound_trace_result", + "type": "decision", + "question": "What does the message trace show?", + "help_text": "Look at the Status column in the trace results", + "options": [ + {"id": "delivered", "label": "Status: Delivered (but user doesn't see it)", "next_node_id": "check_inbox_rules"}, + {"id": "quarantined", "label": "Status: Quarantined or FilteredAsSpam", "next_node_id": "fix_quarantine"}, + {"id": "failed", "label": "Status: Failed", "next_node_id": "check_ndr"}, + {"id": "no_results", "label": "No results in trace (mail never reached M365)", "next_node_id": "check_mx_records"} + ], + "children": [ + { + "id": "check_inbox_rules", + "type": "action", + "title": "Check Inbox Rules and Focused Inbox", + "description": "Mail is delivered but user can't see it. Something is hiding or moving it.\n\n**Check Inbox Rules:**\n1. OWA (Outlook on the web) > Settings gear > View all Outlook settings > Mail > Rules\n2. Look for rules that delete, move, or redirect emails\n3. Also check: Sweep rules and Block list\n\n**PowerShell check:**\n```\nGet-InboxRule -Mailbox user@domain.com | Select Name,Description,Enabled\n```\n\n**Check Focused Inbox:**\n- Is the missing email in the 'Other' tab?\n- Right-click > Move to Focused to train it\n\n**Check Deleted Items:**\n- User may have accidentally deleted it\n- Check Deleted Items folder AND Recoverable Items\n\n**Check shared mailbox confusion:**\n- Is the user looking in the right mailbox?", + "next_node_id": "solution_inbox_rules" + }, + { + "id": "solution_inbox_rules", + "type": "solution", + "title": "Resolved: Email Hidden by Inbox Rule", + "description": "Email was delivered but hidden by an inbox rule or Focused Inbox.\n\n**Ticket Notes:** User reported not receiving emails. Message trace confirmed delivery. Found [inbox rule moving to folder / Focused Inbox filtering to Other / accidental deletion]. Resolved by [disabling rule / moving to Focused / recovering from Deleted Items].\n\n**Tip:** Review inbox rules periodically — users often create them and forget." + }, + { + "id": "fix_quarantine", + "type": "action", + "title": "Release from Quarantine / Fix Spam Filtering", + "description": "Legitimate email was quarantined by Exchange Online Protection.\n\n**Release the email:**\n1. M365 Defender > Email & Collaboration > Review > Quarantine\n2. Find the message, select it > Release\n3. Choose to release to all recipients and report as not junk\n\n**Prevent future false positives:**\n1. Add the sender to the user's Safe Senders list\n2. Or add to org-wide allow list:\n - Exchange Admin > Mail Flow > Rules > create a rule to bypass spam filtering for trusted senders/domains\n3. Or use a Transport Rule to set SCL to -1 for the sender\n\n**PowerShell:**\n```\nGet-QuarantineMessage -RecipientAddress user@domain.com | Select Subject,SenderAddress,QuarantineTypes,ReceivedTime\n```", + "next_node_id": "solution_quarantine_released" + }, + { + "id": "solution_quarantine_released", + "type": "solution", + "title": "Resolved: Email Released from Quarantine", + "description": "Legitimate email was quarantined by spam filter. Released and allow-listed.\n\n**Ticket Notes:** Email from [sender] quarantined by Exchange Online Protection. Released from quarantine. Added sender to [safe senders / transport rule allow list] to prevent recurrence.\n\n**Caution:** Be selective with allow lists. Only add trusted senders to avoid creating a spam bypass." + }, + { + "id": "check_mx_records", + "type": "action", + "title": "Verify MX Records", + "description": "No mail is reaching Exchange Online. Check DNS.\n\n**Verify MX record:**\n```\nnslookup -type=mx domain.com\n```\n\n**Should point to:** `domain-com.mail.protection.outlook.com` (priority 0)\n\n**If MX is wrong:**\n- Was the domain recently migrated?\n- Did someone change DNS records?\n- Check the domain registrar's DNS settings\n\n**If MX is correct but mail still not arriving:**\n- Check if the domain is verified in M365 Admin Center > Domains\n- Check for a third-party mail gateway (Mimecast, Proofpoint, etc.) that might be intercepting\n- Is there a mail flow connector in Exchange Admin that's misconfigured?", + "next_node_id": "mx_result" + }, + { + "id": "mx_result", + "type": "decision", + "question": "What did the MX record check show?", + "help_text": "Compare the MX record to what M365 expects", + "options": [ + {"id": "mx_wrong", "label": "MX record is wrong or missing", "next_node_id": "solution_fix_mx"}, + {"id": "mx_correct", "label": "MX is correct — issue is elsewhere", "next_node_id": "solution_escalate_mail"} + ], + "children": [ + { + "id": "solution_fix_mx", + "type": "solution", + "title": "Resolved: MX Record Corrected", + "description": "MX record was missing or incorrect, preventing inbound mail.\n\n**Ticket Notes:** Inbound mail not reaching M365. MX record was [missing / pointing to old server]. Updated MX to point to [tenant].mail.protection.outlook.com with priority 0.\n\n**DNS propagation:** Changes take up to 48 hours but usually within 1-4 hours. Monitor message trace for new incoming mail.\n\n**Also verify these DNS records:**\n- SPF: `v=spf1 include:spf.protection.outlook.com -all`\n- DKIM: Enabled in M365 Defender\n- DMARC: `v=DMARC1; p=quarantine; rua=mailto:dmarc@domain.com`" + }, + { + "id": "solution_escalate_mail", + "type": "solution", + "title": "Escalate: Mail Flow Issue — Advanced Investigation", + "description": "MX records are correct but mail still isn't arriving. Needs deeper investigation.\n\n**Ticket Notes:** Inbound mail not reaching user. MX records verified correct. Message trace shows no inbound messages. Escalating for connector and mail gateway investigation.\n\n**Next steps:**\n- Check Exchange Admin > Mail Flow > Connectors for misconfiguration\n- If third-party gateway (Mimecast, Proofpoint): check their admin console\n- Check M365 Defender for blocked sender patterns\n- Open Microsoft support ticket if needed" + } + ] + } + ] + }, + { + "id": "check_ndr", + "type": "action", + "title": "Analyze the Bounce / NDR Message", + "description": "User got a Non-Delivery Report (NDR) bounce back. The error code tells you why.\n\n**Ask user to forward the NDR to you.** Key info to find:\n- The error code (e.g., 550 5.1.1, 5.7.1, 5.2.2)\n- The remote server's rejection message\n\n**Common NDR codes:**\n- **5.1.1** — Recipient doesn't exist (typo? old email?)\n- **5.2.2** — Recipient mailbox full\n- **5.7.1** — Sender not authorized (permissions or relay denied)\n- **5.7.23** — Sender domain doesn't have valid SPF/DKIM\n- **5.4.1** — Recipient domain doesn't exist (DNS issue)\n- **5.7.606/607** — Blocked by sender reputation\n- **4.4.1** — Connection timeout (remote server unreachable)\n\n**Look up the full code:** https://learn.microsoft.com/en-us/exchange/mail-flow-best-practices/non-delivery-reports-in-exchange-online/non-delivery-reports-in-exchange-online", + "next_node_id": "ndr_type" + }, + { + "id": "ndr_type", + "type": "decision", + "question": "What NDR code did you find?", + "help_text": "Match the code to the most relevant category", + "options": [ + {"id": "recipient_issue", "label": "5.1.1 / 5.4.1 — Recipient doesn't exist", "next_node_id": "solution_recipient_invalid"}, + {"id": "permission", "label": "5.7.x — Permission or authentication denied", "next_node_id": "fix_send_permissions"}, + {"id": "mailbox_full", "label": "5.2.2 — Recipient mailbox full", "next_node_id": "solution_recipient_full"}, + {"id": "reputation", "label": "5.7.606/607 — Blocked by reputation", "next_node_id": "check_outbound_reputation"} + ], + "children": [ + { + "id": "solution_recipient_invalid", + "type": "solution", + "title": "Resolved: Invalid Recipient Address", + "description": "Email bounced because the recipient address doesn't exist.\n\n**Ticket Notes:** NDR 5.1.1 — recipient address not found. Verified with user: [typo in address / recipient left the company / domain changed].\n\n**Actions:**\n- Double-check spelling of the recipient address\n- If the address used to work: the recipient's mailbox may have been deleted or the domain changed\n- Contact the recipient through another channel to get their current email" + }, + { + "id": "fix_send_permissions", + "type": "action", + "title": "Fix Send Permission / Relay Issue", + "description": "5.7.x errors mean the sender isn't authorized.\n\n**Common scenarios:**\n\n**Sending as a shared mailbox:**\n- User needs 'Send As' or 'Send on Behalf' permission\n- Exchange Admin > Mailboxes > [shared mailbox] > Delegation\n\n**Sending to a distribution group:**\n- The group may restrict who can send to it\n- Exchange Admin > Groups > [group] > Message approval / Delivery management\n\n**External relay denied:**\n- If using a connector or application to send via M365\n- Check the connector settings in Exchange Admin > Mail Flow > Connectors\n\n**SPF/DKIM rejection (5.7.23):**\n- Your domain's SPF record doesn't include M365\n- Fix: Add `include:spf.protection.outlook.com` to your SPF record", + "next_node_id": "solution_send_perms" + }, + { + "id": "solution_send_perms", + "type": "solution", + "title": "Resolved: Send Permission / Relay Fixed", + "description": "Mail flow restored after fixing send permissions.\n\n**Ticket Notes:** NDR 5.7.x — send permission denied. Root cause: [missing Send As permission / distribution group restriction / SPF record missing M365 / connector misconfiguration]. Resolved by [granting permission / updating restriction / fixing DNS / updating connector]." + }, + { + "id": "solution_recipient_full", + "type": "solution", + "title": "Resolved: Recipient Mailbox Full", + "description": "Email bounced because the recipient's mailbox is full.\n\n**Ticket Notes:** NDR 5.2.2 — recipient mailbox is over quota. This is on the recipient's end, not ours.\n\n**If the recipient is in our org:** Increase their mailbox quota or help them clean up. Exchange Admin > Mailboxes > [user] > Mailbox usage.\n\n**If external recipient:** Inform the sender that the recipient's mailbox is full. They'll need to contact the recipient through another channel." + } + ] + }, + { + "id": "check_delayed", + "type": "action", + "title": "Investigate Delayed Email Delivery", + "description": "Emails are arriving but with significant delays.\n\n**Run message trace to see timing:**\n```\nGet-MessageTrace -SenderAddress user@domain.com -StartDate (Get-Date).AddDays(-1) -EndDate (Get-Date) | Select Received,SenderAddress,RecipientAddress,Subject,Status\n```\n\n**Look at the 'Received' timestamps** — compare send time to delivery time.\n\n**Common delay causes:**\n1. **Mail flow rules** — A transport rule is holding mail (check Exchange Admin > Mail Flow > Rules for rules with 'defer')\n2. **Greylist** — First-time sender to a domain gets delayed (normal, resolves in 1-15 min)\n3. **Large attachments** — Big files take longer\n4. **Connectors** — If routing through a third-party gateway, check their queue\n5. **M365 service issue** — Check Service Health dashboard\n\n**If internal to internal:** Should be near-instant. Delays suggest a processing rule or health issue.", + "next_node_id": "solution_delayed" + }, + { + "id": "solution_delayed", + "type": "solution", + "title": "Resolved: Email Delivery Delay", + "description": "Identified and addressed the cause of email delays.\n\n**Ticket Notes:** Email delivery delayed by [timeframe]. Root cause: [transport rule / greylisting / large attachment / connector queue / M365 service issue]. Resolution: [disabled rule / waited for greylist to clear / reduced attachment / checked connector / monitored service health].\n\n**If greylisting:** This is normal behavior for first-time senders and resolves automatically." + }, + { + "id": "check_spam_filter", + "type": "action", + "title": "Fix Legitimate Email Going to Junk", + "description": "Good emails are being incorrectly flagged as spam.\n\n**Immediate fix for the user:**\n1. Right-click the email in Junk > Move to Inbox\n2. Right-click sender > Add to Safe Senders\n\n**Admin-level fixes:**\n1. M365 Defender > Policies > Anti-spam > check policy settings\n2. Create an allow list entry:\n - M365 Defender > Tenant Allow/Block Lists > add sender or domain\n3. Create a transport rule:\n - Exchange Admin > Mail Flow > Rules > Set SCL to -1 for the sender\n\n**Check why it was flagged:**\n- Message trace > click the message > see the spam confidence level (SCL)\n- Check message headers: look for `X-MS-Exchange-Organization-SCL` and `X-Forefront-Antispam-Report`", + "next_node_id": "solution_spam_fixed" + }, + { + "id": "solution_spam_fixed", + "type": "solution", + "title": "Resolved: False Positive Spam Filtering", + "description": "Legitimate email no longer going to junk.\n\n**Ticket Notes:** Legitimate emails from [sender/domain] incorrectly flagged as spam. SCL was [value]. Added to [Safe Senders / Tenant Allow list / transport rule]. Emails now delivering to inbox.\n\n**Caution:** Only allow-list trusted senders. Over-permissive rules can let actual spam through." + }, + { + "id": "check_outbound_reputation", + "type": "action", + "title": "Check Outbound Email Reputation and DNS", + "description": "External recipients say our emails go to their spam. This is a sender reputation issue.\n\n**Step 1: Check SPF, DKIM, DMARC records:**\n```\nnslookup -type=txt domain.com # Look for SPF\nnslookup -type=txt selector1._domainkey.domain.com # DKIM\nnslookup -type=txt _dmarc.domain.com # DMARC\n```\n\n**Expected values:**\n- SPF: `v=spf1 include:spf.protection.outlook.com -all`\n- DKIM: Should have CNAME records for selector1 and selector2\n- DMARC: `v=DMARC1; p=quarantine;` (at minimum)\n\n**Step 2: Check blocklists**\nUse https://mxtoolbox.com/blacklists.aspx — enter your domain.\n\n**Step 3: Check in M365 Defender:**\n- Email & Collaboration > Restricted entities — is the user or domain restricted?\n- A compromised account may have been used to send spam, getting the domain blacklisted.", + "next_node_id": "reputation_result" + }, + { + "id": "reputation_result", + "type": "decision", + "question": "What did the reputation check reveal?", + "help_text": "Check DNS records, blocklists, and M365 Defender", + "options": [ + {"id": "dns_issue", "label": "SPF/DKIM/DMARC records missing or wrong", "next_node_id": "solution_fix_email_dns"}, + {"id": "blocklisted", "label": "Domain or IP is on a blocklist", "next_node_id": "solution_blocklist"}, + {"id": "restricted", "label": "User/domain restricted in M365 Defender", "next_node_id": "solution_restricted_sender"}, + {"id": "all_ok", "label": "Everything checks out fine", "next_node_id": "solution_reputation_ok"} + ], + "children": [ + { + "id": "solution_fix_email_dns", + "type": "solution", + "title": "Resolved: Email DNS Records Fixed", + "description": "SPF/DKIM/DMARC records were missing or misconfigured.\n\n**Ticket Notes:** Outbound emails going to recipients' spam due to missing/incorrect email authentication DNS records.\n\n**Records added/fixed:**\n- SPF: `v=spf1 include:spf.protection.outlook.com -all`\n- DKIM: Enabled via M365 Defender > Email authentication > DKIM\n- DMARC: `v=DMARC1; p=quarantine; rua=mailto:dmarc@domain.com`\n\n**Allow 24-48 hours** for DNS propagation and reputation improvement.\n\n**Verify with:** https://mxtoolbox.com/emailhealth/" + }, + { + "id": "solution_blocklist", + "type": "solution", + "title": "Action Required: Domain/IP Blocklisted", + "description": "Domain or sending IP is on a third-party blocklist.\n\n**Ticket Notes:** Outbound email reputation issue. Domain/IP found on [blocklist name]. Likely caused by [spam from compromised account / misconfigured relay / new domain with no reputation].\n\n**Remediation:**\n1. Fix the root cause (secure compromised accounts, fix relay)\n2. Request delisting from each blocklist (links on mxtoolbox.com results)\n3. Most blocklists auto-delist in 24-48 hours after spam stops\n4. Check M365 Defender for compromised user accounts\n\n**For Microsoft's own blocklist:** Use the delist portal at https://sender.office.com" + }, + { + "id": "solution_restricted_sender", + "type": "solution", + "title": "Resolved: Restricted Sender Unblocked", + "description": "User was restricted from sending in M365 Defender.\n\n**This usually means the account was compromised and used to send spam.**\n\n**Immediate actions:**\n1. Reset the user's password\n2. Enable MFA if not already on\n3. Revoke active sessions: `Revoke-AzureADUserAllRefreshToken -ObjectId `\n4. Remove the restriction in M365 Defender > Restricted entities\n5. Check for inbox forwarding rules the attacker may have created\n\n**Ticket Notes:** User restricted from sending due to suspected compromise. Password reset, MFA enabled, sessions revoked, forwarding rules checked. Restriction removed." + }, + { + "id": "solution_reputation_ok", + "type": "solution", + "title": "Reputation Checks OK — Monitor Situation", + "description": "All email authentication records and reputation checks look clean.\n\n**Ticket Notes:** SPF/DKIM/DMARC all correctly configured. Not on any blocklists. User not restricted.\n\n**Possible causes:**\n- Recipient's mail system has aggressive filtering\n- Email content is triggering spam filters (links, attachments, formatting)\n- New domain that hasn't built reputation yet\n\n**Recommendations:**\n- Ask the recipient to add us to their safe senders\n- Avoid spammy language in emails\n- Warm up new domains by sending gradually" + } + ] + } + ] + } + } + + +# ============================================================================= +# Tree 4: SharePoint Permissions Problems +# ============================================================================= +def get_sharepoint_permissions_tree() -> dict[str, Any]: + """SharePoint Permissions Problems - M365 tree.""" + return { + "name": "SharePoint Permissions Problems", + "description": "Troubleshoot SharePoint Online access denied errors, broken inheritance, external sharing failures, and permission management. Covers site-level, library-level, and item-level permissions in M365.", + "category": "Microsoft 365", + "tree_structure": { + "id": "root", + "type": "decision", + "question": "What is the SharePoint permissions issue?", + "help_text": "Identify what the user is trying to do and what error they're seeing.", + "options": [ + {"id": "access_denied", "label": "User gets 'Access Denied' or 'You need permission'", "next_node_id": "check_access_denied"}, + {"id": "external_sharing", "label": "Can't share with external users", "next_node_id": "check_external_sharing"}, + {"id": "too_much_access", "label": "Someone has access they shouldn't have", "next_node_id": "check_oversharing"}, + {"id": "request_pending", "label": "Access request sent but not approved", "next_node_id": "check_access_requests"} + ], + "children": [ + { + "id": "check_access_denied", + "type": "action", + "title": "Diagnose Access Denied Error", + "description": "User gets 'Access Denied' when visiting a SharePoint site, library, or file.\n\n**Step 1: Verify the URL is correct**\n- Is the user going to the right site? Typos in SharePoint URLs are common.\n\n**Step 2: Check the user's permissions**\n1. Go to the site as an admin\n2. Settings gear > Site permissions\n3. Check if the user is in any permission group (Members, Visitors, Owners)\n\n**Step 3: Check at the right level**\nSharePoint permissions can be set at:\n- **Site level** — applies to everything\n- **Library/list level** — can have unique permissions\n- **Folder level** — can override library permissions\n- **Item/file level** — most granular\n\nIf the user has site access but not file access, the file/folder probably has unique permissions (broken inheritance).\n\n**Step 4: Check Entra ID groups**\n- Permissions are often granted via security groups or M365 groups\n- The user may have been removed from a group", + "next_node_id": "access_denied_cause" + }, + { + "id": "access_denied_cause", + "type": "decision", + "question": "Why does the user lack access?", + "help_text": "Based on the permission check above", + "options": [ + {"id": "not_added", "label": "User was never added to the site", "next_node_id": "fix_add_user"}, + {"id": "unique_perms", "label": "File/folder has unique permissions (broken inheritance)", "next_node_id": "fix_unique_perms"}, + {"id": "group_removed", "label": "User was removed from a security group", "next_node_id": "fix_group_membership"}, + {"id": "conditional_access", "label": "Conditional Access is blocking access", "next_node_id": "fix_sp_conditional_access"} + ], + "children": [ + { + "id": "fix_add_user", + "type": "action", + "title": "Add User to SharePoint Site", + "description": "User needs to be granted access to the site.\n\n**Option 1: Add to a SharePoint group**\n1. Site > Settings gear > Site permissions\n2. Choose the appropriate group:\n - **Visitors** = Read only\n - **Members** = Edit (add/edit/delete content)\n - **Owners** = Full control (manage permissions, settings)\n3. Click the group > Add members > enter user's email\n\n**Option 2: Add via M365 group (recommended)**\nIf this is a team site connected to a M365 group:\n- Adding the user to the M365 group gives them Member access automatically\n- M365 Admin > Groups > [group] > Members > Add\n\n**Option 3: Share directly**\n- Click 'Share' on the site > enter user's email > choose permission level\n\n**Best practice:** Use groups over individual permissions for easier management.", + "next_node_id": "solution_user_added" + }, + { + "id": "solution_user_added", + "type": "solution", + "title": "Resolved: User Added to SharePoint Site", + "description": "User granted access to the SharePoint site.\n\n**Ticket Notes:** User could not access [site name]. Added to [group name] with [permission level]. User confirmed access.\n\n**Note:** Permission changes are usually immediate, but can take up to 15-30 minutes in some cases. If user still can't access, have them clear browser cache or try InPrivate/Incognito." + }, + { + "id": "fix_unique_perms", + "type": "action", + "title": "Fix Unique Permissions (Broken Inheritance)", + "description": "The file or folder has its own permissions separate from the parent.\n\n**How to check:**\n1. Go to the file/folder in SharePoint\n2. Click the (i) info pane or ··· > Manage access\n3. See who has access\n\n**Options to fix:**\n\n**Add the user to this specific item:**\n- ··· > Manage access > Grant access > enter user's email\n\n**Restore inheritance (use the parent's permissions instead):**\n1. Go to Library settings > Permissions for this library\n2. Or for a folder: ··· > Manage access > look for 'Stop sharing' or 'Restore inheritance'\n\n**PowerShell (PnP) to check:**\n```\nConnect-PnPOnline -Url https://tenant.sharepoint.com/sites/sitename\nGet-PnPListItem -List 'Documents' -Id | Get-PnPProperty -Property RoleAssignments\n```\n\n**Caution:** Restoring inheritance removes all unique permissions. Make sure that's intended.", + "next_node_id": "solution_unique_perms" + }, + { + "id": "solution_unique_perms", + "type": "solution", + "title": "Resolved: Unique Permissions Fixed", + "description": "Access issue resolved by fixing item-level permissions.\n\n**Ticket Notes:** User couldn't access [file/folder] due to unique permissions (broken inheritance). Resolved by [granting direct access / restoring inheritance].\n\n**Recommendation:** Minimize unique permissions — they create management complexity. Use folder structure and library-level permissions where possible." + }, + { + "id": "fix_group_membership", + "type": "action", + "title": "Restore Group Membership", + "description": "User lost access because they were removed from a security group or M365 group.\n\n**Identify the group:**\n1. Check site permissions to see which groups have access\n2. Check the user's group memberships in Entra ID\n\n**Re-add the user:**\n- Entra ID > Users > [user] > Groups > Add membership\n- Or M365 Admin > Groups > [group] > Add member\n\n**If it was an automated removal:**\n- Check dynamic group rules in Entra ID\n- The user may not match the membership criteria anymore (department change, license change, etc.)", + "next_node_id": "solution_group_restored" + }, + { + "id": "solution_group_restored", + "type": "solution", + "title": "Resolved: Group Membership Restored", + "description": "Access restored by re-adding user to the group.\n\n**Ticket Notes:** User lost SharePoint access due to removal from [group name]. Re-added to group. If dynamic group: [explain why they were removed and whether the criteria should be updated].\n\n**Prevention:** Review group membership changes that could impact SharePoint access. Document which groups grant access to which sites." + }, + { + "id": "fix_sp_conditional_access", + "type": "action", + "title": "Check Conditional Access Blocking SharePoint", + "description": "Conditional Access policies can block SharePoint access based on conditions.\n\n**Check sign-in logs:**\n1. Entra ID > Users > [user] > Sign-in logs\n2. Filter for SharePoint Online\n3. Look for 'Failure' with Conditional Access as the reason\n\n**Common blocks:**\n- Unmanaged device (not Intune-enrolled)\n- Untrusted network/location\n- Browser-only access when policy requires managed app\n- MFA not completed\n\n**Fixes depend on the policy:**\n- Enroll the device in Intune\n- Connect to the corporate network or VPN\n- Complete MFA enrollment\n- Request a policy exception from the security team", + "next_node_id": "solution_ca_fixed" + }, + { + "id": "solution_ca_fixed", + "type": "solution", + "title": "Resolved: Conditional Access Issue", + "description": "SharePoint access restored after addressing Conditional Access block.\n\n**Ticket Notes:** SharePoint access denied due to Conditional Access policy [policy name]. Cause: [unmanaged device / untrusted location / missing MFA]. Resolved by [enrolling device / connecting to VPN / completing MFA].\n\n**If the policy is too restrictive:** Escalate to the security team for review." + } + ] + }, + { + "id": "check_external_sharing", + "type": "action", + "title": "Diagnose External Sharing Issue", + "description": "User can't share a SharePoint file or site with someone outside the organization.\n\n**External sharing has multiple levels — check all of them:**\n\n**1. Tenant level:**\nSharePoint Admin Center > Policies > Sharing\n- Most permissive → Least permissive: Anyone > New/existing guests > Existing guests > Only org\n\n**2. Site level:**\nSharePoint Admin Center > Sites > Active sites > [site] > Sharing\n- Site sharing can't be more permissive than tenant level\n\n**3. Entra ID guest settings:**\nEntra ID > External Identities > External collaboration settings\n- Check if guest invitations are restricted\n\n**4. Sensitivity labels:**\n- If the site or file has a sensitivity label, it may block external sharing\n\n**5. Conditional Access:**\n- Guest access policies may be blocking", + "next_node_id": "external_sharing_fix" + }, + { + "id": "external_sharing_fix", + "type": "decision", + "question": "Where is external sharing being blocked?", + "help_text": "Check tenant, site, Entra ID, and sensitivity label settings", + "options": [ + {"id": "tenant_level", "label": "Tenant sharing policy is too restrictive", "next_node_id": "solution_external_sharing_tenant"}, + {"id": "site_level", "label": "This specific site has sharing disabled", "next_node_id": "solution_external_sharing_site"}, + {"id": "guest_policy", "label": "Entra ID guest settings are blocking", "next_node_id": "solution_external_sharing_guest"}, + {"id": "sensitivity", "label": "Sensitivity label is preventing sharing", "next_node_id": "solution_external_sharing_label"} + ], + "children": [ + { + "id": "solution_external_sharing_tenant", + "type": "solution", + "title": "Escalate: Tenant Sharing Policy", + "description": "The tenant-level sharing policy prevents external sharing.\n\n**Ticket Notes:** External sharing blocked by tenant-level SharePoint policy (currently set to [level]). This is an org-wide setting and requires admin approval to change.\n\n**Escalate to:** M365 Global Admin or SharePoint Admin to review the sharing policy.\n\n**Options for admin:**\n- SharePoint Admin Center > Policies > Sharing\n- Can enable for specific sites only via site-level settings\n- Consider 'New and existing guests' as a balanced option" + }, + { + "id": "solution_external_sharing_site", + "type": "solution", + "title": "Resolved: Site Sharing Enabled", + "description": "External sharing enabled for the specific site.\n\n**Ticket Notes:** External sharing was disabled for [site name]. Enabled sharing at the site level per [requester/approval]. Set to [sharing level].\n\n**SharePoint Admin Center > Sites > Active sites > [site] > Sharing**\n\n**Note:** Site sharing level cannot exceed the tenant level." + }, + { + "id": "solution_external_sharing_guest", + "type": "solution", + "title": "Escalate: Entra ID Guest Policy", + "description": "Guest invitations are restricted by Entra ID policies.\n\n**Ticket Notes:** External sharing blocked by Entra ID external collaboration settings. Guest invite settings are restrictive.\n\n**Escalate to:** Entra ID admin to review:\n- Entra ID > External Identities > External collaboration settings\n- Who can invite guests (members, admins only, etc.)\n- Guest user access restrictions\n- Collaboration restrictions (allow/block specific domains)" + }, + { + "id": "solution_external_sharing_label", + "type": "solution", + "title": "Resolved: Sensitivity Label Sharing Restriction", + "description": "Sensitivity label on the file/site prevents external sharing.\n\n**Ticket Notes:** External sharing blocked by sensitivity label [label name] applied to [file/site]. This label's policy restricts sharing to internal users only.\n\n**Options:**\n- Remove or change the sensitivity label if appropriate (requires permission)\n- Copy the content to a new file without the label\n- Request an exception from the compliance/security team\n\n**Caution:** Sensitivity labels exist for a reason — verify the content is appropriate for external sharing before removing the label." + } + ] + }, + { + "id": "check_oversharing", + "type": "action", + "title": "Audit and Remove Excess Permissions", + "description": "Someone has access they shouldn't have.\n\n**Check who has access:**\n1. Site: Settings gear > Site permissions > review all groups and direct users\n2. Library: Library settings > Permissions for this document library\n3. File: Click file > (i) pane > Manage access\n\n**Common oversharing causes:**\n- 'Anyone' sharing links still active\n- User shared with wrong people\n- 'Company-wide' link was used instead of specific people\n- Guest user still has access after project ended\n\n**To remove access:**\n- Site level: Remove from the SharePoint group\n- File/folder: Manage access > Remove the user or revoke the sharing link\n- Org-wide links: Manage access > delete the company-wide link\n\n**Audit trail:**\n- M365 Compliance Center > Audit log > search for sharing activities\n- Filter by user and date to see who shared what", + "next_node_id": "solution_oversharing_fixed" + }, + { + "id": "solution_oversharing_fixed", + "type": "solution", + "title": "Resolved: Excess Permissions Removed", + "description": "Removed unauthorized access to SharePoint content.\n\n**Ticket Notes:** [User/group] had unintended access to [resource]. Access was granted via [sharing link / direct permission / group membership]. Removed access and [revoked sharing links / removed from group].\n\n**Recommendations:**\n- Regular permissions audits (quarterly)\n- Set sharing link expiration dates\n- Use 'Specific people' links instead of 'Anyone' or 'Company' links\n- Enable sharing notifications for site owners" + }, + { + "id": "check_access_requests", + "type": "action", + "title": "Check Access Request Queue", + "description": "User sent an access request but it hasn't been approved.\n\n**Where access requests go:**\n1. Site owners get an email notification\n2. SharePoint site > Settings gear > Site permissions > Access Requests\n (or Settings > Site information > View all site settings > Access requests and invitations)\n\n**If no one is receiving requests:**\n- Check who is set as the access request recipient:\n Site settings > Access requests and invitations\n- The email might be going to a former employee or unmonitored mailbox\n\n**To approve pending requests:**\n1. Go to the access requests page\n2. Approve or decline each request\n3. Choose the permission level when approving\n\n**If access requests are disabled:**\n- Site settings > Access requests and invitations > Enable", + "next_node_id": "solution_access_request" + }, + { + "id": "solution_access_request", + "type": "solution", + "title": "Resolved: Access Request Processed", + "description": "Pending access request has been approved.\n\n**Ticket Notes:** User's access request for [site/resource] was pending. Approved with [permission level]. Updated access request notification email to [current admin/owner].\n\n**Prevention:** Ensure access request notifications go to an active, monitored mailbox. Consider using a shared mailbox for site ownership." + } + ] + } + } + + +# ============================================================================= +# Tree 5: MFA / Conditional Access Lockout +# ============================================================================= +def get_mfa_lockout_tree() -> dict[str, Any]: + """MFA / Conditional Access Lockout - M365 tree.""" + return { + "name": "MFA / Conditional Access Lockout", + "description": "Troubleshoot users locked out of Microsoft 365 due to MFA issues, Conditional Access policy blocks, device compliance failures, and authentication problems. Covers Authenticator app, FIDO2 keys, phone sign-in, and Entra ID diagnostics.", + "category": "Microsoft 365", + "tree_structure": { + "id": "root", + "type": "decision", + "question": "What MFA/authentication issue is the user experiencing?", + "help_text": "Identify the specific lockout scenario to determine the right fix.", + "options": [ + {"id": "lost_phone", "label": "Lost phone / can't access Authenticator app", "next_node_id": "handle_lost_mfa"}, + {"id": "mfa_denied", "label": "MFA prompt is being denied or timing out", "next_node_id": "check_mfa_denied"}, + {"id": "ca_blocked", "label": "'You can't access this' / Conditional Access block", "next_node_id": "diagnose_ca_block"}, + {"id": "new_device", "label": "Can't sign in from a new device", "next_node_id": "check_new_device"}, + {"id": "mfa_loop", "label": "Stuck in an MFA loop (keeps asking repeatedly)", "next_node_id": "fix_mfa_loop"} + ], + "children": [ + { + "id": "handle_lost_mfa", + "type": "action", + "title": "User Lost MFA Device — Restore Access", + "description": "User can't complete MFA because they lost their phone, got a new phone, or deleted Authenticator.\n\n**Step 1: Verify identity** — Confirm the person is who they say they are (call their known number, verify with manager, etc.)\n\n**Step 2: Check for backup methods**\nEntra ID > Users > [user] > Authentication methods\n- Do they have a backup phone number?\n- Do they have a FIDO2 key?\n- Are there other registered methods?\n\n**Step 3: If no backup methods — Admin reset MFA:**\n1. Entra ID > Users > [user] > Authentication methods\n2. Delete all registered methods (or just the lost device)\n3. The user will be prompted to register again at next sign-in\n\n**Step 4: Issue a Temporary Access Pass (recommended):**\nEntra ID > Users > [user] > Authentication methods > Add > Temporary Access Pass\n- Set expiration (e.g., 1 hour)\n- User signs in with this TAP and re-registers MFA\n\n**Important:** Never skip identity verification. MFA reset is a high-value social engineering target.", + "next_node_id": "mfa_reset_result" + }, + { + "id": "mfa_reset_result", + "type": "decision", + "question": "Can the user sign in and re-register MFA?", + "help_text": "After resetting methods or issuing a TAP", + "options": [ + {"id": "yes", "label": "Yes, user re-registered MFA successfully", "next_node_id": "solution_mfa_reset"}, + {"id": "still_blocked", "label": "Still can't sign in — another policy is blocking", "next_node_id": "diagnose_ca_block"} + ], + "children": [ + { + "id": "solution_mfa_reset", + "type": "solution", + "title": "Resolved: MFA Re-registered", + "description": "User's MFA methods have been reset and re-registered.\n\n**Ticket Notes:** User lost access to MFA device. Identity verified via [method]. Issued Temporary Access Pass. User re-registered Microsoft Authenticator on [new device].\n\n**Strongly recommend:**\n- Register at least 2 MFA methods (Authenticator + phone number)\n- Enable passwordless sign-in for better security and UX\n- Document registered methods for future reference\n\n**Security note:** Always verify identity before resetting MFA. Log this action for audit purposes." + } + ] + }, + { + "id": "check_mfa_denied", + "type": "action", + "title": "Troubleshoot MFA Prompt Failures", + "description": "MFA prompt is sent but user can't complete it.\n\n**Common causes:**\n\n**Authenticator push not arriving:**\n- Check phone has internet connection\n- Open Authenticator app manually (sometimes push notifications are delayed)\n- Check that notifications are enabled for Authenticator in phone settings\n- Time sync: Authenticator > three dots > Settings > Time correction for codes\n\n**TOTP code not working (6-digit code):**\n- Check the time on user's phone (must be accurate)\n- Make sure they're using the code for the right account (users with multiple accounts)\n- Codes expire every 30 seconds — don't use a code that's about to expire\n\n**Phone call not working:**\n- Is the phone number correct in Entra ID?\n- Check if the user's phone can receive calls (not in airplane mode, has signal)\n- Some VoIP numbers can't receive MFA calls\n\n**SMS not arriving:**\n- Check for full inbox on the phone\n- Try sending a regular text to verify SMS works", + "next_node_id": "mfa_denied_result" + }, + { + "id": "mfa_denied_result", + "type": "decision", + "question": "What's preventing the MFA prompt from completing?", + "help_text": "Based on your investigation above", + "options": [ + {"id": "push_not_arriving", "label": "Push notifications not arriving", "next_node_id": "fix_push_notifications"}, + {"id": "code_wrong", "label": "TOTP code not accepted", "next_node_id": "fix_totp_code"}, + {"id": "phone_issue", "label": "Phone call or SMS not working", "next_node_id": "fix_phone_mfa"}, + {"id": "fraud_alert", "label": "User sees 'fraud alert' or didn't initiate the prompt", "next_node_id": "check_fraud_alert"} + ], + "children": [ + { + "id": "fix_push_notifications", + "type": "action", + "title": "Fix Authenticator Push Notifications", + "description": "Push notifications aren't arriving on the user's phone.\n\n**Try in order:**\n1. Open the Authenticator app manually and approve from there\n2. Check phone notification settings for Microsoft Authenticator\n3. Force close and reopen Authenticator\n4. Check that the phone has a data/Wi-Fi connection\n5. Re-register the account in Authenticator:\n - Remove the account from Authenticator app\n - Go to https://mysignins.microsoft.com > Security info\n - Delete the old Authenticator registration\n - Add it fresh\n\n**Android-specific:** Check battery optimization settings — Authenticator may be getting killed in the background. Add it to the battery optimization exclusion list.\n\n**iPhone-specific:** Settings > Notifications > Authenticator > ensure everything is enabled.", + "next_node_id": "solution_push_fixed" + }, + { + "id": "solution_push_fixed", + "type": "solution", + "title": "Resolved: Push Notifications Fixed", + "description": "Authenticator push notifications restored.\n\n**Ticket Notes:** MFA push notifications not arriving. Resolved by [fixing notification settings / re-registering account / fixing battery optimization]. User confirmed MFA working.\n\n**Prevention:** Advise user to test MFA periodically and register a backup method." + }, + { + "id": "fix_totp_code", + "type": "action", + "title": "Fix TOTP Code Not Accepted", + "description": "The 6-digit time-based code from Authenticator isn't working.\n\n**Most common cause: Clock drift**\nAuthenticator > ··· menu > Settings > Time correction for codes > Sync now\n\n**Other checks:**\n- Make sure the user is reading the code for the correct account\n- Don't use a code in the last 5 seconds (wait for the next one)\n- If using a third-party authenticator (Google Authenticator, Authy), try Microsoft Authenticator instead\n\n**If nothing works:**\n- Remove and re-add the account in Authenticator\n- Admin: Delete the method in Entra ID and have user re-register", + "next_node_id": "solution_totp_fixed" + }, + { + "id": "solution_totp_fixed", + "type": "solution", + "title": "Resolved: TOTP Code Issue Fixed", + "description": "TOTP code now working correctly.\n\n**Ticket Notes:** MFA time-based code not accepted. Resolved by [syncing time correction / re-registering account]. User confirmed MFA working." + }, + { + "id": "fix_phone_mfa", + "type": "action", + "title": "Fix Phone Call / SMS MFA", + "description": "Phone-based MFA method isn't working.\n\n**Check in Entra ID:**\n1. Users > [user] > Authentication methods\n2. Verify the phone number is correct and current\n3. Update if wrong\n\n**Phone call issues:**\n- VoIP numbers often can't receive MFA calls\n- International numbers may be blocked by M365 tenant settings\n- User may be blocking unknown callers\n\n**SMS issues:**\n- Short code SMS may be blocked by the carrier\n- Phone storage full (can't receive new texts)\n- Some prepaid plans block short codes\n\n**Best fix:** Switch to Microsoft Authenticator push or passwordless. Phone-based MFA is less reliable and less secure.", + "next_node_id": "solution_phone_mfa_fixed" + }, + { + "id": "solution_phone_mfa_fixed", + "type": "solution", + "title": "Resolved: Phone MFA Fixed", + "description": "Phone-based MFA restored.\n\n**Ticket Notes:** MFA phone call/SMS not working. Resolved by [updating phone number / switching from VoIP / unblocking short codes].\n\n**Recommendation:** Migrate user to Microsoft Authenticator app. It's more reliable than phone/SMS and more secure." + }, + { + "id": "check_fraud_alert", + "type": "action", + "title": "Investigate MFA Fraud Alert", + "description": "User received an MFA prompt they didn't initiate — this could be an attack.\n\n**This is a security incident.** Someone has the user's password and is trying to sign in.\n\n**Immediate actions:**\n1. Tell the user to DENY the MFA prompt (do NOT approve it)\n2. Reset the user's password immediately\n3. Revoke all active sessions:\n```\n# Entra ID > Users > [user] > Revoke sessions\n# Or PowerShell:\nRevoke-AzureADUserAllRefreshToken -ObjectId \n```\n4. Check sign-in logs for the unauthorized attempt:\n Entra ID > Users > [user] > Sign-in logs\n - Where did the attempt come from? (IP, location, device)\n\n**If the user already approved a fraudulent prompt:** Treat as a compromised account. Check for inbox forwarding rules, new MFA methods added by the attacker, and data exfiltration.", + "next_node_id": "solution_fraud_alert" + }, + { + "id": "solution_fraud_alert", + "type": "solution", + "title": "Security Incident: Unauthorized MFA Prompt", + "description": "User received MFA prompts they didn't initiate — password is compromised.\n\n**Ticket Notes:** SECURITY INCIDENT — User received unauthorized MFA prompt. Password has been compromised. Immediate actions taken:\n- Password reset\n- All sessions revoked\n- Sign-in logs reviewed (attempt from [IP/location])\n- Inbox forwarding rules checked\n- MFA methods audited for unauthorized additions\n\n**Follow-up required:**\n- Check for data access during compromised period\n- Review other accounts with the same password\n- Enable number matching in Authenticator (prevents MFA fatigue attacks)\n- Consider Conditional Access policies to block risky sign-ins" + } + ] + }, + { + "id": "diagnose_ca_block", + "type": "action", + "title": "Diagnose Conditional Access Block", + "description": "User sees 'You cannot access this right now' or 'Access has been blocked.'\n\n**Step 1: Check sign-in logs for the exact reason**\n1. Entra ID > Users > [user] > Sign-in logs\n2. Find the failed sign-in\n3. Click it > Conditional Access tab\n4. It will show which policy blocked and why\n\n**Step 2: Use the 'What If' tool**\nEntra ID > Conditional Access > What If\n- Select the user, app, and conditions\n- It shows which policies would apply and their result\n\n**Common block reasons:**\n- Device not compliant (Intune)\n- Not on a trusted network\n- Risky sign-in detected (Identity Protection)\n- Unsupported platform (e.g., Linux when only Windows/Mac allowed)\n- Browser not supported\n- Legacy authentication attempt", + "next_node_id": "ca_block_cause" + }, + { + "id": "ca_block_cause", + "type": "decision", + "question": "What is the Conditional Access policy blocking on?", + "help_text": "Check the sign-in log's Conditional Access tab", + "options": [ + {"id": "device_compliance", "label": "Device not compliant (Intune)", "next_node_id": "fix_device_compliance"}, + {"id": "location", "label": "Untrusted location / network", "next_node_id": "fix_location_ca"}, + {"id": "risky_signin", "label": "Risky sign-in detected", "next_node_id": "fix_risky_signin"}, + {"id": "legacy_auth", "label": "Legacy authentication blocked", "next_node_id": "fix_legacy_auth"} + ], + "children": [ + { + "id": "fix_device_compliance", + "type": "action", + "title": "Fix Device Compliance for Conditional Access", + "description": "The device doesn't meet Intune compliance requirements.\n\n**Check compliance status:**\n1. On the device: Settings > Accounts > Access work or school > Info > Sync\n2. Intune Admin > Devices > [device] > Device compliance\n\n**Common compliance failures:**\n- Windows updates not current\n- BitLocker not enabled\n- Antivirus not running or out of date\n- OS version too old\n- Device not enrolled in Intune at all\n\n**Fixes:**\n- Install pending Windows updates\n- Enable BitLocker: Control Panel > BitLocker Drive Encryption\n- Verify antivirus is running and updated\n- Re-sync Intune: Settings > Accounts > Access work or school > Info > Sync\n\n**If device isn't enrolled:** Settings > Accounts > Access work or school > Connect > sign in with work account", + "next_node_id": "solution_compliance_fixed" + }, + { + "id": "solution_compliance_fixed", + "type": "solution", + "title": "Resolved: Device Compliance Restored", + "description": "Device is now compliant and Conditional Access allows sign-in.\n\n**Ticket Notes:** Sign-in blocked by Conditional Access due to device non-compliance. Device was [not enrolled / missing updates / BitLocker disabled / AV outdated]. Fixed compliance issue and synced with Intune. User confirmed access.\n\n**Note:** Compliance check can take up to 8 hours to update. Force a sync to speed it up." + }, + { + "id": "fix_location_ca", + "type": "action", + "title": "Address Location-Based Block", + "description": "User is signing in from a location not on the trusted network list.\n\n**Common scenarios:**\n- Working from home (home IP not trusted)\n- Traveling (hotel/airport Wi-Fi)\n- Using mobile data\n- VPN disconnected\n\n**Fixes:**\n1. Connect to the corporate VPN (if available)\n2. If working from home is normal: ask admin to add the policy exception\n3. Check if the policy allows compliant devices from any location\n\n**Admin fix (if appropriate):**\nEntra ID > Conditional Access > Named locations\n- Add trusted IP ranges or countries\n- Modify the policy to allow compliant devices from any location", + "next_node_id": "solution_location_fixed" + }, + { + "id": "solution_location_fixed", + "type": "solution", + "title": "Resolved: Location-Based Access", + "description": "Access restored after addressing location-based Conditional Access block.\n\n**Ticket Notes:** Sign-in blocked by location-based Conditional Access policy. User was at [location]. Resolved by [connecting to VPN / adding location to trusted list / policy exception].\n\n**If policy change was needed:** Document the change and get approval from the security team." + }, + { + "id": "fix_risky_signin", + "type": "action", + "title": "Handle Risky Sign-In Block", + "description": "Entra ID Identity Protection flagged this sign-in as risky.\n\n**Check the risk details:**\nEntra ID > Security > Risky sign-ins > find the event\n- Risk level: Low, Medium, High\n- Risk type: Unfamiliar location, impossible travel, malware-linked IP, etc.\n\n**If it's a legitimate sign-in (false positive):**\n1. Entra ID > Security > Risky sign-ins\n2. Select the sign-in > Confirm safe\n3. This teaches the system and clears the block\n\n**If it might be real:**\n- Reset the user's password\n- Check for compromise indicators\n- Review sign-in history for other suspicious activity\n\n**Admin can also:**\nEntra ID > Security > Risky users > [user] > Dismiss risk (if confirmed safe)", + "next_node_id": "solution_risky_signin" + }, + { + "id": "solution_risky_signin", + "type": "solution", + "title": "Resolved: Risky Sign-In Addressed", + "description": "Risky sign-in alert resolved.\n\n**Ticket Notes:** Sign-in blocked by Identity Protection risk policy. Risk type: [type]. Risk level: [level]. Determined to be [legitimate / suspicious]. Action taken: [confirmed safe / password reset / investigation].\n\n**If confirmed safe:** Educate user that signing in from new locations or devices may trigger risk alerts. This is expected and protective." + }, + { + "id": "fix_legacy_auth", + "type": "action", + "title": "Fix Legacy Authentication Block", + "description": "User is trying to authenticate using a legacy protocol that's been blocked.\n\n**Legacy auth protocols (should be blocked):**\n- POP3, IMAP, SMTP AUTH\n- Older Office clients (Office 2010 and earlier)\n- ActiveSync with basic auth\n\n**How to identify:**\nSign-in logs > Client app column will show 'Other clients' or specific legacy protocol names.\n\n**Fixes:**\n- Update Office to a modern version (2016+ with modern auth, ideally M365 Apps)\n- Update email client to support OAuth/modern auth\n- If using IMAP/POP for a specific need: consider an app password (if allowed by policy)\n\n**If it's a printer or scanner using SMTP:**\n- Configure it to use SMTP AUTH with OAuth or use a relay connector\n- Or use a service account with an exception (security team approval needed)", + "next_node_id": "solution_legacy_auth" + }, + { + "id": "solution_legacy_auth", + "type": "solution", + "title": "Resolved: Legacy Auth Updated", + "description": "Legacy authentication issue resolved by updating to modern auth.\n\n**Ticket Notes:** Sign-in blocked due to legacy authentication protocol [protocol]. Resolved by [updating Office / switching to modern auth client / configuring SMTP relay].\n\n**Important:** Legacy auth should remain blocked. It's a major security vulnerability (no MFA support). Any exceptions should be temporary and documented." + } + ] + }, + { + "id": "check_new_device", + "type": "action", + "title": "Troubleshoot New Device Sign-In", + "description": "User can't sign into M365 from a new computer or phone.\n\n**Common blockers for new devices:**\n\n1. **MFA re-verification required** — Normal. User needs their MFA device.\n2. **Device enrollment required** — Conditional Access may require Intune enrollment.\n3. **Browser not supported** — Some policies block non-Edge/Chrome browsers.\n4. **Risk detection** — New device triggers Identity Protection alert.\n\n**Check sign-in logs** to see exactly what's blocking:\nEntra ID > Users > [user] > Sign-in logs > most recent failed attempt\n\n**If Intune enrollment is required:**\n1. Go to Settings > Accounts > Access work or school > Connect\n2. Sign in with work credentials\n3. Follow the enrollment prompts\n4. Wait for compliance check (can take up to 30 minutes)\n5. Try signing in again", + "next_node_id": "ca_block_cause" + }, + { + "id": "fix_mfa_loop", + "type": "action", + "title": "Fix MFA Authentication Loop", + "description": "User keeps getting prompted for MFA repeatedly — completes it but gets asked again.\n\n**Common causes:**\n\n**Browser cookie issues:**\n1. Clear browser cookies and cache\n2. Try InPrivate/Incognito mode\n3. Try a different browser\n\n**Multiple Conditional Access policies conflicting:**\n- Check sign-in logs — are multiple policies triggering?\n- Use the What If tool to see all policies that apply\n\n**Session token issues:**\n1. Close ALL browser tabs\n2. Clear cookies for login.microsoftonline.com and login.windows.net\n3. Sign in fresh\n\n**If using an app (not browser):**\n- Sign out of the app completely\n- Clear app data/cache\n- Sign in again\n\n**Persistent MFA prompt on every sign-in:**\n- Check if the 'remember MFA' setting is enabled for the tenant\n- Entra ID > Conditional Access > [policy] > Session > Sign-in frequency", + "next_node_id": "solution_mfa_loop" + }, + { + "id": "solution_mfa_loop", + "type": "solution", + "title": "Resolved: MFA Loop Fixed", + "description": "MFA authentication loop resolved.\n\n**Ticket Notes:** User stuck in MFA re-prompt loop. Resolved by [clearing browser cookies / fixing conflicting CA policies / clearing app cache / adjusting sign-in frequency].\n\n**If sign-in frequency policy is too aggressive:** Discuss with security team about adjusting the policy. Requiring MFA every hour causes user frustration without proportional security benefit for most scenarios." + } + ] + } + } + + +# ============================================================================= +# Tree 6: License Assignment Problems +# ============================================================================= +def get_license_assignment_tree() -> dict[str, Any]: + """License Assignment Problems - M365 tree.""" + return { + "name": "License Assignment Problems", + "description": "Troubleshoot Microsoft 365 license assignment failures, service plan conflicts, group-based licensing issues, and users missing expected M365 features. Covers direct and group-based assignment in Entra ID.", + "category": "Microsoft 365", + "tree_structure": { + "id": "root", + "type": "decision", + "question": "What is the licensing issue?", + "help_text": "Identify whether this is a missing license, failed assignment, or feature availability issue.", + "options": [ + {"id": "no_license", "label": "User doesn't have a license / apps not available", "next_node_id": "check_license_status"}, + {"id": "assignment_failed", "label": "License assignment is failing with an error", "next_node_id": "check_assignment_error"}, + {"id": "missing_feature", "label": "User has a license but a specific feature is missing", "next_node_id": "check_service_plans"}, + {"id": "no_licenses_available", "label": "No available licenses to assign (out of seats)", "next_node_id": "check_license_count"} + ], + "children": [ + { + "id": "check_license_status", + "type": "action", + "title": "Check User's Current License Assignment", + "description": "Verify what licenses the user currently has.\n\n**M365 Admin Center:**\nUsers > Active users > [user] > Licenses and apps tab\n\n**Entra ID:**\nUsers > [user] > Licenses\n\n**PowerShell:**\n```\nConnect-MgGraph -Scopes 'User.Read.All'\nGet-MgUserLicenseDetail -UserId user@domain.com | Select SkuPartNumber\n```\n\n**Check if the license is direct or group-based:**\n- In Entra ID > Users > [user] > Licenses\n- The 'Assignment path' column shows 'Direct' or the group name\n\n**Common M365 license SKUs:**\n- ENTERPRISEPACK = Office 365 E3\n- SPE_E5 = Microsoft 365 E5\n- SPE_E3 = Microsoft 365 E3\n- EXCHANGESTANDARD = Exchange Online Plan 1\n- TEAMS_EXPLORATORY = Teams Exploratory", + "next_node_id": "license_status_result" + }, + { + "id": "license_status_result", + "type": "decision", + "question": "What does the license check show?", + "help_text": "Review the user's assigned licenses", + "options": [ + {"id": "no_license_at_all", "label": "No license assigned at all", "next_node_id": "assign_license"}, + {"id": "wrong_license", "label": "Has a license but it's the wrong one", "next_node_id": "fix_wrong_license"}, + {"id": "has_license", "label": "Has the right license but apps still not working", "next_node_id": "check_service_plans"} + ], + "children": [ + { + "id": "assign_license", + "type": "action", + "title": "Assign a License to the User", + "description": "User needs a license assigned.\n\n**Option 1: Direct assignment**\n1. M365 Admin Center > Users > [user] > Licenses and apps\n2. Check the appropriate license\n3. Expand the license to enable/disable specific service plans if needed\n4. Save\n\n**Option 2: Group-based assignment (recommended for scale)**\n1. Entra ID > Groups > [licensing group]\n2. Add the user to the group\n3. The license auto-assigns based on group membership\n\n**PowerShell:**\n```\nSet-MgUserLicense -UserId user@domain.com -AddLicenses @{SkuId=''} -RemoveLicenses @()\n```\n\n**Important:** The user needs a Usage Location set in their profile before a license can be assigned. Check: Entra ID > Users > [user] > Properties > Usage location.", + "next_node_id": "solution_license_assigned" + }, + { + "id": "solution_license_assigned", + "type": "solution", + "title": "Resolved: License Assigned", + "description": "License successfully assigned to the user.\n\n**Ticket Notes:** User had no M365 license. Assigned [license name] via [direct / group-based assignment]. Services should be available within [minutes to 24 hours depending on service].\n\n**Provisioning times:**\n- Exchange mailbox: Usually within 15-30 minutes\n- Teams: Usually within minutes\n- OneDrive: First access provisions the site (up to 24 hours)\n- Office apps: Available to install immediately\n\n**User action needed:** Sign out and back into Office apps to pick up the new license." + }, + { + "id": "fix_wrong_license", + "type": "action", + "title": "Change User's License", + "description": "User has the wrong license type. Need to swap it.\n\n**Important:** Removing a license removes access to its services. Swap carefully.\n\n**Best practice: Add new license FIRST, then remove old one.**\n\n1. M365 Admin Center > Users > [user] > Licenses and apps\n2. Check the new license\n3. Uncheck the old license\n4. Save (both changes happen atomically)\n\n**If group-based:**\n- Add user to the new license group\n- Wait for the new license to apply\n- Remove from the old license group\n\n**Watch for:**\n- Exchange mailbox data — swapping licenses can temporarily disconnect the mailbox\n- OneDrive data — user's files are preserved but access may hiccup\n- Teams — may need to re-sign in\n\n**Data is NOT deleted immediately.** M365 keeps data for 30 days after license removal.", + "next_node_id": "solution_license_swapped" + }, + { + "id": "solution_license_swapped", + "type": "solution", + "title": "Resolved: License Changed", + "description": "User's license has been updated.\n\n**Ticket Notes:** Changed user's license from [old license] to [new license]. New license assigned first before removing old to prevent service disruption.\n\n**Follow-up:** Have user sign out and back into all Office apps and Teams to pick up the new license. Verify all services are accessible." + } + ] + }, + { + "id": "check_assignment_error", + "type": "action", + "title": "Diagnose License Assignment Error", + "description": "License assignment is failing. Check for common errors.\n\n**Check in Entra ID:**\nUsers > [user] > Licenses — look for error indicators\n\n**For group-based licensing errors:**\nEntra ID > Groups > [group] > Licenses > check for users in error state\n\n**Common errors:**\n\n1. **\"Usage location is not specified\"**\n - User needs a Usage Location set\n - Entra ID > Users > [user] > Properties > Edit > Usage location\n\n2. **\"Conflicting service plans\"**\n - Two licenses include the same service (e.g., two different Exchange plans)\n - Disable the conflicting service plan in one of the licenses\n\n3. **\"Not enough licenses\"**\n - All purchased seats are assigned\n - Buy more or reclaim unused licenses\n\n4. **\"Dependent service plan disabled\"**\n - A required sub-service was disabled (e.g., SharePoint needed for OneDrive)\n - Re-enable the dependent service plan", + "next_node_id": "assignment_error_type" + }, + { + "id": "assignment_error_type", + "type": "decision", + "question": "What is the assignment error?", + "help_text": "Based on the error message in Entra ID", + "options": [ + {"id": "no_location", "label": "Usage location not specified", "next_node_id": "fix_usage_location"}, + {"id": "conflict", "label": "Conflicting service plans", "next_node_id": "fix_service_conflict"}, + {"id": "no_seats", "label": "Not enough licenses available", "next_node_id": "check_license_count"}, + {"id": "dependency", "label": "Dependent service plan disabled", "next_node_id": "fix_dependency"} + ], + "children": [ + { + "id": "fix_usage_location", + "type": "action", + "title": "Set Usage Location", + "description": "M365 requires a Usage Location before licenses can be assigned. This determines which services are available based on regional availability.\n\n**Set the location:**\n1. Entra ID > Users > [user] > Properties > Edit\n2. Set \"Usage location\" to the appropriate country\n3. Save\n\n**PowerShell:**\n```\nUpdate-MgUser -UserId user@domain.com -UsageLocation 'US'\n```\n\n**For bulk users:** Check if your user provisioning (Entra Connect, SCIM, etc.) is setting this automatically. If not, add it to your onboarding process.\n\n**After setting location:** Retry the license assignment.", + "next_node_id": "solution_location_set" + }, + { + "id": "solution_location_set", + "type": "solution", + "title": "Resolved: Usage Location Set", + "description": "Usage location set and license assigned successfully.\n\n**Ticket Notes:** License assignment failed due to missing Usage Location. Set to [country]. License [name] now assigned successfully.\n\n**Prevention:** Ensure Usage Location is set during user onboarding. Add it to your new user checklist or automate it in your provisioning workflow." + }, + { + "id": "fix_service_conflict", + "type": "action", + "title": "Resolve Service Plan Conflict", + "description": "Two licenses are trying to assign the same service plan, causing a conflict.\n\n**Example:** User has Exchange Online Plan 2 from one license AND an E3 license that also includes Exchange. These conflict.\n\n**How to fix:**\n1. Entra ID > Users > [user] > Licenses\n2. Click on one of the conflicting licenses\n3. Disable the overlapping service plan (toggle it off)\n4. Save\n5. The other license assignment should now succeed\n\n**Common conflicts:**\n- Exchange Online Plan 1 vs Plan 2\n- Teams Exploratory vs E3/E5 Teams\n- Power BI Free vs Power BI Pro\n- Visio/Project standalone vs E5 included\n\n**For group-based licensing:** You may need to create a different group with specific service plans disabled to avoid conflicts.", + "next_node_id": "solution_conflict_resolved" + }, + { + "id": "solution_conflict_resolved", + "type": "solution", + "title": "Resolved: Service Plan Conflict Fixed", + "description": "License conflict resolved by disabling the overlapping service plan.\n\n**Ticket Notes:** License assignment failed due to conflicting service plans ([plan A] from [license A] conflicting with [plan B] from [license B]). Disabled [plan] from [license]. Both licenses now assigned.\n\n**Prevention:** When adding new license SKUs to the tenant, review which service plans overlap with existing assignments." + }, + { + "id": "fix_dependency", + "type": "action", + "title": "Fix Dependent Service Plan", + "description": "A required service plan was disabled, preventing another from working.\n\n**Common dependencies:**\n- OneDrive requires SharePoint Online\n- Teams requires SharePoint Online and Exchange Online\n- Office apps require various background services\n\n**Fix:**\n1. Entra ID > Users > [user] > Licenses\n2. Click the license\n3. Enable the required service plan that was disabled\n4. Save\n\n**The error message usually tells you which plan is needed.** Re-enable it and the dependent service will work.", + "next_node_id": "solution_dependency_fixed" + }, + { + "id": "solution_dependency_fixed", + "type": "solution", + "title": "Resolved: Dependent Service Enabled", + "description": "Required service plan re-enabled, resolving the dependency error.\n\n**Ticket Notes:** License service plan [dependent plan] required [prerequisite plan] to be enabled. Re-enabled [prerequisite]. All services now functional.\n\n**Caution:** Be careful when disabling individual service plans within a license — dependencies aren't always obvious." + } + ] + }, + { + "id": "check_service_plans", + "type": "action", + "title": "Check Individual Service Plans", + "description": "User has the right license but a specific feature or app is unavailable.\n\n**Check which service plans are enabled:**\n1. Entra ID > Users > [user] > Licenses\n2. Click the license > see the list of service plans with on/off toggles\n3. Is the service for the missing feature turned ON?\n\n**Examples:**\n- Teams missing? → Check 'Microsoft Teams' service plan\n- Can't install Office? → Check 'Microsoft 365 Apps for Enterprise'\n- No Power BI? → Check 'Power BI' service plan\n- OneDrive not working? → Check 'SharePoint Online' (OneDrive depends on it)\n\n**If the service plan is ON but feature still missing:**\n- Wait up to 24 hours for provisioning\n- Have user sign out and back in\n- Check for a Conditional Access policy blocking the specific app", + "next_node_id": "service_plan_result" + }, + { + "id": "service_plan_result", + "type": "decision", + "question": "Was the service plan disabled?", + "help_text": "Check the service plan toggles in the license", + "options": [ + {"id": "was_disabled", "label": "Yes, the service plan was toggled off — enabled it", "next_node_id": "solution_service_plan_enabled"}, + {"id": "already_on", "label": "Service plan is on but feature still missing", "next_node_id": "solution_wait_provisioning"} + ], + "children": [ + { + "id": "solution_service_plan_enabled", + "type": "solution", + "title": "Resolved: Service Plan Enabled", + "description": "Missing feature restored by enabling the service plan.\n\n**Ticket Notes:** User missing [feature]. Service plan [plan name] was disabled in [license name]. Enabled the service plan. Feature should be available within [minutes to 24 hours].\n\n**User action:** Sign out and back into all M365 apps to pick up the change." + }, + { + "id": "solution_wait_provisioning", + "type": "solution", + "title": "Service Plan Enabled — Wait for Provisioning", + "description": "Service plan is correctly enabled but the feature isn't available yet.\n\n**Ticket Notes:** User missing [feature]. Service plan verified as enabled. Likely a provisioning delay.\n\n**Provisioning times:**\n- Exchange mailbox: 15-30 minutes\n- Teams: Minutes\n- OneDrive site: Up to 24 hours (first access)\n- Office app downloads: Immediate\n- SharePoint sites: Minutes to hours\n\n**If still not working after 24 hours:** Open a Microsoft support ticket." + } + ] + }, + { + "id": "check_license_count", + "type": "action", + "title": "Check Available License Count", + "description": "Organization may be out of available license seats.\n\n**Check license inventory:**\n1. M365 Admin Center > Billing > Licenses\n2. See total, assigned, and available for each SKU\n\n**PowerShell:**\n```\nGet-MgSubscribedSku | Select SkuPartNumber,\n @{N='Total';E={$_.PrepaidUnits.Enabled}},\n @{N='Assigned';E={$_.ConsumedUnits}},\n @{N='Available';E={$_.PrepaidUnits.Enabled - $_.ConsumedUnits}}\n```\n\n**If no licenses available:**\n1. **Reclaim unused licenses:** Find users who are licensed but inactive\n - M365 Admin Center > Reports > Usage\n - Look for users with no activity in 90+ days\n2. **Buy more licenses:** Billing > Purchase services\n3. **Reassign from a departing user:** Remove their license first\n\n**For group-based licensing:** The group will show users in 'error' state if no seats are available.", + "next_node_id": "license_count_result" + }, + { + "id": "license_count_result", + "type": "decision", + "question": "Are there licenses available to assign?", + "help_text": "Check the available count in M365 Admin", + "options": [ + {"id": "reclaimed", "label": "Found unused licenses to reclaim", "next_node_id": "solution_license_reclaimed"}, + {"id": "need_purchase", "label": "Need to purchase more licenses", "next_node_id": "solution_purchase_licenses"} + ], + "children": [ + { + "id": "solution_license_reclaimed", + "type": "solution", + "title": "Resolved: Licenses Reclaimed", + "description": "Freed up licenses by reclaiming from inactive users.\n\n**Ticket Notes:** No available [license name] seats. Identified [X] inactive users and reclaimed their licenses. Assigned one to [requesting user].\n\n**Recommendation:** Set up a quarterly license review process to reclaim unused seats. M365 usage reports help identify inactive users." + }, + { + "id": "solution_purchase_licenses", + "type": "solution", + "title": "Action Required: Purchase Additional Licenses", + "description": "No available licenses and none to reclaim. Need to purchase more.\n\n**Ticket Notes:** All [license name] seats are assigned and in active use. Need to purchase additional licenses.\n\n**Next steps:**\n- M365 Admin Center > Billing > Purchase services\n- Or contact your Microsoft partner/reseller\n- Licenses are billed monthly or annually depending on your agreement\n\n**Escalate to:** Whoever approves software purchases (IT manager, finance, etc.)" + } + ] + } + ] + } + } \ No newline at end of file diff --git a/backend/scripts/seed_trees_networking.py b/backend/scripts/seed_trees_networking.py new file mode 100644 index 00000000..f69e6764 --- /dev/null +++ b/backend/scripts/seed_trees_networking.py @@ -0,0 +1,733 @@ +#!/usr/bin/env python3 +""" +ResolutionFlow Decision Trees - Batch 4: Additional Networking + +Three additional networking troubleshooting trees for MSP engineers. +Imported by seed_trees_v2.py for seeding. + +Trees: +1. Bandwidth / Slow Internet +2. Wireless Connectivity Problems +3. Firewall Blocking Issues +""" + +from typing import Any + + +# ============================================================================= +# Tree 1: Bandwidth / Slow Internet +# ============================================================================= +def get_bandwidth_slow_internet_tree() -> dict[str, Any]: + """Bandwidth / Slow Internet - Networking tree.""" + return { + "name": "Bandwidth / Slow Internet", + "description": "Diagnose and resolve slow internet or bandwidth issues at client sites. Covers ISP problems, LAN saturation, QoS misconfiguration, bandwidth hogs, and speed test analysis for MSP-managed environments.", + "category": "Networking", + "tree_structure": { + "id": "root", + "type": "decision", + "question": "How many users are affected by the slow internet?", + "help_text": "Scope determines whether this is a single-device issue, LAN problem, or ISP/WAN issue.", + "options": [ + {"id": "one_user", "label": "Just one user / one device", "next_node_id": "check_single_device"}, + {"id": "several_users", "label": "Several users at the same location", "next_node_id": "check_lan_saturation"}, + {"id": "everyone", "label": "Everyone at the site is slow", "next_node_id": "check_wan_isp"}, + {"id": "intermittent", "label": "Intermittent — comes and goes throughout the day", "next_node_id": "check_intermittent"} + ], + "children": [ + { + "id": "check_single_device", + "type": "action", + "title": "Diagnose Single Device Slow Internet", + "description": "Only one user is affected — likely a local device or connection issue.\n\n**Step 1: Speed test on the affected device**\nhttps://www.speedtest.net — note download, upload, jitter, and latency.\n\n**Step 2: Compare to another device on the same network**\nRun a speed test from another computer nearby. If the other device is fast, the issue is device-specific.\n\n**Step 3: Check connection type**\n- Wi-Fi or Ethernet? If Wi-Fi, try Ethernet to rule it out.\n- What speed does the NIC show?\n```\n# Check link speed:\nGet-NetAdapter | Select Name, LinkSpeed, Status\n```\n\n**Step 4: Check for bandwidth hogs on the device**\n- Task Manager > Performance > Open Resource Monitor > Network tab\n- Look for processes with high network usage (OneDrive sync, Windows Update, cloud backup, etc.)\n\n**Step 5: Check NIC driver**\n- Device Manager > Network adapters > check for warnings\n- Update or reinstall the NIC driver", + "next_node_id": "single_device_result" + }, + { + "id": "single_device_result", + "type": "decision", + "question": "What did the single-device check reveal?", + "help_text": "Compare speed test results and device checks", + "options": [ + {"id": "wifi_issue", "label": "Wi-Fi is the problem — Ethernet is fast", "next_node_id": "fix_single_wifi"}, + {"id": "hog_found", "label": "Found a bandwidth hog process", "next_node_id": "fix_bandwidth_hog"}, + {"id": "nic_issue", "label": "NIC showing errors or slow link speed", "next_node_id": "fix_nic_issue"}, + {"id": "device_ok", "label": "Device seems fine — issue may be network-wide", "next_node_id": "check_lan_saturation"} + ], + "children": [ + { + "id": "fix_single_wifi", + "type": "action", + "title": "Fix Single Device Wi-Fi Performance", + "description": "Ethernet is fast but Wi-Fi is slow on this device.\n\n**Check Wi-Fi signal strength:**\n```\nnetsh wlan show interfaces\n```\nLook at 'Signal' percentage — below 70% is problematic.\n\n**Check Wi-Fi band:**\n- 2.4GHz = longer range but slower and more congested\n- 5GHz = faster but shorter range\n- Force 5GHz: Network adapter properties > Advanced > Preferred Band\n\n**Try these fixes:**\n1. Move closer to the access point\n2. Forget and reconnect to the network\n3. Reset the Wi-Fi adapter:\n```\nnetsh winsock reset\nnetsh int ip reset\nipconfig /flushdns\n```\n4. Update or reinstall the wireless driver\n5. Check for interference (Bluetooth, USB 3.0 devices near the antenna)\n\n**If the laptop has an old/cheap Wi-Fi adapter:** An external USB Wi-Fi adapter (Wi-Fi 6) can be a quick fix.", + "next_node_id": "solution_single_wifi" + }, + { + "id": "solution_single_wifi", + "type": "solution", + "title": "Resolved: Single Device Wi-Fi Issue", + "description": "Wi-Fi performance improved on the affected device.\n\n**Ticket Notes:** User experiencing slow internet. Isolated to Wi-Fi on their device — Ethernet was full speed. Resolved by [switching to 5GHz / moving closer to AP / updating driver / resetting adapter].\n\n**If recurring:** Consider a USB Wi-Fi 6 adapter or relocating the user closer to an AP." + }, + { + "id": "fix_bandwidth_hog", + "type": "action", + "title": "Address Bandwidth Hog Process", + "description": "A process on the device is consuming excessive bandwidth.\n\n**Common offenders:**\n- **OneDrive/SharePoint sync** — large initial sync or many changes\n- **Windows Update** — downloading feature updates (can be several GB)\n- **Cloud backup** (Veeam agent, Carbonite, etc.)\n- **Browser tabs** — streaming video, large downloads\n- **Antivirus** — cloud scanning or definition updates\n- **Teams/Zoom** — video call running in background\n\n**Fixes:**\n- OneDrive: Pause sync or set upload bandwidth limit (OneDrive > Settings > Network)\n- Windows Update: Pause for 7 days if it's disrupting work\n- Cloud backup: Schedule outside business hours\n- Browser: Close unnecessary tabs, check for extensions consuming bandwidth\n\n**Long-term:** Implement QoS or traffic shaping at the firewall to protect critical traffic from bulk transfers.", + "next_node_id": "solution_bandwidth_hog" + }, + { + "id": "solution_bandwidth_hog", + "type": "solution", + "title": "Resolved: Bandwidth Hog Identified", + "description": "Bandwidth restored after addressing the high-usage process.\n\n**Ticket Notes:** Slow internet on user's device caused by [process name] consuming bandwidth. [Paused sync / rescheduled backup / closed streaming tab / paused Windows Update].\n\n**Prevention:** Configure OneDrive bandwidth limits org-wide via Group Policy. Schedule backups and updates outside business hours." + }, + { + "id": "fix_nic_issue", + "type": "action", + "title": "Fix NIC / Cable Issue", + "description": "Network adapter showing slow link speed or errors.\n\n**Check link speed:**\n```\nGet-NetAdapter | Select Name, LinkSpeed, Status, MediaConnectionState\n```\nExpected: 1 Gbps for Ethernet. If showing 100 Mbps or 10 Mbps — cable or port issue.\n\n**Check for errors:**\n```\nGet-NetAdapterStatistics | Select Name, ReceivedErrors, OutboundErrors, ReceivedDiscards\n```\n\n**Common causes of slow link speed:**\n- Bad Ethernet cable (bent pins, damaged cable) — try a different cable\n- Plugged into a 100Mbps switch port — check the switch\n- NIC auto-negotiation failing — try setting speed manually\n- USB docking station — many docks have 100Mbps NICs\n\n**Fix:**\n1. Try a different Ethernet cable\n2. Try a different switch port\n3. Check dock specs if using a docking station\n4. Update NIC driver\n5. If NIC is failing: replace or use a USB Ethernet adapter", + "next_node_id": "solution_nic_fixed" + }, + { + "id": "solution_nic_fixed", + "type": "solution", + "title": "Resolved: NIC / Cable Issue", + "description": "Network speed restored after fixing the NIC or cable.\n\n**Ticket Notes:** Slow internet caused by [bad cable / 100Mbps dock NIC / NIC errors / wrong switch port]. Resolved by [replacing cable / using direct Ethernet / updating driver / swapping to gigabit port].\n\n**Check:** Confirm link speed is now 1 Gbps with `Get-NetAdapter`." + } + ] + }, + { + "id": "check_lan_saturation", + "type": "action", + "title": "Check LAN for Saturation or Bottleneck", + "description": "Multiple users are slow — check if the internal network is the bottleneck.\n\n**Step 1: Speed test at the firewall/router level**\nIf possible, run a speed test from a device directly connected to the firewall. This eliminates LAN issues.\n- If speed is fine at the firewall: LAN bottleneck\n- If speed is slow at the firewall: WAN/ISP issue\n\n**Step 2: Check switch utilization**\n- Log into managed switches and check port utilization\n- Look for ports at 90%+ utilization\n- Check for CRC errors or packet drops on uplink ports\n\n**Step 3: Check for a single device saturating the LAN**\n- Is someone downloading a large file?\n- Is a server doing a backup over the LAN during business hours?\n- Is a NAS replicating?\n\n**Step 4: Check uplinks between switches**\n- Are inter-switch uplinks gigabit or 10G? If only 1G and lots of traffic, they may be saturated.\n\n**Step 5: Look for broadcast storms**\n- High CPU on switches can indicate a loop or broadcast storm\n- Check spanning tree status", + "next_node_id": "lan_result" + }, + { + "id": "lan_result", + "type": "decision", + "question": "Where is the LAN bottleneck?", + "help_text": "Based on speed tests and switch checks", + "options": [ + {"id": "device_saturating", "label": "One device is saturating the network", "next_node_id": "fix_lan_hog"}, + {"id": "uplink_saturated", "label": "Switch uplink is saturated", "next_node_id": "fix_uplink"}, + {"id": "switch_issue", "label": "Switch errors / spanning tree / loop", "next_node_id": "fix_switch_issue"}, + {"id": "lan_ok", "label": "LAN is fine — issue is WAN/ISP", "next_node_id": "check_wan_isp"} + ], + "children": [ + { + "id": "fix_lan_hog", + "type": "action", + "title": "Address Device Saturating the LAN", + "description": "A single device is consuming most of the LAN bandwidth.\n\n**Identify the device:** Check switch port utilization or use a network monitoring tool (PRTG, Auvik, Datto RMM, etc.).\n\n**Common culprits:**\n- Server backup running during business hours\n- NAS replication job\n- Large file copy between servers\n- User downloading/uploading huge files\n- Malware-infected device generating traffic\n\n**Immediate fix:** Rate-limit or pause the offending activity.\n\n**Long-term fixes:**\n- Schedule backups outside business hours\n- Implement QoS on the firewall to prioritize business traffic\n- Segment the network (put backup traffic on its own VLAN)\n- If malware: isolate the device immediately and scan", + "next_node_id": "solution_lan_hog" + }, + { + "id": "solution_lan_hog", + "type": "solution", + "title": "Resolved: LAN Bandwidth Hog", + "description": "LAN performance restored after addressing the high-traffic device.\n\n**Ticket Notes:** Network slowdown caused by [device/server] consuming excessive LAN bandwidth due to [backup / replication / file transfer / malware]. Resolved by [pausing job / rescheduling / isolating device].\n\n**Recommendations:**\n- Schedule bulk transfers outside 8AM-6PM\n- Implement QoS policies\n- Consider network segmentation (backup VLAN)" + }, + { + "id": "fix_uplink", + "type": "action", + "title": "Fix Saturated Switch Uplink", + "description": "The uplink between switches (or switch to firewall) is maxed out.\n\n**Check the uplink:**\n- What speed is it? (1G, 10G?)\n- Is it a single link or LAG (link aggregation)?\n\n**Fixes:**\n- **Upgrade the uplink** to 10G if switches support it\n- **Add a second uplink** and configure Link Aggregation (LACP)\n- **Move heavy-traffic devices** to the switch closest to the firewall\n- **Implement VLANs** to keep local traffic local (e.g., printer traffic shouldn't cross uplinks)\n\n**If the firewall uplink is saturated:**\nThe internet connection itself may be too small for the number of users. See the WAN/ISP troubleshooting path.", + "next_node_id": "solution_uplink" + }, + { + "id": "solution_uplink", + "type": "solution", + "title": "Resolved: Switch Uplink Upgraded", + "description": "Uplink bottleneck resolved.\n\n**Ticket Notes:** Network slowdown caused by saturated switch uplink (was [speed]). Resolved by [upgrading to 10G / adding LACP / restructuring traffic flow].\n\n**Prevention:** Monitor uplink utilization with network monitoring tools. Set alerts at 70% sustained utilization." + }, + { + "id": "fix_switch_issue", + "type": "action", + "title": "Fix Switch / Spanning Tree Issue", + "description": "Switch is showing errors, high CPU, or possible network loop.\n\n**Check for a network loop:**\n- Unmanaged switches or user-plugged patch cables are common loop sources\n- High CPU + broadcast storm symptoms: everything slows, then briefly recovers, then slows again\n- On managed switches: check spanning tree status for 'blocking' ports\n\n**Check for CRC errors:**\n- Log into managed switch\n- Show interface counters for CRC errors, runts, giants\n- Errors usually indicate bad cables, bad SFPs, or failing ports\n\n**Fix:**\n- Loop: Find and remove the offending cable/switch. Enable spanning tree (BPDU guard, loop protection)\n- CRC errors: Replace the cable or SFP on the erroring port\n- High CPU: Check for broadcast storms, ARP floods, or multicast issues\n\n**If unmanaged switches are present:** Replace with managed switches. Unmanaged switches are a major risk for loops.", + "next_node_id": "solution_switch_fixed" + }, + { + "id": "solution_switch_fixed", + "type": "solution", + "title": "Resolved: Switch / Network Issue", + "description": "Network performance restored after fixing the switch issue.\n\n**Ticket Notes:** Network slowdown caused by [loop / CRC errors / spanning tree issue / broadcast storm]. Resolved by [removing loop / replacing cable / enabling BPDU guard / replacing unmanaged switch].\n\n**Prevention:**\n- Enable BPDU guard and loop protection on all managed switches\n- Replace unmanaged switches with managed\n- Label all patch cables to prevent accidental loops" + } + ] + }, + { + "id": "check_wan_isp", + "type": "action", + "title": "Check WAN / ISP Connection", + "description": "Everyone is slow — likely a WAN or ISP issue.\n\n**Step 1: Speed test from the firewall or a directly-connected device**\nCompare results to the contracted ISP speed.\n\n**Step 2: Check the ISP circuit**\n- Is the modem/ONT showing link lights?\n- Any errors on the WAN interface of the firewall?\n- Check firewall WAN interface stats for errors, drops, CRC\n\n**Step 3: Check if the ISP is having an outage**\n- https://downdetector.com — search for the ISP\n- Check ISP's status page\n- Call the ISP NOC\n\n**Step 4: Run a traceroute**\n```\ntracert 8.8.8.8\n```\nLook for high latency or timeouts at specific hops. If the first hop (firewall) is slow, it's internal. If later hops are slow, it's ISP.\n\n**Step 5: Check firewall throughput**\n- Is UTM/IPS/content filtering maxing out the firewall CPU?\n- Some firewalls slow down significantly with all security features enabled\n- Check firewall CPU and memory utilization", + "next_node_id": "wan_result" + }, + { + "id": "wan_result", + "type": "decision", + "question": "What did the WAN/ISP check reveal?", + "help_text": "Compare speed tests to contracted speeds and check firewall stats", + "options": [ + {"id": "isp_issue", "label": "ISP speed is well below contracted rate", "next_node_id": "fix_isp_issue"}, + {"id": "firewall_bottleneck", "label": "Firewall is the bottleneck (high CPU / UTM)", "next_node_id": "fix_firewall_bottleneck"}, + {"id": "circuit_too_small", "label": "Speed matches contract but is too slow for the site", "next_node_id": "solution_upgrade_circuit"}, + {"id": "wan_ok", "label": "WAN speed is fine — issue is elsewhere", "next_node_id": "check_lan_saturation"} + ], + "children": [ + { + "id": "fix_isp_issue", + "type": "action", + "title": "Address ISP Performance Issue", + "description": "Speed is significantly below the contracted rate.\n\n**Document before calling the ISP:**\n- Speed test results (multiple tests, different times)\n- Traceroute showing where the slowdown is\n- WAN interface stats from the firewall\n- Test from a device directly connected to the modem (bypass firewall) to rule out internal issues\n\n**Call the ISP:**\n- Reference your circuit ID / account number\n- Report the speed discrepancy\n- Ask them to check for errors on their side, check the modem/ONT signal levels\n- Request a tech dispatch if they can't resolve remotely\n\n**If the ISP says everything looks fine on their end:**\n- Ask for the modem/ONT signal levels (SNR, attenuation)\n- Power cycle the modem/ONT\n- Check for damaged cabling from the demarc to the modem", + "next_node_id": "solution_isp_issue" + }, + { + "id": "solution_isp_issue", + "type": "solution", + "title": "ISP Issue Reported", + "description": "ISP performance issue identified and reported.\n\n**Ticket Notes:** Internet speed at [X] Mbps, contracted for [Y] Mbps. Tested from device directly connected to modem to rule out internal issues. ISP ticket opened: [ISP ticket #]. [ISP is dispatching tech / ISP found issue on their side / awaiting ISP response].\n\n**Follow-up:** Retest after ISP resolves. If this is a recurring issue, consider a secondary ISP for failover." + }, + { + "id": "fix_firewall_bottleneck", + "type": "action", + "title": "Address Firewall Throughput Bottleneck", + "description": "The firewall is limiting throughput — CPU is high or UTM features are reducing speed.\n\n**Check firewall CPU and memory:**\nLog into the firewall admin console and check dashboard/system status.\n\n**Common causes:**\n- UTM features (IPS, content filtering, SSL inspection) consuming too much CPU\n- Firewall hardware is undersized for the number of users/throughput\n- Firmware is outdated (newer firmware often has performance improvements)\n- Too many VPN tunnels or NAT sessions\n\n**Quick fixes:**\n- Reduce IPS/UTM logging verbosity\n- Disable SSL deep inspection if not strictly required (major CPU saver)\n- Exclude trusted traffic from UTM scanning (M365, known-good sites)\n- Update firmware\n\n**Long-term:** If the firewall is simply too small, it needs to be right-sized. Check the vendor's throughput specs with UTM enabled (not just raw firewall throughput).", + "next_node_id": "solution_firewall_bottleneck" + }, + { + "id": "solution_firewall_bottleneck", + "type": "solution", + "title": "Resolved: Firewall Throughput Issue", + "description": "Internet speed improved after addressing firewall bottleneck.\n\n**Ticket Notes:** Internet slow for all users. Firewall CPU at [X]% with UTM enabled. Resolved by [disabling SSL inspection / excluding M365 from UTM / updating firmware / reducing logging]. Speed improved from [X] to [Y] Mbps.\n\n**If firewall is undersized:** Recommend hardware upgrade. Always check vendor specs for 'threat inspection throughput' not just 'firewall throughput' — they can be 5-10x different." + }, + { + "id": "solution_upgrade_circuit", + "type": "solution", + "title": "Recommendation: Upgrade Internet Circuit", + "description": "The internet connection is performing at contracted speed but is insufficient for the site.\n\n**Ticket Notes:** Internet speed matches contracted [X] Mbps but is insufficient for [Y] users at this site. Average utilization during business hours: [Z]%.\n\n**Recommendations:**\n- Current bandwidth per user: [X/Y] Mbps — industry recommendation is 25-50 Mbps per user minimum for cloud-heavy environments\n- Upgrade circuit to [recommended speed]\n- Consider adding a secondary ISP for failover and load balancing\n- In the meantime: Implement QoS to prioritize critical applications (VoIP, video conferencing) over bulk traffic\n\n**Escalate to:** Client decision-maker for circuit upgrade approval." + } + ] + }, + { + "id": "check_intermittent", + "type": "action", + "title": "Diagnose Intermittent Slowdowns", + "description": "Internet speed comes and goes — hard to catch in the moment.\n\n**Step 1: Establish a baseline with continuous monitoring**\n- Set up a continuous ping to 8.8.8.8 and log results:\n```\nping -t 8.8.8.8 > C:\\Temp\\ping_log.txt\n```\n- Use a free monitoring tool: PRTG (100 sensors free) or PingPlotter\n- Let it run for 24-48 hours to catch the pattern\n\n**Step 2: Identify the pattern**\n- Same time every day? → Scheduled job (backup, updates, AV scan)\n- Random but frequent? → ISP instability, bad cable, or overheating equipment\n- Only during heavy usage? → Bandwidth is insufficient for peak demand\n\n**Step 3: Check for scheduled jobs**\n- What time do backups run?\n- When does Windows Update check/install?\n- When does AV push definitions?\n- When do cloud sync tools run full scans?\n\n**Step 4: Check hardware health**\n- Is the modem, switch, or firewall overheating? (check in a hot server room?)\n- Overheating equipment can throttle or restart intermittently", + "next_node_id": "intermittent_result" + }, + { + "id": "intermittent_result", + "type": "decision", + "question": "Did you identify the pattern?", + "help_text": "Review monitoring data and scheduled tasks", + "options": [ + {"id": "scheduled_job", "label": "Coincides with a scheduled job (backup, updates)", "next_node_id": "fix_bandwidth_hog"}, + {"id": "isp_instability", "label": "ISP connection is dropping/degrading intermittently", "next_node_id": "fix_isp_issue"}, + {"id": "hardware_issue", "label": "Equipment overheating or failing", "next_node_id": "solution_hardware_issue"}, + {"id": "peak_usage", "label": "Happens during peak usage times", "next_node_id": "solution_upgrade_circuit"} + ], + "children": [ + { + "id": "solution_hardware_issue", + "type": "solution", + "title": "Resolved: Network Hardware Issue", + "description": "Intermittent slowdowns caused by failing or overheating network equipment.\n\n**Ticket Notes:** Intermittent internet slowdowns traced to [modem / switch / firewall] [overheating / failing]. [Moved equipment / improved cooling / replaced device].\n\n**Prevention:**\n- Ensure network equipment has adequate ventilation\n- Monitor equipment temperatures (SNMP sensors)\n- Replace aging equipment proactively (switches, firewalls have ~7-10 year lifespans)\n- Keep firmware updated" + } + ] + } + ] + } + } + + +# ============================================================================= +# Tree 2: Wireless Connectivity Problems +# ============================================================================= +def get_wireless_connectivity_tree() -> dict[str, Any]: + """Wireless Connectivity Problems - Networking tree.""" + return { + "name": "Wireless Connectivity Problems", + "description": "Troubleshoot Wi-Fi connectivity issues including connection failures, frequent disconnects, slow wireless speeds, roaming problems, and SSID visibility. Covers both single-AP and enterprise wireless environments.", + "category": "Networking", + "tree_structure": { + "id": "root", + "type": "decision", + "question": "What is the wireless issue?", + "help_text": "Identify the specific Wi-Fi problem to narrow down the cause.", + "options": [ + {"id": "cant_connect", "label": "Can't connect to Wi-Fi at all", "next_node_id": "check_cant_connect"}, + {"id": "keeps_dropping", "label": "Connects but keeps disconnecting", "next_node_id": "check_drops"}, + {"id": "slow_wifi", "label": "Connected but Wi-Fi is very slow", "next_node_id": "check_slow_wifi"}, + {"id": "no_ssid", "label": "Wi-Fi network (SSID) not showing up", "next_node_id": "check_ssid_missing"}, + {"id": "roaming_issues", "label": "Drops when moving between areas / floors", "next_node_id": "check_roaming"} + ], + "children": [ + { + "id": "check_cant_connect", + "type": "action", + "title": "Diagnose Wi-Fi Connection Failure", + "description": "User can't connect to the wireless network.\n\n**Step 1: Check the basics**\n- Is Wi-Fi turned on? (check hardware switch, Fn key, airplane mode)\n- Is the correct SSID selected?\n- Is the password correct? (most common issue)\n\n**Step 2: Check if other devices can connect**\n- If no devices can connect: AP or RADIUS issue\n- If only this device fails: device-specific problem\n\n**Step 3: Check the Wi-Fi adapter**\n```\nnetsh wlan show interfaces\nnetsh wlan show drivers\n```\nLook for: Radio state (on/off), supported modes, driver version.\n\n**Step 4: Forget and reconnect**\n1. Settings > Network & Internet > Wi-Fi > Manage known networks\n2. Select the network > Forget\n3. Reconnect and enter the password\n\n**Step 5: For enterprise WPA2-Enterprise / 802.1X:**\n- Is the user's certificate valid?\n- Is the RADIUS server reachable and responding?\n- Check the RADIUS server logs for rejection reasons", + "next_node_id": "cant_connect_result" + }, + { + "id": "cant_connect_result", + "type": "decision", + "question": "What's preventing the connection?", + "help_text": "Based on the checks above", + "options": [ + {"id": "wrong_password", "label": "Wrong password / credential issue", "next_node_id": "solution_wifi_password"}, + {"id": "adapter_issue", "label": "Wi-Fi adapter disabled or driver issue", "next_node_id": "fix_wifi_adapter"}, + {"id": "radius_issue", "label": "802.1X / RADIUS authentication failing", "next_node_id": "fix_radius_auth"}, + {"id": "ap_issue", "label": "No devices can connect — AP issue", "next_node_id": "fix_ap_issue"} + ], + "children": [ + { + "id": "solution_wifi_password", + "type": "solution", + "title": "Resolved: Wi-Fi Password/Credential Issue", + "description": "User connected after fixing credentials.\n\n**Ticket Notes:** Wi-Fi connection failed due to [wrong password / expired credentials / incorrect profile]. Forgot the network, reconnected with correct credentials.\n\n**If WPA2-Enterprise:** Ensure the user's certificate hasn't expired and their domain credentials are current." + }, + { + "id": "fix_wifi_adapter", + "type": "action", + "title": "Fix Wi-Fi Adapter Issue", + "description": "Wi-Fi adapter is disabled, missing, or has a driver problem.\n\n**Re-enable the adapter:**\n```\n# Check adapter status\nGet-NetAdapter -Name \"Wi-Fi\" | Select Status\n\n# Enable if disabled\nEnable-NetAdapter -Name \"Wi-Fi\"\n```\n\n**Check Device Manager:**\n- Right-click Start > Device Manager > Network adapters\n- Look for the wireless adapter — yellow warning icon means driver issue\n- Right-click > Update driver > Search automatically\n- If no wireless adapter listed: check if it's disabled in BIOS/UEFI\n\n**Reset the adapter stack:**\n```\nnetsh winsock reset\nnetsh int ip reset\nipconfig /flushdns\nipconfig /release\nipconfig /renew\n```\nRestart the computer after running these.\n\n**If driver update doesn't help:** Download the latest driver from the laptop manufacturer's website (not Windows Update).", + "next_node_id": "solution_adapter_fixed" + }, + { + "id": "solution_adapter_fixed", + "type": "solution", + "title": "Resolved: Wi-Fi Adapter Fixed", + "description": "Wi-Fi adapter restored and connecting.\n\n**Ticket Notes:** Wi-Fi not connecting. Adapter was [disabled / driver corrupted / missing from Device Manager]. Resolved by [re-enabling / updating driver from manufacturer / resetting network stack].\n\n**If BIOS-disabled:** Document that the wireless was disabled in BIOS settings and re-enabled." + }, + { + "id": "fix_radius_auth", + "type": "action", + "title": "Fix RADIUS / 802.1X Authentication", + "description": "Enterprise Wi-Fi authentication is failing.\n\n**Check the RADIUS server (NPS):**\n1. Event Viewer on the NPS server > Custom Views > Server Roles > Network Policy and Access Services\n2. Look for reject events — they show the reason code\n\n**Common RADIUS failures:**\n- **Certificate expired** on the user, computer, or RADIUS server\n- **User not in the allowed group** specified in the NPS policy\n- **Computer not domain-joined** (if policy requires domain membership)\n- **NPS policy mismatch** (wrong auth type, encryption settings)\n- **RADIUS shared secret mismatch** between AP and NPS server\n\n**Quick fixes:**\n1. Verify the user is in the correct security group\n2. Check certificate expiration dates\n3. Delete the Wi-Fi profile on the client and re-create it\n4. If using GPO-deployed Wi-Fi profiles: run `gpupdate /force`\n\n**Test with a known-working account** to isolate whether it's user-specific or systemic.", + "next_node_id": "solution_radius_fixed" + }, + { + "id": "solution_radius_fixed", + "type": "solution", + "title": "Resolved: RADIUS Authentication Fixed", + "description": "802.1X/RADIUS authentication restored.\n\n**Ticket Notes:** Wi-Fi 802.1X authentication failing. NPS logs showed: [reason]. Resolved by [adding user to group / renewing certificate / fixing NPS policy / correcting shared secret].\n\n**If certificate-related:** Check expiration dates for:\n- NPS server certificate\n- Root CA certificate distributed to clients\n- User/computer certificates" + }, + { + "id": "fix_ap_issue", + "type": "action", + "title": "Troubleshoot Access Point", + "description": "No devices can connect — the AP itself may be the problem.\n\n**Step 1: Check AP status**\n- Is the AP powered on? (check LED indicators)\n- Is the AP reachable on the network? (ping its management IP)\n- Log into the wireless controller or AP management console\n\n**Step 2: Check for common AP issues:**\n- **Power cycle the AP** — many issues resolve with a reboot\n- **PoE power** — is the switch providing enough power? (check PoE budget)\n- **DHCP pool exhausted** — clients can't get an IP (check DHCP scope)\n- **Channel congestion** — AP is on a congested channel\n- **Firmware** — is the AP firmware up to date?\n\n**Step 3: If managed by a controller:**\n- Check controller for AP status and alerts\n- Check if the AP has lost its connection to the controller\n- Is the AP's VLAN trunk configured correctly on the switch?\n\n**Step 4: Try a different AP** — swap with a known-good AP to isolate hardware failure.", + "next_node_id": "solution_ap_fixed" + }, + { + "id": "solution_ap_fixed", + "type": "solution", + "title": "Resolved: Access Point Issue", + "description": "Wi-Fi connectivity restored after fixing the AP.\n\n**Ticket Notes:** No devices could connect to [SSID]. AP at [location] was [unresponsive / PoE issue / firmware crash / controller disconnect]. Resolved by [power cycle / fixing PoE / updating firmware / re-adopting to controller].\n\n**If AP hardware failure:** Replace the unit and configure the replacement." + } + ] + }, + { + "id": "check_drops", + "type": "action", + "title": "Diagnose Frequent Wi-Fi Disconnections", + "description": "User connects but keeps getting disconnected.\n\n**Step 1: Check event logs for disconnect reasons**\n```\nGet-WinEvent -LogName 'Microsoft-Windows-WLAN-AutoConfig/Operational' -MaxEvents 20 | Select TimeCreated, Message\n```\n\n**Step 2: Check signal strength during a dropout**\n```\nnetsh wlan show interfaces\n```\nSignal below 50% = likely cause of drops.\n\n**Step 3: Common causes:**\n- **Weak signal** — user is too far from the AP\n- **Interference** — microwaves, Bluetooth, cordless phones on 2.4GHz\n- **Driver power management** — Windows is turning off Wi-Fi to save power\n- **AP overloaded** — too many clients on one AP (usually 30+ causes issues)\n- **DHCP lease issues** — very short lease time causing re-auth\n- **DFS channel change** — radar detection causes AP to switch channels, dropping clients\n\n**Step 4: Disable Wi-Fi power saving**\nDevice Manager > Network adapter > Properties > Power Management > Uncheck \"Allow the computer to turn off this device to save power\"\n\nAlso: Adapter properties > Advanced > Power Save Mode > set to Maximum Performance", + "next_node_id": "drops_result" + }, + { + "id": "drops_result", + "type": "decision", + "question": "What's causing the disconnections?", + "help_text": "Based on signal strength, event logs, and environment checks", + "options": [ + {"id": "weak_signal", "label": "Weak signal — too far from AP", "next_node_id": "solution_weak_signal"}, + {"id": "power_mgmt", "label": "Power management turning off Wi-Fi", "next_node_id": "solution_power_mgmt"}, + {"id": "interference", "label": "Interference on the channel", "next_node_id": "fix_interference"}, + {"id": "ap_overloaded", "label": "AP is overloaded with too many clients", "next_node_id": "solution_ap_overloaded"} + ], + "children": [ + { + "id": "solution_weak_signal", + "type": "solution", + "title": "Resolved: Weak Wi-Fi Signal", + "description": "Disconnections caused by weak signal in the user's area.\n\n**Ticket Notes:** Wi-Fi disconnecting due to weak signal ([X]% signal strength). User is [location], too far from nearest AP at [AP location].\n\n**Fixes applied:** [Moved user / added AP / replaced AP with higher-power model / switched to 2.4GHz for better range].\n\n**If additional coverage is needed:** Recommend a site survey to identify optimal AP placement." + }, + { + "id": "solution_power_mgmt", + "type": "solution", + "title": "Resolved: Wi-Fi Power Management Disabled", + "description": "Disconnections stopped after disabling Wi-Fi power management.\n\n**Ticket Notes:** Wi-Fi disconnecting intermittently. Windows power management was turning off the wireless adapter. Disabled in Device Manager and set adapter to Maximum Performance.\n\n**To deploy org-wide:** Use Group Policy:\nComputer Config > Admin Templates > System > Power Management > set wireless adapter to Maximum Performance on AC power." + }, + { + "id": "fix_interference", + "type": "action", + "title": "Address Wi-Fi Channel Interference", + "description": "Wi-Fi channel is congested or has interference.\n\n**Step 1: Scan for competing networks**\nUse a Wi-Fi analyzer app (e.g., WiFi Analyzer for Android, or inSSIDer for Windows).\n- How many SSIDs are on the same channel?\n- Are neighboring businesses on overlapping channels?\n\n**Step 2: Choose the best channel**\n- **2.4GHz:** Only use channels 1, 6, or 11 (non-overlapping). Pick the least crowded.\n- **5GHz:** More channels available — switch to a less crowded one. Avoid DFS channels if radar is an issue.\n\n**Step 3: Change the channel on the AP**\n- Log into the AP or wireless controller\n- Set the radio to the selected channel (disable auto if it keeps picking a bad one)\n\n**Step 4: Check for non-Wi-Fi interference**\n- Microwaves (2.4GHz interference)\n- Bluetooth devices\n- Cordless phones\n- USB 3.0 hubs (known to cause 2.4GHz interference)", + "next_node_id": "solution_interference_fixed" + }, + { + "id": "solution_interference_fixed", + "type": "solution", + "title": "Resolved: Wi-Fi Interference", + "description": "Wi-Fi stability improved after addressing channel interference.\n\n**Ticket Notes:** Wi-Fi disconnections caused by channel interference. Changed [AP name] from channel [X] to channel [Y] on [2.4/5]GHz band. Also [removed interference source / moved microwave / switched clients to 5GHz].\n\n**Best practice:** For enterprise environments, use a wireless controller with automatic channel management. For small sites, manually set non-overlapping channels." + }, + { + "id": "solution_ap_overloaded", + "type": "solution", + "title": "Resolved: AP Overloaded — Too Many Clients", + "description": "AP had too many connected clients causing instability.\n\n**Ticket Notes:** AP at [location] had [X] connected clients. Performance degrades above ~25-30 clients per AP. [Added additional AP / load balanced clients / configured band steering to push clients to 5GHz].\n\n**Recommendations:**\n- Deploy additional APs to distribute the client load\n- Enable band steering to push dual-band devices to 5GHz\n- Consider client load balancing on the wireless controller\n- Target 15-25 clients per AP for reliable performance" + } + ] + }, + { + "id": "check_slow_wifi", + "type": "action", + "title": "Diagnose Slow Wi-Fi Speeds", + "description": "User is connected but Wi-Fi is very slow.\n\n**Step 1: Check connection speed and signal**\n```\nnetsh wlan show interfaces\n```\nLook at: Receive/Transmit rate, Signal, Radio type, Channel\n\n**Step 2: Speed test on Wi-Fi vs Ethernet**\nRun speedtest.net on Wi-Fi, then on Ethernet. This shows how much the Wi-Fi is limiting speed.\n\n**Step 3: Check which band/standard the client is on**\n- 802.11n on 2.4GHz = max ~70Mbps real-world\n- 802.11ac on 5GHz = max ~400Mbps real-world\n- 802.11ax (Wi-Fi 6) on 5GHz = max ~600Mbps+ real-world\n\n**If connected at low rates (e.g., 54Mbps, 72Mbps):**\n- Client may be forcing an older standard\n- Adapter > Properties > Advanced > Wireless Mode > enable all standards\n- Or the AP is configured for legacy compatibility mode (slows everyone down)\n\n**Step 4: Check AP client count**\nMany clients on one AP = everyone gets slower. Over 25 clients is a concern.", + "next_node_id": "slow_wifi_result" + }, + { + "id": "slow_wifi_result", + "type": "decision", + "question": "What's causing slow Wi-Fi?", + "help_text": "Compare Wi-Fi speed to Ethernet and check connection parameters", + "options": [ + {"id": "old_standard", "label": "Client connected on old/slow standard (11n, 11g)", "next_node_id": "solution_upgrade_wifi_standard"}, + {"id": "poor_signal", "label": "Signal is weak — degrading speed", "next_node_id": "solution_weak_signal"}, + {"id": "congested_channel", "label": "Channel is congested", "next_node_id": "fix_interference"}, + {"id": "too_many_clients", "label": "Too many clients on the AP", "next_node_id": "solution_ap_overloaded"} + ], + "children": [ + { + "id": "solution_upgrade_wifi_standard", + "type": "solution", + "title": "Resolved: Wi-Fi Standard Upgrade Needed", + "description": "Client is connecting on an older, slower Wi-Fi standard.\n\n**Ticket Notes:** User's Wi-Fi slow due to connection on [802.11n/g] instead of [802.11ac/ax]. [Updated adapter settings / replaced adapter / switched to 5GHz band].\n\n**If the AP only supports 802.11n:** Recommend upgrading to Wi-Fi 5 (802.11ac) or Wi-Fi 6 (802.11ax) APs.\n**If the client only supports 802.11n:** A USB Wi-Fi 6 adapter is an inexpensive upgrade." + } + ] + }, + { + "id": "check_ssid_missing", + "type": "action", + "title": "Troubleshoot Missing SSID", + "description": "The Wi-Fi network name isn't appearing in the available networks list.\n\n**Step 1: Can other devices see the SSID?**\n- If no devices see it: AP issue or SSID is disabled\n- If only this device can't see it: client-side issue\n\n**Step 2: Check if SSID is hidden**\n- Some networks are configured as hidden (SSID broadcast disabled)\n- To connect to a hidden SSID: Network & Internet > Wi-Fi > Add a network > enter the SSID manually\n\n**Step 3: Check if the correct band is supported**\n- If the SSID is only on 5GHz and the client only has 2.4GHz, it won't appear\n- Check: `netsh wlan show drivers` — look for 'Supported bands'\n\n**Step 4: Check the AP**\n- Is the SSID still configured and enabled on the AP/controller?\n- Is the AP's radio turned on?\n- Did someone accidentally delete or disable the SSID?\n\n**Step 5: Scan for networks**\n```\nnetsh wlan show networks mode=bssid\n```\nThis shows all detected networks with their channel and signal strength.", + "next_node_id": "ssid_result" + }, + { + "id": "ssid_result", + "type": "decision", + "question": "Why is the SSID not visible?", + "help_text": "Based on the checks above", + "options": [ + {"id": "hidden", "label": "SSID is hidden — need to connect manually", "next_node_id": "solution_hidden_ssid"}, + {"id": "band_mismatch", "label": "SSID is on 5GHz, client only has 2.4GHz", "next_node_id": "solution_band_mismatch"}, + {"id": "ap_ssid_down", "label": "SSID was disabled or AP radio is off", "next_node_id": "fix_ap_issue"}, + {"id": "client_driver", "label": "Client Wi-Fi driver issue — can't scan", "next_node_id": "fix_wifi_adapter"} + ], + "children": [ + { + "id": "solution_hidden_ssid", + "type": "solution", + "title": "Resolved: Connected to Hidden SSID", + "description": "Network was configured as a hidden SSID. Connected manually.\n\n**Ticket Notes:** Wi-Fi SSID [name] not appearing because SSID broadcast is disabled. Connected manually by adding the network profile.\n\n**Note:** Hidden SSIDs are not more secure — they actually cause the client to broadcast the SSID name while probing. Consider enabling SSID broadcast and using proper WPA2/3 Enterprise for security." + }, + { + "id": "solution_band_mismatch", + "type": "solution", + "title": "Resolved: Band Mismatch", + "description": "Client doesn't support the frequency band the SSID is on.\n\n**Ticket Notes:** SSID [name] is configured on 5GHz only. User's device only supports 2.4GHz. [Added 2.4GHz SSID / provided USB dual-band adapter / user connected to alternate SSID].\n\n**Recommendation:** Most enterprise environments should have both 2.4GHz and 5GHz SSIDs available, or a single SSID on both bands with band steering." + } + ] + }, + { + "id": "check_roaming", + "type": "action", + "title": "Troubleshoot Wi-Fi Roaming Issues", + "description": "User drops connection when moving between APs (different floors, areas).\n\n**What should happen:** Client seamlessly roams from one AP to the next without disconnecting.\n\n**Step 1: Check roaming configuration**\n- Are all APs on the same SSID and security settings? (Must match exactly)\n- Are all APs on the same VLAN? (Or is there L3 roaming configured?)\n- Is fast roaming enabled? (802.11r, OKC, or PMKSA caching)\n\n**Step 2: Check AP overlap**\n- Adjacent APs should have 15-20% signal overlap\n- If there's a dead zone between APs, the client drops before finding the next AP\n- Use a Wi-Fi survey tool to check coverage\n\n**Step 3: Check client-side roaming aggressiveness**\n- Adapter > Properties > Advanced > Roaming Aggressiveness\n- Set to 'Medium' or 'High' — low aggressiveness means the client clings to a weak AP too long\n\n**Step 4: Check for 'sticky client' behavior**\n- Client stays connected to a distant AP instead of roaming to a closer one\n- Fix: Enable minimum RSSI on the AP (disconnect clients below -75dBm threshold)\n- Enable band steering and fast roaming on the controller", + "next_node_id": "roaming_result" + }, + { + "id": "roaming_result", + "type": "decision", + "question": "What's causing the roaming issue?", + "help_text": "Based on coverage analysis and configuration checks", + "options": [ + {"id": "dead_zone", "label": "Dead zone between APs — no overlap", "next_node_id": "solution_dead_zone"}, + {"id": "sticky_client", "label": "Client is sticky — won't roam", "next_node_id": "solution_sticky_client"}, + {"id": "config_mismatch", "label": "SSID or security mismatch between APs", "next_node_id": "solution_ssid_mismatch"}, + {"id": "no_fast_roaming", "label": "Fast roaming (802.11r) not enabled", "next_node_id": "solution_fast_roaming"} + ], + "children": [ + { + "id": "solution_dead_zone", + "type": "solution", + "title": "Resolved: Wi-Fi Dead Zone", + "description": "Coverage gap between APs causing disconnections.\n\n**Ticket Notes:** Wi-Fi drops when user moves between [area A] and [area B]. Coverage survey confirmed dead zone. [Repositioned AP / added additional AP / increased AP transmit power].\n\n**Recommendation:** Conduct a professional Wi-Fi site survey to identify all dead zones. APs should have 15-20% signal overlap at -67dBm or better for seamless roaming." + }, + { + "id": "solution_sticky_client", + "type": "solution", + "title": "Resolved: Sticky Client Issue", + "description": "Client was holding onto a distant AP instead of roaming.\n\n**Ticket Notes:** User's device staying connected to distant AP ([AP name], signal [X]dBm) instead of roaming to closer AP. Resolved by [increasing roaming aggressiveness on client / enabling minimum RSSI on AP / configuring band steering].\n\n**AP-side fixes:**\n- Set minimum RSSI threshold to -75dBm (disconnect weak clients)\n- Enable client load balancing on the controller" + }, + { + "id": "solution_ssid_mismatch", + "type": "solution", + "title": "Resolved: SSID/Security Mismatch", + "description": "APs had different SSID or security configurations preventing roaming.\n\n**Ticket Notes:** Roaming failure between APs. [AP at location A] and [AP at location B] had mismatched [SSID / security type / VLAN / WPA settings]. Corrected to match across all APs.\n\n**Prevention:** Use a wireless controller to manage all APs centrally — this prevents configuration drift." + }, + { + "id": "solution_fast_roaming", + "type": "solution", + "title": "Resolved: Fast Roaming Enabled", + "description": "Enabled fast roaming protocols to speed up transitions between APs.\n\n**Ticket Notes:** Wi-Fi drops during roaming due to slow re-authentication. Enabled [802.11r (Fast BSS Transition) / OKC (Opportunistic Key Caching) / PMKSA caching] on the wireless controller.\n\n**Note:** 802.11r can cause issues with some older devices. Test before deploying broadly. OKC is usually a safer first option.\n\n**Result:** Roaming transitions now take <50ms instead of 1-3 seconds." + } + ] + } + ] + } + } + + +# ============================================================================= +# Tree 3: Firewall Blocking Issues +# ============================================================================= +def get_firewall_blocking_tree() -> dict[str, Any]: + """Firewall Blocking Issues - Networking tree.""" + return { + "name": "Firewall Blocking Issues", + "description": "Troubleshoot firewall-related blocking of applications, websites, ports, and services. Covers both Windows Firewall and network firewalls (UTM/NGFW). Includes common port requirements, rule creation, and log analysis.", + "category": "Networking", + "tree_structure": { + "id": "root", + "type": "decision", + "question": "What is being blocked?", + "help_text": "Identify what the user can't access or what application isn't working.", + "options": [ + {"id": "website", "label": "A specific website or web application", "next_node_id": "check_website_block"}, + {"id": "application", "label": "A desktop application can't connect", "next_node_id": "check_app_block"}, + {"id": "port_service", "label": "A specific port or service is blocked", "next_node_id": "check_port_block"}, + {"id": "vpn_blocked", "label": "VPN can't connect through the firewall", "next_node_id": "check_vpn_block"}, + {"id": "not_sure", "label": "Something isn't working but not sure if it's the firewall", "next_node_id": "diagnose_firewall_vs_other"} + ], + "children": [ + { + "id": "check_website_block", + "type": "action", + "title": "Diagnose Website/URL Blocking", + "description": "User can't access a specific website.\n\n**Step 1: Verify the block**\n- Can other users at the same site access it?\n- Can the user access it from their phone (on cellular, not Wi-Fi)?\n- What error message do they see? (timeout, block page, SSL error, etc.)\n\n**Step 2: Check for a firewall block page**\n- Many UTM firewalls show a branded block page (SonicWall, Fortinet, Sophos, etc.)\n- The block page usually tells you the category (e.g., 'Social Media', 'Uncategorized', 'Security Risk')\n- This confirms it's the firewall content filter\n\n**Step 3: Check the firewall content filter logs**\n- Log into the firewall admin console\n- Check the web filter or content filter log\n- Search for the URL/domain\n- Note the category and policy that blocked it\n\n**Step 4: Check SSL/TLS inspection**\n- If SSL inspection is enabled, it may be causing certificate errors on some sites\n- Some sites use certificate pinning and break with SSL inspection\n- Check if excluding the site from SSL inspection fixes it", + "next_node_id": "website_block_cause" + }, + { + "id": "website_block_cause", + "type": "decision", + "question": "Why is the website blocked?", + "help_text": "Based on the block page and firewall logs", + "options": [ + {"id": "content_filter", "label": "Content filter category block (intended)", "next_node_id": "fix_content_filter"}, + {"id": "wrong_category", "label": "Website is miscategorized by the filter", "next_node_id": "fix_miscategorized"}, + {"id": "ssl_inspection", "label": "SSL inspection causing certificate errors", "next_node_id": "fix_ssl_inspection"}, + {"id": "dns_filter", "label": "DNS-level filtering (DNS Security, Umbrella, etc.)", "next_node_id": "fix_dns_filter"} + ], + "children": [ + { + "id": "fix_content_filter", + "type": "action", + "title": "Handle Content Filter Block", + "description": "Website is blocked by the content filter policy — this is working as designed.\n\n**If the user needs access for work:**\n1. Verify the business justification\n2. Get approval from the client's manager or IT decision-maker\n3. Options to allow access:\n\n**Option A: Allow the specific URL/domain** (recommended)\n- Firewall > Content Filter > Allow List\n- Add just the specific domain (not the entire category)\n\n**Option B: Allow for specific user/group only**\n- If the firewall supports user-based policies (most NGFWs do)\n- Create a policy for the user/group that allows the category\n\n**Option C: Allow the category** (least recommended)\n- Unblocking an entire category opens it for everyone\n- Only do this if the policy needs to change org-wide\n\n**Document:** Who approved the exception and the business justification.", + "next_node_id": "solution_content_filter" + }, + { + "id": "solution_content_filter", + "type": "solution", + "title": "Resolved: Content Filter Exception Added", + "description": "Website access granted via content filter exception.\n\n**Ticket Notes:** [URL] blocked by content filter (category: [category]). Business justification: [reason]. Approved by: [approver]. Added domain to [allow list / user-specific policy]. Access confirmed.\n\n**Important:** Document all exceptions for compliance and audit purposes. Review exceptions periodically." + }, + { + "id": "fix_miscategorized", + "type": "action", + "title": "Fix Miscategorized Website", + "description": "The website is in the wrong content filter category.\n\n**Immediate fix:** Add the domain to the allow list so the user can work.\n\n**Submit a recategorization request:**\n- Most firewall vendors let you request a category change:\n - **Fortinet:** https://www.fortiguard.com/faq/wfrating\n - **SonicWall:** https://cfssupport.sonicwall.com/\n - **Sophos:** Submit through Sophos Central\n - **Palo Alto:** https://urlfiltering.paloaltonetworks.com/\n - **Cisco/OpenDNS:** https://community.opendns.com/domaintagging/\n\n- Recategorization usually takes 1-3 business days\n\n**After recategorization:** You can remove the manual allow list entry once the category is corrected.", + "next_node_id": "solution_recategorized" + }, + { + "id": "solution_recategorized", + "type": "solution", + "title": "Resolved: Website Recategorization Submitted", + "description": "Website miscategorized — added to allow list and submitted recategorization.\n\n**Ticket Notes:** [URL] miscategorized as [wrong category] instead of [correct category]. Added to allow list for immediate access. Recategorization request submitted to [vendor]. Will remove allow list entry once category is corrected.\n\n**Follow-up:** Check categorization in 3-5 business days." + }, + { + "id": "fix_ssl_inspection", + "type": "action", + "title": "Fix SSL Inspection Certificate Issues", + "description": "SSL deep inspection is causing certificate errors on certain sites.\n\n**Why this happens:** The firewall intercepts HTTPS, re-signs the certificate with its own CA. Sites that use certificate pinning (banking, government, some apps) will reject the firewall's certificate.\n\n**Fix: Exclude the site from SSL inspection**\n1. Firewall > SSL Inspection policy\n2. Add the domain to the SSL inspection bypass/exclusion list\n\n**Common sites that need SSL inspection bypass:**\n- Banking and financial sites\n- Government sites\n- Microsoft 365 (Microsoft recommends bypassing)\n- Video conferencing (Teams, Zoom, WebEx)\n- Healthcare portals\n\n**If the issue is that the firewall's CA cert isn't trusted:**\n- Deploy the firewall's root CA certificate to all domain computers via GPO\n- Non-domain devices will show certificate warnings unless the CA is manually trusted", + "next_node_id": "solution_ssl_fixed" + }, + { + "id": "solution_ssl_fixed", + "type": "solution", + "title": "Resolved: SSL Inspection Bypass Added", + "description": "Certificate errors resolved by excluding the site from SSL inspection.\n\n**Ticket Notes:** [URL] showing certificate errors due to SSL deep inspection. Added to SSL inspection bypass list. Site now loads correctly.\n\n**If CA deployment is needed:** Deploy firewall root CA to all endpoints via GPO:\nComputer Config > Windows Settings > Security Settings > Public Key Policies > Trusted Root Certification Authorities" + }, + { + "id": "fix_dns_filter", + "type": "action", + "title": "Fix DNS-Level Filtering Block", + "description": "Website is blocked at the DNS level (Cisco Umbrella, DNSFilter, Cloudflare Gateway, etc.).\n\n**How to identify DNS filtering:**\n- User gets a block page but it's from the DNS service, not the firewall\n- `nslookup` for the domain returns the DNS filter's block IP instead of the real IP\n```\nnslookup blocked-site.com\n```\n\n**To fix:**\n1. Log into the DNS filtering console (Umbrella, DNSFilter, etc.)\n2. Check the logs for the blocked domain\n3. Add to the allow list if it should be permitted\n\n**If you can't access the DNS filter console:** The DNS filter may be managed by a different team or MSP. Escalate.\n\n**Quick test:** Temporarily change the client's DNS to 8.8.8.8 to bypass DNS filtering and confirm the site works. (Change it back afterward!)", + "next_node_id": "solution_dns_filter" + }, + { + "id": "solution_dns_filter", + "type": "solution", + "title": "Resolved: DNS Filter Exception Added", + "description": "Website unblocked in DNS filtering service.\n\n**Ticket Notes:** [URL] blocked by [DNS filter service]. Added to allow list in [service name]. Access confirmed.\n\n**Note:** DNS filtering and firewall content filtering are separate layers. A site may need to be allowed in both if the org uses both." + } + ] + }, + { + "id": "check_app_block", + "type": "action", + "title": "Diagnose Application Connection Block", + "description": "A desktop application can't connect to its server or service.\n\n**Step 1: Identify what the app needs**\n- What server/IP does it connect to?\n- What port(s) does it use?\n- Check the vendor's documentation for required ports and IPs\n\n**Step 2: Test connectivity**\n```\n# Test if the port is reachable\nTest-NetConnection -ComputerName server.example.com -Port 443\nTest-NetConnection -ComputerName server.example.com -Port 8080\n\n# Check if Windows Firewall is blocking\nGet-NetFirewallRule | Where-Object {$_.DisplayName -like '*AppName*'} | Select DisplayName, Enabled, Direction, Action\n```\n\n**Step 3: Check Windows Firewall first**\n- Windows Defender Firewall may be blocking the app independently from the network firewall\n- Check: Control Panel > Windows Defender Firewall > Allow an app\n- Temporarily disable Windows Firewall to test (re-enable immediately after)\n\n**Step 4: Check network firewall logs**\n- Search for the source IP (user's computer) in the firewall deny logs\n- Look at what destination IP and port is being blocked", + "next_node_id": "app_block_source" + }, + { + "id": "app_block_source", + "type": "decision", + "question": "What is blocking the application?", + "help_text": "Based on connectivity tests and firewall log analysis", + "options": [ + {"id": "windows_fw", "label": "Windows Firewall is blocking it", "next_node_id": "fix_windows_firewall"}, + {"id": "network_fw", "label": "Network firewall is blocking the port/IP", "next_node_id": "fix_network_firewall_rule"}, + {"id": "both", "label": "Both firewalls need rules", "next_node_id": "fix_windows_firewall"}, + {"id": "not_firewall", "label": "Connectivity works — issue isn't firewall", "next_node_id": "solution_not_firewall"} + ], + "children": [ + { + "id": "fix_windows_firewall", + "type": "action", + "title": "Create Windows Firewall Rule", + "description": "Windows Firewall is blocking the application.\n\n**Option 1: Allow the app through Windows Firewall**\n1. Control Panel > Windows Defender Firewall > Allow an app\n2. Click 'Change settings' > 'Allow another app'\n3. Browse to the application's .exe file\n4. Check Private and/or Domain as appropriate\n\n**Option 2: Create a port-based rule**\n```\n# Allow inbound on specific port\nNew-NetFirewallRule -DisplayName 'Allow MyApp' -Direction Inbound -Protocol TCP -LocalPort 8080 -Action Allow\n\n# Allow outbound on specific port\nNew-NetFirewallRule -DisplayName 'Allow MyApp Outbound' -Direction Outbound -Protocol TCP -RemotePort 443 -Action Allow\n```\n\n**Option 3: Deploy via Group Policy (for org-wide apps)**\nComputer Config > Windows Settings > Security Settings > Windows Defender Firewall with Advanced Security > Inbound/Outbound Rules\n\n**After adding the rule:** Test the application. If it still doesn't work, also check the network firewall.", + "next_node_id": "windows_fw_result" + }, + { + "id": "windows_fw_result", + "type": "decision", + "question": "Did the Windows Firewall rule fix it?", + "help_text": "Test the application after adding the rule", + "options": [ + {"id": "yes", "label": "Yes, application works now", "next_node_id": "solution_windows_fw"}, + {"id": "no", "label": "No, still blocked — network firewall too", "next_node_id": "fix_network_firewall_rule"} + ], + "children": [ + { + "id": "solution_windows_fw", + "type": "solution", + "title": "Resolved: Windows Firewall Rule Added", + "description": "Application connectivity restored after adding Windows Firewall rule.\n\n**Ticket Notes:** [Application] blocked by Windows Defender Firewall. Created [inbound/outbound] rule for [app/port]. Application confirmed working.\n\n**If this needs to be deployed org-wide:** Create the rule via Group Policy to push to all domain computers." + } + ] + }, + { + "id": "fix_network_firewall_rule", + "type": "action", + "title": "Create Network Firewall Rule", + "description": "The network firewall needs a rule to allow the application's traffic.\n\n**Step 1: Gather the requirements**\n- Source: User's subnet or specific IP\n- Destination: Application server IP or FQDN\n- Port(s): TCP/UDP port numbers the app uses\n- Protocol: TCP, UDP, or both\n\n**Step 2: Check vendor documentation**\nAlways check the app vendor's docs for the complete list of required ports and IPs. Common apps:\n- RDP: TCP 3389\n- SQL Server: TCP 1433\n- HTTPS: TCP 443\n- SSH: TCP 22\n- FTP: TCP 20-21, passive ports\n- SIP/VoIP: UDP 5060-5061, RTP 10000-20000\n\n**Step 3: Create the rule**\nLog into the firewall and create an allow rule with the specific source, destination, ports, and protocol.\n\n**Step 4: Test and verify**\n```\nTest-NetConnection -ComputerName destination -Port port_number\n```\n\n**Best practice:** Use the most specific rule possible (exact IPs and ports). Avoid broad 'allow all' rules.", + "next_node_id": "solution_network_fw_rule" + }, + { + "id": "solution_network_fw_rule", + "type": "solution", + "title": "Resolved: Network Firewall Rule Created", + "description": "Application connectivity restored after creating firewall rule.\n\n**Ticket Notes:** [Application] blocked by network firewall. Created rule: Source [IP/subnet] → Destination [IP/FQDN] Port [ports] [TCP/UDP]. Application confirmed working.\n\n**Documentation:** Record the rule in the client's firewall change log with business justification and approval." + }, + { + "id": "solution_not_firewall", + "type": "solution", + "title": "Not a Firewall Issue", + "description": "Connectivity test succeeded — the firewall is not blocking the traffic.\n\n**Ticket Notes:** Application [name] not connecting. Firewall ruled out — port test to [destination:port] succeeds. Issue is likely:\n- Application configuration (wrong server address, credentials)\n- Server-side issue (service down, certificate expired)\n- DNS resolution (app resolving to wrong IP)\n- Application-level authentication failure\n\n**Next steps:** Troubleshoot at the application level." + } + ] + }, + { + "id": "check_port_block", + "type": "action", + "title": "Test and Fix Specific Port Block", + "description": "A specific port or service needs to be opened.\n\n**Step 1: Confirm the port is actually blocked**\n```\n# Test TCP port\nTest-NetConnection -ComputerName target_ip -Port port_number\n\n# If the above isn't available, use telnet:\ntelnet target_ip port_number\n\n# Check what's listening locally\nnetstat -an | findstr :port_number\n```\n\n**Step 2: Determine WHERE the block is**\n1. Test from the server itself (is the service even listening?)\n2. Test from the same subnet (is it a Windows Firewall issue?)\n3. Test from a different subnet (is it the network firewall?)\n4. Test from outside the network (is it the edge firewall?)\n\n**Step 3: Common port requirements by service**\n- HTTP/HTTPS: 80, 443\n- RDP: 3389\n- SSH: 22\n- DNS: 53 (TCP+UDP)\n- SMTP: 25, 587\n- IMAP: 143, 993\n- FTP: 20-21 + passive range\n- SMB: 445\n- SQL: 1433\n- MySQL: 3306\n- PostgreSQL: 5432", + "next_node_id": "app_block_source" + }, + { + "id": "check_vpn_block", + "type": "action", + "title": "Troubleshoot VPN Blocked by Firewall", + "description": "VPN connection can't establish through the firewall.\n\n**Identify the VPN type and required ports:**\n\n**IPSec VPN:**\n- UDP 500 (IKE)\n- UDP 4500 (NAT Traversal)\n- Protocol 50 (ESP) — note: this is an IP protocol, not a port\n\n**SSL VPN / OpenVPN:**\n- TCP or UDP 443 (most common)\n- Or custom port (check VPN server config)\n\n**WireGuard:**\n- UDP 51820 (default)\n\n**L2TP/IPSec:**\n- UDP 500, UDP 4500, UDP 1701, Protocol 50\n\n**PPTP (legacy, avoid):**\n- TCP 1723, Protocol 47 (GRE)\n\n**Step 1: Check which ports are needed** based on the VPN type above.\n\n**Step 2: Test if the port is reachable**\n```\nTest-NetConnection -ComputerName vpn_server -Port 443\n```\n\n**Step 3: Check both directions**\n- Outbound: Is the user's firewall allowing outbound VPN traffic?\n- Inbound: Is the VPN server's firewall allowing inbound connections?\n- NAT: Is port forwarding configured correctly for the VPN server?\n\n**Step 4: Check for ISP blocking**\nSome ISPs and hotel/public Wi-Fi block VPN protocols. Try port 443 (usually open everywhere).", + "next_node_id": "vpn_block_result" + }, + { + "id": "vpn_block_result", + "type": "decision", + "question": "Where is the VPN being blocked?", + "help_text": "Based on port tests and firewall log analysis", + "options": [ + {"id": "outbound_fw", "label": "User's network firewall blocking outbound VPN", "next_node_id": "fix_network_firewall_rule"}, + {"id": "inbound_fw", "label": "VPN server firewall blocking inbound", "next_node_id": "fix_vpn_inbound"}, + {"id": "nat_issue", "label": "NAT or port forwarding not configured", "next_node_id": "fix_vpn_nat"}, + {"id": "isp_block", "label": "ISP or public Wi-Fi blocking VPN protocols", "next_node_id": "solution_isp_vpn_block"} + ], + "children": [ + { + "id": "fix_vpn_inbound", + "type": "action", + "title": "Fix VPN Server Inbound Firewall", + "description": "The firewall in front of the VPN server is blocking incoming VPN connections.\n\n**Create the inbound rule:**\nBased on VPN type, allow the required ports/protocols inbound to the VPN server's internal IP.\n\n**For IPSec:** Allow UDP 500, UDP 4500, and IP Protocol 50 to the VPN server.\n\n**For SSL VPN:** Allow TCP 443 (or the custom port) to the VPN server.\n\n**Check NAT:** If the VPN server is behind NAT, port forwarding must be configured (see NAT fix).\n\n**After creating the rule:** Test the VPN connection from outside the network.", + "next_node_id": "solution_vpn_inbound" + }, + { + "id": "solution_vpn_inbound", + "type": "solution", + "title": "Resolved: VPN Server Firewall Rule Added", + "description": "VPN connections now working after adding inbound firewall rule.\n\n**Ticket Notes:** VPN connections blocked by firewall in front of VPN server. Created inbound rules for [ports/protocols] to [VPN server IP]. VPN confirmed working from external network." + }, + { + "id": "fix_vpn_nat", + "type": "action", + "title": "Fix VPN NAT / Port Forwarding", + "description": "VPN server is behind NAT and port forwarding isn't configured.\n\n**Configure port forwarding:**\n1. Log into the edge firewall/router\n2. Create port forwarding rules:\n - External port → Internal VPN server IP : Internal port\n\n**For IPSec behind NAT:**\n- Forward UDP 500 and UDP 4500 to the VPN server\n- NAT-Traversal (NAT-T) must be enabled on both ends\n- Note: Multiple IPSec VPNs behind the same NAT can cause issues\n\n**For SSL VPN behind NAT:**\n- Forward TCP 443 to the VPN server\n- If port 443 is already used by something else, use a different port and update the VPN client config\n\n**Important:** Only ONE device can receive forwarded traffic for a given port. If 443 is forwarded to a web server, the SSL VPN needs a different port.", + "next_node_id": "solution_vpn_nat" + }, + { + "id": "solution_vpn_nat", + "type": "solution", + "title": "Resolved: VPN Port Forwarding Configured", + "description": "VPN connectivity restored after configuring NAT/port forwarding.\n\n**Ticket Notes:** VPN server behind NAT at [public IP]. Configured port forwarding: [external port] → [internal IP:port]. VPN confirmed working.\n\n**Document:** Record the port forwarding rule in the client's network documentation." + }, + { + "id": "solution_isp_vpn_block", + "type": "solution", + "title": "ISP or Public Wi-Fi Blocking VPN", + "description": "The user's ISP or public Wi-Fi is blocking VPN protocols.\n\n**Ticket Notes:** VPN blocked by [ISP / hotel Wi-Fi / public network]. Standard VPN ports are filtered.\n\n**Workarounds:**\n1. Switch VPN to port 443 (TCP) — almost never blocked because it looks like HTTPS\n2. Use SSL VPN instead of IPSec if available\n3. Use a mobile hotspot instead of the public Wi-Fi\n4. Some VPN clients support stealth/obfuscation modes\n\n**If the VPN server supports it:** Configure an alternative listener on TCP 443 for users in restrictive networks." + } + ] + }, + { + "id": "diagnose_firewall_vs_other", + "type": "action", + "title": "Determine If the Firewall Is the Problem", + "description": "Not sure if the firewall is causing the issue. Let's find out.\n\n**Quick test: Is it the firewall?**\n\n**Test 1: Check firewall deny logs**\nSearch the firewall's deny/drop log for the user's IP address in the last hour. If you see blocked traffic, the firewall is involved.\n\n**Test 2: Test from inside vs outside the firewall**\n- Can the user reach the resource from the same subnet? (bypasses the firewall)\n- If it works from the same subnet, the firewall is likely involved\n\n**Test 3: Temporarily create a broad allow rule** (for testing ONLY)\n- Allow all traffic from the user's IP to the destination\n- If it works: firewall is the issue — now narrow down which specific port/protocol is needed\n- **Remove the broad rule immediately after testing**\n\n**Test 4: Check Windows Firewall too**\n```\n# Temporarily disable Windows Firewall to test\nSet-NetFirewallProfile -Profile Domain,Public,Private -Enabled False\n# TEST NOW — then immediately re-enable:\nSet-NetFirewallProfile -Profile Domain,Public,Private -Enabled True\n```\n\n**If none of these point to the firewall:** The issue is likely DNS, application configuration, server-side, or authentication.", + "next_node_id": "firewall_diagnosis_result" + }, + { + "id": "firewall_diagnosis_result", + "type": "decision", + "question": "Is the firewall causing the problem?", + "help_text": "Based on the tests above", + "options": [ + {"id": "windows_fw", "label": "Yes — Windows Firewall is blocking", "next_node_id": "fix_windows_firewall"}, + {"id": "network_fw", "label": "Yes — Network firewall is blocking", "next_node_id": "check_app_block"}, + {"id": "not_fw", "label": "No — Firewall isn't the issue", "next_node_id": "solution_not_firewall"} + ] + } + ] + } + } diff --git a/backend/scripts/seed_trees_v2.py b/backend/scripts/seed_trees_v2.py new file mode 100644 index 00000000..cf9912da --- /dev/null +++ b/backend/scripts/seed_trees_v2.py @@ -0,0 +1,1113 @@ +#!/usr/bin/env python3 +""" +ResolutionFlow Decision Trees - Batch 2: Networking, Active Directory, Microsoft 365. + +This script adds new troubleshooting decision trees to complement the original +seed_trees.py trees. Covers common MSP scenarios across three categories: +- Networking (DNS, DHCP, VPN, Bandwidth, Wireless, Firewall) +- Active Directory / Entra ID (Lockouts, Replication, GPO, Sync, Domain Join, Auth) +- Microsoft 365 (Teams, OneDrive, Mail Flow, SharePoint, MFA, Licensing) + +Run from the backend directory: + python -m scripts.seed_trees_v2 --email admin@example.com --password YourPass123 + +Requirements: +- Backend server must be running (uvicorn app.main:app) +""" + +import asyncio +import argparse +import httpx +from typing import Any + +# Import AD trees from separate module (file is too large for one file) +from scripts.seed_trees_ad import ( + get_repeated_lockout_tree, + get_ad_replication_tree, + get_gpo_not_applying_tree, + get_entra_id_sync_tree, + get_domain_join_tree, + get_kerberos_auth_tree, +) + +# Import additional networking trees from separate module +from scripts.seed_trees_networking import ( + get_bandwidth_slow_internet_tree, + get_wireless_connectivity_tree, + get_firewall_blocking_tree, +) + +# Import M365 trees from separate module +from scripts.seed_trees_m365 import ( + get_teams_call_quality_tree, + get_onedrive_sync_tree, + get_mail_flow_tree, + get_sharepoint_permissions_tree, + get_mfa_lockout_tree, + get_license_assignment_tree, +) + + +# API Configuration +API_BASE_URL = "http://localhost:8000/api/v1" +ADMIN_EMAIL = None +ADMIN_PASSWORD = None + + +# ============================================================================= +# NETWORKING TREES +# ============================================================================= + +def get_dns_resolution_tree() -> dict[str, Any]: + """ + DNS Resolution Failures - Comprehensive networking tree. + Covers client-side DNS issues, server-side DNS problems, + conditional forwarders, split-brain DNS, and common misconfigurations. + """ + return { + "name": "DNS Resolution Failures", + "description": "Troubleshoot DNS resolution issues including failed lookups, slow resolution, wrong answers, and DNS server problems. Covers both client-side and server-side diagnostics with PowerShell and nslookup commands.", + "category": "Networking", + "tree_structure": { + "id": "root", + "type": "decision", + "question": "What type of DNS issue is the user experiencing?", + "help_text": "Identify whether this affects a single user, multiple users, or a specific resource. This determines if it's a client-side or server-side issue.", + "options": [ + {"id": "single_user", "label": "Single user can't resolve names", "next_node_id": "check_single_client"}, + {"id": "multiple_users", "label": "Multiple users affected", "next_node_id": "check_dns_server"}, + {"id": "specific_resource", "label": "One specific hostname won't resolve", "next_node_id": "check_specific_record"}, + {"id": "intermittent", "label": "DNS works sometimes, fails other times", "next_node_id": "check_intermittent"} + ], + "children": [ + { + "id": "check_single_client", + "type": "action", + "title": "Check Client DNS Configuration", + "description": "Verify the client's DNS settings and basic connectivity.\n\n**PowerShell:**\n```\nGet-DnsClientServerAddress -AddressFamily IPv4 | Format-Table InterfaceAlias,ServerAddresses\n\nGet-Service -Name Dnscache\n\nResolve-DnsName google.com\n```\n\n**Quick check:** Can the user ping the DNS server IP directly?\n```\nTest-Connection -ComputerName -Count 2\n```", + "next_node_id": "client_dns_result" + }, + { + "id": "client_dns_result", + "type": "decision", + "question": "What did the client DNS check reveal?", + "help_text": "Compare the DNS servers configured against your environment's expected values", + "options": [ + {"id": "wrong_dns", "label": "DNS servers are wrong / DHCP-assigned are incorrect", "next_node_id": "fix_client_dns"}, + {"id": "cant_reach_dns", "label": "Can't ping the DNS server", "next_node_id": "check_network_path"}, + {"id": "dns_ok_no_resolve", "label": "DNS config looks correct but still can't resolve", "next_node_id": "flush_dns_cache"}, + {"id": "dns_service_stopped", "label": "DNS Client service is stopped", "next_node_id": "restart_dns_client"} + ], + "children": [ + { + "id": "fix_client_dns", + "type": "action", + "title": "Correct Client DNS Configuration", + "description": "The client has incorrect DNS servers configured.\n\n**If using DHCP (preferred):**\n```\nipconfig /release\nipconfig /renew\n```\n\n**If DHCP is assigning wrong DNS**, check the DHCP server scope options.\n\n**If static DNS is needed:**\n```\nSet-DnsClientServerAddress -InterfaceAlias \"Ethernet\" -ServerAddresses \"10.0.0.10\",\"10.0.0.11\"\n```\n\n**Verify after change:**\n```\nResolve-DnsName google.com\nResolve-DnsName your-internal-server.domain.local\n```", + "next_node_id": "verify_dns_resolution" + }, + { + "id": "check_network_path", + "type": "action", + "title": "Diagnose Network Path to DNS Server", + "description": "The client can't reach the DNS server. This is a network connectivity issue.\n\n**Commands:**\n```\nTest-NetConnection -ComputerName -Port 53 -InformationLevel Detailed\n\ntracert \n```\n\n**Common causes:**\n- VLAN misconfiguration\n- Firewall blocking port 53 (UDP/TCP)\n- Network cable / WiFi disconnect\n- VPN tunnel down", + "next_node_id": "network_path_result" + }, + { + "id": "network_path_result", + "type": "decision", + "question": "Can the client reach the DNS server on port 53?", + "help_text": "Test-NetConnection will show TcpTestSucceeded: True/False", + "options": [ + {"id": "port_blocked", "label": "Port 53 is blocked (TcpTestSucceeded: False)", "next_node_id": "escalate_firewall"}, + {"id": "no_route", "label": "No route / request times out completely", "next_node_id": "escalate_network"}, + {"id": "port_open", "label": "Port 53 is open but DNS still fails", "next_node_id": "check_dns_server"} + ], + "children": [ + { + "id": "escalate_firewall", + "type": "solution", + "title": "Escalate: Firewall Blocking DNS Traffic", + "description": "Port 53 (DNS) is being blocked between the client and DNS server.\n\n**Likely causes:**\n- Host-based firewall on client or server\n- Network firewall / ACL blocking UDP 53 and/or TCP 53\n- New firewall rule change\n\n**Actions:**\n1. Check Windows Firewall: `Get-NetFirewallRule | Where-Object {$_.DisplayName -like '*DNS*'}`\n2. Escalate to network team with source IP, destination IP, port 53, traceroute output\n\n**Ticket Notes:** Include traceroute output and Test-NetConnection results." + }, + { + "id": "escalate_network", + "type": "solution", + "title": "Escalate: Network Routing Issue", + "description": "The client cannot reach the DNS server at all.\n\n**Check before escalating:**\n1. Is the client on the correct VLAN? `Get-NetAdapter | Select Name,Status,LinkSpeed`\n2. Does the client have a valid IP? `ipconfig /all`\n3. Can the client ping its default gateway?\n\n**If no gateway connectivity:** Local network issue (cable, switch port, WiFi)\n**If gateway works but DNS unreachable:** Routing issue between subnets\n\n**Escalate to:** Network Engineering team" + } + ] + }, + { + "id": "flush_dns_cache", + "type": "action", + "title": "Flush DNS Cache and Re-register", + "description": "DNS config looks correct. The issue may be a stale cache.\n\n**PowerShell (run as Administrator):**\n```\nClear-DnsClientCache\nipconfig /registerdns\nGet-DnsClientCache | Measure-Object\n```\n\n**Also check the hosts file for overrides:**\n```\nGet-Content C:\\Windows\\System32\\drivers\\etc\\hosts | Where-Object {$_ -notmatch '^#' -and $_ -ne ''}\n```\n\nA stale hosts file entry will override DNS every time.", + "next_node_id": "post_flush_check" + }, + { + "id": "post_flush_check", + "type": "decision", + "question": "Did flushing DNS and checking the hosts file resolve the issue?", + "help_text": "Test: Resolve-DnsName ", + "options": [ + {"id": "resolved", "label": "Yes, DNS is working now", "next_node_id": "solution_cache_flush"}, + {"id": "hosts_entry", "label": "Found a bad hosts file entry", "next_node_id": "fix_hosts_file"}, + {"id": "still_failing", "label": "Still not resolving", "next_node_id": "nslookup_deep_dive"} + ], + "children": [ + { + "id": "solution_cache_flush", + "type": "solution", + "title": "Resolved: Stale DNS Cache", + "description": "The issue was caused by a stale DNS cache entry.\n\n**Root cause:** DNS cache held an outdated or incorrect record. Common after server IP changes, DNS record updates, or VPN cycling.\n\n**Resolution:** Flushed DNS client cache with `Clear-DnsClientCache`.\n\n**Prevention:** If recurring for many users, consider lowering TTL values on frequently-changed records." + }, + { + "id": "fix_hosts_file", + "type": "action", + "title": "Remove Bad Hosts File Entry", + "description": "A static entry in the hosts file is overriding DNS.\n\n**Edit (run Notepad as Administrator):**\n```\nnotepad C:\\Windows\\System32\\drivers\\etc\\hosts\n```\n\nRemove or comment out the offending line by adding `#` at the beginning.\n\n**Common culprits:** Old dev/test entries, malware-added entries, legacy workarounds.\n\n**After editing:** `Clear-DnsClientCache`\n\n**Security note:** If the hosts file has entries you don't recognize, run a malware scan.", + "next_node_id": "verify_dns_resolution" + }, + { + "id": "nslookup_deep_dive", + "type": "action", + "title": "Deep DNS Diagnostics with nslookup", + "description": "Standard troubleshooting hasn't resolved it. Compare DNS responses.\n\n**Test against different DNS servers:**\n```\nnslookup problematic-hostname \nnslookup problematic-hostname 8.8.8.8\n\nResolve-DnsName -Name problematic-hostname -Type A -Server \n```\n\n**Compare results:** If public DNS resolves it but internal doesn't, the record is missing from your internal DNS.", + "next_node_id": "nslookup_result" + }, + { + "id": "nslookup_result", + "type": "decision", + "question": "What do the nslookup comparisons show?", + "help_text": "Compare internal DNS server response vs public DNS (8.8.8.8)", + "options": [ + {"id": "internal_missing", "label": "Internal DNS can't resolve, public DNS can", "next_node_id": "check_forwarders"}, + {"id": "both_fail", "label": "Neither internal nor public DNS can resolve", "next_node_id": "check_name_validity"}, + {"id": "wrong_ip", "label": "DNS returns wrong IP address", "next_node_id": "check_stale_record"}, + {"id": "internal_only", "label": "It's an internal name (no public record expected)", "next_node_id": "check_dns_zone"} + ], + "children": [ + { + "id": "check_forwarders", + "type": "solution", + "title": "Check DNS Forwarders Configuration", + "description": "Internal DNS can't resolve external names — likely a forwarder issue.\n\n**On the DNS Server:**\n```\nGet-DnsServerForwarder\nTest-NetConnection -ComputerName 8.8.8.8 -Port 53\n```\n\n**Common causes:** Forwarders unreachable, root hints disabled with no forwarders, conditional forwarder misconfigured.\n\n**Fix:** Update forwarders to reliable public DNS (8.8.8.8, 1.1.1.1).\n\n**Escalate to:** Systems Administration if DNS server changes are needed." + }, + { + "id": "check_name_validity", + "type": "solution", + "title": "Verify Hostname is Valid", + "description": "Neither internal nor public DNS can resolve this name.\n\n**Check:**\n1. Is the hostname spelled correctly?\n2. Does the DNS record actually exist?\n3. Has the record had time to propagate? (up to 48 hours for new records)\n4. Use: https://mxtoolbox.com/DNSLookup.aspx\n\n**Ticket Notes:** Document the exact hostname and test results." + }, + { + "id": "check_stale_record", + "type": "solution", + "title": "Escalate: Stale or Incorrect DNS Record", + "description": "DNS is returning the wrong IP address.\n\n**Possible causes:** Server migrated but DNS not updated, DNS scavenging disabled, dynamic DNS registration from wrong host.\n\n**Gather:**\n```\nResolve-DnsName -Name hostname -Type A | Select Name,IPAddress,TTL\n```\n\n**Escalate to:** DNS Administrator with current wrong IP and expected correct IP." + }, + { + "id": "check_dns_zone", + "type": "solution", + "title": "Escalate: Missing Internal DNS Record", + "description": "Internal hostname doesn't have a DNS record.\n\n**For the DNS admin:**\n```\nGet-DnsServerZone | Where-Object {$_.ZoneName -like '*yourdomain*'}\nGet-DnsServerResourceRecord -ZoneName 'yourdomain.local' -Name 'hostname'\n```\n\n**If dynamic DNS:** Re-register from the target machine: `ipconfig /registerdns`\n\n**Escalate to:** DNS Administrator to create the missing record." + } + ] + } + ] + }, + { + "id": "restart_dns_client", + "type": "action", + "title": "Restart DNS Client Service", + "description": "The DNS Client service (Dnscache) is stopped.\n\n**PowerShell (Administrator):**\n```\nStart-Service -Name Dnscache\nSet-Service -Name Dnscache -StartupType Automatic\nGet-Service -Name Dnscache\n```\n\n**Note:** This service should ALWAYS be running. Investigate why it was stopped.", + "next_node_id": "verify_dns_resolution" + } + ] + }, + { + "id": "check_dns_server", + "type": "action", + "title": "Check DNS Server Health", + "description": "Multiple users affected — check the DNS server itself.\n\n**On the DNS Server (PowerShell):**\n```\nGet-Service -Name DNS\n\nTest-DnsServer -IPAddress -ZoneName yourdomain.local\n\nGet-WinEvent -FilterHashtable @{LogName='DNS Server';Level=2} -MaxEvents 10\n```", + "next_node_id": "dns_server_result" + }, + { + "id": "dns_server_result", + "type": "decision", + "question": "What is the DNS server status?", + "help_text": "Check the service, overall server health, and event logs", + "options": [ + {"id": "service_stopped", "label": "DNS Server service is stopped/crashed", "next_node_id": "restart_dns_server"}, + {"id": "server_unreachable", "label": "DNS server is unreachable / down", "next_node_id": "dns_server_down"}, + {"id": "service_running_errors", "label": "Service running but event log shows errors", "next_node_id": "dns_event_errors"}, + {"id": "server_looks_ok", "label": "Server and service look healthy", "next_node_id": "check_dns_zones"} + ], + "children": [ + { + "id": "restart_dns_server", + "type": "action", + "title": "Restart DNS Server Service", + "description": "**CAUTION:** Restarting DNS affects all users relying on this server.\n\n```\nRestart-Service -Name DNS -Force\nGet-Service -Name DNS\nResolve-DnsName google.com -Server localhost\n```\n\n**If it won't start:**\n```\nGet-NetTCPConnection -LocalPort 53\nGet-NetUDPEndpoint -LocalPort 53\n```\n\n**Important:** If a secondary DNS server exists, verify clients can failover.", + "next_node_id": "dns_restart_result" + }, + { + "id": "dns_restart_result", + "type": "decision", + "question": "Did the DNS Server service restart successfully?", + "help_text": "Verify with: Get-Service -Name DNS", + "options": [ + {"id": "restart_ok", "label": "Yes, service running and resolving", "next_node_id": "solution_dns_service_restart"}, + {"id": "restart_fail", "label": "Service won't start", "next_node_id": "escalate_dns_critical"} + ], + "children": [ + { + "id": "solution_dns_service_restart", + "type": "solution", + "title": "Resolved: DNS Server Service Restarted", + "description": "The DNS Server service was stopped and has been restarted.\n\n**Post-resolution:**\n1. Monitor for the next few hours\n2. Set up monitoring alerts for DNS service status\n3. Review event logs for recurring errors\n4. Investigate why it stopped\n\n**Ticket Notes:** DNS service was stopped, restarted successfully. Root cause investigation needed." + }, + { + "id": "escalate_dns_critical", + "type": "solution", + "title": "CRITICAL: DNS Server Won't Start", + "description": "**Priority: CRITICAL**\n\n**Immediate actions:**\n1. Ensure secondary DNS is handling queries\n2. If no secondary, consider pointing clients to 8.8.8.8 temporarily\n3. Check disk space, Windows updates, event logs\n\n**Escalate to:** Senior Systems Administrator\n**Communication:** Notify affected users of degraded DNS." + } + ] + }, + { + "id": "dns_server_down", + "type": "solution", + "title": "CRITICAL: DNS Server Unreachable", + "description": "**Priority: CRITICAL**\n\n**Immediate actions:**\n1. Check secondary DNS server\n2. Check via iLO/iDRAC/IPMI or hypervisor\n3. Contact datacenter if hosted\n\n**Temporary workaround** (emergency only):\n```\nSet-DhcpServerv4OptionValue -ScopeId -DnsServer 8.8.8.8,1.1.1.1\n```\nThis breaks internal name resolution.\n\n**Escalate to:** Infrastructure team immediately" + }, + { + "id": "dns_event_errors", + "type": "solution", + "title": "Investigate DNS Server Event Log Errors", + "description": "DNS service is running but logging errors.\n\n**Common errors:**\n- **Event 4015:** Zone transfer failed — check connectivity between DNS servers\n- **Event 4004:** Zone not loaded — corrupt zone file\n- **Event 7062:** No forwarders reachable — check internet connectivity\n- **Event 4512:** LDAP-integrated zone error — check AD replication: `repadmin /replsummary`\n\n**Escalate to:** DNS/Systems Administrator with event log exports." + }, + { + "id": "check_dns_zones", + "type": "solution", + "title": "Check DNS Zones and Records", + "description": "DNS server looks healthy. Check the zones.\n\n**On DNS server:**\n```\nGet-DnsServerZone | Format-Table ZoneName,ZoneType,DynamicUpdate\n\nGet-DnsServerResourceRecord -ZoneName 'yourdomain.local' -Name 'problemhost'\n```\n\n**For AD-integrated zones:** Verify AD replication is healthy.\n\n**Escalate to:** DNS Administrator with zone configuration output." + } + ] + }, + { + "id": "check_specific_record", + "type": "action", + "title": "Test the Specific Hostname", + "description": "One specific hostname won't resolve. Identify whether the record exists.\n\n**Commands:**\n```\nResolve-DnsName -Name 'problematic-hostname' -Type A\nnslookup problematic-hostname \nnslookup problematic-hostname 8.8.8.8\nGet-DnsClientGlobalSetting | Select SuffixSearchList\n```\n\n**Key question:** Is this an internal name (.local / .corp) or external?", + "next_node_id": "specific_record_result" + }, + { + "id": "specific_record_result", + "type": "decision", + "question": "Is this an internal or external hostname?", + "help_text": "Internal names typically end in .local, .corp, .internal or your AD domain suffix", + "options": [ + {"id": "internal_name", "label": "Internal hostname (.local / AD domain)", "next_node_id": "check_dns_zones"}, + {"id": "external_name", "label": "External / public hostname", "next_node_id": "check_external_dns"}, + {"id": "unsure", "label": "Not sure / no suffix", "next_node_id": "check_suffix_search"} + ], + "children": [ + { + "id": "check_external_dns", + "type": "solution", + "title": "Test External DNS Resolution Path", + "description": "Public hostname failing. Compare resolution paths.\n\n```\nResolve-DnsName -Name 'example.com' -Server \nResolve-DnsName -Name 'example.com' -Server 8.8.8.8\n```\n\n**If public DNS works but internal doesn't:** Check content filter/DNS filter, conditional forwarders, or split-brain DNS configuration.\n\n**Escalate to:** DNS/Network admin with comparison results." + }, + { + "id": "check_suffix_search", + "type": "solution", + "title": "Check DNS Suffix Search List", + "description": "Short names may not resolve if the DNS suffix search list is incomplete.\n\n```\nGet-DnsClientGlobalSetting | Select SuffixSearchList\nipconfig /all | findstr 'Search'\n```\n\n**Common issue:** Suffix search list doesn't include the right domain.\n\n**Fix:** Set via DHCP (scope option 015) or GPO (Computer Config > Admin Templates > Network > DNS Client)." + } + ] + }, + { + "id": "check_intermittent", + "type": "decision", + "question": "Is the intermittent failure affecting internal names, external names, or both?", + "help_text": "Test both an internal hostname and google.com multiple times", + "options": [ + {"id": "internal_intermittent", "label": "Only internal names fail intermittently", "next_node_id": "check_multiple_dns_servers"}, + {"id": "external_intermittent", "label": "Only external names fail intermittently", "next_node_id": "check_forwarder_health"}, + {"id": "both_intermittent", "label": "Both fail intermittently", "next_node_id": "check_dns_load"} + ], + "children": [ + { + "id": "check_multiple_dns_servers", + "type": "solution", + "title": "Check Multiple DNS Server Consistency", + "description": "If you have multiple DNS servers, one may be out of sync.\n\n```\nResolve-DnsName -Name 'testhost.yourdomain.local' -Server 10.0.0.10\nResolve-DnsName -Name 'testhost.yourdomain.local' -Server 10.0.0.11\n```\n\n**If results differ:** Check AD replication:\n```\nrepadmin /replsummary\nrepadmin /showrepl\n```\n\n**Temporary workaround:** Point affected clients to the working DNS server only.\n\n**Escalate to:** Senior Systems Admin with replication data." + }, + { + "id": "check_forwarder_health", + "type": "solution", + "title": "Check Upstream DNS / Forwarder Health", + "description": "External resolution is intermittent — likely a forwarder or upstream issue.\n\n```\nGet-DnsServerForwarder\nResolve-DnsName google.com -Server \nTest-Connection -ComputerName -Count 20 | Measure-Object -Property ResponseTime -Average\n```\n\n**If using ISP DNS:** Try switching to 8.8.8.8, 1.1.1.1.\n**If packet loss to forwarders:** Network/ISP issue." + }, + { + "id": "check_dns_load", + "type": "solution", + "title": "Investigate DNS Server Performance", + "description": "Both internal and external resolution intermittent — server may be overloaded.\n\n```\nGet-Counter '\\Processor(_Total)\\% Processor Time','\\Memory\\Available MBytes'\nGet-Counter '\\DNS\\Total Query Received/sec','\\DNS\\Recursive Queries/sec'\n```\n\n**If overloaded:** Add another DNS server, check for amplification attacks, review recursive settings.\n\n**Escalate to:** Systems Administration with performance data." + } + ] + }, + { + "id": "verify_dns_resolution", + "type": "decision", + "question": "Is DNS resolving correctly now?", + "help_text": "Test: Resolve-DnsName google.com AND Resolve-DnsName ", + "options": [ + {"id": "all_resolved", "label": "Yes, both internal and external resolve", "next_node_id": "solution_resolved"}, + {"id": "still_issues", "label": "Still having issues", "next_node_id": "check_dns_server"} + ], + "children": [ + { + "id": "solution_resolved", + "type": "solution", + "title": "DNS Resolution Issue Resolved", + "description": "DNS is now working correctly.\n\n**Document in ticket:**\n- Root cause identified\n- Steps taken to resolve\n- Configuration changes made\n\n**If widespread issue:** Send communication to affected users confirming resolution." + } + ] + } + ] + } + } + + +def get_dhcp_issues_tree() -> dict[str, Any]: + """ + DHCP Lease Issues / No IP Address - Networking tree. + Covers APIPA addresses, scope exhaustion, relay agents, and DHCP server problems. + """ + return { + "name": "DHCP Lease Issues / No IP Address", + "description": "Troubleshoot DHCP problems including clients not getting IP addresses, APIPA (169.254.x.x) addresses, scope exhaustion, and DHCP server failures. Covers both client-side and server-side diagnostics.", + "category": "Networking", + "tree_structure": { + "id": "root", + "type": "decision", + "question": "How many devices are affected by the DHCP issue?", + "help_text": "This determines whether it's a client-specific issue or a DHCP server/infrastructure problem.", + "options": [ + {"id": "single", "label": "Single device can't get an IP", "next_node_id": "check_single_device"}, + {"id": "multiple_same_subnet", "label": "Multiple devices on the same subnet", "next_node_id": "check_dhcp_scope"}, + {"id": "multiple_subnets", "label": "Multiple subnets affected", "next_node_id": "check_dhcp_server_health"}, + {"id": "new_device", "label": "Brand new device / just reimaged", "next_node_id": "check_new_device"} + ], + "children": [ + { + "id": "check_single_device", + "type": "action", + "title": "Check Device Network Configuration", + "description": "Start with basic network adapter diagnostics.\n\n**PowerShell:**\n```\nipconfig /all\nGet-NetIPInterface -AddressFamily IPv4 | Select InterfaceAlias,Dhcp\nGet-NetAdapter | Select Name,Status,LinkSpeed,MediaConnectionState\n```\n\n**Look for:**\n- 169.254.x.x (APIPA) = DHCP request failed\n- Is DHCP enabled on the adapter?\n- Is the adapter showing 'Up' status?", + "next_node_id": "single_device_result" + }, + { + "id": "single_device_result", + "type": "decision", + "question": "What does the device's network configuration show?", + "help_text": "Check ipconfig /all output carefully", + "options": [ + {"id": "apipa", "label": "169.254.x.x address (APIPA)", "next_node_id": "try_dhcp_renew"}, + {"id": "static_ip", "label": "Static IP configured (DHCP not enabled)", "next_node_id": "fix_static_to_dhcp"}, + {"id": "no_adapter", "label": "Network adapter disabled or missing", "next_node_id": "fix_adapter"}, + {"id": "has_ip_not_working", "label": "Has a DHCP IP but network isn't working", "next_node_id": "check_wrong_scope"}, + {"id": "no_link", "label": "No media / disconnected", "next_node_id": "check_physical"} + ], + "children": [ + { + "id": "try_dhcp_renew", + "type": "action", + "title": "Release and Renew DHCP Lease", + "description": "APIPA address means DHCP requests are failing.\n\n**PowerShell (Administrator):**\n```\nipconfig /release\nStart-Sleep -Seconds 5\nipconfig /renew\nipconfig /all\n```\n\n**Error meanings:**\n- \"No DHCP server could be contacted\" = Server unreachable\n- \"The semaphore timeout period has expired\" = Network connectivity issue\n\n**While waiting:** Check if other devices on same switch/VLAN have IPs.", + "next_node_id": "dhcp_renew_result" + }, + { + "id": "dhcp_renew_result", + "type": "decision", + "question": "Did the DHCP renew succeed?", + "help_text": "Does the device now have a valid (non-169.254) IP?", + "options": [ + {"id": "got_ip", "label": "Yes, received a valid IP", "next_node_id": "solution_dhcp_renewed"}, + {"id": "still_apipa", "label": "Still showing 169.254.x.x", "next_node_id": "check_dhcp_reachability"}, + {"id": "error_msg", "label": "Got an error message", "next_node_id": "check_dhcp_reachability"} + ], + "children": [ + { + "id": "solution_dhcp_renewed", + "type": "solution", + "title": "Resolved: DHCP Lease Renewed", + "description": "Device successfully obtained an IP address.\n\n**Likely caused by:** Temporary network glitch, expired lease from sleep/hibernation, brief DHCP server unavailability.\n\n**Verify full connectivity:**\n```\nping \nping 8.8.8.8\nnslookup google.com\n```\n\n**Ticket Notes:** Single device DHCP failure, resolved with release/renew." + }, + { + "id": "check_dhcp_reachability", + "type": "action", + "title": "Test DHCP Server Reachability", + "description": "Can't get a lease. Check DHCP server connectivity.\n\n```\nTest-Connection -ComputerName -Count 2\narp -a\n```\n\n**Key check:** If the device is on a different subnet from the DHCP server, is there a DHCP relay/IP helper configured on the router?", + "next_node_id": "dhcp_reach_result" + }, + { + "id": "dhcp_reach_result", + "type": "decision", + "question": "Can the device communicate with anything on the network?", + "help_text": "Check ARP table and ping known hosts on same subnet", + "options": [ + {"id": "can_reach_local", "label": "Can reach local devices but not DHCP server", "next_node_id": "check_relay_agent"}, + {"id": "cant_reach_anything", "label": "Can't reach anything", "next_node_id": "check_physical"}, + {"id": "can_reach_dhcp", "label": "Can ping DHCP server but still no lease", "next_node_id": "check_dhcp_scope"} + ], + "children": [ + { + "id": "check_relay_agent", + "type": "solution", + "title": "Check DHCP Relay Agent / IP Helper", + "description": "DHCP broadcasts aren't reaching the server. The relay agent may be misconfigured.\n\n**On the router/L3 switch:**\n```\n# Cisco IOS\nshow running-config interface vlan \n# Look for: ip helper-address \n```\n\n**Escalate to:** Network Engineering to verify relay/helper\n**Include:** VLAN ID, subnet, DHCP server IP, affected location" + } + ] + } + ] + }, + { + "id": "fix_static_to_dhcp", + "type": "action", + "title": "Switch from Static IP to DHCP", + "description": "Device has a static IP instead of DHCP.\n\n**PowerShell (Administrator):**\n```\nSet-NetIPInterface -InterfaceAlias 'Ethernet' -Dhcp Enabled\nRemove-NetIPAddress -InterfaceAlias 'Ethernet' -Confirm:$false\nSet-DnsClientServerAddress -InterfaceAlias 'Ethernet' -ResetServerAddresses\nipconfig /renew\n```\n\n**Before changing:** Verify this device doesn't NEED a static IP (servers, printers, etc.)", + "next_node_id": "verify_dhcp_working" + }, + { + "id": "fix_adapter", + "type": "action", + "title": "Enable or Troubleshoot Network Adapter", + "description": "Network adapter is disabled or not detected.\n\n```\nGet-NetAdapter -IncludeHidden | Select Name,Status,InterfaceDescription\nEnable-NetAdapter -Name 'Ethernet'\nGet-PnpDevice -Class Net | Select Status,FriendlyName\n```\n\n**If missing from Device Manager:** Check BIOS NIC setting, try different USB port, or install drivers.\n**If showing error:** Uninstall device > Scan for hardware changes.", + "next_node_id": "verify_dhcp_working" + }, + { + "id": "check_wrong_scope", + "type": "solution", + "title": "Verify IP is from Correct Scope", + "description": "Device has a DHCP IP but it may be from the wrong scope/VLAN.\n\n```\nipconfig /all\nipconfig /all | findstr 'DHCP Server'\n```\n\n**Compare:** Is the IP in the expected range for this location/VLAN?\n\n**Common cause:** Wrong switch port/VLAN or DHCP scope options misconfigured.\n\n**Escalate to:** Network team with device MAC address and current IP info." + }, + { + "id": "check_physical", + "type": "action", + "title": "Check Physical Network Connection", + "description": "No network connectivity at all.\n\n**Check in order:**\n1. Ethernet cable plugged in firmly? Try different cable.\n2. Switch port link light on? Try different port.\n3. WiFi enabled and connected to correct SSID?\n4. NIC link/activity LEDs?\n\n```\nGet-NetAdapter | Select Name,Status,LinkSpeed\nnetsh wlan show interfaces\n```\n\n**If using dock/dongle:** Try connecting directly.", + "next_node_id": "physical_result" + }, + { + "id": "physical_result", + "type": "decision", + "question": "Did fixing the physical connection restore network?", + "help_text": "Does adapter show 'Up' status?", + "options": [ + {"id": "fixed_physical", "label": "Yes, adapter is up", "next_node_id": "try_dhcp_renew"}, + {"id": "still_no_link", "label": "Still no link", "next_node_id": "escalate_hardware"}, + {"id": "link_up_no_dhcp", "label": "Link up but still no IP", "next_node_id": "try_dhcp_renew"} + ], + "children": [ + { + "id": "escalate_hardware", + "type": "solution", + "title": "Escalate: Possible Hardware Failure", + "description": "No link despite checking cables and ports.\n\n**Try before escalating:**\n1. Different cable + different switch port\n2. USB Ethernet adapter as workaround\n3. Uninstall/reinstall NIC driver\n\n**Escalate to:** Desktop Support for hardware assessment." + } + ] + } + ] + }, + { + "id": "check_dhcp_scope", + "type": "action", + "title": "Check DHCP Scope Health", + "description": "Multiple devices affected. Check the DHCP scope.\n\n**On DHCP Server:**\n```\nGet-DhcpServerv4ScopeStatistics | Format-Table ScopeId,Free,InUse,PercentageInUse\n\nGet-DhcpServerv4ScopeStatistics | Where-Object {$_.Free -eq 0}\n\nGet-DhcpServerv4Lease -ScopeId | Sort LeaseExpiryTime\n```\n\n**Key metric:** PercentageInUse at 100% = scope exhausted.", + "next_node_id": "scope_health_result" + }, + { + "id": "scope_health_result", + "type": "decision", + "question": "What is the DHCP scope status?", + "help_text": "Check scope statistics output", + "options": [ + {"id": "exhausted", "label": "Scope is 100% full", "next_node_id": "fix_scope_exhaustion"}, + {"id": "scope_disabled", "label": "Scope is deactivated", "next_node_id": "activate_scope"}, + {"id": "scope_ok", "label": "Has free addresses and is active", "next_node_id": "check_dhcp_server_health"}, + {"id": "scope_missing", "label": "No scope for this subnet", "next_node_id": "escalate_missing_scope"} + ], + "children": [ + { + "id": "fix_scope_exhaustion", + "type": "action", + "title": "Address DHCP Scope Exhaustion", + "description": "No more IPs available.\n\n```\nGet-DhcpServerv4Lease -ScopeId | Where-Object {$_.AddressState -eq 'InactiveReservation'}\n\nGet-DhcpServerv4Scope -ScopeId | Select LeaseDuration\n```\n\n**Options:**\n1. Reduce lease duration (8 hours for offices)\n2. Delete stale/inactive leases\n3. Expand the scope range\n4. Create exclusions for devices moved to static IPs\n\n**Be careful** expanding scopes — don't overlap with other subnets.", + "next_node_id": "scope_fix_result" + }, + { + "id": "scope_fix_result", + "type": "decision", + "question": "Were you able to free up addresses?", + "help_text": "Check updated free count in scope statistics", + "options": [ + {"id": "freed_addresses", "label": "Yes, scope has free addresses", "next_node_id": "solution_scope_fixed"}, + {"id": "need_expansion", "label": "Need to expand scope", "next_node_id": "escalate_scope_expansion"} + ], + "children": [ + { + "id": "solution_scope_fixed", + "type": "solution", + "title": "Resolved: DHCP Scope Addresses Freed", + "description": "Cleaned up stale leases.\n\n**Post-resolution:**\n1. Have users run `ipconfig /renew`\n2. Monitor scope utilization\n3. Set up threshold alerts (warn at 80%, critical at 90%)\n4. Regular audit of stale leases\n\n**Ticket Notes:** DHCP scope exhaustion, cleaned stale leases." + }, + { + "id": "escalate_scope_expansion", + "type": "solution", + "title": "Escalate: DHCP Scope Needs Expansion", + "description": "More devices than current range supports.\n\n**Options:**\n1. Expand IP range (if subnet allows)\n2. Reduce lease duration\n3. Move to larger subnet (requires IP redesign)\n4. Segment network (separate VLANs)\n\n**Escalate to:** Network Engineering\n**Temporary workaround:** Static IPs for critical devices." + } + ] + }, + { + "id": "activate_scope", + "type": "action", + "title": "Activate DHCP Scope", + "description": "Scope is deactivated. Verify it SHOULD be active first.\n\n```\nGet-DhcpServerv4Scope -ScopeId | Select ScopeId,Name,State\nSet-DhcpServerv4Scope -ScopeId -State Active\n```\n\n**After activating:** Have users run `ipconfig /renew`", + "next_node_id": "verify_dhcp_working" + }, + { + "id": "escalate_missing_scope", + "type": "solution", + "title": "Escalate: No DHCP Scope for This Subnet", + "description": "No scope configured for this subnet.\n\n**Possible causes:** New VLAN without scope, DHCP migration missed this scope, accidental deletion.\n\n**Escalate to:** DHCP Administrator / Network Engineering\n**Include:** Subnet, VLAN ID, expected IP range, gateway IP\n**Temporary workaround:** Static IPs for critical devices." + } + ] + }, + { + "id": "check_dhcp_server_health", + "type": "action", + "title": "Check DHCP Server Health", + "description": "Scope has addresses but devices can't get IPs. Check the server.\n\n```\nGet-Service -Name DHCPServer\nGet-DhcpServerInDC\nGet-WinEvent -FilterHashtable @{LogName='Microsoft-Windows-DHCP Server Events/Operational';Level=2,3} -MaxEvents 10\nGet-NetUDPEndpoint -LocalPort 67\n```\n\n**Check if DHCP failover is configured** — partner server too.", + "next_node_id": "dhcp_server_result" + }, + { + "id": "dhcp_server_result", + "type": "decision", + "question": "What is the DHCP server status?", + "help_text": "Check service, authorization, and event logs", + "options": [ + {"id": "service_stopped", "label": "DHCP Server service stopped", "next_node_id": "restart_dhcp_service"}, + {"id": "not_authorized", "label": "Not authorized in AD", "next_node_id": "authorize_dhcp"}, + {"id": "server_healthy", "label": "Server healthy, service running", "next_node_id": "check_dhcp_conflicts"}, + {"id": "server_down", "label": "Server completely unreachable", "next_node_id": "escalate_dhcp_server_down"} + ], + "children": [ + { + "id": "restart_dhcp_service", + "type": "action", + "title": "Restart DHCP Server Service", + "description": "```\nRestart-Service -Name DHCPServer -Force\nGet-Service -Name DHCPServer\n```\n\n**After restart:** Have clients run `ipconfig /renew`\n\n**If failover partner exists:** It should have been handling leases.", + "next_node_id": "verify_dhcp_working" + }, + { + "id": "authorize_dhcp", + "type": "solution", + "title": "Escalate: DHCP Server Authorization", + "description": "DHCP server not authorized in AD. Unauthorized servers cannot issue leases.\n\n**Requires Domain Admin:**\n```\nAdd-DhcpServerInDC -DnsName dhcpserver.yourdomain.local -IPAddress \n```\n\n**Escalate to:** Domain Administrator\n**Priority:** High" + }, + { + "id": "check_dhcp_conflicts", + "type": "solution", + "title": "Check for IP Conflicts or Rogue DHCP", + "description": "Server healthy with available addresses but clients can't get leases.\n\n**Check for rogue DHCP:**\n```\nipconfig /all | findstr 'DHCP Server'\n```\nIf unexpected IP, there's a rogue DHCP server.\n\n**Check for IP conflicts:**\n```\nGet-DhcpServerv4Lease -ScopeId | Where-Object {$_.AddressState -like '*Decline*'}\n```\n\n**Other possibilities:** MAC address filtering, DHCP policies restricting leases.\n\n**Escalate to:** Network/Systems Admin with audit log findings." + }, + { + "id": "escalate_dhcp_server_down", + "type": "solution", + "title": "CRITICAL: DHCP Server Down", + "description": "**Priority: CRITICAL**\n\n**Immediate actions:**\n1. Check failover DHCP server\n2. Check hypervisor or iLO/iDRAC for server access\n\n**Mitigation:** Existing devices keep leases until expiration. New devices get APIPA.\n\n**Escalate to:** Infrastructure team immediately\n**Communication:** New network connections may fail." + } + ] + }, + { + "id": "check_new_device", + "type": "decision", + "question": "Is the new device connected via Ethernet or WiFi?", + "help_text": "New/reimaged devices may need specific network access configuration", + "options": [ + {"id": "new_ethernet", "label": "Ethernet (wired)", "next_node_id": "check_port_security"}, + {"id": "new_wifi", "label": "WiFi (wireless)", "next_node_id": "check_wifi_auth"} + ], + "children": [ + { + "id": "check_port_security", + "type": "decision", + "question": "Does your environment use 802.1X or MAC filtering?", + "help_text": "Many organizations require device authentication before network access", + "options": [ + {"id": "has_nac", "label": "Yes, 802.1X / NAC", "next_node_id": "register_device_nac"}, + {"id": "has_mac_filter", "label": "Yes, MAC whitelist", "next_node_id": "register_mac"}, + {"id": "no_security", "label": "No, open wired access", "next_node_id": "check_single_device"} + ], + "children": [ + { + "id": "register_device_nac", + "type": "solution", + "title": "Register Device in NAC / 802.1X", + "description": "New devices must be registered in NAC.\n\n1. Get MAC: `Get-NetAdapter | Select Name,MacAddress`\n2. Register in NAC platform (Cisco ISE, ClearPass, etc.)\n3. Assign correct policy/group\n4. Reconnect cable or restart NIC\n\n**Escalate to:** Network Security if you don't have NAC access." + }, + { + "id": "register_mac", + "type": "solution", + "title": "Add MAC Address to Whitelist", + "description": "Switch port or DHCP requires MAC registration.\n\n```\nGet-NetAdapter | Select Name,MacAddress\n```\n\nRegister in DHCP MAC filtering, switch port security, or network management platform.\n\n**After registration:** Disconnect/reconnect cable, then `ipconfig /renew`" + } + ] + }, + { + "id": "check_wifi_auth", + "type": "solution", + "title": "Configure WiFi for New Device", + "description": "New devices need WiFi credentials and may need certificate enrollment.\n\n**Steps:**\n1. Connect to correct SSID (corporate, not guest)\n2. Enter credentials or certificates\n3. If MDM managed: Enroll in Intune/JAMF first\n\n**Common issues:** No WiFi profile yet (needs MDM), certificate not enrolled (WPA2-Enterprise), connected to guest instead of corporate." + } + ] + }, + { + "id": "verify_dhcp_working", + "type": "decision", + "question": "Is the device now receiving a valid IP address?", + "help_text": "Run ipconfig /renew and check for valid (non-169.254) IP", + "options": [ + {"id": "working", "label": "Yes, DHCP is working", "next_node_id": "solution_dhcp_resolved"}, + {"id": "still_failing", "label": "Still not getting an IP", "next_node_id": "check_dhcp_server_health"} + ], + "children": [ + { + "id": "solution_dhcp_resolved", + "type": "solution", + "title": "DHCP Issue Resolved", + "description": "Device successfully receiving DHCP lease.\n\n**Verify:**\n```\nipconfig /all\nping \nnslookup google.com\n```\n\n**Document:** Root cause, steps taken, config changes, scope for impact." + } + ] + } + ] + } + } + + +def get_site_to_site_vpn_tree() -> dict[str, Any]: + """ + Site-to-Site VPN Tunnel Down - Networking tree. + Covers IPSec tunnel failures, IKE negotiation, routing, and ISP outages. + """ + return { + "name": "Site-to-Site VPN Tunnel Down", + "description": "Troubleshoot site-to-site VPN tunnel failures including IPSec/IKE negotiation issues, routing problems, ISP outages, and configuration mismatches. Covers common firewall vendors and diagnostic approaches.", + "category": "Networking", + "tree_structure": { + "id": "root", + "type": "decision", + "question": "What are the symptoms of the VPN tunnel issue?", + "help_text": "Determine whether the tunnel is completely down or partially working. Can users at the remote site access ANY resources at the main site?", + "options": [ + {"id": "completely_down", "label": "No connectivity between sites", "next_node_id": "check_tunnel_status"}, + {"id": "partial", "label": "Some traffic works, some doesn't", "next_node_id": "check_partial_connectivity"}, + {"id": "intermittent", "label": "VPN keeps dropping and reconnecting", "next_node_id": "check_vpn_stability"}, + {"id": "slow", "label": "VPN is up but extremely slow", "next_node_id": "check_vpn_performance"} + ], + "children": [ + { + "id": "check_tunnel_status", + "type": "action", + "title": "Verify VPN Tunnel Status on Firewall", + "description": "Log into the firewall and check tunnel status.\n\n**FortiGate:**\n```\nget vpn ipsec tunnel summary\ndiagnose vpn ike gateway list name \n```\n\n**SonicWall:** Network > IPSec VPN > Settings\n\n**Meraki:** Security & SD-WAN > Site-to-Site VPN\n\n**Palo Alto:**\n```\nshow vpn ipsec-sa\nshow vpn ike-sa\n```\n\n**pfSense:** Status > IPsec\n\n**Check:** Is Phase 1 (IKE) established? Is Phase 2 (IPSec SA) established?", + "next_node_id": "tunnel_status_result" + }, + { + "id": "tunnel_status_result", + "type": "decision", + "question": "What does the tunnel status show?", + "help_text": "Phase 1 (IKE/ISAKMP) establishes identity, Phase 2 (IPSec) carries traffic", + "options": [ + {"id": "phase1_down", "label": "Phase 1 (IKE) is down", "next_node_id": "troubleshoot_phase1"}, + {"id": "phase1_up_phase2_down", "label": "Phase 1 up, Phase 2 down", "next_node_id": "troubleshoot_phase2"}, + {"id": "both_up", "label": "Both phases show up", "next_node_id": "check_routing"}, + {"id": "cant_check", "label": "Can't log into firewall", "next_node_id": "check_firewall_access"} + ], + "children": [ + { + "id": "troubleshoot_phase1", + "type": "decision", + "question": "What is the Phase 1 failure reason?", + "help_text": "Check VPN log on the firewall for specific errors", + "options": [ + {"id": "timeout", "label": "Timeout / no response from peer", "next_node_id": "check_peer_reachability"}, + {"id": "auth_fail", "label": "Authentication / PSK mismatch", "next_node_id": "fix_psk_mismatch"}, + {"id": "proposal_mismatch", "label": "No proposal chosen / mismatch", "next_node_id": "fix_phase1_proposal"}, + {"id": "id_mismatch", "label": "ID payload mismatch", "next_node_id": "fix_peer_id"} + ], + "children": [ + { + "id": "check_peer_reachability", + "type": "action", + "title": "Check Connectivity to Remote VPN Endpoint", + "description": "Phase 1 timing out — remote endpoint may be unreachable.\n\n**From the local firewall:**\n```\nping \n```\n\n**Check in order:**\n1. Can you ping remote peer's public IP?\n2. Has remote site's public IP changed? (ISP change, DHCP WAN)\n3. Is remote firewall online?\n4. Is your ISP having issues?\n\n**Contact remote site:** Can they access internet normally?", + "next_node_id": "peer_reach_result" + }, + { + "id": "peer_reach_result", + "type": "decision", + "question": "Can you reach the remote VPN endpoint?", + "help_text": "If ping fails, the issue is upstream of VPN configuration", + "options": [ + {"id": "peer_unreachable", "label": "Cannot reach remote peer", "next_node_id": "check_isp_wan"}, + {"id": "peer_reachable", "label": "Can ping but VPN won't connect", "next_node_id": "check_ike_ports"}, + {"id": "ip_changed", "label": "Remote peer IP changed", "next_node_id": "update_peer_ip"} + ], + "children": [ + { + "id": "check_isp_wan", + "type": "decision", + "question": "Is internet working at both sites?", + "help_text": "Check if both sites can browse normally", + "options": [ + {"id": "local_down", "label": "Local internet is down", "next_node_id": "escalate_local_isp"}, + {"id": "remote_down", "label": "Remote internet is down", "next_node_id": "escalate_remote_isp"}, + {"id": "both_ok", "label": "Both have internet", "next_node_id": "check_ike_ports"} + ], + "children": [ + { + "id": "escalate_local_isp", + "type": "solution", + "title": "Local Internet / ISP Outage", + "description": "VPN down due to local internet outage.\n\n**Actions:**\n1. Check ISP status page\n2. Power cycle modem/ONT and edge router\n3. Contact ISP support\n4. Check if WAN IP changed (if dynamic)\n\n**Ticket Notes:** VPN down due to local ISP outage." + }, + { + "id": "escalate_remote_isp", + "type": "solution", + "title": "Remote Site Internet / ISP Outage", + "description": "VPN down due to remote site internet loss.\n\n**Actions:**\n1. Contact someone at remote site\n2. Have them check ISP and power cycle equipment\n3. If dynamic WAN IP, may need VPN update when service returns\n\n**Ticket Notes:** VPN down due to remote site ISP outage." + } + ] + }, + { + "id": "check_ike_ports", + "type": "solution", + "title": "Check IKE/NAT-T Ports (UDP 500/4500)", + "description": "Can reach peer but VPN won't negotiate. IKE ports may be blocked.\n\n**Required ports:**\n- UDP 500 (IKE/ISAKMP)\n- UDP 4500 (NAT Traversal)\n- Protocol 50 (ESP) if not using NAT-T\n\n**Common causes:** ISP blocking VPN, upstream NAT interference, firewall rules blocking outbound 500/4500.\n\n**Escalate to:** Network team to verify firewall rules and ISP blocking." + }, + { + "id": "update_peer_ip", + "type": "solution", + "title": "Update Remote Peer IP Address", + "description": "Remote site's public IP changed (common with dynamic IP ISPs).\n\n**Actions:**\n1. Get new IP from remote site\n2. Update VPN peer config on local firewall\n3. Update remote firewall if needed\n4. Re-establish tunnel\n\n**Prevention:** Use DDNS, configure VPN with FQDN, or get static IP from ISP.\n\n**Escalate to:** Network admin to update firewall config." + } + ] + }, + { + "id": "fix_psk_mismatch", + "type": "solution", + "title": "Fix Pre-Shared Key Mismatch", + "description": "Authentication failing — PSKs don't match.\n\n**Common causes:** Key changed on one side, trailing spaces, copy/paste error.\n\n**Resolution:**\n1. Verify PSK on both firewalls independently\n2. If mismatched, update one to match the other\n3. Clear VPN SA and force re-negotiation\n\n**Security:** Don't share PSKs over unencrypted channels.\n\n**Escalate to:** Network admin (requires firewall access on both sides)." + }, + { + "id": "fix_phase1_proposal", + "type": "solution", + "title": "Fix Phase 1 Proposal Mismatch", + "description": "Endpoints can't agree on encryption settings.\n\n**Must match on BOTH sides:**\n- IKE Version (IKEv1 or IKEv2)\n- Encryption (AES-256, AES-128)\n- Hash (SHA-256, SHA-1)\n- DH Group (14, 19, 20)\n- Lifetime (usually 28800 sec / 8 hours)\n\n**Common causes:** Firmware update changed defaults, one side reconfigured.\n\n**Best practice:** IKEv2, AES-256, SHA-256, DH Group 14+\n\n**Escalate to:** Network admin to align proposals." + }, + { + "id": "fix_peer_id", + "type": "solution", + "title": "Fix Peer ID Mismatch", + "description": "IKE Peer ID doesn't match expectations.\n\n**Peer ID can be:** IP address, FQDN, User FQDN (email), or DN (certificate).\n\n**Common causes:** NAT changing source IP, FQDN not resolving, certificate CN mismatch.\n\n**Escalate to:** Network admin to verify and align peer ID on both endpoints." + } + ] + }, + { + "id": "troubleshoot_phase2", + "type": "solution", + "title": "Troubleshoot Phase 2 (IPSec SA) Failure", + "description": "Phase 1 up but Phase 2 won't establish.\n\n**Must match on BOTH sides:**\n- Encryption (AES-256, AES-128)\n- Hash/integrity (SHA-256, SHA-1)\n- PFS DH Group (must match or both disabled)\n- Lifetime (usually 3600 sec / 1 hour)\n\n**Also check Proxy IDs / Traffic Selectors:**\nLocal and remote subnet definitions must mirror each other.\n\n**Most common Phase 2 issue:** Mismatched proxy IDs.\n\n**Escalate to:** Network admin to compare Phase 2 settings on both firewalls." + }, + { + "id": "check_routing", + "type": "action", + "title": "Check VPN Routing", + "description": "Both phases up but traffic isn't flowing. This is a routing issue.\n\n**From a workstation:**\n```\ntracert \nTest-NetConnection -ComputerName -TraceRoute\n```\n\n**On the firewall:** Check for a route to the remote subnet via the VPN tunnel interface.\n\n**If traffic goes out the internet gateway** instead of VPN, there's a missing or incorrect route.", + "next_node_id": "routing_result" + }, + { + "id": "routing_result", + "type": "decision", + "question": "Is traffic being routed through the VPN tunnel?", + "help_text": "Traceroute should show traffic going through VPN interface, not internet gateway", + "options": [ + {"id": "wrong_route", "label": "Traffic going out internet (wrong route)", "next_node_id": "fix_vpn_routing"}, + {"id": "correct_route_blocked", "label": "Routing correct but traffic blocked", "next_node_id": "check_firewall_policy"}, + {"id": "route_ok_both", "label": "Routing correct from both sides", "next_node_id": "check_nat_overlap"} + ], + "children": [ + { + "id": "fix_vpn_routing", + "type": "solution", + "title": "Fix VPN Routing", + "description": "Traffic not routed through VPN tunnel.\n\n**Common causes:** Missing static route, route overridden by more specific route, policy-based VPN needs firewall policy.\n\n**Escalate to:** Network admin to add/fix routes on firewall." + }, + { + "id": "check_firewall_policy", + "type": "solution", + "title": "Check Firewall Policy for VPN Traffic", + "description": "Routing correct but firewall blocking traffic.\n\nCheck on BOTH firewalls: Is there a policy allowing traffic between local and remote subnets via VPN interface? Are correct ports allowed? Check deny rule logs.\n\n**Escalate to:** Network/Security admin to review policies." + }, + { + "id": "check_nat_overlap", + "type": "solution", + "title": "Check NAT or Subnet Overlap Issues", + "description": "If both sites use the same subnet (e.g., 192.168.1.0/24), VPN traffic fails.\n\n**Check:** Overlapping IP ranges? NAT applied to VPN traffic incorrectly? VPN exempted from outbound NAT?\n\n**If subnets overlap:** Requires NAT on tunnel or re-addressing.\n**If NAT issue:** Add NAT exemption rule for VPN subnets.\n\n**Escalate to:** Network admin for firewall changes." + } + ] + }, + { + "id": "check_firewall_access", + "type": "solution", + "title": "Cannot Access Firewall Management", + "description": "Unable to log into the firewall.\n\n**Try:** Web GUI, SSH/console, different workstation, check management VLAN.\n\n**If completely unresponsive:** Check power, try console access (serial cable for physical appliances).\n\n**Escalate to:** Network admin or vendor support." + } + ] + }, + { + "id": "check_partial_connectivity", + "type": "decision", + "question": "What specifically works and what doesn't across the VPN?", + "help_text": "Test: ping (ICMP), file shares (SMB/445), RDP (3389), web (80/443)", + "options": [ + {"id": "ping_works_apps_dont", "label": "Ping works but apps don't (RDP, file shares)", "next_node_id": "check_mtu"}, + {"id": "some_hosts", "label": "Can reach some hosts but not others", "next_node_id": "check_subnet_selectors"}, + {"id": "one_direction", "label": "Works one direction but not the other", "next_node_id": "check_asymmetric"} + ], + "children": [ + { + "id": "check_mtu", + "type": "solution", + "title": "MTU / Fragmentation Issue", + "description": "Ping works but larger packets fail. Classic MTU issue.\n\n**Test:**\n```\nping -f -l 1500\nping -f -l 1400\n```\n\n**The -f flag prevents fragmentation.** Find the largest working size.\n\n**Fix:** Enable MSS clamping on the firewall VPN interface (set TCP MSS to 1360-1400).\n\n**Escalate to:** Network admin for MSS clamping configuration." + }, + { + "id": "check_subnet_selectors", + "type": "solution", + "title": "Check VPN Phase 2 Subnet Selectors", + "description": "Some hosts reachable, others not. VPN may not cover all subnets.\n\nPhase 2 selectors define which subnets traverse the VPN. If a subnet isn't listed, that traffic won't use the tunnel.\n\n**Fix:** Add Phase 2 entries for missing subnets on both firewalls.\n\n**Escalate to:** Network admin to review Phase 2 selectors." + }, + { + "id": "check_asymmetric", + "type": "solution", + "title": "Investigate Asymmetric Routing", + "description": "Traffic works one direction only.\n\n**Common causes:**\n1. Missing return route on one side\n2. Firewall policy only on one side\n3. NAT interference on one side\n\nRun traceroute from BOTH directions and compare.\n\n**Escalate to:** Network admin to check routes and policies on both endpoints." + } + ] + }, + { + "id": "check_vpn_stability", + "type": "decision", + "question": "How frequently is the VPN dropping?", + "help_text": "Check VPN logs for reconnection frequency and patterns", + "options": [ + {"id": "every_few_hours", "label": "Drops regularly (every few hours)", "next_node_id": "check_lifetime_mismatch"}, + {"id": "random_drops", "label": "Random drops throughout the day", "next_node_id": "check_dpd_keepalive"}, + {"id": "daily_pattern", "label": "Drops at same time daily", "next_node_id": "check_scheduled_tasks"} + ], + "children": [ + { + "id": "check_lifetime_mismatch", + "type": "solution", + "title": "Check Phase 1/Phase 2 Lifetime Mismatch", + "description": "Regular drops suggest a lifetime/rekey issue.\n\n**Phase 2 should ALWAYS be shorter than Phase 1.**\n- Phase 1: 28800 seconds (8 hours)\n- Phase 2: 3600 seconds (1 hour)\n\n**Common problem:** Phase 2 longer than Phase 1 causes failure during rekey.\n\n**Escalate to:** Network admin to align lifetime settings." + }, + { + "id": "check_dpd_keepalive", + "type": "solution", + "title": "Check Dead Peer Detection (DPD) Settings", + "description": "Random drops may be aggressive DPD killing the tunnel.\n\n**Recommended DPD:** Interval 10-30s, Retry 3-5 attempts, Action: Restart.\n\n**Also check WAN stability:**\n```\nTest-Connection -ComputerName -Count 100 | Measure-Object -Property ResponseTime -Average -Maximum\n```\n\n**Escalate to:** Network admin to adjust DPD and check WAN stability." + }, + { + "id": "check_scheduled_tasks", + "type": "solution", + "title": "Investigate Daily Drop Pattern", + "description": "Same-time drops suggest a scheduled event.\n\n**Common causes:** Backup jobs saturating WAN, scheduled firewall changes, ISP maintenance, auto-update/reboot schedule, DHCP WAN lease renewal.\n\nCorrelate drop time with firewall system logs and scheduled events." + } + ] + }, + { + "id": "check_vpn_performance", + "type": "solution", + "title": "VPN Performance Issues", + "description": "VPN up but slow.\n\n**Test:**\n```\nTest-Connection -ComputerName -Count 20 | Measure-Object -Property ResponseTime -Average -Maximum\n```\n\n**Common causes and fixes:**\n1. Slow WAN at one/both sites — upgrade internet\n2. Underpowered firewall — check CPU during VPN traffic\n3. MTU/fragmentation — enable MSS clamping\n4. Too much traffic — add QoS/traffic shaping\n5. High latency (distance) — normal for distant sites\n\n**Escalate to:** Network admin with throughput test results and baseline speeds." + } + ] + } + } + + +# ============================================================================= +# SEEDING INFRASTRUCTURE +# ============================================================================= + +async def get_admin_token(client: httpx.AsyncClient) -> str: + """Authenticate with admin credentials.""" + if not ADMIN_EMAIL or not ADMIN_PASSWORD: + raise Exception("Admin credentials not provided. Use --email and --password.") + + login_response = await client.post( + f"{API_BASE_URL}/auth/login", + data={"username": ADMIN_EMAIL, "password": ADMIN_PASSWORD} + ) + + if login_response.status_code != 200: + raise Exception(f"Failed to login: {login_response.text}") + + return login_response.json()["access_token"] + + +async def create_tree(client: httpx.AsyncClient, token: str, tree_data: dict) -> dict | None: + """Create a tree via the API. Returns None if tree already exists.""" + headers = {"Authorization": f"Bearer {token}"} + + tree_data["is_default"] = True + tree_data["is_public"] = True + + list_response = await client.get(f"{API_BASE_URL}/trees", headers=headers) + if list_response.status_code == 200: + existing_trees = list_response.json() + for tree in existing_trees: + if tree["name"] == tree_data["name"]: + if not tree.get("is_public") or not tree.get("is_default"): + await client.put( + f"{API_BASE_URL}/trees/{tree['id']}", + json={"is_public": True, "is_default": True}, + headers=headers + ) + print(f" [UPDATE] '{tree_data['name']}' visibility updated") + return None + print(f" [SKIP] '{tree_data['name']}' already exists") + return None + + response = await client.post( + f"{API_BASE_URL}/trees", + json=tree_data, + headers=headers + ) + + if response.status_code not in (200, 201): + raise Exception(f"Failed to create '{tree_data['name']}': {response.text}") + + tree = response.json() + print(f" [OK] Created '{tree_data['name']}' (ID: {tree['id']})") + return tree + + +async def seed_database(): + """Main seeding function.""" + print("\n" + "=" * 60) + print(" RESOLUTIONFLOW - Batch 2 Trees Seeder") + print(" Networking | Active Directory | Microsoft 365") + print("=" * 60) + + async with httpx.AsyncClient(timeout=60.0) as client: + try: + health_check = await client.get(f"{API_BASE_URL.replace('/api/v1', '')}/health") + if health_check.status_code != 200: + print(f"\n[ERROR] API health check failed: {health_check.status_code}") + return False + except httpx.ConnectError: + print("\n[ERROR] Cannot connect to API server") + print(f" Make sure the server is running at {API_BASE_URL}") + return False + + print("\n[1/3] Authenticating...") + try: + token = await get_admin_token(client) + print(f" Logged in as {ADMIN_EMAIL}") + except Exception as e: + print(f" [ERROR] {e}") + return False + + print("\n[2/3] Preparing decision trees...") + trees_to_create = [ + ("Networking", get_dns_resolution_tree()), + ("Networking", get_dhcp_issues_tree()), + ("Networking", get_site_to_site_vpn_tree()), + # Active Directory / Entra ID + ("Active Directory / Entra ID", get_repeated_lockout_tree()), + ("Active Directory / Entra ID", get_ad_replication_tree()), + ("Active Directory / Entra ID", get_gpo_not_applying_tree()), + ("Active Directory / Entra ID", get_entra_id_sync_tree()), + ("Active Directory / Entra ID", get_domain_join_tree()), + ("Active Directory / Entra ID", get_kerberos_auth_tree()), + # Microsoft 365 (Batch 3) + ("Microsoft 365", get_teams_call_quality_tree()), + ("Microsoft 365", get_onedrive_sync_tree()), + ("Microsoft 365", get_mail_flow_tree()), + ("Microsoft 365", get_sharepoint_permissions_tree()), + ("Microsoft 365", get_mfa_lockout_tree()), + ("Microsoft 365", get_license_assignment_tree()), + # Additional Networking (Batch 4) + ("Networking", get_bandwidth_slow_internet_tree()), + ("Networking", get_wireless_connectivity_tree()), + ("Networking", get_firewall_blocking_tree()), + ] + + print(f" Found {len(trees_to_create)} trees to seed\n") + + print("[3/3] Creating decision trees...") + created_count = 0 + skipped_count = 0 + current_category = None + + for category, tree_data in trees_to_create: + if category != current_category: + print(f"\n {category}:") + current_category = category + try: + result = await create_tree(client, token, tree_data) + if result: + created_count += 1 + else: + skipped_count += 1 + except Exception as e: + print(f" [FAIL] '{tree_data['name']}': {e}") + + print("\n" + "=" * 60) + print(" SEEDING COMPLETE") + print("=" * 60) + print(f" Trees created: {created_count}") + print(f" Trees skipped: {skipped_count}") + print(f" Total: {created_count + skipped_count}") + print() + return True + + +def main(): + parser = argparse.ArgumentParser( + description="Seed ResolutionFlow with Batch 2 trees" + ) + parser.add_argument("--api-url", default="http://localhost:8000/api/v1") + parser.add_argument("--email", required=True, help="Admin email") + parser.add_argument("--password", required=True, help="Admin password") + args = parser.parse_args() + + global API_BASE_URL, ADMIN_EMAIL, ADMIN_PASSWORD + API_BASE_URL = args.api_url + ADMIN_EMAIL = args.email + ADMIN_PASSWORD = args.password + + success = asyncio.run(seed_database()) + exit(0 if success else 1) + + +if __name__ == "__main__": + main() \ No newline at end of file