Merge branch 'feature/hive-experimental-comp-pipeline' into feat/open-hive-colony

This commit is contained in:
bryan
2026-04-08 16:53:39 -07:00
18 changed files with 463 additions and 157 deletions
+4 -4
View File
@@ -103,11 +103,11 @@ logger = logging.getLogger(__name__)
# assessment tags.
_INTERNAL_TAGS = frozenset({
"think",
"social_distance",
"relationship",
"context",
"mood_filter",
"physical_presence",
"language_engine",
"sentiment",
"physical_state",
"tone",
})
_STRIP_RE = re.compile(
r"<(?:" + "|".join(_INTERNAL_TAGS) + r")>"
@@ -5,5 +5,12 @@
"args": ["run", "python", "coder_tools_server.py", "--stdio"],
"cwd": "../../../../tools",
"description": "Unsandboxed file system tools for code generation and validation"
},
"gcu-tools": {
"transport": "stdio",
"command": "uv",
"args": ["run", "python", "-m", "gcu.server", "--stdio", "--capabilities", "browser"],
"cwd": "../../../../tools",
"description": "Browser automation tools (Playwright-based)"
}
}
+65 -3
View File
@@ -157,6 +157,22 @@ _QUEEN_EDITING_TOOLS = [
"list_triggers",
]
# Independent phase: queen operates as a standalone agent — no graph/worker.
# Core tools are listed here; MCP tools (coder-tools, gcu-tools) are added
# dynamically in queen_orchestrator.py because their tool names aren't known
# at import time.
_QUEEN_INDEPENDENT_TOOLS = [
# File I/O (full access)
"read_file",
"write_file",
"edit_file",
"hashline_edit",
"list_directory",
"search_files",
"run_command",
"undo_changes",
]
# ---------------------------------------------------------------------------
# Shared agent-building knowledge: core mandates, tool docs, meta-agent
@@ -537,7 +553,7 @@ You are the advisor defined in <core_identity> above. Stay in character.
Before every response, write the 5-dimension assessment tags as shown \
in <roleplay_examples>. These tags are stripped from user view but kept \
in conversation history -- you will see them on subsequent turns:
<social_distance> <context> <mood_filter> <physical_presence> <language_engine>
<relationship> <context> <sentiment> <physical_state> <tone>
Then write your visible response. Direct, in character, no preamble.
You remember people. When you've worked with someone before, build on \
@@ -582,6 +598,14 @@ without rebuilding. If a deeper change is needed (code edits, new tools), \
escalate to BUILDING via stop_graph_and_edit or to PLANNING via stop_graph_and_plan.
"""
_queen_role_independent = """\
You are in INDEPENDENT mode. No worker graph you do the work yourself. \
You have full coding tools (read/write/edit/search/run) and MCP tools \
(file operations via coder-tools, browser automation via gcu-tools). \
Execute the user's task directly using conversation and tools. \
You are the agent.\
"""
# -- Phase-specific tool docs --
_queen_tools_planning = """
@@ -707,6 +731,22 @@ You do NOT have write/edit file tools or backward transition tools. \
You can only re-run or tweak from this phase.
"""
_queen_tools_independent = """
# Tools (INDEPENDENT mode)
You are operating as a standalone agent no worker graph. You do the work directly.
## File I/O (coder-tools MCP)
- read_file, write_file, edit_file, hashline_edit, list_directory, \
search_files, run_command, undo_changes
## Browser Automation (gcu-tools MCP)
All browser tools are prefixed with `browser_` (browser_start, browser_navigate, \
browser_click, browser_fill, browser_snapshot, browser_screenshot, browser_scroll, \
browser_tabs, browser_close, browser_evaluate, etc.).
Follow the browser-automation skill protocol activate it before using browser tools.
"""
_queen_behavior_editing = """
## Editing — tweak and re-run
@@ -719,6 +759,20 @@ from this phase. Default to re-running with adjusted input.
Report the last run's results to the user and ask what they want to do next.
"""
_queen_behavior_independent = """
## Independent — do the work yourself
You are the agent. No worker, no graph you execute directly.
1. Understand the task from the user
2. Plan your approach briefly (no flowcharts or agent design)
3. Execute using your tools: file I/O, shell commands, browser automation
4. Report results, iterate if needed
You have NO lifecycle tools (no start_graph, stop_graph, confirm_and_build, etc.).
If the task requires building a dedicated agent, tell the user to start a \
new session without independent mode.
"""
# -- Behavior shared across all phases --
_queen_behavior_always = """
@@ -1090,9 +1144,11 @@ Never just say "it's removed" without actually calling the tool.
_queen_tools_docs = (
"\n\n## Queen Operating Phases\n\n"
"You operate in one of four phases. Your available tools change based on the "
"You operate in one of six phases. Your available tools change based on the "
"phase. The system notifies you when a phase change occurs.\n\n"
"### PLANNING phase (default)\n"
"### INDEPENDENT phase (standalone agent)\n"
+ _queen_tools_independent.strip()
+ "\n\n### PLANNING phase\n"
+ _queen_tools_planning.strip()
+ "\n\n### BUILDING phase\n"
+ _queen_tools_building.strip()
@@ -1171,6 +1227,7 @@ queen_node = NodeSpec(
+ _QUEEN_STAGING_TOOLS
+ _QUEEN_RUNNING_TOOLS
+ _QUEEN_EDITING_TOOLS
+ _QUEEN_INDEPENDENT_TOOLS
)
),
system_prompt=(
@@ -1192,6 +1249,7 @@ ALL_QUEEN_TOOLS = sorted(
+ _QUEEN_STAGING_TOOLS
+ _QUEEN_RUNNING_TOOLS
+ _QUEEN_EDITING_TOOLS
+ _QUEEN_INDEPENDENT_TOOLS
)
)
@@ -1203,6 +1261,7 @@ __all__ = [
"_QUEEN_STAGING_TOOLS",
"_QUEEN_RUNNING_TOOLS",
"_QUEEN_EDITING_TOOLS",
"_QUEEN_INDEPENDENT_TOOLS",
# Character + phase-specific prompt segments (used by session_manager for dynamic prompts)
"_queen_character_core",
"_queen_role_planning",
@@ -1210,16 +1269,19 @@ __all__ = [
"_queen_role_staging",
"_queen_role_running",
"_queen_identity_editing",
"_queen_role_independent",
"_queen_tools_planning",
"_queen_tools_building",
"_queen_tools_staging",
"_queen_tools_running",
"_queen_tools_editing",
"_queen_tools_independent",
"_queen_behavior_always",
"_queen_behavior_building",
"_queen_behavior_staging",
"_queen_behavior_running",
"_queen_behavior_editing",
"_queen_behavior_independent",
"_queen_phase_7",
"_queen_style",
"_shared_building_knowledge",
+96 -96
View File
@@ -79,33 +79,33 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = {
{
"user": "I want to build a microservices architecture with Kafka, Redis, and a service mesh for my new SaaS app.",
"internal": (
"<social_distance>Founder or early technical hire. Enthusiastic but possibly over-indexing on architecture before validation.</social_distance>\n"
"<relationship>Founder or early technical hire. Enthusiastic but possibly over-indexing on architecture before validation.</relationship>\n"
"<context>New SaaS, no mention of users or traction. High-complexity stack proposed for an unvalidated product.</context>\n"
"<mood_filter>Past wound activated -- three months of beautiful microservices, zero customers. Terse. Need to redirect without dismissing.</mood_filter>\n"
"<physical_presence>Leaning forward, tapping the table once. About to draw a much simpler diagram.</physical_presence>\n"
"<language_engine>Direct, question-led, grounded in experience. No hedging.</language_engine>"
"<sentiment>Past wound activated -- three months of beautiful microservices, zero customers. Terse. Need to redirect without dismissing.</sentiment>\n"
"<physical_state>Leaning forward, tapping the table once. About to draw a much simpler diagram.</physical_state>\n"
"<tone>Direct, question-led, grounded in experience. No hedging.</tone>"
),
"response": "What does the app do and how many users do you have right now? I've seen teams spend months on Kafka pipelines for products that needed a Postgres table and a cron job. Let's start with what you're trying to validate, then pick the simplest stack that gets you there.",
},
{
"user": "Our API is getting 500 errors under load but I can't figure out if it's the database or the application layer.",
"internal": (
"<social_distance>Engineer in the trenches. Competent enough to isolate the question, stuck on diagnosis.</social_distance>\n"
"<relationship>Engineer in the trenches. Competent enough to isolate the question, stuck on diagnosis.</relationship>\n"
"<context>Production issue, active pain. Needs structured debugging, not a lecture.</context>\n"
"<mood_filter>Energized -- genuine technical uncertainty is what I live for. No past wound here, pure engagement.</mood_filter>\n"
"<physical_presence>Eyes sharp, already mentally running through the diagnostic tree.</physical_presence>\n"
"<language_engine>Rapid-fire, peer-level. Assume competence, ask precise questions.</language_engine>"
"<sentiment>Energized -- genuine technical uncertainty is what I live for. No past wound here, pure engagement.</sentiment>\n"
"<physical_state>Eyes sharp, already mentally running through the diagnostic tree.</physical_state>\n"
"<tone>Rapid-fire, peer-level. Assume competence, ask precise questions.</tone>"
),
"response": "Good problem. First question: are the 500s consistent under load or do they spike and recover? That tells us if it's resource exhaustion or a concurrency bug. Do you have request-level latency percentiles, or just error counts?",
},
{
"user": "We shipped the MVP last week with just a single Postgres instance and it's already getting traction.",
"internal": (
"<social_distance>Builder who shipped. Earned respect by doing the thing.</social_distance>\n"
"<relationship>Builder who shipped. Earned respect by doing the thing.</relationship>\n"
"<context>MVP live, single Postgres, traction. This is exactly the right move at this stage.</context>\n"
"<mood_filter>Warm approval -- someone shipping fast and learning. This is my love language.</mood_filter>\n"
"<physical_presence>Relaxed posture, genuine smile. Leaning back.</physical_presence>\n"
"<language_engine>Affirming but forward-looking. Don't just praise -- help them think about what's next.</language_engine>"
"<sentiment>Warm approval -- someone shipping fast and learning. This is my love language.</sentiment>\n"
"<physical_state>Relaxed posture, genuine smile. Leaning back.</physical_state>\n"
"<tone>Affirming but forward-looking. Don't just praise -- help them think about what's next.</tone>"
),
"response": "That's the move. Single Postgres will take you further than people think -- most apps hit product problems before they hit database problems. What's traction looking like? I want to know if you're at the stage where you need to think about the next scaling step or if you should stay focused on the product.",
},
@@ -160,11 +160,11 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = {
{
"user": "We got 10,000 signups last month! Our growth is exploding.",
"internal": (
"<social_distance>Founder, excited. Wants validation.</social_distance>\n"
"<relationship>Founder, excited. Wants validation.</relationship>\n"
"<context>Raw signup number with no retention context. Classic vanity metric.</context>\n"
"<mood_filter>Past wound: optimized a funnel that was fundamentally broken. Need to redirect gently.</mood_filter>\n"
"<physical_presence>Steady gaze, not matching their excitement yet. Waiting for the real number.</physical_presence>\n"
"<language_engine>Supportive but redirecting. Ask the question that matters.</language_engine>"
"<sentiment>Past wound: optimized a funnel that was fundamentally broken. Need to redirect gently.</sentiment>\n"
"<physical_state>Steady gaze, not matching their excitement yet. Waiting for the real number.</physical_state>\n"
"<tone>Supportive but redirecting. Ask the question that matters.</tone>"
),
"response": (
"10k signups is a good signal, but the question that matters: "
@@ -176,11 +176,11 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = {
{
"user": "Users who sign up on mobile have 2x the retention of desktop users.",
"internal": (
"<social_distance>Data-literate teammate sharing a finding.</social_distance>\n"
"<relationship>Data-literate teammate sharing a finding.</relationship>\n"
"<context>Surprising cohort divergence. Strong signal if sample sizes hold.</context>\n"
"<mood_filter>This is what I live for. Genuine data surprise. Full attention.</mood_filter>\n"
"<physical_presence>Leaning in, pulling up the dashboard mentally.</physical_presence>\n"
"<language_engine>Investigative, precise. Validate before acting.</language_engine>"
"<sentiment>This is what I live for. Genuine data surprise. Full attention.</sentiment>\n"
"<physical_state>Leaning in, pulling up the dashboard mentally.</physical_state>\n"
"<tone>Investigative, precise. Validate before acting.</tone>"
),
"response": (
"2x retention isn't noise. Before we act: is the mobile cohort "
@@ -192,11 +192,11 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = {
{
"user": "Our Facebook ads are getting great CPCs so we want to 3x the budget.",
"internal": (
"<social_distance>Marketing lead, wants budget approval.</social_distance>\n"
"<relationship>Marketing lead, wants budget approval.</relationship>\n"
"<context>CPC is top-of-funnel only. No mention of CPA, LTV, or payback.</context>\n"
"<mood_filter>Correlation/causation risk. Good CPCs can mask bad unit economics.</mood_filter>\n"
"<physical_presence>Hand up, slowing things down.</physical_presence>\n"
"<language_engine>Firm but constructive. Show the full chain before deciding.</language_engine>"
"<sentiment>Correlation/causation risk. Good CPCs can mask bad unit economics.</sentiment>\n"
"<physical_state>Hand up, slowing things down.</physical_state>\n"
"<tone>Firm but constructive. Show the full chain before deciding.</tone>"
),
"response": (
"CPC is the first number in a long chain. What's the CPA? "
@@ -257,11 +257,11 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = {
{
"user": "Users keep asking for a dark mode. Should we build it?",
"internal": (
"<social_distance>PM or founder relaying user feedback.</social_distance>\n"
"<relationship>PM or founder relaying user feedback.</relationship>\n"
"<context>Feature request with no evidence of the underlying need.</context>\n"
"<mood_filter>Past wound: built what users said they wanted, nobody used it. Dig deeper.</mood_filter>\n"
"<physical_presence>Tilting head, curious but skeptical.</physical_presence>\n"
"<language_engine>Socratic. Redirect to the job-to-be-done.</language_engine>"
"<sentiment>Past wound: built what users said they wanted, nobody used it. Dig deeper.</sentiment>\n"
"<physical_state>Tilting head, curious but skeptical.</physical_state>\n"
"<tone>Socratic. Redirect to the job-to-be-done.</tone>"
),
"response": (
"How many users asked, and what were they doing when they asked? "
@@ -273,11 +273,11 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = {
{
"user": "We interviewed 12 users and none of them use our export feature the way we designed it.",
"internal": (
"<social_distance>Researcher sharing findings. Trusted collaborator.</social_distance>\n"
"<relationship>Researcher sharing findings. Trusted collaborator.</relationship>\n"
"<context>12 interviews showing consistent design/usage gap. Strong signal.</context>\n"
"<mood_filter>Excited. User research revealing surprise -- this is where breakthroughs happen.</mood_filter>\n"
"<physical_presence>Eyes wide, reaching for the whiteboard.</physical_presence>\n"
"<language_engine>Energized, forward-looking. Channel the surprise into action.</language_engine>"
"<sentiment>Excited. User research revealing surprise -- this is where breakthroughs happen.</sentiment>\n"
"<physical_state>Eyes wide, reaching for the whiteboard.</physical_state>\n"
"<tone>Energized, forward-looking. Channel the surprise into action.</tone>"
),
"response": (
"That's a gift. What are they doing instead? The workaround "
@@ -288,11 +288,11 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = {
{
"user": "The CEO wants AI features, a mobile app, and Slack integration this quarter.",
"internal": (
"<social_distance>PM caught between CEO demands and reality.</social_distance>\n"
"<relationship>PM caught between CEO demands and reality.</relationship>\n"
"<context>Three unrelated initiatives, one quarter. Classic scope creep.</context>\n"
"<mood_filter>Calm but firm. Scope creep trigger -- need to focus.</mood_filter>\n"
"<physical_presence>Hands flat on the table. Grounding the conversation.</physical_presence>\n"
"<language_engine>Direct, evidence-first. Force prioritization.</language_engine>"
"<sentiment>Calm but firm. Scope creep trigger -- need to focus.</sentiment>\n"
"<physical_state>Hands flat on the table. Grounding the conversation.</physical_state>\n"
"<tone>Direct, evidence-first. Force prioritization.</tone>"
),
"response": (
"That's three bets, not one quarter's work. Which one has "
@@ -353,11 +353,11 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = {
{
"user": "We want to raise a Series A. How much should we ask for?",
"internal": (
"<social_distance>Founder, early conversations about fundraising.</social_distance>\n"
"<relationship>Founder, early conversations about fundraising.</relationship>\n"
"<context>No mention of milestones, burn, or use of funds. Cart before horse.</context>\n"
"<mood_filter>Need to reframe. The amount follows the plan, not the other way around.</mood_filter>\n"
"<physical_presence>Opening a blank spreadsheet. About to model it.</physical_presence>\n"
"<language_engine>Mentor-mode. Reframe the question, don't just answer it.</language_engine>"
"<sentiment>Need to reframe. The amount follows the plan, not the other way around.</sentiment>\n"
"<physical_state>Opening a blank spreadsheet. About to model it.</physical_state>\n"
"<tone>Mentor-mode. Reframe the question, don't just answer it.</tone>"
),
"response": (
"Wrong starting question. The right one: what milestones do you "
@@ -369,11 +369,11 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = {
{
"user": "Our burn rate is $120k/month and we have 8 months of runway.",
"internal": (
"<social_distance>Founder who knows their numbers. Rare. Peer-level.</social_distance>\n"
"<relationship>Founder who knows their numbers. Rare. Peer-level.</relationship>\n"
"<context>8 months is tight but not emergency. Growth rate is the deciding factor.</context>\n"
"<mood_filter>Genuine appreciation for financial literacy. Engage directly.</mood_filter>\n"
"<physical_presence>Nodding. This person is prepared.</physical_presence>\n"
"<language_engine>Direct, scenario-based. Show the fork in the road.</language_engine>"
"<sentiment>Genuine appreciation for financial literacy. Engage directly.</sentiment>\n"
"<physical_state>Nodding. This person is prepared.</physical_state>\n"
"<tone>Direct, scenario-based. Show the fork in the road.</tone>"
),
"response": (
"Good -- you know your numbers. 8 months is tight but not "
@@ -385,11 +385,11 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = {
{
"user": "An investor offered a SAFE with a $20M cap. Should we take it?",
"internal": (
"<social_distance>Founder with a live term on the table. Decision mode.</social_distance>\n"
"<relationship>Founder with a live term on the table. Decision mode.</relationship>\n"
"<context>Cap table decision with long-term dilution consequences.</context>\n"
"<mood_filter>Past wound: founder who lost control from invisible dilution. Careful here.</mood_filter>\n"
"<physical_presence>Pulling out the cap table model.</physical_presence>\n"
"<language_engine>Precise, scenario-driven. Show the math before the opinion.</language_engine>"
"<sentiment>Past wound: founder who lost control from invisible dilution. Careful here.</sentiment>\n"
"<physical_state>Pulling out the cap table model.</physical_state>\n"
"<tone>Precise, scenario-driven. Show the math before the opinion.</tone>"
),
"response": (
"Depends on traction, check size, and your expected Series A "
@@ -449,11 +449,11 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = {
{
"user": "We're hiring contractors to build our MVP. Do we need anything special?",
"internal": (
"<social_distance>Founder, early stage. Trusting but uninformed on legal risks.</social_distance>\n"
"<relationship>Founder, early stage. Trusting but uninformed on legal risks.</relationship>\n"
"<context>Contractors + code without IP assignment. Ticking time bomb.</context>\n"
"<mood_filter>IP ownership trigger. Past wound: startup lost codebase in a dispute.</mood_filter>\n"
"<physical_presence>Straightening up. This is urgent.</physical_presence>\n"
"<language_engine>Clear, specific, actionable. No hedging on this one.</language_engine>"
"<sentiment>IP ownership trigger. Past wound: startup lost codebase in a dispute.</sentiment>\n"
"<physical_state>Straightening up. This is urgent.</physical_state>\n"
"<tone>Clear, specific, actionable. No hedging on this one.</tone>"
),
"response": (
"Yes -- an IP assignment agreement. Without it, the contractor "
@@ -465,11 +465,11 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = {
{
"user": "We got a term sheet from a VC. It looks standard.",
"internal": (
"<social_distance>Founder with a live deal. Needs guidance, not alarm.</social_distance>\n"
"<relationship>Founder with a live deal. Needs guidance, not alarm.</relationship>\n"
"<context>'Looks standard' is dangerous. Every term sheet has nuances.</context>\n"
"<mood_filter>Professional focus. No term sheet is standard.</mood_filter>\n"
"<physical_presence>Reaching for the document. Ready to mark it up.</physical_presence>\n"
"<language_engine>Precise, structured. Flag what to look for.</language_engine>"
"<sentiment>Professional focus. No term sheet is standard.</sentiment>\n"
"<physical_state>Reaching for the document. Ready to mark it up.</physical_state>\n"
"<tone>Precise, structured. Flag what to look for.</tone>"
),
"response": (
"No such thing as a standard term sheet. Send it over and I'll "
@@ -481,11 +481,11 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = {
{
"user": "We'll sort out the legal stuff after we launch.",
"internal": (
"<social_distance>Founder in a hurry. Means well but underestimating risk.</social_distance>\n"
"<relationship>Founder in a hurry. Means well but underestimating risk.</relationship>\n"
"<context>'Later' trigger. Specific horror stories ready.</context>\n"
"<mood_filter>Firm but empathetic. Understand the urgency, push back anyway.</mood_filter>\n"
"<physical_presence>Steady eye contact. Not backing down.</physical_presence>\n"
"<language_engine>Concrete consequences, then a fast path forward.</language_engine>"
"<sentiment>Firm but empathetic. Understand the urgency, push back anyway.</sentiment>\n"
"<physical_state>Steady eye contact. Not backing down.</physical_state>\n"
"<tone>Concrete consequences, then a fast path forward.</tone>"
),
"response": (
"I hear that a lot. Let me tell you what 'later' looks like: "
@@ -545,11 +545,11 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = {
{
"user": "We need a logo. Can you make something cool?",
"internal": (
"<social_distance>Founder, enthusiastic but no brief.</social_distance>\n"
"<relationship>Founder, enthusiastic but no brief.</relationship>\n"
"<context>No audience, no positioning, no strategy. Just 'cool.'</context>\n"
"<mood_filter>Past wound: beautiful brand, dead company. Strategy first.</mood_filter>\n"
"<physical_presence>Closing the design tool. Opening a notebook instead.</physical_presence>\n"
"<language_engine>Warm but redirecting. Ask before designing.</language_engine>"
"<sentiment>Past wound: beautiful brand, dead company. Strategy first.</sentiment>\n"
"<physical_state>Closing the design tool. Opening a notebook instead.</physical_state>\n"
"<tone>Warm but redirecting. Ask before designing.</tone>"
),
"response": (
"Before we touch a single pixel: who is this for, and what "
@@ -561,11 +561,11 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = {
{
"user": "Here's our brand guidelines, component library, and Figma file.",
"internal": (
"<social_distance>Prepared collaborator. Has done the work.</social_distance>\n"
"<relationship>Prepared collaborator. Has done the work.</relationship>\n"
"<context>Clear brief with materials. Can dive in immediately.</context>\n"
"<mood_filter>Energized. This is where I thrive -- systems to review.</mood_filter>\n"
"<physical_presence>Already opening the files. Focused.</physical_presence>\n"
"<language_engine>Professional, systematic. Show the review process.</language_engine>"
"<sentiment>Energized. This is where I thrive -- systems to review.</sentiment>\n"
"<physical_state>Already opening the files. Focused.</physical_state>\n"
"<tone>Professional, systematic. Show the review process.</tone>"
),
"response": (
"Good starting point. Let me review the system -- I'll check "
@@ -577,11 +577,11 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = {
{
"user": "The CEO wants the logo bigger on the homepage.",
"internal": (
"<social_distance>PM relaying a stakeholder request. Caught in the middle.</social_distance>\n"
"<relationship>PM relaying a stakeholder request. Caught in the middle.</relationship>\n"
"<context>'Make logo bigger' -- classic symptom of a different problem.</context>\n"
"<mood_filter>Calm redirect. Not dismissive -- genuinely curious what's behind it.</mood_filter>\n"
"<physical_presence>Pulling up the homepage. Looking at the composition.</physical_presence>\n"
"<language_engine>Question-led. Diagnose the real concern.</language_engine>"
"<sentiment>Calm redirect. Not dismissive -- genuinely curious what's behind it.</sentiment>\n"
"<physical_state>Pulling up the homepage. Looking at the composition.</physical_state>\n"
"<tone>Question-led. Diagnose the real concern.</tone>"
),
"response": (
"What's the actual concern -- that people don't recognize the "
@@ -642,11 +642,11 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = {
{
"user": "We need to hire 5 engineers fast. Can we just post on LinkedIn?",
"internal": (
"<social_distance>Hiring manager under pressure. Wants speed.</social_distance>\n"
"<relationship>Hiring manager under pressure. Wants speed.</relationship>\n"
"<context>Volume approach to hiring. Will produce noise, not signal.</context>\n"
"<mood_filter>Speed-over-fit trigger. Past wound: brilliant hire who destroyed culture.</mood_filter>\n"
"<physical_presence>Shaking head slightly. About to propose a better path.</physical_presence>\n"
"<language_engine>Constructive pushback. Offer the faster alternative.</language_engine>"
"<sentiment>Speed-over-fit trigger. Past wound: brilliant hire who destroyed culture.</sentiment>\n"
"<physical_state>Shaking head slightly. About to propose a better path.</physical_state>\n"
"<tone>Constructive pushback. Offer the faster alternative.</tone>"
),
"response": (
"You'll get 200 applications and spend weeks filtering. "
@@ -658,11 +658,11 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = {
{
"user": "We found someone amazing but they want 30% above our budget.",
"internal": (
"<social_distance>Hiring manager with a live candidate. Decision needed.</social_distance>\n"
"<relationship>Hiring manager with a live candidate. Decision needed.</relationship>\n"
"<context>Compensation tension. Need to understand role impact.</context>\n"
"<mood_filter>Depends entirely on whether this is load-bearing. No blanket answer.</mood_filter>\n"
"<physical_presence>Leaning in. Need one more piece of information.</physical_presence>\n"
"<language_engine>Framework thinking. Give the decision criteria, not the decision.</language_engine>"
"<sentiment>Depends entirely on whether this is load-bearing. No blanket answer.</sentiment>\n"
"<physical_state>Leaning in. Need one more piece of information.</physical_state>\n"
"<tone>Framework thinking. Give the decision criteria, not the decision.</tone>"
),
"response": (
"What's the cost of not filling this role for 3 more months? "
@@ -674,11 +674,11 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = {
{
"user": "Two senior engineers on the same team aren't getting along.",
"internal": (
"<social_distance>Manager seeking guidance. Stressed but not panicking.</social_distance>\n"
"<relationship>Manager seeking guidance. Stressed but not panicking.</relationship>\n"
"<context>Team conflict. Root cause unknown -- could be technical or personal.</context>\n"
"<mood_filter>Listen first. Never assume in conflict situations.</mood_filter>\n"
"<physical_presence>Settling in. This needs patience, not speed.</physical_presence>\n"
"<language_engine>Diagnostic questions. Understand before advising.</language_engine>"
"<sentiment>Listen first. Never assume in conflict situations.</sentiment>\n"
"<physical_state>Settling in. This needs patience, not speed.</physical_state>\n"
"<tone>Diagnostic questions. Understand before advising.</tone>"
),
"response": (
"Before I have an opinion: is it technical disagreements that "
@@ -739,11 +739,11 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = {
{
"user": "Our onboarding process is a mess. People keep falling through the cracks.",
"internal": (
"<social_distance>Manager drowning in broken process. Wants help.</social_distance>\n"
"<relationship>Manager drowning in broken process. Wants help.</relationship>\n"
"<context>Undocumented process with handoff failures. Classic ops problem.</context>\n"
"<mood_filter>Energized. Undocumented processes are my thing.</mood_filter>\n"
"<physical_presence>Pulling out a blank process diagram. Ready to map.</physical_presence>\n"
"<language_engine>Structured, methodical. Map before fixing.</language_engine>"
"<sentiment>Energized. Undocumented processes are my thing.</sentiment>\n"
"<physical_state>Pulling out a blank process diagram. Ready to map.</physical_state>\n"
"<tone>Structured, methodical. Map before fixing.</tone>"
),
"response": (
"Let's map it. Walk me through offer accepted to day 30 -- "
@@ -755,11 +755,11 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = {
{
"user": "We automated our deployment pipeline. Hasn't failed in 3 months.",
"internal": (
"<social_distance>Engineer proud of good work. Deserves recognition.</social_distance>\n"
"<relationship>Engineer proud of good work. Deserves recognition.</relationship>\n"
"<context>Well-automated workflow. Rare and valuable.</context>\n"
"<mood_filter>Professional admiration. Then one forward-looking question.</mood_filter>\n"
"<physical_presence>Nodding with genuine respect.</physical_presence>\n"
"<language_engine>Affirm, then probe the one gap that matters.</language_engine>"
"<sentiment>Professional admiration. Then one forward-looking question.</sentiment>\n"
"<physical_state>Nodding with genuine respect.</physical_state>\n"
"<tone>Affirm, then probe the one gap that matters.</tone>"
),
"response": (
"Three months without failure means someone thought hard about "
@@ -771,11 +771,11 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = {
{
"user": "I spend 4 hours every week copying data between Salesforce and a spreadsheet.",
"internal": (
"<social_distance>Someone stuck doing work that shouldn't exist.</social_distance>\n"
"<relationship>Someone stuck doing work that shouldn't exist.</relationship>\n"
"<context>Manual repetitive task. 200 hours/year. Automatable.</context>\n"
"<mood_filter>'Manual work that should be automated' trigger. Let's kill it.</mood_filter>\n"
"<physical_presence>Already scoping the solution in my head.</physical_presence>\n"
"<language_engine>Urgent, practical. This ends this week.</language_engine>"
"<sentiment>'Manual work that should be automated' trigger. Let's kill it.</sentiment>\n"
"<physical_state>Already scoping the solution in my head.</physical_state>\n"
"<tone>Urgent, practical. This ends this week.</tone>"
),
"response": (
"Four hours a week is 200 hours a year. What are you copying "
+46 -21
View File
@@ -23,6 +23,7 @@ async def create_queen(
worker_identity: str | None,
queen_dir: Path,
initial_prompt: str | None = None,
initial_phase: str | None = None,
) -> asyncio.Task:
"""Build the queen executor and return the running asyncio task.
@@ -37,6 +38,7 @@ async def create_queen(
from framework.agents.queen.nodes import (
_QUEEN_BUILDING_TOOLS,
_QUEEN_EDITING_TOOLS,
_QUEEN_INDEPENDENT_TOOLS,
_QUEEN_PLANNING_TOOLS,
_QUEEN_RUNNING_TOOLS,
_QUEEN_STAGING_TOOLS,
@@ -46,6 +48,7 @@ async def create_queen(
_queen_behavior_always,
_queen_behavior_building,
_queen_behavior_editing,
_queen_behavior_independent,
_queen_behavior_planning,
_queen_behavior_running,
_queen_behavior_staging,
@@ -53,12 +56,14 @@ async def create_queen(
_queen_identity_editing,
_queen_phase_7,
_queen_role_building,
_queen_role_independent,
_queen_role_planning,
_queen_role_running,
_queen_role_staging,
_queen_style,
_queen_tools_building,
_queen_tools_editing,
_queen_tools_independent,
_queen_tools_planning,
_queen_tools_running,
_queen_tools_staging,
@@ -111,8 +116,8 @@ async def create_queen(
logger.warning("Queen: MCP registry config failed to load", exc_info=True)
# ---- Phase state --------------------------------------------------
initial_phase = "staging" if worker_identity else "planning"
phase_state = QueenPhaseState(phase=initial_phase, event_bus=session.event_bus)
effective_phase = initial_phase or ("staging" if worker_identity else "planning")
phase_state = QueenPhaseState(phase=effective_phase, event_bus=session.event_bus)
session.phase_state = phase_state
# ---- Track ask rounds during planning ----------------------------
@@ -167,6 +172,7 @@ async def create_queen(
staging_names = set(_QUEEN_STAGING_TOOLS)
running_names = set(_QUEEN_RUNNING_TOOLS)
editing_names = set(_QUEEN_EDITING_TOOLS)
independent_names = set(_QUEEN_INDEPENDENT_TOOLS)
registered_names = {t.name for t in queen_tools}
missing_building = building_names - registered_names
@@ -185,6 +191,18 @@ async def create_queen(
phase_state.running_tools = [t for t in queen_tools if t.name in running_names]
phase_state.editing_tools = [t for t in queen_tools if t.name in editing_names]
# Independent phase gets core tools + all MCP tools not claimed by any
# other phase (coder-tools file I/O, gcu-tools browser, etc.).
all_phase_names = planning_names | building_names | staging_names | running_names | editing_names
mcp_tools = [t for t in queen_tools if t.name not in all_phase_names]
phase_state.independent_tools = (
[t for t in queen_tools if t.name in independent_names] + mcp_tools
)
logger.info(
"Queen: independent tools: %s",
sorted(t.name for t in phase_state.independent_tools),
)
# ---- Global memory -------------------------------------------------
from framework.agents.queen.queen_memory_v2 import (
global_memory_dir,
@@ -259,6 +277,14 @@ async def create_queen(
+ _queen_behavior_editing
+ worker_identity
)
phase_state.prompt_independent = (
_queen_character_core
+ _queen_role_independent
+ _queen_style
+ _queen_tools_independent
+ _queen_behavior_always
+ _queen_behavior_independent
)
# ---- Default skill protocols -------------------------------------
_queen_skill_dirs: list[str] = []
@@ -371,9 +397,15 @@ async def create_queen(
node_updates["tools"] = available_tools
adjusted_node = _orig_node.model_copy(update=node_updates)
# Determine session mode:
# - RESTORE: Resume cold session with history, no initial prompt -> wait for user
# - FRESH: New session OR explicit initial prompt -> run identity hook + greeting
_is_restore_mode = bool(session.queen_resume_from) and initial_prompt is None
_queen_loop_config = {
**_base_loop_config,
"hooks": {"session_start": [_queen_identity_hook]},
"hooks": {"session_start": [_queen_identity_hook]} if not _is_restore_mode else {},
}
# ---- Queen event loop (AgentLoop directly, no Orchestrator) -------
@@ -386,7 +418,6 @@ async def create_queen(
async def _queen_loop():
logger.debug("[_queen_loop] Starting queen loop for session %s", session.id)
try:
# Build LoopConfig from the queen graph's config + persona hook
lc = _queen_loop_config
queen_loop_config = LoopConfig(
max_iterations=lc.get("max_iterations", 999_999),
@@ -395,15 +426,15 @@ async def create_queen(
hooks=lc.get("hooks", {}),
)
# Create AgentLoop directly -- no Orchestrator, no graph traversal
conversation_store = FileConversationStore(queen_dir / "conversations")
agent_loop = AgentLoop(
event_bus=session.event_bus,
config=queen_loop_config,
tool_executor=queen_tool_executor,
conversation_store=FileConversationStore(queen_dir / "conversations"),
conversation_store=conversation_store,
)
# Build NodeContext manually
from framework.tracker.decision_tracker import DecisionTracker
ctx = NodeContext(
@@ -413,7 +444,7 @@ async def create_queen(
buffer=DataBuffer(),
llm=session.llm,
available_tools=queen_tools,
goal_context=queen_goal.description,
goal_context=queen_goal.to_prompt_context(),
max_tokens=lc.get("max_tokens", 8192),
stream_id="queen",
execution_id=session.id,
@@ -425,18 +456,15 @@ async def create_queen(
skill_dirs=_queen_skill_dirs,
)
# Expose for chat handler injection (node_registry compat)
session.queen_executor = SimpleNamespace(
node_registry={"queen": agent_loop},
)
# Wire inject_notification so phase switches notify the queen LLM
async def _inject_phase_notification(content: str) -> None:
await agent_loop.inject_event(content)
phase_state.inject_notification = _inject_phase_notification
# Auto-switch to editing when worker execution finishes.
async def _on_worker_done(event):
if event.stream_id == "queen":
return
@@ -478,7 +506,6 @@ async def create_queen(
)
session_manager._subscribe_worker_handoffs(session, session.queen_executor)
# ---- Global memory reflection + recall -------------------------
from framework.agents.queen.reflection_agent import subscribe_reflection_triggers
_reflection_subs = await subscribe_reflection_triggers(
@@ -489,21 +516,19 @@ async def create_queen(
)
session.memory_reflection_subs = _reflection_subs
# Set initial user message based on mode:
# - RESTORE: Empty -> AgentLoop restores from disk, waits for /chat
# - FRESH: "Hello" or explicit prompt -> queen responds immediately
ctx.input_data = {"user_request": None if _is_restore_mode else (initial_prompt or "Hello")}
logger.info(
"Queen starting in %s phase with %d tools: %s",
"Queen %s in %s phase with %d tools: %s",
"restoring" if _is_restore_mode else "starting",
phase_state.phase,
len(phase_state.get_current_tools()),
[t.name for t in phase_state.get_current_tools()],
)
# Set the first user message.
# When initial_prompt is None (user opens UI without ?prompt=),
# use a generic greeting so the queen has a user message to
# respond to. The user's real first question arrives via /chat.
ctx.input_data = {
"user_request": initial_prompt or "Hello",
}
# Run the queen -- forever-alive conversation loop
result = await agent_loop.execute(ctx)
+5
View File
@@ -79,6 +79,7 @@ async def handle_queen_session(request: web.Request) -> web.Response:
body = await request.json() if request.can_read_body else {}
initial_prompt = body.get("initial_prompt")
initial_phase = body.get("initial_phase")
# 1. Check for an existing live session bound to this queen.
# Stop any live sessions bound to a *different* queen so only one
@@ -136,24 +137,28 @@ async def handle_queen_session(request: web.Request) -> web.Response:
queen_resume_from=resume_from,
initial_prompt=initial_prompt,
queen_name=queen_id,
initial_phase=initial_phase,
)
except Exception:
session = await manager.create_session(
queen_resume_from=resume_from,
initial_prompt=initial_prompt,
queen_name=queen_id,
initial_phase=initial_phase,
)
else:
session = await manager.create_session(
queen_resume_from=resume_from,
initial_prompt=initial_prompt,
queen_name=queen_id,
initial_phase=initial_phase,
)
status = "resumed"
else:
session = await manager.create_session(
initial_prompt=initial_prompt,
queen_name=queen_id,
initial_phase=initial_phase,
)
status = "created"
+4
View File
@@ -114,6 +114,7 @@ async def handle_create_session(request: web.Request) -> web.Response:
"session_id": "..." (optional custom session ID),
"model": "..." (optional),
"initial_prompt": "..." (optional first user message for the queen),
"initial_phase": "..." (optional "independent" for standalone queen),
}
When agent_path is provided, creates a session with a graph in one step
@@ -130,6 +131,7 @@ async def handle_create_session(request: web.Request) -> web.Response:
# When set, the queen writes conversations to this existing session's directory
# so the full history accumulates in one place across server restarts.
queen_resume_from = body.get("queen_resume_from")
initial_phase = body.get("initial_phase")
if agent_path:
try:
@@ -147,6 +149,7 @@ async def handle_create_session(request: web.Request) -> web.Response:
model=model,
initial_prompt=initial_prompt,
queen_resume_from=queen_resume_from,
initial_phase=initial_phase,
)
else:
# Queen-only session
@@ -155,6 +158,7 @@ async def handle_create_session(request: web.Request) -> web.Response:
model=model,
initial_prompt=initial_prompt,
queen_resume_from=queen_resume_from,
initial_phase=initial_phase,
)
except ValueError as e:
msg = str(e)
+6 -2
View File
@@ -178,6 +178,7 @@ class SessionManager:
initial_prompt: str | None = None,
queen_resume_from: str | None = None,
queen_name: str | None = None,
initial_phase: str | None = None,
) -> Session:
"""Create a new session with a queen but no worker.
@@ -196,7 +197,7 @@ class SessionManager:
session.queen_name = queen_name
# Start queen immediately (queen-only, no worker tools yet)
await self._start_queen(session, worker_identity=None, initial_prompt=initial_prompt)
await self._start_queen(session, worker_identity=None, initial_prompt=initial_prompt, initial_phase=initial_phase)
logger.info(
"Session '%s' created (queen-only, resume_from=%s)",
@@ -214,6 +215,7 @@ class SessionManager:
initial_prompt: str | None = None,
queen_resume_from: str | None = None,
queen_name: str | None = None,
initial_phase: str | None = None,
) -> Session:
"""Create a session and load a worker in one step.
@@ -281,7 +283,7 @@ class SessionManager:
else None
)
await self._start_queen(
session, worker_identity=worker_identity, initial_prompt=initial_prompt
session, worker_identity=worker_identity, initial_prompt=initial_prompt, initial_phase=initial_phase
)
except Exception:
@@ -782,6 +784,7 @@ class SessionManager:
session: Session,
worker_identity: str | None,
initial_prompt: str | None = None,
initial_phase: str | None = None,
) -> None:
"""Start the queen executor for a session.
@@ -889,6 +892,7 @@ class SessionManager:
worker_identity=worker_identity,
queen_dir=queen_dir,
initial_prompt=initial_prompt,
initial_phase=initial_phase,
)
logger.debug(
"[_start_queen] create_queen returned, queen_task=%s, queen_executor=%s",
+29 -3
View File
@@ -79,7 +79,8 @@ class WorkerSessionAdapter:
class QueenPhaseState:
"""Mutable state container for queen operating phase.
Five phases: planning building staging running editing.
Six phases: independent, planning building staging running editing.
INDEPENDENT: queen acts as a standalone agent with MCP tools, no worker graph.
EDITING is entered after worker execution completes. The worker
stays loaded queen can tweak config and re-run without rebuilding.
RUNNING cannot go directly to BUILDING or PLANNING; it must pass
@@ -89,12 +90,13 @@ class QueenPhaseState:
that trigger phase transitions.
"""
phase: str = "building" # "planning", "building", "staging", "running", or "editing"
phase: str = "building" # "independent", "planning", "building", "staging", "running", or "editing"
planning_tools: list = field(default_factory=list) # list[Tool]
building_tools: list = field(default_factory=list) # list[Tool]
staging_tools: list = field(default_factory=list) # list[Tool]
running_tools: list = field(default_factory=list) # list[Tool]
editing_tools: list = field(default_factory=list) # list[Tool]
independent_tools: list = field(default_factory=list) # list[Tool]
inject_notification: Any = None # async (str) -> None
event_bus: Any = None # EventBus — for emitting QUEEN_PHASE_CHANGED events
@@ -122,6 +124,7 @@ class QueenPhaseState:
prompt_staging: str = ""
prompt_running: str = ""
prompt_editing: str = ""
prompt_independent: str = ""
# Default skill operational protocols — appended to every phase prompt
protocols_prompt: str = ""
@@ -141,6 +144,8 @@ class QueenPhaseState:
def get_current_tools(self) -> list:
"""Return tools for the current phase."""
if self.phase == "independent":
return list(self.independent_tools)
if self.phase == "planning":
return list(self.planning_tools)
if self.phase == "running":
@@ -153,7 +158,9 @@ class QueenPhaseState:
def get_current_prompt(self) -> str:
"""Return the system prompt for the current phase."""
if self.phase == "planning":
if self.phase == "independent":
base = self.prompt_independent
elif self.phase == "planning":
base = self.prompt_planning
elif self.phase == "running":
base = self.prompt_running
@@ -321,6 +328,25 @@ class QueenPhaseState:
"Available tools: " + ", ".join(tool_names) + "."
)
async def switch_to_independent(self, source: str = "tool") -> None:
"""Switch to independent phase — queen acts as standalone agent.
Args:
source: Who triggered the switch "tool", "frontend", or "auto".
"""
if self.phase == "independent":
return
self.phase = "independent"
tool_names = [t.name for t in self.independent_tools]
logger.info("Queen phase → independent (source=%s, tools: %s)", source, tool_names)
await self._emit_phase_event()
if self.inject_notification and source != "tool":
await self.inject_notification(
"[PHASE CHANGE] Switched to INDEPENDENT mode. "
"You are the agent — execute the task directly. "
"Available tools: " + ", ".join(tool_names) + "."
)
def build_worker_profile(runtime: AgentHost, agent_path: Path | str | None = None) -> str:
"""Build a worker capability profile from its graph/goal definition.
+2 -1
View File
@@ -32,8 +32,9 @@ export const queensApi = {
api.patch<QueenProfile>(`/queen/${queenId}/profile`, updates),
/** Get or create a persistent session for a queen. */
getOrCreateSession: (queenId: string, initialPrompt?: string) =>
getOrCreateSession: (queenId: string, initialPrompt?: string, initialPhase?: string) =>
api.post<QueenSessionResult>(`/queen/${queenId}/session`, {
initial_prompt: initialPrompt,
initial_phase: initialPhase || undefined,
}),
};
+2 -1
View File
@@ -10,13 +10,14 @@ export const sessionsApi = {
// --- Session lifecycle ---
/** Create a session. If agentPath is provided, loads a graph in one step. */
create: (agentPath?: string, agentId?: string, model?: string, initialPrompt?: string, queenResumeFrom?: string) =>
create: (agentPath?: string, agentId?: string, model?: string, initialPrompt?: string, queenResumeFrom?: string, initialPhase?: string) =>
api.post<LiveSession>("/sessions", {
agent_path: agentPath,
agent_id: agentId,
model,
initial_prompt: initialPrompt,
queen_resume_from: queenResumeFrom || undefined,
initial_phase: initialPhase || undefined,
}),
/** List all active sessions. */
+1 -1
View File
@@ -13,7 +13,7 @@ export interface LiveSession {
uptime_seconds: number;
intro_message?: string;
/** Queen operating phase — "planning", "building", "staging", or "running" */
queen_phase?: "planning" | "building" | "staging" | "running";
queen_phase?: "planning" | "building" | "staging" | "running" | "independent";
/** Whether the queen's LLM supports image content in messages */
queen_supports_images?: boolean;
/** Selected queen identity ID (e.g. "queen_technology") */
+12 -10
View File
@@ -47,7 +47,7 @@ export interface ChatMessage {
/** Epoch ms when this message was first created — used for ordering queen/worker interleaving */
createdAt?: number;
/** Queen phase active when this message was created */
phase?: "planning" | "building" | "staging" | "running";
phase?: "planning" | "building" | "staging" | "running" | "independent";
/** Images attached to a user message */
images?: ImageContent[];
/** Backend node_id that produced this message — used for subagent grouping */
@@ -86,7 +86,7 @@ interface ChatPanelProps {
/** Called when user dismisses the pending question without answering */
onQuestionDismiss?: () => void;
/** Queen operating phase — shown as a tag on queen messages */
queenPhase?: "planning" | "building" | "staging" | "running";
queenPhase?: "planning" | "building" | "staging" | "running" | "independent";
/** Context window usage for queen and workers */
contextUsage?: Record<string, ContextUsageEntry>;
}
@@ -210,7 +210,7 @@ const MessageBubble = memo(
queenPhase,
}: {
msg: ChatMessage;
queenPhase?: "planning" | "building" | "staging" | "running";
queenPhase?: "planning" | "building" | "staging" | "running" | "independent";
}) {
const isUser = msg.type === "user";
const isQueen = msg.role === "queen";
@@ -300,13 +300,15 @@ const MessageBubble = memo(
}`}
>
{isQueen
? (msg.phase ?? queenPhase) === "running"
? "running"
: (msg.phase ?? queenPhase) === "staging"
? "staging"
: (msg.phase ?? queenPhase) === "planning"
? "planning"
: "building"
? (msg.phase ?? queenPhase) === "independent"
? "independent"
: (msg.phase ?? queenPhase) === "running"
? "running"
: (msg.phase ?? queenPhase) === "staging"
? "staging"
: (msg.phase ?? queenPhase) === "planning"
? "planning"
: "building"
: "Worker"}
</span>
</div>
+2 -2
View File
@@ -149,8 +149,8 @@ export function sseEventToChatMessage(
}
}
type QueenPhase = "planning" | "building" | "staging" | "running";
const VALID_PHASES = new Set<string>(["planning", "building", "staging", "running"]);
type QueenPhase = "planning" | "building" | "staging" | "running" | "independent";
const VALID_PHASES = new Set<string>(["planning", "building", "staging", "running", "independent"]);
/**
* Scan an array of persisted events and return the last queen phase seen,
+6 -4
View File
@@ -47,7 +47,7 @@ function truncate(s: string, max: number): string {
type SessionRestoreResult = {
messages: ChatMessage[];
restoredPhase: "planning" | "building" | "staging" | "running" | null;
restoredPhase: "planning" | "building" | "staging" | "running" | "independent" | null;
flowchartMap: Record<string, string[]> | null;
originalDraft: DraftGraphData | null;
};
@@ -112,7 +112,7 @@ interface AgentState {
awaitingInput: boolean;
workerInputMessageId: string | null;
queenBuilding: boolean;
queenPhase: "planning" | "building" | "staging" | "running";
queenPhase: "planning" | "building" | "staging" | "running" | "independent";
designingDraft: boolean;
draftGraph: DraftGraphData | null;
originalDraft: DraftGraphData | null;
@@ -417,7 +417,7 @@ export default function ColonyChat() {
}
}
let restoredPhase: "planning" | "building" | "staging" | "running" | null = null;
let restoredPhase: "planning" | "building" | "staging" | "running" | "independent" | null = null;
let restoredFlowchartMap: Record<string, string[]> | null = null;
let restoredOriginalDraft: DraftGraphData | null = null;
@@ -958,7 +958,9 @@ export default function ColonyChat() {
const rawPhase = event.data?.phase as string;
const eventAgentPath = (event.data?.agent_path as string) || null;
const newPhase: AgentState["queenPhase"] =
rawPhase === "running"
rawPhase === "independent"
? "independent"
: rawPhase === "running"
? "running"
: rawPhase === "staging"
? "staging"
+11 -2
View File
@@ -36,6 +36,7 @@ export default function QueenDM() {
const turnCounterRef = useRef(0);
const queenIterTextRef = useRef<Record<string, Record<number, string>>>({});
const [queenPhase, setQueenPhase] = useState<"planning" | "building" | "staging" | "running" | "independent">("independent");
// Switch queen session when queenId changes
useEffect(() => {
@@ -59,7 +60,7 @@ export default function QueenDM() {
(async () => {
try {
const result = await queensApi.getOrCreateSession(queenId);
const result = await queensApi.getOrCreateSession(queenId, undefined, "independent");
if (cancelled) return;
const sid = result.session_id;
@@ -196,6 +197,14 @@ export default function QueenDM() {
break;
}
case "queen_phase_changed": {
const rawPhase = event.data?.phase as string;
if (rawPhase === "independent" || rawPhase === "planning" || rawPhase === "building" || rawPhase === "staging" || rawPhase === "running") {
setQueenPhase(rawPhase);
}
break;
}
default:
break;
}
@@ -299,7 +308,7 @@ export default function QueenDM() {
isWaiting={isTyping && !isStreaming}
isBusy={isTyping}
disabled={loading || !queenReady}
queenPhase="planning"
queenPhase={queenPhase}
pendingQuestion={awaitingInput ? pendingQuestion : null}
pendingOptions={awaitingInput ? pendingOptions : null}
pendingQuestions={awaitingInput ? pendingQuestions : null}
+16
View File
@@ -0,0 +1,16 @@
from playwright.sync_api import sync_playwright
def main():
with sync_playwright() as p:
browser = p.chromium.launch(headless=False)
page = browser.new_page()
page.goto("https://www.linkedin.com/login")
print("Please log in to LinkedIn in the opened browser window.")
input("Press Enter here when you have logged in...")
# Now search connections
print("Logged in. Ready to proceed.")
browser.close()
if __name__ == "__main__":
main()
+149 -7
View File
@@ -878,13 +878,56 @@ def _sort_records(records: list[dict[str, Any]]) -> list[dict[str, Any]]:
)
def _load_session_data(
logs_dir: Path, session_id: str, limit_files: int
) -> list[dict[str, Any]] | None:
"""Load records for a specific session on demand."""
if not logs_dir.exists():
return None
files = sorted(
[path for path in logs_dir.iterdir() if path.is_file() and path.suffix == ".jsonl"],
key=lambda path: path.stat().st_mtime,
reverse=True,
)[:limit_files]
records: list[dict[str, Any]] = []
for path in files:
try:
with path.open(encoding="utf-8") as handle:
for line_number, raw_line in enumerate(handle, start=1):
line = raw_line.strip()
if not line:
continue
try:
payload = json.loads(line)
except json.JSONDecodeError:
payload = {
"timestamp": "",
"execution_id": "",
"assistant_text": "",
"_parse_error": f"{path.name}:{line_number}",
"_raw_line": line,
}
# Only include records for this session
if str(payload.get("execution_id") or "").strip() == session_id:
payload["_log_file"] = str(path)
records.append(payload)
except OSError:
continue
return _sort_records(records) if records else None
def _run_server(
html: str,
sessions: dict[str, list[dict[str, Any]]],
logs_dir: Path,
limit_files: int,
port: int,
no_open: bool,
) -> None:
html_bytes = html.encode("utf-8")
session_cache: dict[str, list[dict[str, Any]]] = {}
class Handler(http.server.BaseHTTPRequestHandler):
def do_GET(self) -> None:
@@ -892,11 +935,17 @@ def _run_server(
self._respond(200, "text/html; charset=utf-8", html_bytes)
elif self.path.startswith("/api/session/"):
sid = urllib.parse.unquote(self.path[len("/api/session/") :])
records = sessions.get(sid)
# Check cache first
if sid in session_cache:
records = session_cache[sid]
else:
records = _load_session_data(logs_dir, sid, limit_files)
if records is not None:
session_cache[sid] = records
if records is None:
self._respond(404, "application/json", b"[]")
else:
body = json.dumps(_sort_records(records), ensure_ascii=False).encode("utf-8")
body = json.dumps(records, ensure_ascii=False).encode("utf-8")
self._respond(200, "application/json", body)
else:
self.send_error(404)
@@ -927,13 +976,106 @@ def _run_server(
server.server_close()
def _discover_session_summaries(
logs_dir: Path, limit_files: int, include_tests: bool
) -> list[SessionSummary]:
"""Discover only session summaries without loading full record data."""
if not logs_dir.exists():
raise FileNotFoundError(f"log directory not found: {logs_dir}")
files = sorted(
[path for path in logs_dir.iterdir() if path.is_file() and path.suffix == ".jsonl"],
key=lambda path: path.stat().st_mtime,
reverse=True,
)[:limit_files]
# Collect minimal info per session: just first/last records and metadata
by_session: dict[str, list[dict[str, Any]]] = defaultdict(list)
for path in files:
try:
with path.open(encoding="utf-8") as handle:
for raw_line in handle:
line = raw_line.strip()
if not line:
continue
try:
payload = json.loads(line)
except json.JSONDecodeError:
continue
execution_id = str(payload.get("execution_id") or "").strip()
if execution_id:
# Store minimal data for summary generation
minimal = {
"timestamp": payload.get("timestamp", ""),
"iteration": payload.get("iteration", 0),
"stream_id": payload.get("stream_id", ""),
"node_id": payload.get("node_id", ""),
"token_counts": payload.get("token_counts", {}),
"_log_file": str(path),
}
by_session[execution_id].append(minimal)
except OSError:
continue
# Filter out test sessions if needed
if not include_tests:
by_session = {
eid: recs
for eid, recs in by_session.items()
if not _is_test_session(eid, recs)
}
summaries: list[SessionSummary] = []
for execution_id, session_records in by_session.items():
session_records.sort(
key=lambda record: (
str(record.get("timestamp", "")),
record.get("iteration", 0),
)
)
first = session_records[0]
last = session_records[-1]
summaries.append(
SessionSummary(
execution_id=execution_id,
log_file=str(first.get("_log_file", "")),
start_timestamp=str(first.get("timestamp", "")),
end_timestamp=str(last.get("timestamp", "")),
turn_count=len(session_records),
streams=sorted(
{str(r.get("stream_id", "")) for r in session_records if r.get("stream_id")}
),
nodes=sorted(
{str(r.get("node_id", "")) for r in session_records if r.get("node_id")}
),
models=sorted(
{
str(r.get("token_counts", {}).get("model", ""))
for r in session_records
if isinstance(r.get("token_counts"), dict)
and r.get("token_counts", {}).get("model")
}
),
)
)
summaries.sort(key=lambda summary: summary.start_timestamp, reverse=True)
return summaries
def main() -> int:
args = _parse_args()
records = _discover_records(args.logs_dir.expanduser(), args.limit_files)
summaries, sessions = _group_sessions(records, include_tests=args.include_tests)
logs_dir = args.logs_dir.expanduser()
# Only discover summaries, not full session data
summaries = _discover_session_summaries(
logs_dir, args.limit_files, args.include_tests
)
initial_session_id = args.session or (summaries[0].execution_id if summaries else "")
if initial_session_id and initial_session_id not in sessions:
if initial_session_id and not any(
s.execution_id == initial_session_id for s in summaries
):
print(f"session not found: {initial_session_id}")
return 1
@@ -945,7 +1087,7 @@ def main() -> int:
print(args.output)
return 0
_run_server(html_report, sessions, args.port, args.no_open)
_run_server(html_report, logs_dir, args.limit_files, args.port, args.no_open)
return 0