feat: token comsumption usage

2026-04-23 15:05:30 -07:00
parent 44cb0c0f4c
commit 8c6428f445
11 changed files with 522 additions and 38 deletions
@@ -151,8 +151,11 @@ interface ChatPanelProps {
  onStartNewSession?: () => void;
  /** When true, disable the start-new-session button (request in flight). */
  startingNewSession?: boolean;
-  /** Cumulative LLM token usage for this session */
-  tokenUsage?: { input: number; output: number };
+  /** Cumulative LLM token usage for this session.
+   *  `cached` (cache reads) and `cacheCreated` (cache writes) are subsets of
+   *  `input` — providers count both inside prompt_tokens. Display them
+   *  separately; do not add to a total. */
+  tokenUsage?: { input: number; output: number; cached?: number; cacheCreated?: number };
  /** Optional action element rendered on the right side of the "Conversation" header */
  headerAction?: React.ReactNode;
 }
@@ -1482,11 +1485,23 @@ export default function ChatPanel({
                Context: {fmt(queenUsage.estimatedTokens)}/{fmt(queenUsage.maxTokens)}
              </span>
            )}
-            {hasTokens && (
-              <span title="LLM tokens used this session (input + output)">
-                Tokens: {fmt(tokenUsage!.input + tokenUsage!.output)}
-              </span>
-            )}
+            {hasTokens && (() => {
+              const cached = tokenUsage!.cached ?? 0;
+              const created = tokenUsage!.cacheCreated ?? 0;
+              // cached/created are subsets of input — never sum; surface separately.
+              const title = [
+                "LLM tokens used this session",
+                `input         ${fmt(tokenUsage!.input)}`,
+                `  cache read  ${fmt(cached)}`,
+                `  cache write ${fmt(created)}`,
+                `output        ${fmt(tokenUsage!.output)}`,
+              ].join("\n");
+              return (
+                <span title={title}>
+                  Tokens: {fmt(tokenUsage!.output)}
+                </span>
+              );
+            })()}
          </div>
        );
      })()}
@@ -71,7 +71,14 @@ export default function QueenDM() {
    { id: string; prompt: string; options?: string[] }[] | null
  >(null);
  const [awaitingInput, setAwaitingInput] = useState(false);
-  const [tokenUsage, setTokenUsage] = useState({ input: 0, output: 0 });
+  // `cached` and `cacheCreated` are subsets of `input` (providers count both
+  // inside prompt_tokens already) — display them, never add them to a total.
+  const [tokenUsage, setTokenUsage] = useState({
+    input: 0,
+    output: 0,
+    cached: 0,
+    cacheCreated: 0,
+  });
  const [historySessions, setHistorySessions] = useState<HistorySession[]>([]);
  const [historyLoading, setHistoryLoading] = useState(false);
  const [switchingSessionId, setSwitchingSessionId] = useState<string | null>(
@@ -118,7 +125,7 @@ export default function QueenDM() {
    setPendingQuestions(null);
    setAwaitingInput(false);
    setQueenPhase("independent");
-    setTokenUsage({ input: 0, output: 0 });
+    setTokenUsage({ input: 0, output: 0, cached: 0, cacheCreated: 0 });
    setInitialDraft(null);
    setColonySpawned(false);
    setSpawnedColonyName(null);
@@ -576,7 +583,16 @@ export default function QueenDM() {
          if (event.data) {
            const inp = (event.data.input_tokens as number) || 0;
            const out = (event.data.output_tokens as number) || 0;
-            setTokenUsage((prev) => ({ input: prev.input + inp, output: prev.output + out }));
+            // cached / cache_creation are subsets of input — accumulate
+            // separately for display, do NOT roll into input/total.
+            const cached = (event.data.cached_tokens as number) || 0;
+            const cacheCreated = (event.data.cache_creation_tokens as number) || 0;
+            setTokenUsage((prev) => ({
+              input: prev.input + inp,
+              output: prev.output + out,
+              cached: prev.cached + cached,
+              cacheCreated: prev.cacheCreated + cacheCreated,
+            }));
          }
          // Flush one queued message per LLM turn boundary. This is the
          // real "turn ended" signal in a queen DM — execution_completed