feat: token comsumption usage

This commit is contained in:
Richard Tang
2026-04-23 15:05:30 -07:00
parent 44cb0c0f4c
commit 8c6428f445
11 changed files with 522 additions and 38 deletions
+22 -7
View File
@@ -151,8 +151,11 @@ interface ChatPanelProps {
onStartNewSession?: () => void;
/** When true, disable the start-new-session button (request in flight). */
startingNewSession?: boolean;
/** Cumulative LLM token usage for this session */
tokenUsage?: { input: number; output: number };
/** Cumulative LLM token usage for this session.
* `cached` (cache reads) and `cacheCreated` (cache writes) are subsets of
* `input` — providers count both inside prompt_tokens. Display them
* separately; do not add to a total. */
tokenUsage?: { input: number; output: number; cached?: number; cacheCreated?: number };
/** Optional action element rendered on the right side of the "Conversation" header */
headerAction?: React.ReactNode;
}
@@ -1482,11 +1485,23 @@ export default function ChatPanel({
Context: {fmt(queenUsage.estimatedTokens)}/{fmt(queenUsage.maxTokens)}
</span>
)}
{hasTokens && (
<span title="LLM tokens used this session (input + output)">
Tokens: {fmt(tokenUsage!.input + tokenUsage!.output)}
</span>
)}
{hasTokens && (() => {
const cached = tokenUsage!.cached ?? 0;
const created = tokenUsage!.cacheCreated ?? 0;
// cached/created are subsets of input — never sum; surface separately.
const title = [
"LLM tokens used this session",
`input ${fmt(tokenUsage!.input)}`,
` cache read ${fmt(cached)}`,
` cache write ${fmt(created)}`,
`output ${fmt(tokenUsage!.output)}`,
].join("\n");
return (
<span title={title}>
Tokens: {fmt(tokenUsage!.output)}
</span>
);
})()}
</div>
);
})()}
+19 -3
View File
@@ -71,7 +71,14 @@ export default function QueenDM() {
{ id: string; prompt: string; options?: string[] }[] | null
>(null);
const [awaitingInput, setAwaitingInput] = useState(false);
const [tokenUsage, setTokenUsage] = useState({ input: 0, output: 0 });
// `cached` and `cacheCreated` are subsets of `input` (providers count both
// inside prompt_tokens already) — display them, never add them to a total.
const [tokenUsage, setTokenUsage] = useState({
input: 0,
output: 0,
cached: 0,
cacheCreated: 0,
});
const [historySessions, setHistorySessions] = useState<HistorySession[]>([]);
const [historyLoading, setHistoryLoading] = useState(false);
const [switchingSessionId, setSwitchingSessionId] = useState<string | null>(
@@ -118,7 +125,7 @@ export default function QueenDM() {
setPendingQuestions(null);
setAwaitingInput(false);
setQueenPhase("independent");
setTokenUsage({ input: 0, output: 0 });
setTokenUsage({ input: 0, output: 0, cached: 0, cacheCreated: 0 });
setInitialDraft(null);
setColonySpawned(false);
setSpawnedColonyName(null);
@@ -576,7 +583,16 @@ export default function QueenDM() {
if (event.data) {
const inp = (event.data.input_tokens as number) || 0;
const out = (event.data.output_tokens as number) || 0;
setTokenUsage((prev) => ({ input: prev.input + inp, output: prev.output + out }));
// cached / cache_creation are subsets of input — accumulate
// separately for display, do NOT roll into input/total.
const cached = (event.data.cached_tokens as number) || 0;
const cacheCreated = (event.data.cache_creation_tokens as number) || 0;
setTokenUsage((prev) => ({
input: prev.input + inp,
output: prev.output + out,
cached: prev.cached + cached,
cacheCreated: prev.cacheCreated + cacheCreated,
}));
}
// Flush one queued message per LLM turn boundary. This is the
// real "turn ended" signal in a queen DM — execution_completed