diff --git a/src/pages/Chat/index.tsx b/src/pages/Chat/index.tsx
index e15a54b..1325de5 100644
--- a/src/pages/Chat/index.tsx
+++ b/src/pages/Chat/index.tsx
@@ -41,6 +41,18 @@ type UserRunCard = {
   steps: TaskStep[];
   messageStepTexts: string[];
   streamingReplyText: string | null;
+  /**
+   * Whether the trailing "Thinking..." indicator should be hidden for this
+   * card. True only when the run's live stream is currently rendered AS a
+   * streaming step inside the graph (the step itself already signals
+   * liveness, so the extra indicator would be redundant). False in all
+   * other cases — including when the stream is promoted to a bubble
+   * below the graph, or when there is no streaming content at all (the
+   * gap between tool rounds), because the graph has no visible activity
+   * of its own in those windows and the indicator is what tells the user
+   * "work is still in progress".
+   */
+  suppressThinking: boolean;
 };
 
 function getPrimaryMessageStepTexts(steps: TaskStep[]): string[] {
@@ -174,6 +186,15 @@ export function Chat() {
     : 0;
   const streamText = streamMsg ? extractText(streamMsg) : (typeof streamingMessage === 'string' ? streamingMessage : '');
   const hasStreamText = streamText.trim().length > 0;
+  // Whether the streaming chunk currently carries a `thinking` block. Used as
+  // a liveness signal so the run stays "active" (and the ExecutionGraphCard
+  // keeps showing its trailing "Thinking..." indicator) during the brief window
+  // between a tool finishing and the next text/tool chunk arriving — that gap
+  // is normally only filled by streamed thinking. NOT included in
+  // `shouldRenderStreaming`: a thinking-only stream chunk should not produce
+  // a chat bubble (thinking is rendered exclusively inside the ExecutionGraph).
+  const streamThinking = streamMsg ? extractThinking(streamMsg) : null;
+  const hasStreamThinking = !!streamThinking && streamThinking.trim().length > 0;
   const streamTools = streamMsg ? extractToolUse(streamMsg) : [];
   const hasStreamTools = streamTools.length > 0;
   const streamImages = streamMsg ? extractImages(streamMsg) : [];
@@ -181,7 +202,7 @@ export function Chat() {
   const hasStreamToolStatus = streamingTools.length > 0;
   const hasRunningStreamToolStatus = streamingTools.some((tool) => tool.status === 'running');
   const shouldRenderStreaming = sending && (hasStreamText || hasStreamTools || hasStreamImages || hasStreamToolStatus);
-  const hasAnyStreamContent = hasStreamText || hasStreamTools || hasStreamImages || hasStreamToolStatus;
+  const hasAnyStreamContent = hasStreamText || hasStreamThinking || hasStreamTools || hasStreamImages || hasStreamToolStatus;
 
   const isEmpty = messages.length === 0 && !sending;
   const subagentCompletionInfos = messages.map((message) => parseSubagentCompletionInfo(message));
@@ -234,7 +255,22 @@ export function Chat() {
     const hasToolActivity = segmentMessages.some((m) =>
       m.role === 'assistant' && extractToolUse(m).length > 0,
     );
-    const hasFinalReply = segmentMessages.some((m) => {
+    // Locate the last tool-use message so we only count text messages that
+    // come AFTER all tool calls as "final reply".  Intermediate narration
+    // messages (pure text, no tool_use) sit BEFORE tool calls and must not
+    // be misread as the concluding reply — otherwise `runStillExecutingTools`
+    // flips to false between tool rounds, collapsing the trailing
+    // "Thinking..." indicator during the brief gap before the next stream chunk.
+    let lastToolUseOffset = -1;
+    for (let i = segmentMessages.length - 1; i >= 0; i -= 1) {
+      const m = segmentMessages[i];
+      if (m.role === 'assistant' && extractToolUse(m).length > 0) {
+        lastToolUseOffset = i;
+        break;
+      }
+    }
+    const hasFinalReply = segmentMessages.some((m, i) => {
+      if (i <= lastToolUseOffset) return false;
       if (m.role !== 'assistant') return false;
       if (extractText(m).trim().length === 0) return false;
       const content = m.content;
@@ -291,22 +327,36 @@ export function Chat() {
     };
 
     // Show the streaming response as a separate bubble (not inside the
-    // execution graph) once all tool calls have finished.
+    // execution graph) once tool activity has happened and the CURRENT stream
+    // chunk carries no tool_use block.
     //
-    // Three signals indicate "tools finished, now streaming the reply":
-    //   1. `pendingFinal`        — set by tool-result final events
-    //   2. `allToolsCompleted`   — all entries in streamingTools are completed
-    //   3. `hasCompletedToolPhase` — historical messages (loaded by the poll)
-    //      contain tool_use blocks, meaning the Gateway executed tools
-    //      server-side without sending streaming tool events to the client.
-    //      During intermediate narration (before reply), stripProcessMessagePrefix
-    //      will produce an empty trimmedReplyText, so the graph stays active.
+    // We use an optimistic promotion strategy because the distinguishing
+    // signal between "narration-before-next-tool" and "final reply" is not
+    // available during early deltas — both are text-only, both arrive after
+    // `hasToolActivity` has flipped true.  Any of these signals opens the
+    // promotion gate:
+    //   1. `pendingFinal`       — tool-result final just fired; next text is
+    //      (almost always) the final reply.
+    //   2. `allToolsCompleted`  — every client-tracked tool entry reached
+    //      `completed` state.
+    //   3. `hasToolActivity`    — at least one prior tool_use exists in the
+    //      segment, i.e. we're past the first tool round.
+    //
+    // Demotion happens the moment a tool_use block appears in the streaming
+    // message (`streamTools.length > 0`) OR a tool transitions back to
+    // `running`.  When demoted, the stream re-renders inside the graph as a
+    // narration step.  A brief flicker when narration turns into the next
+    // tool round is inherent to optimistic promotion and is accepted.
+    //
+    // Earlier iterations tried restricting this gate to only
+    // `pendingFinal || allToolsCompleted` to protect the trailing
+    // "Thinking..." indicator.  That check is real, but belongs in the
+    // `suppressThinking` coupling below — not here.  With the coupling
+    // fixed, the three-signal gate gives the correct bubble placement for
+    // both narration and final reply.
     const allToolsCompleted = streamingTools.length > 0 && !hasRunningStreamToolStatus;
-    const hasCompletedToolPhase = segmentMessages.some((msg) =>
-      msg.role === 'assistant' && extractToolUse(msg).length > 0,
-    );
     const rawStreamingReplyCandidate = isLatestOpenRun
-      && (pendingFinal || allToolsCompleted || hasCompletedToolPhase)
+      && (pendingFinal || allToolsCompleted || hasToolActivity)
       && (hasStreamText || hasStreamImages)
       && streamTools.length === 0
       && !hasRunningStreamToolStatus;
@@ -339,6 +389,7 @@ export function Chat() {
           steps: [],
           messageStepTexts: [],
           streamingReplyText: null,
+          suppressThinking: false,
         }];
       }
       const cached = graphStepCache[runKey];
@@ -360,6 +411,7 @@ export function Chat() {
         steps: cleanedSteps,
         messageStepTexts: getPrimaryMessageStepTexts(cleanedSteps),
         streamingReplyText: null,
+        suppressThinking: false,
       }];
     }
 
@@ -393,6 +445,23 @@ export function Chat() {
     // uncontrolled path before the controlled `expanded` override could kick in.
     const cardActive = isLatestOpenRun;
 
+    // Suppress the trailing "Thinking..." indicator only when the live stream is
+    // currently rendered AS a streaming step inside this card's graph. In
+    // that case the streaming step itself is the activity signal, and the
+    // separate trailing indicator would be redundant.
+    //   - streamingReplyText != null: stream is promoted to a bubble → graph
+    //     has no live step of its own → DO show the trailing indicator so the
+    //     user still sees progress in the graph (indicator rendered above the
+    //     bubble).
+    //   - no stream content at all (the gap between tool rounds): graph also
+    //     has no live step → DO show the indicator — this is the very case
+    //     the indicator exists for.
+    //   - stream IS in graph (e.g. tool_use is streaming): indicator is
+    //     redundant → suppress.
+    const streamIsInGraph =
+      isLatestOpenRun && streamingReplyText == null && hasAnyStreamContent;
+    const suppressThinking = streamIsInGraph;
+
     return [{
       triggerIndex: idx,
       replyIndex,
@@ -403,6 +472,7 @@ export function Chat() {
       steps,
       messageStepTexts: getPrimaryMessageStepTexts(steps),
       streamingReplyText,
+      suppressThinking,
     }];
   });
   const hasActiveExecutionGraph = userRunCards.some((card) => card.active);
@@ -556,7 +626,7 @@ export function Chat() {
                               agentLabel={card.agentLabel}
                               steps={card.steps}
                               active={card.active}
-                              suppressThinking={card.streamingReplyText != null}
+                              suppressThinking={card.suppressThinking}
                               expanded={expanded}
                               onExpandedChange={(next) =>
                                 setGraphExpandedOverrides((prev) => ({ ...prev, [runKey]: next }))