Deep Dive: Streaming

Build responsive UIs with real-time Copilot responses.

Why Streaming Matters

LLM responses can take seconds to generate. Without streaming:

  • Users stare at a blank screen
  • Long responses feel even longer
  • No feedback that anything is happening

With streaming:

  • Users see immediate response
  • Content appears progressively
  • App feels faster (even if total time is the same)

Event Stream Anatomy

Event sequence for simple response

Time →

send()

  ├─[0ms]── turnStart

  ├─[100ms]── textDelta: "I"
  ├─[120ms]── textDelta: " can"
  ├─[140ms]── textDelta: " help"
  ├─[160ms]── textDelta: " you"
  ├─[180ms]── textDelta: " with"
  ├─[200ms]── textDelta: " that"
  ├─[220ms]── textDelta: "."

  └─[250ms]── turnEnd

Event sequence with tool call

send()

  ├─── turnStart

  ├─── textDelta: "Let me check..."

  ├─── toolCall (Copilot pauses, waiting for result)
  │         │
  │         └──► Your handler runs
  │         └──► submitToolResult()

  ├─── textDelta: "The answer is..."

  └─── turnEnd

Processing Patterns

Pattern 1: Simple print-as-you-go

for await (const event of session.send(prompt)) {
  if (event.type === "textDelta") {
    process.stdout.write(event.delta);
  }
}

Pattern 2: Accumulate then process

let fullResponse = "";

for await (const event of session.send(prompt)) {
  if (event.type === "textDelta") {
    fullResponse += event.delta;
    updateUI(fullResponse); // Re-render with full text
  }
}

// Do something with complete response
parseAndStore(fullResponse);

Pattern 3: Chunked processing

For markdown, code syntax highlighting, or other structured content:

let buffer = "";

for await (const event of session.send(prompt)) {
  if (event.type === "textDelta") {
    buffer += event.delta;

    // Process complete lines only
    const lines = buffer.split("\n");
    if (lines.length > 1) {
      // Process all complete lines
      for (const line of lines.slice(0, -1)) {
        processLine(line);
      }
      // Keep incomplete line in buffer
      buffer = lines[lines.length - 1];
    }
  }
}

// Process any remaining content
if (buffer) {
  processLine(buffer);
}

Pattern 4: Debounced updates

Prevent UI jank from too-frequent updates:

let pending = "";
let updateScheduled = false;

for await (const event of session.send(prompt)) {
  if (event.type === "textDelta") {
    pending += event.delta;

    if (!updateScheduled) {
      updateScheduled = true;
      requestAnimationFrame(() => {
        updateUI(pending);
        updateScheduled = false;
      });
    }
  }
}

// Final update
updateUI(pending);

Pattern 5: Typed character effect

For a typewriter-like appearance:

async function typeText(text: string, element: HTMLElement, msPerChar = 30) {
  for (const char of text) {
    element.textContent += char;
    await sleep(msPerChar);
  }
}

let displayedLength = 0;
let fullText = "";

for await (const event of session.send(prompt)) {
  if (event.type === "textDelta") {
    fullText += event.delta;
    // Let typewriter catch up
    while (displayedLength < fullText.length) {
      outputElement.textContent = fullText.slice(0, ++displayedLength);
      await sleep(20);
    }
  }
}

Building UIs

Terminal CLI

import * as readline from "readline";

async function chat(session: CopilotSession) {
  const rl = readline.createInterface({ input: process.stdin, output: process.stdout });

  while (true) {
    const input = await new Promise<string>(resolve =>
      rl.question("\n> ", resolve)
    );

    if (input === "exit") break;

    process.stdout.write("\n");

    for await (const event of session.send(input)) {
      if (event.type === "textDelta") {
        process.stdout.write(event.delta);
      }
    }
  }

  rl.close();
}

React with hooks

import { useState, useCallback } from "react";

function useCopilotStream(session: CopilotSession) {
  const [text, setText] = useState("");
  const [isStreaming, setIsStreaming] = useState(false);

  const send = useCallback(async (prompt: string) => {
    setText("");
    setIsStreaming(true);

    try {
      for await (const event of session.send(prompt)) {
        if (event.type === "textDelta") {
          setText(prev => prev + event.delta);
        }
      }
    } finally {
      setIsStreaming(false);
    }
  }, [session]);

  return { text, isStreaming, send };
}

// Usage
function ChatComponent() {
  const { text, isStreaming, send } = useCopilotStream(session);

  return (
    <div>
      <button onClick={() => send("Hello!")}>Send</button>
      <div className="response">
        {text}
        {isStreaming && <span className="cursor"></span>}
      </div>
    </div>
  );
}

Server-Sent Events (SSE)

// Server (Express)
app.get("/stream", async (req, res) => {
  res.setHeader("Content-Type", "text/event-stream");
  res.setHeader("Cache-Control", "no-cache");
  res.setHeader("Connection", "keep-alive");

  const prompt = req.query.prompt as string;

  for await (const event of session.send(prompt)) {
    if (event.type === "textDelta") {
      res.write(`data: ${JSON.stringify({ type: "delta", text: event.delta })}\n\n`);
    }
  }

  res.write(`data: ${JSON.stringify({ type: "done" })}\n\n`);
  res.end();
});

// Client
const evtSource = new EventSource(`/stream?prompt=${encodeURIComponent(prompt)}`);
evtSource.onmessage = (e) => {
  const data = JSON.parse(e.data);
  if (data.type === "delta") {
    output.textContent += data.text;
  } else if (data.type === "done") {
    evtSource.close();
  }
};

WebSocket bidirectional

// Server
wss.on("connection", (ws) => {
  let session: CopilotSession;

  ws.on("message", async (data) => {
    const msg = JSON.parse(data.toString());

    if (msg.type === "init") {
      session = await client.createSession();
      ws.send(JSON.stringify({ type: "ready" }));
    }

    if (msg.type === "send") {
      for await (const event of session.send(msg.prompt)) {
        if (event.type === "textDelta") {
          ws.send(JSON.stringify({ type: "delta", text: event.delta }));
        }
      }
      ws.send(JSON.stringify({ type: "done" }));
    }
  });
});

Cancellation

Abort streaming mid-response

const controller = new AbortController();

// Start streaming
const streamTask = (async () => {
  for await (const event of session.send(prompt, { signal: controller.signal })) {
    if (event.type === "textDelta") {
      process.stdout.write(event.delta);
    }
  }
})();

// Cancel after user presses Ctrl+C
process.on("SIGINT", () => {
  controller.abort();
});

try {
  await streamTask;
} catch (e) {
  if (e.name === "AbortError") {
    console.log("\n[Cancelled]");
  }
}

React with cancellation

function CancelableChat({ session }) {
  const [controller, setController] = useState<AbortController | null>(null);
  const [text, setText] = useState("");

  const send = async (prompt: string) => {
    const ctrl = new AbortController();
    setController(ctrl);
    setText("");

    try {
      for await (const event of session.send(prompt, { signal: ctrl.signal })) {
        if (event.type === "textDelta") {
          setText(prev => prev + event.delta);
        }
      }
    } catch (e) {
      if (e.name !== "AbortError") throw e;
    } finally {
      setController(null);
    }
  };

  const cancel = () => controller?.abort();

  return (
    <div>
      <button onClick={() => send("Write a long story")}>Send</button>
      {controller && <button onClick={cancel}>Cancel</button>}
      <div>{text}</div>
    </div>
  );
}

Performance Optimization

1. Reduce DOM updates

// Bad: Update DOM on every delta
for await (const event of session.send(prompt)) {
  if (event.type === "textDelta") {
    element.textContent += event.delta; // Triggers layout on every delta
  }
}

// Good: Batch updates
let buffer = "";
for await (const event of session.send(prompt)) {
  if (event.type === "textDelta") {
    buffer += event.delta;
  }
}
element.textContent = buffer; // Single DOM update

2. Use requestAnimationFrame

let buffer = "";
let rafScheduled = false;

for await (const event of session.send(prompt)) {
  if (event.type === "textDelta") {
    buffer += event.delta;
    if (!rafScheduled) {
      rafScheduled = true;
      requestAnimationFrame(() => {
        element.textContent = buffer;
        rafScheduled = false;
      });
    }
  }
}

3. Virtual scrolling for long responses

For very long outputs, render only visible content:

// Use a library like react-window or react-virtualized
import { VariableSizeList } from "react-window";

function StreamingOutput({ lines }) {
  return (
    <VariableSizeList
      height={400}
      itemCount={lines.length}
      itemSize={index => measureLine(lines[index])}
    >
      {({ index, style }) => (
        <div style={style}>{lines[index]}</div>
      )}
    </VariableSizeList>
  );
}

Pitfalls

1. Not handling backpressure

If your processing is slower than token generation:

// Problem: Events queue up in memory
for await (const event of session.send(prompt)) {
  await slowProcess(event); // Slows iteration
}

// Solution: Process async, don't block iteration
const buffer = [];
const processLoop = async () => {
  while (true) {
    const event = buffer.shift();
    if (!event) { await sleep(10); continue; }
    await slowProcess(event);
  }
};

processLoop(); // Run in background
for await (const event of session.send(prompt)) {
  buffer.push(event);
}

2. Memory leaks with long streams

// Problem: Growing string in memory
let fullText = "";
for await (const event of session.send(prompt)) {
  fullText += event.delta; // String grows forever
}

// Solution: Write to disk or limit buffer size
const stream = fs.createWriteStream("output.txt");
for await (const event of session.send(prompt)) {
  if (event.type === "textDelta") {
    stream.write(event.delta);
  }
}
stream.end();

3. UI freezing

// Problem: Synchronous processing blocks UI
for await (const event of session.send(prompt)) {
  heavyComputation(event); // Blocks main thread
}

// Solution: Use Web Workers or defer heavy work
for await (const event of session.send(prompt)) {
  queueMicrotask(() => heavyComputation(event));
  // Or: postMessageToWorker(event);
}

Exercises

  1. Basic: Build a CLI that shows a spinner while waiting for turnStart, then streams output
  2. Intermediate: Create a React component with a “Stop” button that cancels streaming
  3. Advanced: Build a streaming markdown renderer that syntax-highlights code blocks as they appear

See exercises/ for starter code.