calude-code-06-新颖组件定义 Claude Code 的关键创新

老马啸西风2025/3/7大约 20 分钟

新颖组件：定义 Claude Code 的关键创新

以下是严格、完整、不做任何省略的中文翻译，保持原有结构、标题层级、代码块、表格与 Markdown 格式不变，仅对自然语言内容进行忠实翻译，代码保持原样。

新颖组件：定义 Claude Code 的关键创新

流式 JSON 解析器：处理 LLM 的“未完成思考”

当 LLM 以流式方式发送一次工具调用请求时，它并不会一次性发送完整的 JSON。相反，你可能会收到如下片段：

{"file_path": "/src/
{"file_path": "/src/main.
{"file_path": "/src/main.ts", "old_str
{"file_path": "/src/main.ts", "old_string": "console.log('hell

流式 JSON 解析器优雅地解决了这一问题：

class StreamingToolInputParser {
  private buffer: string = '';
  private state = {
    depth: 0,           // {} / [] 的嵌套层级
    inString: boolean,  // 当前是否位于字符串中？
    escape: boolean,    // 前一个字符是否是反斜杠？
    stringChar: '"' | "'" | null,  // 当前字符串使用的引号类型
  };

  addChunk(chunk: string): ParseResult {
    this.buffer += chunk;

    // 逐字符更新解析器状态
    for (let i = 0; i < chunk.length; i++) {
      const char = chunk[i];
      const prevChar = i > 0 ? chunk[i-1] : this.buffer[this.buffer.length - chunk.length - 1];

      // 处理转义序列
      if (this.escape) {
        this.escape = false;
        continue;
      }

      if (char === '\\' && this.state.inString) {
        this.escape = true;
        continue;
      }

      // 字符串边界检测
      if (!this.state.inString && (char === '"' || char === "'")) {
        this.state.inString = true;
        this.state.stringChar = char;
      } else if (this.state.inString && char === this.state.stringChar) {
        this.state.inString = false;
        this.state.stringChar = null;
      }

      // 在字符串之外跟踪嵌套深度
      if (!this.state.inString) {
        if (char === '{' || char === '[') {
          this.state.depth++;
        } else if (char === '}' || char === ']') {
          this.state.depth--;

          // 当回到 depth 0 时尝试解析
          if (this.state.depth === 0) {
            return this.tryParse();
          }
        }
      }
    }

    // 即便没有回到 depth 0，也可能已经完整（畸形 JSON）
    if (this.buffer.length > 10000) { // 安全上限
      return this.tryParseWithRecovery();
    }

    return { complete: false };
  }

  private tryParse(): ParseResult {
    try {
      const parsed = JSON.parse(this.buffer);
      return { complete: true, value: parsed };
    } catch (e) {
      return { complete: false, partial: this.buffer };
    }
  }

  private tryParseWithRecovery(): ParseResult {
    let attemptBuffer = this.buffer;

    // 修复策略 1：关闭未关闭的字符串
    if (this.state.inString && this.state.stringChar) {
      attemptBuffer += this.state.stringChar;

      // 尝试关闭未闭合的结构
      attemptBuffer += '}'.repeat(Math.max(0, this.state.depth));
      attemptBuffer += ']'.repeat(
        Math.max(0, (attemptBuffer.match(/\[/g) || []).length -
                    (attemptBuffer.match(/\]/g) || []).length)
      );
    }

    // 修复策略 2：基于结构分析自动补全
    const braceBalance = (attemptBuffer.match(/{/g) || []).length -
                        (attemptBuffer.match(/}/g) || []).length;
    const bracketBalance = (attemptBuffer.match(/\[/g) || []).length -
                          (attemptBuffer.match(/\]/g) || []).length;

    attemptBuffer += '}'.repeat(Math.max(0, braceBalance));
    attemptBuffer += ']'.repeat(Math.max(0, bracketBalance));

    try {
      const parsed = JSON.parse(attemptBuffer);
      return {
        complete: true,
        value: parsed,
        wasRepaired: true,
        repairs: {
          closedStrings: this.state.inString,
          addedBraces: braceBalance,
          addedBrackets: bracketBalance
        }
      };
    } catch (e) {
      // 修复策略 3：尽可能提取可用数据
      const partialResult = this.extractPartialData(this.buffer);
      return {
        complete: false,
        partial: partialResult,
        error: e.message
      };
    }
  }

  private extractPartialData(buffer: string): any {
    // 尝试提取完整的键值对
    const result: any = {};
    const keyValuePattern = /"(\w+)":\s*("([^"\\]*(\\.[^"\\]*)*)"|true|false|null|\d+)/g;

    let match;
    while ((match = keyValuePattern.exec(buffer)) !== null) {
      const [, key, value] = match;
      try {
        result[key] = JSON.parse(value);
      } catch {
        result[key] = value; // 如果解析失败则按字符串存储
      }
    }

    return Object.keys(result).length > 0 ? result : null;
  }
}

为什么这是新颖的：

传统 JSON 解析器无法处理不完整输入
该解析器支持渐进式解析，并能返回有意义的部分结果
多种修复策略可处理常见的 LLM 流式输出问题
支持响应式 UI，在工具输入流式到达时即时展示

性能特征：

输入大小	解析时间	内存	成功率
<1KB	<0.1ms	O(n)	100%
1–10KB	0.1–1ms	O(n)	99.9%
10–100KB	1–10ms	O(n)	99.5%
>100KB	10–50ms	O(n)	98%（启用修复）

`normalizeToSize` 算法：智能数据截断

在向 LLM 或遥测服务发送数据时，大小限制至关重要。normalizeToSize 算法可以在保留结构的前提下，智能地缩减对象体积：

class DataNormalizer {
  static normalizeToSize(
    obj: any,
    maxDepth: number = 3,
    maxSizeInBytes: number = 100_000
  ): any {
    // 首先尝试完整深度
    let normalized = this.normalize(obj, maxDepth);
    let size = this.estimateSize(normalized);

    // 逐步降低深度直到满足大小限制
    while (size > maxSizeInBytes && maxDepth > 0) {
      maxDepth--;
      normalized = this.normalize(obj, maxDepth);
      size = this.estimateSize(normalized);
    }

    return normalized;
  }

  private static normalize(
    obj: any,
    maxDepth: number,
    currentDepth: number = 0,
    visited = new WeakSet()
  ): any {
    // 处理基本类型
    if (obj === null) return '[null]';
    if (obj === undefined) return '[undefined]';
    if (typeof obj === 'number' && isNaN(obj)) return '[NaN]';
    if (typeof obj === 'bigint') return `[BigInt: ${obj}n]`;

    // 处理函数与 Symbol
    if (typeof obj === 'function') {
      return `[Function: ${obj.name || 'anonymous'}]`;
    }
    if (typeof obj === 'symbol') {
      return `[Symbol: ${obj.description || 'Symbol'}]`;
    }

    // 原始类型直接返回
    if (['string', 'number', 'boolean'].includes(typeof obj)) {
      return obj;
    }

    // 达到深度上限
    if (currentDepth >= maxDepth) {
      if (Array.isArray(obj)) return `[Array(${obj.length})]`;
      if (obj.constructor?.name) {
        return `[${obj.constructor.name}]`;
      }
      return '[Object]';
    }

    // 循环引用检测
    if (visited.has(obj)) {
      return '[Circular]';
    }
    visited.add(obj);

    // 已知类型的特殊处理
    if (this.isReactElement(obj)) {
      return `[React.${obj.type?.name || obj.type || 'Element'}]`;
    }

    if (this.isVueComponent(obj)) {
      return `[Vue.${obj.$options?.name || 'Component'}]`;
    }

    if (obj instanceof Error) {
      return {
        name: obj.name,
        message: obj.message,
        stack: this.truncateStack(obj.stack)
      };
    }

    if (obj instanceof Date) {
      return obj.toISOString();
    }

    if (obj instanceof RegExp) {
      return obj.toString();
    }

    // 处理 DOM 元素
    if (this.isDOMElement(obj)) {
      return `[${obj.tagName}${obj.id ? '#' + obj.id : ''}]`;
    }

    // 处理 toJSON 方法
    if (typeof obj.toJSON === 'function') {
      try {
        return this.normalize(
          obj.toJSON(),
          maxDepth,
          currentDepth,
          visited
        );
      } catch {
        return '[Object with toJSON error]';
      }
    }

    // 数组处理
    if (Array.isArray(obj)) {
      const result = [];
      const maxItems = 100; // 限制数组大小

      for (let i = 0; i < Math.min(obj.length, maxItems); i++) {
        result.push(
          this.normalize(obj[i], maxDepth, currentDepth + 1, visited)
        );
      }

      if (obj.length > maxItems) {
        result.push(`... ${obj.length - maxItems} more items`);
      }

      return result;
    }

    // 对象处理
    const result: any = {};
    const keys = Object.keys(obj);
    const maxProps = 50; // 限制属性数量

    // 尊重 Sentry 指令
    if (obj.__sentry_skip_normalization__) {
      return obj;
    }

    const effectiveMaxDepth =
      obj.__sentry_override_normalization_depth__ || maxDepth;

    for (let i = 0; i < Math.min(keys.length, maxProps); i++) {
      const key = keys[i];
      try {
        result[key] = this.normalize(
          obj[key],
          effectiveMaxDepth,
          currentDepth + 1,
          visited
        );
      } catch {
        result[key] = '[Error accessing property]';
      }
    }

    if (keys.length > maxProps) {
      result['...'] = `${keys.length - maxProps} more properties`;
    }

    return result;
  }

  private static estimateSize(obj: any): number {
    // 不做完整序列化的快速估算
    const sample = JSON.stringify(obj).substring(0, 1000);
    const avgCharSize = new Blob([sample]).size / sample.length;

    const fullLength = this.estimateJsonLength(obj);
    return Math.ceil(fullLength * avgCharSize);
  }

  private static estimateJsonLength(obj: any, visited = new WeakSet()): number {
    if (obj === null || obj === undefined) return 4; // "null"
    if (typeof obj === 'boolean') return obj ? 4 : 5; // "true" : "false"
    if (typeof obj === 'number') return String(obj).length;
    if (typeof obj === 'string') return obj.length + 2; // 引号

    if (visited.has(obj)) return 12; // "[Circular]"
    visited.add(obj);

    if (Array.isArray(obj)) {
      let length = 2; // []
      for (const item of obj) {
        length += this.estimateJsonLength(item, visited) + 1; // 逗号
      }
      return length;
    }

    if (typeof obj === 'object') {
      let length = 2; // {}
      for (const key in obj) {
        length += key.length + 3; // "key":
        length += this.estimateJsonLength(obj[key], visited) + 1; // 逗号
      }
      return length;
    }

    return 10; // 默认估算
  }
}

为什么这是新颖的：

基于真实字节大小的迭代式深度收缩
对特殊对象的类型感知序列化
尊重 React、Vue 等框架对象
使用 WeakSet 高效检测循环引用
在约束内尽可能保留信息

使用场景：

// LLM 上下文准备
const context = normalizeToSize(
  largeProjectState,
  10,     // 初始深度 10
  50_000  // 上下文 50KB 限制
);

// 遥测错误上报
const errorContext = normalizeToSize(
  applicationState,
  5,       // 合理深度
  10_000   // 错误报告 10KB
);