{
  "@context": {
    "@vocab": "http://schema.org/",
    "hasPart": {
      "@id": "http://schema.org/hasPart",
      "@type": "@id"
    },
    "mainEntity": {
      "@id": "http://schema.org/mainEntity",
      "@type": "@id"
    },
    "about": {
      "@id": "http://schema.org/about",
      "@type": "@id"
    },
    "author": {
      "@id": "http://schema.org/author",
      "@type": "@id"
    },
    "publisher": {
      "@id": "http://schema.org/publisher",
      "@type": "@id"
    },
    "Question": "http://schema.org/Question",
    "Answer": "http://schema.org/Answer",
    "DefinedTerm": "http://schema.org/DefinedTerm",
    "DefinedTermSet": "http://schema.org/DefinedTermSet",
    "HowTo": "http://schema.org/HowTo",
    "HowToStep": "http://schema.org/HowToStep",
    "CreativeWork": "http://schema.org/CreativeWork",
    "Person": "http://schema.org/Person",
    "Organization": "http://schema.org/Organization"
  },
  "@type": "CreativeWork",
  "name": "Externalization in LLM Agents: A Unified Review of Memory, Skills, Protocols and Harness Engineering",
  "author": [
    {
      "@type": "Person",
      "name": "Chenyu Zhou",
      "affiliation": "Shanghai Jiao Tong University"
    },
    {
      "@type": "Person",
      "name": "Huacan Chai",
      "affiliation": "Shanghai Jiao Tong University"
    },
    {
      "@type": "Person",
      "name": "Wenteng Chen",
      "affiliation": "Shanghai Jiao Tong University"
    },
    {
      "@type": "Person",
      "name": "Zihan Guo",
      "affiliation": ["Sun Yat-Sen University", "Shanghai Innovation Institute"]
    },
    {
      "@type": "Person",
      "name": "Rong Shan",
      "affiliation": "Shanghai Jiao Tong University"
    },
    {
      "@type": "Person",
      "name": "Yuanyi Song",
      "affiliation": "Shanghai Jiao Tong University"
    },
    {
      "@type": "Person",
      "name": "Tianyi Xu",
      "affiliation": "Shanghai Jiao Tong University"
    },
    {
      "@type": "Person",
      "name": "Yingxuan Yang",
      "affiliation": "Shanghai Jiao Tong University"
    },
    {
      "@type": "Person",
      "name": "Aofan Yu",
      "affiliation": "Shanghai Jiao Tong University"
    },
    {
      "@type": "Person",
      "name": "Weiming Zhang",
      "affiliation": "Shanghai Jiao Tong University"
    },
    {
      "@type": "Person",
      "name": "Congming Zheng",
      "affiliation": "Shanghai Jiao Tong University"
    },
    {
      "@type": "Person",
      "name": "Jiachen Zhu",
      "affiliation": "Shanghai Jiao Tong University"
    },
    {
      "@type": "Person",
      "name": "Zeyu Zheng",
      "affiliation": "Carnegie Mellon University"
    },
    {
      "@type": "Person",
      "name": "Zhuosheng Zhang",
      "affiliation": "Shanghai Jiao Tong University"
    },
    {
      "@type": "Person",
      "name": "Xingyu Lou",
      "affiliation": "OPPO"
    },
    {
      "@type": "Person",
      "name": "Changwang Zhang",
      "affiliation": "OPPO"
    },
    {
      "@type": "Person",
      "name": "Zhihui Fu",
      "affiliation": "OPPO"
    },
    {
      "@type": "Person",
      "name": "Jun Wang",
      "affiliation": "OPPO"
    },
    {
      "@type": "Person",
      "name": "Weiwen Liu",
      "affiliation": "Shanghai Jiao Tong University"
    },
    {
      "@type": "Person",
      "name": "Jianghao Lin",
      "affiliation": "Shanghai Jiao Tong University"
    },
    {
      "@type": "Person",
      "name": "Weinan Zhang",
      "affiliation": ["Shanghai Jiao Tong University", "Shanghai Innovation Institute"]
    }
  ],
  "datePublished": "2026-04-10",
  "abstract": "This paper reviews the shift in large language model (LLM) agents from internal model weight changes to externalizing cognitive burdens into memory, skills, protocols, and harness engineering. It argues that externalization transforms hard cognitive tasks into more reliably solvable forms by the model. Memory externalizes state across time, skills externalize procedural expertise, protocols externalize interaction structure, and harness engineering unifies these into governed execution. The paper traces the historical progression from weights to context to harness, analyzes the three externalization forms, and discusses their interactions and trade-offs. Emerging directions include self-evolving harnesses and shared infrastructure. The framework explains why agent progress depends on better external cognitive infrastructure alongside stronger models.",
  "articleBody": "LLM agents increasingly externalize cognitive burdens into memory, skills, protocols, and harness engineering to improve reliability and capability beyond model weight changes alone.",
  "hasPart": [
    {
      "@type": "DefinedTermSet",
      "name": "Defined Terms in LLM Agent Externalization",
      "description": "Key terms defining the externalization concepts in LLM agents.",
      "hasDefinedTerm": [
        {
          "@type": "DefinedTerm",
          "name": "Externalization",
          "description": "The progressive relocation of cognitive burdens from the model's internal computation into persistent, inspectable, and reusable external structures."
        },
        {
          "@type": "DefinedTerm",
          "name": "Memory Externalization",
          "description": "Externalizing state across time into persistent stores, enabling selective retrieval and continuity beyond ephemeral context windows."
        },
        {
          "@type": "DefinedTerm",
          "name": "Skill Externalization",
          "description": "Packaging procedural expertise into reusable artifacts that encode operational procedures, decision heuristics, and normative constraints."
        },
        {
          "@type": "DefinedTerm",
          "name": "Protocol Externalization",
          "description": "Defining explicit machine-readable contracts for interaction, including invocation grammar, lifecycle semantics, permissions, and discovery metadata."
        },
        {
          "@type": "DefinedTerm",
          "name": "Harness Engineering",
          "description": "The integrative runtime environment that coordinates memory, skills, and protocols with governance, control, and observability."
        },
        {
          "@type": "DefinedTerm",
          "name": "Working Context",
          "description": "Live intermediate state of the current task, such as open files, temporary variables, and execution checkpoints."
        },
        {
          "@type": "DefinedTerm",
          "name": "Episodic Experience",
          "description": "Records of prior runs including decision points, tool calls, failures, and outcomes."
        },
        {
          "@type": "DefinedTerm",
          "name": "Semantic Knowledge",
          "description": "Abstracted domain facts, heuristics, and stable world knowledge that outlive individual episodes."
        },
        {
          "@type": "DefinedTerm",
          "name": "Personalized Memory",
          "description": "Stable information about particular users or environments, such as preferences and recurring constraints."
        },
        {
          "@type": "DefinedTerm",
          "name": "Agent Loop",
          "description": "The perceive–retrieve–plan–act–observe cycle that governs agent execution."
        }
      ]
    },
    {
      "@type": "Question",
      "name": "What is the central thesis of the paper regarding LLM agent design?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "The central thesis is that externalization—the progressive relocation of cognitive burdens from the model's internal computation into persistent, inspectable, and reusable external structures—is the transition logic that unifies recent advances in memory, skills, protocols, and harness engineering for language agents."
      }
    },
    {
      "@type": "Question",
      "name": "What are the three main forms of externalization in LLM agents?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "Memory externalizes state across time, skills externalize procedural expertise, and protocols externalize interaction structure."
      }
    },
    {
      "@type": "Question",
      "name": "What role does the harness play in LLM agents?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "The harness serves as the unification layer that coordinates memory, skills, and protocols into a governed execution environment, providing orchestration logic, constraints, observability, and feedback loops."
      }
    },
    {
      "@type": "Question",
      "name": "How does memory externalization transform the cognitive task for the model?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "Memory externalization converts an internal recall problem into an external recognition-and-retrieval problem, allowing the model to recognize and use curated slices of history rather than regenerate knowledge from latent weights."
      }
    },
    {
      "@type": "Question",
      "name": "What are the four dimensions of externalized state in memory systems?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "Working context, episodic experience, semantic knowledge, and personalized memory."
      }
    },
    {
      "@type": "Question",
      "name": "What components constitute a skill in LLM agents?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "Operational procedures (task skeleton), decision heuristics (rules of thumb for branching), and normative constraints (conditions for acceptable execution)."
      }
    },
    {
      "@type": "Question",
      "name": "How are skills externalized and made operational in agent systems?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "Skills are specified via manifests or instruction files, discovered through registries or semantic retrieval, progressively disclosed in layers, bound at runtime to executable tools or protocols, and composed into higher-order capability packages."
      }
    },
    {
      "@type": "Question",
      "name": "Why are protocols important in LLM agents?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "Protocols externalize interaction burdens by providing explicit, machine-readable contracts for invocation grammar, lifecycle semantics, permissions, and discovery, making interactions governable, interoperable, and auditable."
      }
    },
    {
      "@type": "Question",
      "name": "What are the six analytical dimensions of harness design?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "Agent loop and control flow, sandboxing and execution isolation, human oversight and approval gates, observability and structured feedback, configuration and permissions, and context budget management."
      }
    },
    {
      "@type": "HowTo",
      "name": "How to externalize memory in LLM agents",
      "description": "Steps to externalize state across time for reliable long-horizon agent behavior.",
      "step": [
        {
          "@type": "HowToStep",
          "position": 1,
          "name": "Identify relevant state types",
          "text": "Distinguish working context, episodic experience, semantic knowledge, and personalized memory to externalize."
        },
        {
          "@type": "HowToStep",
          "position": 2,
          "name": "Choose memory architecture",
          "text": "Select among monolithic context, retrieval storage, hierarchical orchestration, or adaptive memory systems."
        },
        {
          "@type": "HowToStep",
          "position": 3,
          "name": "Integrate with harness",
          "text": "Ensure memory supports execution checkpoints, failure recording, and supplies task-relevant content to the agent core."
        }
      ]
    },
    {
      "@type": "HowTo",
      "name": "How to externalize skills in LLM agents",
      "description": "Steps to package procedural expertise into reusable, discoverable, and executable artifacts.",
      "step": [
        {
          "@type": "HowToStep",
          "position": 1,
          "name": "Specify skill artifacts",
          "text": "Create manifests or instruction files detailing capabilities, preconditions, constraints, and examples."
        },
        {
          "@type": "HowToStep",
          "position": 2,
          "name": "Implement discovery mechanisms",
          "text": "Use registries and semantic retrieval to enable selective skill loading."
        },
        {
          "@type": "HowToStep",
          "position": 3,
          "name": "Bind and compose skills",
          "text": "At runtime, bind skills to tools, APIs, or subagents and compose them into higher-level workflows."
        }
      ]
    },
    {
      "@type": "HowTo",
      "name": "How to externalize protocols in LLM agents",
      "description": "Steps to formalize interaction contracts for governable and interoperable agent communication.",
      "step": [
        {
          "@type": "HowToStep",
          "position": 1,
          "name": "Define invocation grammar",
          "text": "Specify argument names, types, ordering, and return structures in schemas."
        },
        {
          "@type": "HowToStep",
          "position": 2,
          "name": "Establish lifecycle semantics",
          "text": "Define state machines or event streams for multi-step interaction coordination."
        },
        {
          "@type": "HowToStep",
          "position": 3,
          "name": "Encode permissions and discovery",
          "text": "Implement inspectable rules for authorization and registries for capability discovery."
        }
      ]
    }
  ],
  "mainEntity": [
    {
      "@type": "Question",
      "name": "What is the role of externalization in the evolution of LLM agents?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "Externalization explains the shift from relying solely on model weights to incorporating memory, skills, protocols, and harness engineering, transforming cognitive burdens into forms the model can solve more reliably."
      }
    },
    {
      "@type": "Question",
      "name": "How do memory, skills, and protocols interact within the harness?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "Memory provides evidence for skill formation and protocol routing; skills convert stored experience into reusable procedures and invoke protocolized actions; protocols constrain execution and write normalized outcomes back into memory, forming a self-reinforcing cycle."
      }
    },
    {
      "@type": "Question",
      "name": "What are the future directions for externalization in LLM agents?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "Future directions include expanding the boundary between parametric and externalized capabilities, extending externalization to embodied agents, enabling self-evolving harnesses, managing costs and security risks, shifting from private scaffolding to shared infrastructure, and developing richer evaluation metrics."
      }
    }
  ],
  "publisher": {
    "@type": "Organization",
    "name": "arXiv",
    "url": "https://arxiv.org/abs/2604.08224"
  },
  "image": "page_1_image.png",
  "citation": "Zhou, C., Chai, H., Chen, W., Guo, Z., Shan, R., Song, Y., Xu, T., Yang, Y., Yu, A., Zhang, W., Zheng, C., Zhu, J., Zheng, Z., Zhang, Z., Lou, X., Zhang, C., Fu, Z., Wang, J., Liu, W., Lin, J., Zhang, W. (2026). Externalization in LLM Agents: A Unified Review of Memory, Skills, Protocols and Harness Engineering. arXiv preprint arXiv:2604.08224."
}