{
  "@context": {
    "@vocab": "http://schema.org/",
    "schema": "http://schema.org/"
  },
  "@type": "Article",
  "headline": "Agentic Memory: Learning Unified Long-Term and Short-Term Memory Management for Large Language Model Agents",
  "author": [
    {
      "@type": "Person",
      "name": "Yi Yu",
      "affiliation": {
        "@type": "Organization",
        "name": "School of Cyber Science and Engineering, Wuhan University"
      },
      "email": "yui1212@whu.edu.cn"
    },
    {
      "@type": "Person",
      "name": "Liuyi Yao",
      "affiliation": {
        "@type": "Organization",
        "name": "Alibaba Group"
      },
      "email": "yly287738@alibaba-inc.com"
    },
    {
      "@type": "Person",
      "name": "Yuexiang Xie",
      "affiliation": {
        "@type": "Organization",
        "name": "Alibaba Group"
      },
      "email": "yuexiang.xyx@alibaba-inc.com"
    },
    {
      "@type": "Person",
      "name": "Qingquan Tan",
      "affiliation": {
        "@type": "Organization",
        "name": "School of Cyber Science and Engineering, Wuhan University"
      },
      "email": "tanqingquan@whu.edu.cn"
    },
    {
      "@type": "Person",
      "name": "Jiaqi Feng",
      "affiliation": {
        "@type": "Organization",
        "name": "School of Cyber Science and Engineering, Wuhan University"
      },
      "email": "jiaqiFeng@whu.edu.cn"
    },
    {
      "@type": "Person",
      "name": "Yaliang Li",
      "affiliation": {
        "@type": "Organization",
        "name": "Alibaba Group"
      },
      "email": "yaliang.li@alibaba-inc.com"
    },
    {
      "@type": "Person",
      "name": "Libing Wu",
      "affiliation": {
        "@type": "Organization",
        "name": "School of Cyber Science and Engineering, Wuhan University"
      },
      "email": "wu@whu.edu.cn"
    }
  ],
  "publisher": {
    "@type": "Organization",
    "name": "Wuhan University and Alibaba Group",
    "location": "China"
  },
  "datePublished": "2026-04-30",
  "abstract": "Agentic Memory (AgeMem) is a unified framework integrating long-term memory (LTM) and short-term memory (STM) management directly into large language model (LLM) agents' policies. It enables autonomous memory operations as tool-based actions, trained via a three-stage progressive reinforcement learning strategy with step-wise Group Relative Policy Optimization (GRPO). AgeMem outperforms strong baselines on five long-horizon benchmarks, improving task performance, memory quality, and context efficiency.",
  "articleBody": "AgeMem unifies LTM and STM management in LLM agents via tool-based memory operations, trained end-to-end with a progressive RL strategy and step-wise GRPO, validated on diverse benchmarks.",
  "hasPart": [
    {
      "@type": "DefinedTermSet",
      "name": "Memory Management Tools",
      "description": "Tools exposed to the LLM agent for managing long-term and short-term memory.",
      "hasDefinedTerm": [
        {
          "@type": "DefinedTerm",
          "name": "ADD",
          "description": "Adds new knowledge to long-term memory store Mt."
        },
        {
          "@type": "DefinedTerm",
          "name": "UPDATE",
          "description": "Modifies existing entries in long-term memory Mt."
        },
        {
          "@type": "DefinedTerm",
          "name": "DELETE",
          "description": "Removes entries from long-term memory Mt."
        },
        {
          "@type": "DefinedTerm",
          "name": "RETRIEVE",
          "description": "Retrieves relevant entries from long-term memory Mt into short-term context Ct."
        },
        {
          "@type": "DefinedTerm",
          "name": "SUMMARY",
          "description": "Summarizes segments in short-term context Ct to reduce token usage while preserving key information."
        },
        {
          "@type": "DefinedTerm",
          "name": "FILTER",
          "description": "Filters out irrelevant or distracting segments from short-term context Ct based on semantic similarity."
        }
      ]
    },
    {
      "@type": "HowTo",
      "name": "Three-Stage Progressive Reinforcement Learning Strategy",
      "description": "Training strategy for AgeMem to progressively learn unified memory management.",
      "step": [
        {
          "@type": "HowToStep",
          "position": 1,
          "name": "Stage 1: Long-Term Memory Construction",
          "text": "Agent interacts with contextual information, learns to store salient knowledge into LTM using ADD, UPDATE, DELETE tools."
        },
        {
          "@type": "HowToStep",
          "position": 2,
          "name": "Stage 2: Short-Term Memory Control under Distractors",
          "text": "Agent manages STM by filtering and summarizing context to suppress irrelevant distractors, maintaining task-relevant information."
        },
        {
          "@type": "HowToStep",
          "position": 3,
          "name": "Stage 3: Integrated Reasoning and Memory Coordination",
          "text": "Agent retrieves from LTM, manages STM, and generates final answers, coordinating memory operations end-to-end."
        }
      ]
    },
    {
      "@type": "HowTo",
      "name": "Step-wise Group Relative Policy Optimization (GRPO)",
      "description": "Optimization method to propagate terminal rewards back to all intermediate memory decisions across stages.",
      "step": [
        {
          "@type": "HowToStep",
          "position": 1,
          "name": "Group Rollouts",
          "text": "Generate multiple rollouts per task to form a group for reward normalization."
        },
        {
          "@type": "HowToStep",
          "position": 2,
          "name": "Reward Normalization",
          "text": "Normalize terminal rewards within each group to zero mean and unit variance."
        },
        {
          "@type": "HowToStep",
          "position": 3,
          "name": "Advantage Broadcasting",
          "text": "Broadcast normalized advantages to all timesteps in the trajectory for consistent credit assignment."
        },
        {
          "@type": "HowToStep",
          "position": 4,
          "name": "Policy Update",
          "text": "Update policy parameters by maximizing expected advantage with KL divergence regularization."
        }
      ]
    },
    {
      "@type": "Question",
      "name": "What is the main limitation of existing LTM and STM memory management in LLM agents?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "Existing methods treat LTM and STM as separate components with heuristic or auxiliary controllers, limiting adaptability and end-to-end optimization."
      }
    },
    {
      "@type": "Question",
      "name": "How does AgeMem unify memory management?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "AgeMem integrates LTM and STM management directly into the agent's policy via tool-based memory operations, enabling autonomous decisions on storing, retrieving, updating, summarizing, and discarding information."
      }
    },
    {
      "@type": "Question",
      "name": "What are the six memory management tools in AgeMem?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "ADD, UPDATE, DELETE for LTM; RETRIEVE, SUMMARY, FILTER for STM."
      }
    },
    {
      "@type": "Question",
      "name": "What is the purpose of the three-stage progressive RL strategy?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "To progressively train the agent to first acquire LTM storage, then STM context management under distractors, and finally coordinate both for task execution."
      }
    },
    {
      "@type": "Question",
      "name": "How does the step-wise GRPO address sparse and discontinuous rewards?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "By normalizing terminal rewards across groups of rollouts and broadcasting advantages to all intermediate steps, enabling long-range credit assignment."
      }
    },
    {
      "@type": "Question",
      "name": "What datasets were used to evaluate AgeMem?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "ALFWorld, SciWorld, PDDL, BabyAI, and HotpotQA."
      }
    },
    {
      "@type": "Question",
      "name": "What metrics were used for evaluation?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "Success Rate, Progress Rate, LLM-as-a-Judge for task completion, and Memory Quality (MQ) for stored memory relevance."
      }
    },
    {
      "@type": "Question",
      "name": "How does AgeMem perform compared to baselines?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "AgeMem consistently outperforms strong baselines across multiple LLM backbones and datasets, improving task performance, memory quality, and context efficiency."
      }
    },
    {
      "@type": "Question",
      "name": "What are the main components of the reward function?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "Task completion reward, context management reward (compression, preventive, preservation), memory management reward (storage quality, maintenance, semantic relevance), and penalty terms."
      }
    },
    {
      "@type": "Question",
      "name": "What is the role of the FILTER tool in STM management?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "FILTER removes irrelevant or distracting messages from the short-term context based on semantic similarity thresholds, helping maintain focused context."
      }
    },
    {
      "@type": "Question",
      "name": "How does AgeMem handle memory updates and deletions?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "The agent learns to update existing memory entries when new information supersedes old knowledge and delete obsolete or incorrect memories to maintain memory quality."
      }
    },
    {
      "@type": "Question",
      "name": "What is the significance of the three-stage trajectory structure?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "It temporally separates information acquisition, interference with distractors, and task execution, enabling effective training of unified memory management."
      }
    },
    {
      "@type": "Question",
      "name": "How does AgeMem reduce context token usage?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "By learning to summarize and filter short-term context proactively, AgeMem reduces prompt token counts compared to retrieval-augmented generation (RAG) baselines."
      }
    },
    {
      "@type": "Question",
      "name": "What is the impact of reinforcement learning on AgeMem's tool usage?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "RL training increases the use of ADD and UPDATE operations for LTM and balances STM tool usage, notably increasing FILTER calls for proactive context control."
      }
    },
    {
      "@type": "Question",
      "name": "What are the main limitations of AgeMem?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "Fixed set of memory tools limits fine-grained control; evaluation is limited to controlled benchmarks rather than open-ended real-world scenarios; training relies on HotpotQA for curriculum."
      }
    },
    {
      "@type": "AudioObject",
      "name": "Agentic Memory Presentation Audio",
      "description": "Audio summary of the Agentic Memory framework and its evaluation results.",
      "thumbnailUrl": "https://example.com/audio-thumbnail.jpg",
      "uploadDate": "2026-05-01",
      "contentUrl": "https://example.com/agentic-memory-audio.mp3",
      "embedUrl": "https://example.com/embed/agentic-memory-audio"
    }
  ]
}