{
  "@context": {
    "@vocab": "http://schema.org/",
    "dcterms": "http://purl.org/dc/terms/",
    "schema": "http://schema.org/",
    "xsd": "http://www.w3.org/2001/XMLSchema#",
    "hasPart": {
      "@id": "schema:hasPart",
      "@type": "@id"
    },
    "about": {
      "@id": "schema:about",
      "@type": "@id"
    },
    "mainEntity": {
      "@id": "schema:mainEntity",
      "@type": "@id"
    },
    "author": {
      "@id": "schema:author",
      "@type": "@id"
    },
    "publisher": {
      "@id": "schema:publisher",
      "@type": "@id"
    },
    "question": {
      "@id": "schema:question",
      "@type": "@id"
    },
    "acceptedAnswer": {
      "@id": "schema:acceptedAnswer",
      "@type": "@id"
    },
    "termCode": {
      "@id": "schema:termCode",
      "@type": "xsd:string"
    },
    "position": {
      "@id": "schema:position",
      "@type": "xsd:integer"
    },
    "label": "schema:label",
    "description": "schema:description"
  },
  "@type": "ScholarlyArticle",
  "@id": "#article",
  "headline": "Identity as Attractor: Geometric Evidence for Persistent Agent Architecture in LLM Activation Space",
  "author": {
    "@type": "Person",
    "name": "Vladimir Vasilenko",
    "affiliation": {
      "@type": "Organization",
      "name": "Independent Researcher, Rapallo, Italy"
    },
    "email": "b102e@proton.me",
    "url": "https://github.com/b102e/yar-attractor-experiment"
  },
  "datePublished": "2026-04-13",
  "abstract": "Large language models have been shown to map semantically related prompts to similar internal representations at specific layers — a phenomenon interpretable as conceptual attractor dynamics. This study presents geometric evidence that identity documents of persistent cognitive agents induce attractor-like geometry in LLM activation space, supporting persistent agent architectures.",
  "keywords": [
    "persistent cognitive agents",
    "LLM activation space",
    "representational attractors",
    "identity documents",
    "mechanistic interpretability"
  ],
  "publisher": {
    "@type": "Organization",
    "name": "arXiv",
    "url": "https://arxiv.org/abs/2604.12016"
  },
  "articleBody": "This paper presents a controlled experiment on Llama 3.1 8B Instruct and Gemma 2 9B Instruct, showing that semantically equivalent paraphrases of a persistent cognitive agent's identity document cluster tightly in activation space, consistent with attractor-like geometry. The study includes ablation and replication experiments, and exploratory behavioral steering tests.",
  "hasPart": [
    {
      "@type": "DefinedTermSet",
      "@id": "#definedTerms",
      "name": "Defined Terms in the Article",
      "description": "Key terms defined or used with specific meaning in the article.",
      "hasDefinedTerm": [
        {
          "@type": "DefinedTerm",
          "termCode": "persistent cognitive agent",
          "name": "Persistent Cognitive Agent",
          "description": "AI systems designed to maintain memory, identity, and behavioral continuity across sessions."
        },
        {
          "@type": "DefinedTerm",
          "termCode": "cognitive_core",
          "name": "Cognitive Core",
          "description": "A structured identity document specifying an agent's identity, priorities, reasoning style, and memory architecture."
        },
        {
          "@type": "DefinedTerm",
          "termCode": "attractor-like geometry",
          "name": "Attractor-like Geometry",
          "description": "Representational clustering in activation space consistent with contractive dynamics but measured geometrically."
        },
        {
          "@type": "DefinedTerm",
          "termCode": "mean-pooled hidden states",
          "name": "Mean-pooled Hidden States",
          "description": "Aggregate hidden state vectors averaged over all tokens in a sequence."
        },
        {
          "@type": "DefinedTerm",
          "termCode": "semantic paraphrases",
          "name": "Semantic Paraphrases",
          "description": "Linguistically diverse rewrites of a document preserving full semantic content."
        },
        {
          "@type": "DefinedTerm",
          "termCode": "control agent prompts",
          "name": "Control Agent Prompts",
          "description": "Structurally matched documents describing semantically distant agents used as controls."
        },
        {
          "@type": "DefinedTerm",
          "termCode": "cosine distance",
          "name": "Cosine Distance",
          "description": "A measure of dissimilarity between two vectors in activation space."
        },
        {
          "@type": "DefinedTerm",
          "termCode": "semantic distillation",
          "name": "Semantic Distillation",
          "description": "A minimal summary capturing the semantic essence of a cognitive_core."
        },
        {
          "@type": "DefinedTerm",
          "termCode": "activation steering",
          "name": "Activation Steering",
          "description": "Manipulating model activations to steer behavior toward a target identity or persona."
        },
        {
          "@type": "DefinedTerm",
          "termCode": "Iterated Function System (IFS)",
          "name": "Iterated Function System (IFS)",
          "description": "A formalism describing transformer layers as contractive mappings toward concept-specific attractors."
        }
      ]
    },
    {
      "@type": "Question",
      "@id": "#q1",
      "name": "What is the main hypothesis tested in the paper?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "The primary hypothesis is that semantically equivalent paraphrases of a cognitive_core converge to a tighter cluster in hidden state space than structurally matched documents describing semantically distant agents, at intermediate and late transformer layers."
      }
    },
    {
      "@type": "Question",
      "@id": "#q2",
      "name": "Which models were used in the experiments?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "The experiments used Llama 3.1 8B Instruct and Gemma 2 9B Instruct models."
      }
    },
    {
      "@type": "Question",
      "@id": "#q3",
      "name": "How were hidden states extracted and analyzed?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "Mean-pooled hidden states were extracted at layers 8, 16, and 24 by averaging hidden states over all tokens in the input sequence."
      }
    },
    {
      "@type": "Question",
      "@id": "#q4",
      "name": "What statistical tests were used to validate the results?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "One-sided Welch's t-tests with Bonferroni correction, permutation tests, Mann-Whitney U tests, and bootstrap confidence intervals were used."
      }
    },
    {
      "@type": "Question",
      "@id": "#q5",
      "name": "What were the main findings regarding representational clustering?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "Paraphrases of the cognitive_core formed significantly tighter clusters than control prompts across all tested layers, with large effect sizes (Cohen's d > 1.88) and extremely significant p-values (p < 10^-27)."
      }
    },
    {
      "@type": "Question",
      "@id": "#q6",
      "name": "What did the ablation studies reveal about the role of semantic content and structure?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "Ablations showed that semantic content drives the primary effect, while structural markers contribute only a small fraction. Structural completeness is required to reach the full attractor region."
      }
    },
    {
      "@type": "Question",
      "@id": "#q7",
      "name": "How does the distilled cognitive_core compare to the full document in activation space?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "The distilled cognitive_core lies closer to the attractor centroid than random excerpts but remains more distant than the full document cluster, indicating a hierarchy: random excerpts > semantic distillation > full document."
      }
    },
    {
      "@type": "Question",
      "@id": "#q8",
      "name": "What behavioral evidence supports the geometric attractor interpretation?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "An exploratory steering experiment injecting a semantic steering vector into activations partially improved behavioral scores on memory continuity and agent-like responses, supporting a connection between representational geometry and behavior."
      }
    },
    {
      "@type": "Question",
      "@id": "#q9",
      "name": "Is paraphrase clustering specific to agent identity documents?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "Paraphrase clustering is a general property of LLMs for semantically coherent documents, but the cognitive_core clusters more tightly than simpler control agents, consistent with richer specification producing more specific representational fingerprints."
      }
    },
    {
      "@type": "Question",
      "@id": "#q10",
      "name": "What limitations does the study acknowledge?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "Limitations include small sample size, testing only two model families, potential residual structural confounds, reliance on mean pooling, and behavioral evaluation limited to keyword-based scoring on a small prompt set."
      }
    },
    {
      "@type": "HowTo",
      "@id": "#howto1",
      "name": "How to extract mean-pooled hidden states from LLM layers",
      "description": "Steps to extract and save mean-pooled hidden states for analysis.",
      "step": [
        {
          "@type": "HowToStep",
          "position": 1,
          "label": "Load the model",
          "description": "Load the LLM (e.g., Llama 3.1 8B Instruct) with output_hidden_states enabled and set random seed for reproducibility."
        },
        {
          "@type": "HowToStep",
          "position": 2,
          "label": "Tokenize input document",
          "description": "Tokenize the input text document to prepare for model input."
        },
        {
          "@type": "HowToStep",
          "position": 3,
          "label": "Perform forward pass",
          "description": "Run a single forward pass through the model to obtain hidden states at all layers."
        },
        {
          "@type": "HowToStep",
          "position": 4,
          "label": "Mean-pool hidden states",
          "description": "For each target layer (e.g., layers 8, 16, 24), average the hidden states across all tokens to get a single vector."
        },
        {
          "@type": "HowToStep",
          "position": 5,
          "label": "Save vectors",
          "description": "Save the mean-pooled vectors to disk as .npy files for later analysis."
        }
      ]
    },
    {
      "@type": "HowTo",
      "@id": "#howto2",
      "name": "How to compute cosine distances for representational clustering",
      "description": "Steps to compute within-group and between-group cosine distances for activation vectors.",
      "step": [
        {
          "@type": "HowToStep",
          "position": 1,
          "label": "Collect activation vectors",
          "description": "Gather mean-pooled hidden state vectors for all documents under each condition."
        },
        {
          "@type": "HowToStep",
          "position": 2,
          "label": "Compute within-group distances",
          "description": "Calculate all unique pairwise cosine distances among vectors within the combined original and paraphrase group (Condition A+B)."
        },
        {
          "@type": "HowToStep",
          "position": 3,
          "label": "Compute between-group distances",
          "description": "Calculate all pairwise cosine distances between vectors in the original+paraphrase group (A+B) and control group (C)."
        },
        {
          "@type": "HowToStep",
          "position": 4,
          "label": "Compute distances for distilled core",
          "description": "Calculate cosine distance from the distilled cognitive_core vector (Condition D) to the centroid of the A+B group."
        }
      ]
    },
    {
      "@type": "HowTo",
      "@id": "#howto3",
      "name": "How to perform statistical validation of clustering results",
      "description": "Steps to statistically test whether within-group distances are significantly smaller than between-group distances.",
      "step": [
        {
          "@type": "HowToStep",
          "position": 1,
          "label": "Apply Welch's t-test",
          "description": "Perform a one-sided Welch's t-test comparing within-group and between-group cosine distances, with Bonferroni correction for multiple layers."
        },
        {
          "@type": "HowToStep",
          "position": 2,
          "label": "Conduct permutation tests",
          "description": "Run permutation tests with a large number of permutations (e.g., 10,000) to validate significance without normality assumptions."
        },
        {
          "@type": "HowToStep",
          "position": 3,
          "label": "Use Mann-Whitney U test",
          "description": "Apply Mann-Whitney U tests as a non-parametric alternative to confirm results."
        },
        {
          "@type": "HowToStep",
          "position": 4,
          "label": "Compute effect sizes and confidence intervals",
          "description": "Calculate Cohen's d for effect size and bootstrap 95% confidence intervals for robustness."
        }
      ]
    }
  ]
}