@base <https://claude.com/blog/improving-skill-creator-test-measure-and-refine-agent-skills> .
@prefix schema: <https://schema.org/> .
@prefix owl:    <http://www.w3.org/2002/07/owl#> .
@prefix rdf:    <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs:   <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xsd:    <http://www.w3.org/2001/XMLSchema#> .

# ── Article ──────────────────────────────────────────────────────────────────

<#article> a schema:BlogPosting, schema:TechArticle ;
    schema:name          "Improving skill-creator: Test, measure, and refine Agent Skills"@en ;
    schema:headline      "Improving skill-creator: Test, measure, and refine Agent Skills"@en ;
    schema:abstract      """Skill-creator has been enhanced to help authors verify their skills work properly, identify regressions, and improve descriptions without requiring coding knowledge."""@en ;
    schema:datePublished "2026-03-03"^^xsd:date ;
    schema:url           <https://claude.com/blog/improving-skill-creator-test-measure-and-refine-agent-skills> ;
    schema:inLanguage    "en" ;
    schema:keywords      "Agent Skills, Claude, AI, skill-creator, Evals, Benchmark Mode, Multi-Agent"@en ;
    schema:timeRequired  "PT5M" ;
    schema:articleBody   """Skill-creator has been enhanced to help authors verify their skills work properly, identify regressions, and improve descriptions without requiring coding knowledge. These updates are available in Claude.ai, Cowork, and as a plugin for Claude Code. The article covers two kinds of skills, testing with evals, benchmark mode, multi-agent support, and skill description optimization."""@en ;
    schema:publisher     <#publisher> ;
    schema:about         <#agent-skills>, <#skill-creator>, <#claude-ai> ;
    schema:hasPart       <#section-two-kinds>, <#section-testing-evals>, <#section-benchmark-mode>,
                         <#section-multi-agent>, <#section-description-optimization>, <#section-getting-started> ;
    schema:relatedLink   <https://claude.ai>,
                         <https://claude.com/claude-code> ;
    schema:mainEntityOfPage <https://claude.com/blog/improving-skill-creator-test-measure-and-refine-agent-skills> .

# ── Publisher ─────────────────────────────────────────────────────────────────

<#publisher> a schema:Organization ;
    schema:name    "Anthropic"@en ;
    schema:url     <https://www.anthropic.com> ;
    schema:sameAs  <https://www.anthropic.com> ;
    rdfs:label     "Anthropic"@en .

# ── About entities ────────────────────────────────────────────────────────────

<#agent-skills> a schema:DefinedTerm ;
    schema:name        "Agent Skills"@en ;
    schema:description "Reusable skill bundles that encode specialized techniques and workflows for AI agents."@en ;
    rdfs:label         "Agent Skills"@en .

<#skill-creator> a schema:SoftwareApplication ;
    schema:name                "skill-creator"@en ;
    schema:description         """A tool that enables authors to create, test, evaluate, and refine Agent Skills without requiring coding knowledge."""@en ;
    schema:applicationCategory "AI Development Tool"@en ;
    schema:url                 <https://claude.ai> ;
    rdfs:label                 "skill-creator"@en .

<#claude-ai> a schema:SoftwareApplication ;
    schema:name        "Claude"@en ;
    schema:description "AI assistant developed by Anthropic, available at claude.ai."@en ;
    schema:url         <https://claude.ai> ;
    owl:sameAs         <https://dbpedia.org/resource/Claude_(language_model)> ;
    rdfs:label         "Claude"@en .

# ── Article Sections ──────────────────────────────────────────────────────────

<#section-two-kinds> a schema:ArticleSection ;
    schema:name        "Two Kinds of Skills"@en ;
    schema:description """Identifies two distinct skill categories: capability uplift skills and encoded preference skills."""@en ;
    schema:hasPart     <#capability-uplift-skills>, <#encoded-preference-skills> ;
    rdfs:label         "Two Kinds of Skills"@en .

<#section-testing-evals> a schema:ArticleSection ;
    schema:name        "Testing Skills with Evals"@en ;
    schema:description """Explains how skill-creator enables authors to create evaluations that test whether Claude performs as expected, with the PDF skill as a worked example."""@en ;
    schema:hasPart     <#eval-regression-detection>, <#eval-supersession-detection> ;
    rdfs:label         "Testing Skills with Evals"@en .

<#section-benchmark-mode> a schema:ArticleSection ;
    schema:name        "Benchmark Mode"@en ;
    schema:description """Describes a standardized assessment tool that tracks eval pass rates, elapsed time, and token usage, with results storable locally or in CI systems."""@en ;
    schema:hasPart     <#benchmark-mode> ;
    rdfs:label         "Benchmark Mode"@en .

<#section-multi-agent> a schema:ArticleSection ;
    schema:name        "Multi-Agent Support"@en ;
    schema:description """Covers independent agents running evaluations in parallel within clean contexts, and comparator agents enabling A/B testing between skill versions."""@en ;
    schema:hasPart     <#multi-agent-support>, <#comparator-agents> ;
    rdfs:label         "Multi-Agent Support"@en .

<#section-description-optimization> a schema:ArticleSection ;
    schema:name        "Skill Description Optimization"@en ;
    schema:description """Covers the tool that analyzes skill descriptions against sample prompts to reduce false positives and false negatives in skill triggering."""@en ;
    schema:hasPart     <#description-optimization> ;
    rdfs:label         "Skill Description Optimization"@en .

<#section-getting-started> a schema:ArticleSection ;
    schema:name        "Getting Started"@en ;
    schema:description "Explains how users can access all updates through Claude.ai, Cowork, or Claude Code."@en ;
    rdfs:label         "Getting Started"@en .

# ── Defined Terms ─────────────────────────────────────────────────────────────

<#skill-types> a schema:DefinedTermSet ;
    schema:name           "Skill Types"@en ;
    schema:description    "Classification of Agent Skills into capability uplift and encoded preference categories."@en ;
    schema:hasDefinedTerm <#capability-uplift-skills>, <#encoded-preference-skills> ;
    rdfs:label            "Skill Types"@en .

<#capability-uplift-skills> a schema:DefinedTerm ;
    schema:name             "Capability Uplift Skills"@en ;
    schema:description      """Skills that enhance Claude's abilities beyond base model performance by encoding specialized techniques, such as document creation workflows."""@en ;
    schema:inDefinedTermSet <#skill-types> ;
    rdfs:label              "Capability Uplift Skills"@en .

<#encoded-preference-skills> a schema:DefinedTerm ;
    schema:name             "Encoded Preference Skills"@en ;
    schema:description      """Skills that document workflows where Claude handles individual components, but the skill sequences them according to team processes. Examples include NDA review and weekly update generation."""@en ;
    schema:inDefinedTermSet <#skill-types> ;
    rdfs:label              "Encoded Preference Skills"@en .

<#benchmark-mode> a schema:DefinedTerm ;
    schema:name        "Benchmark Mode"@en ;
    schema:description """A standardized assessment tool in skill-creator that tracks eval pass rates, elapsed time, and token usage. Results can be stored locally, integrated into dashboards, or plugged into CI systems."""@en ;
    rdfs:label         "Benchmark Mode"@en .

<#multi-agent-support> a schema:DefinedTerm ;
    schema:name        "Multi-Agent Support"@en ;
    schema:description """A feature allowing independent agents to run evaluations in parallel within clean contexts, eliminating cross-contamination between test runs."""@en ;
    rdfs:label         "Multi-Agent Support"@en .

<#comparator-agents> a schema:DefinedTerm ;
    schema:name        "Comparator Agents"@en ;
    schema:description "Agents that enable A/B testing between different skill versions or between a skill and a baseline approach."@en ;
    rdfs:label         "Comparator Agents"@en .

<#description-optimization> a schema:DefinedTerm ;
    schema:name        "Skill Description Optimization"@en ;
    schema:description """A tool that analyzes skill descriptions against sample prompts and suggests refinements to reduce false positives and false negatives. Improved triggering on five of six public document-creation skills."""@en ;
    rdfs:label         "Skill Description Optimization"@en .

<#eval-regression-detection> a schema:DefinedTerm ;
    schema:name        "Regression Detection"@en ;
    schema:description "The use of evals to catch quality regressions when AI models evolve."@en ;
    rdfs:label         "Regression Detection"@en .

<#eval-supersession-detection> a schema:DefinedTerm ;
    schema:name        "Supersession Detection"@en ;
    schema:description "The use of evals to identify when base model capabilities have superseded skill techniques."@en ;
    rdfs:label         "Supersession Detection"@en .

# ── Q&A ───────────────────────────────────────────────────────────────────────

<#qa1> a schema:Question ;
    schema:name           "What are the two kinds of Agent Skills?"@en ;
    schema:text           "What are the two kinds of Agent Skills?"@en ;
    schema:acceptedAnswer <#qa1-answer> ;
    schema:about          <#section-two-kinds> ;
    rdfs:label            "Q: Two Kinds of Agent Skills"@en .

<#qa1-answer> a schema:Answer ;
    schema:text  """Capability uplift skills enhance Claude's abilities beyond base model performance, while encoded preference skills document workflows where Claude handles individual components sequenced by team processes."""@en ;
    rdfs:label   "A: Two Kinds of Agent Skills"@en .

<#qa2> a schema:Question ;
    schema:name           "What is Benchmark Mode in skill-creator?"@en ;
    schema:text           "What is Benchmark Mode in skill-creator?"@en ;
    schema:acceptedAnswer <#qa2-answer> ;
    schema:about          <#section-benchmark-mode> ;
    rdfs:label            "Q: Benchmark Mode"@en .

<#qa2-answer> a schema:Answer ;
    schema:text  """Benchmark Mode is a standardized assessment tool that tracks eval pass rates, elapsed time, and token usage. Results can be stored locally, integrated into dashboards, or plugged into CI systems."""@en ;
    rdfs:label   "A: Benchmark Mode"@en .

<#qa3> a schema:Question ;
    schema:name           "What are the two critical functions that evals serve?"@en ;
    schema:text           "What are the two critical functions that evals serve?"@en ;
    schema:acceptedAnswer <#qa3-answer> ;
    schema:about          <#section-testing-evals> ;
    rdfs:label            "Q: Eval Functions"@en .

<#qa3-answer> a schema:Answer ;
    schema:text  "Evals catch quality regressions when models evolve, and identify when base model capabilities have superseded skill techniques."@en ;
    rdfs:label   "A: Eval Functions"@en .

<#qa4> a schema:Question ;
    schema:name           "How does Multi-Agent Support eliminate cross-contamination?"@en ;
    schema:text           "How does Multi-Agent Support eliminate cross-contamination?"@en ;
    schema:acceptedAnswer <#qa4-answer> ;
    schema:about          <#section-multi-agent> ;
    rdfs:label            "Q: Multi-Agent Cross-Contamination"@en .

<#qa4-answer> a schema:Answer ;
    schema:text  """Independent agents run evaluations in parallel within clean contexts, ensuring each evaluation is isolated and unaffected by other concurrent test runs."""@en ;
    rdfs:label   "A: Multi-Agent Cross-Contamination"@en .

<#qa5> a schema:Question ;
    schema:name           "What does Skill Description Optimization improve?"@en ;
    schema:text           "What does Skill Description Optimization improve?"@en ;
    schema:acceptedAnswer <#qa5-answer> ;
    schema:about          <#section-description-optimization> ;
    rdfs:label            "Q: Description Optimization Goal"@en .

<#qa5-answer> a schema:Answer ;
    schema:text  """It reduces both false positives and false negatives in skill triggering. Testing across document-creation skills improved triggering on five of six public skills."""@en ;
    rdfs:label   "A: Description Optimization Goal"@en .

<#qa6> a schema:Question ;
    schema:name           "Where can users access skill-creator updates?"@en ;
    schema:text           "Where can users access skill-creator updates?"@en ;
    schema:acceptedAnswer <#qa6-answer> ;
    schema:about          <#section-getting-started> ;
    rdfs:label            "Q: Accessing skill-creator"@en .

<#qa6-answer> a schema:Answer ;
    schema:text  """Users can access all updates by asking Claude to use skill-creator on Claude.ai or Cowork. Claude Code users can install the plugin or download from the official repository."""@en ;
    rdfs:label   "A: Accessing skill-creator"@en .

<#qa7> a schema:Question ;
    schema:name           "Which example demonstrated the value of evals?"@en ;
    schema:text           "Which example demonstrated the value of evals?"@en ;
    schema:acceptedAnswer <#qa7-answer> ;
    schema:about          <#section-testing-evals> ;
    rdfs:label            "Q: Eval Worked Example"@en .

<#qa7-answer> a schema:Answer ;
    schema:text  """The PDF skill example showed how evals isolated failures with non-fillable forms, leading to coordinate-based positioning fixes that resolved the issue."""@en ;
    rdfs:label   "A: Eval Worked Example"@en .

<#qa8> a schema:Question ;
    schema:name           "What do comparator agents enable?"@en ;
    schema:text           "What do comparator agents enable?"@en ;
    schema:acceptedAnswer <#qa8-answer> ;
    schema:about          <#section-multi-agent> ;
    rdfs:label            "Q: Comparator Agents"@en .

<#qa8-answer> a schema:Answer ;
    schema:text  "Comparator agents enable A/B testing between different skill versions or between a skill and a baseline approach, supporting rigorous quality comparison."@en ;
    rdfs:label   "A: Comparator Agents"@en .

<#qa9> a schema:Question ;
    schema:name           "What is an example of a capability uplift skill?"@en ;
    schema:text           "What is an example of a capability uplift skill?"@en ;
    schema:acceptedAnswer <#qa9-answer> ;
    schema:about          <#section-two-kinds> ;
    rdfs:label            "Q: Capability Uplift Example"@en .

<#qa9-answer> a schema:Answer ;
    schema:text  "Document creation skills exemplify capability uplift by encoding specialized techniques that enhance output beyond what the base model produces unaided."@en ;
    rdfs:label   "A: Capability Uplift Example"@en .

<#qa10> a schema:Question ;
    schema:name           "What is an example of an encoded preference skill?"@en ;
    schema:text           "What is an example of an encoded preference skill?"@en ;
    schema:acceptedAnswer <#qa10-answer> ;
    schema:about          <#section-two-kinds> ;
    rdfs:label            "Q: Encoded Preference Example"@en .

<#qa10-answer> a schema:Answer ;
    schema:text  """NDA review workflows and weekly update generation are encoded preference skills: Claude handles each component, but the skill sequences them according to team-specific processes."""@en ;
    rdfs:label   "A: Encoded Preference Example"@en .