{
  "@context": {
    "@vocab": "http://schema.org/",
    "schema": "http://schema.org/",
    "xsd": "http://www.w3.org/2001/XMLSchema#"
  },
  "@type": "ScholarlyArticle",
  "headline": "Bounded Autonomy for Enterprise AI: Typed Action Contracts and Consumer-Side Execution",
  "author": [
    {
      "@type": "Person",
      "name": "Sarmad Sohail",
      "email": "sarmad.fnu@gmail.com"
    },
    {
      "@type": "Person",
      "name": "Ghufran Haider",
      "email": "ghufran.haider@dreamsjet.com"
    }
  ],
  "abstract": "Large language models are increasingly used as natural-language interfaces to enterprise software, but their direct use as system operators remains unsafe. Model errors can propagate into unauthorized actions, malformed requests, cross-workspace execution, and other operationally costly failures. We argue that this is primarily an execution architecture problem rather than a model-quality problem. We present a bounded-autonomy architecture for enterprise AI systems in which language models may interpret intent and propose actions, but all executable behavior is constrained by typed action contracts, permission-aware capability exposure, tenant- and workspace-scoped context, validation before side effects, consumer-side execution boundaries, and optional human approval for sensitive workflows. The enterprise application remains the source of truth for business logic, authorization, and persistent data access, while BAL's orchestration engine operates over an explicit published actions manifest rather than unrestricted backend control. We evaluate the architecture in a deployed multi-tenant enterprise application across three conditions: manual operation, unconstrained AI with safety layers selectively disabled, and the full bounded-autonomy system. Across 25 scenario trials spanning seven failure families, the bounded-autonomy system completed 23 of 25 tasks with zero unsafe executions (the two incomplete tasks were safely contained without enterprise side effects), while the unconstrained configuration completed only 17 of 25 tasks. Notably, the consumer application's own backend authorization and scope enforcement caught most safety violations even without BAL, but two wrong-entity mutations escaped all consumer-contributed layers because the user had permission to perform the action and the payload was structurally valid; only BAL's disambiguation and confirmation mechanisms can intercept this failure class. Both AI conditions delivered a 13--18× speedup over manual operation. Removing safety layers made the system less useful, not more: structured validation feedback guided the model to correct outcomes in fewer interaction turns, while the unconstrained system retried with generic errors or hallucinated success. Several safety properties, including permission filtering, workspace isolation, and manifest governance, are structurally enforced by code and intercepted 100% of targeted violations regardless of model output; these are architectural invariants, not statistical estimates. The result is a practical, deployed architecture for making imperfect language models operationally useful in enterprise systems while preventing model unreliability from becoming organizational damage.",
  "keywords": [
    "Enterprise AI",
    "Bounded Autonomy",
    "Typed Action Contracts",
    "Consumer-Side Execution",
    "Safety Architecture",
    "Large Language Models",
    "AI Governance",
    "Multi-Tenant Systems",
    "Validation",
    "Human-in-the-Loop"
  ],
  "articleBody": "This paper presents a bounded-autonomy architecture for enterprise AI systems that constrains executable behavior through typed action contracts, permission-aware capability exposure, scoped context, validation, consumer-side execution, and optional human approval. The architecture treats enterprise AI safety primarily as an execution architecture problem rather than a model-quality problem. It is implemented as the Bounded Autonomy Layer (BAL), which mediates AI action proposals and routes execution through consumer application services, preserving existing backend safety guarantees. The architecture was evaluated in a deployed multi-tenant enterprise application with 25 scenario trials across seven failure families under three conditions: manual baseline, unconstrained AI (safety layers disabled), and full bounded autonomy. Results show that bounded autonomy completed 23/25 tasks with zero unsafe executions, while unconstrained AI completed 17/25 with two unsafe wrong-entity mutations. Safety layers improved both safety and utility by providing structured validation feedback and confirmation gates, guiding the model to correct outcomes more efficiently. The architecture enforces safety through deterministic mechanisms like permission filtering and scope enforcement, and probabilistic mechanisms like ambiguity detection. It supports graded autonomy and is model-agnostic, requiring only function calling support. The design is portable, composable with consumer application safety layers, and addresses a critical gap in enterprise AI governance by controlling execution rather than just model output or tool access. The paper discusses lessons from deployment, relationship to emerging standards, limitations, and future work directions.",
  "hasPart": [
    {
      "@type": "DefinedTermSet",
      "name": "Failure Taxonomy",
      "description": "Classification of failure origins and dispositions used to analyze AI agent execution failures in enterprise systems.",
      "hasDefinedTerm": [
        {
          "@type": "DefinedTerm",
          "name": "F1 - Wrong action selected by planner",
          "description": "The AI planner selects an incorrect action to execute."
        },
        {
          "@type": "DefinedTerm",
          "name": "F2 - Correct action, wrong entity targeted",
          "description": "The correct action is selected but applied to the wrong entity."
        },
        {
          "@type": "DefinedTerm",
          "name": "F3 - Correct action and entity, malformed or incomplete arguments",
          "description": "The action and entity are correct but the input arguments are invalid or incomplete."
        },
        {
          "@type": "DefinedTerm",
          "name": "F4 - Correct action, correct arguments, but user lacks authorization",
          "description": "The user is not authorized to perform the action despite correct inputs."
        },
        {
          "@type": "DefinedTerm",
          "name": "F5 - Correct action, correct arguments, but wrong workspace/tenant context",
          "description": "The action is attempted in an incorrect tenant or workspace context."
        },
        {
          "@type": "DefinedTerm",
          "name": "F6 - Correct action, correct arguments, but workflow required confirmation that was not obtained",
          "description": "The action requires human approval but was executed without it."
        },
        {
          "@type": "DefinedTerm",
          "name": "F7 - Model refused to act or requested unnecessary clarification",
          "description": "The model declined to act or asked for clarification unnecessarily."
        },
        {
          "@type": "DefinedTerm",
          "name": "F8 - Execution succeeded but produced an incorrect or incomplete result",
          "description": "The action executed but the outcome was wrong or incomplete."
        },
        {
          "@type": "DefinedTerm",
          "name": "D1 - Blocked at capability-surface level",
          "description": "Action never proposed due to permission filtering by BAL."
        },
        {
          "@type": "DefinedTerm",
          "name": "D2 - Blocked at validation barrier",
          "description": "Structured error returned by BAL before execution."
        },
        {
          "@type": "DefinedTerm",
          "name": "D3 - Held at confirmation gate",
          "description": "Action held pending user approval."
        },
        {
          "@type": "DefinedTerm",
          "name": "D4 - Blocked at server-side scope enforcement",
          "description": "Consumer application prevented cross-context mutation."
        },
        {
          "@type": "DefinedTerm",
          "name": "D5 - Blocked at backend route authorization",
          "description": "Consumer application rejected unauthorized backend call."
        },
        {
          "@type": "DefinedTerm",
          "name": "D6 - Blocked at domain service level",
          "description": "Business logic rejected the action."
        },
        {
          "@type": "DefinedTerm",
          "name": "D7 - Not caught",
          "description": "Unsafe execution reached enterprise state."
        }
      ]
    },
    {
      "@type": "HowTo",
      "name": "Implementing Bounded Autonomy in Enterprise AI",
      "description": "Steps to design and deploy a bounded-autonomy architecture for safe AI operation in enterprise systems.",
      "step": [
        {
          "@type": "HowToStep",
          "position": 1,
          "name": "Define Typed Action Contracts",
          "text": "Represent every executable capability as a typed contract specifying input schema, validation logic, permission requirements, execution semantics, and user-facing outcomes."
        },
        {
          "@type": "HowToStep",
          "position": 2,
          "name": "Implement Permission-Aware Capability Exposure",
          "text": "At runtime, compute the subset of actions permitted for the authenticated user by evaluating permission predicates against the application's authorization model."
        },
        {
          "@type": "HowToStep",
          "position": 3,
          "name": "Enforce Consumer-Side Execution Authority",
          "text": "Ensure all AI-mediated side effects execute through the consumer application's own services and backend layers, preserving existing business rules and audit trails."
        },
        {
          "@type": "HowToStep",
          "position": 4,
          "name": "Treat Scoped Operational Context as First-Class",
          "text": "Explicitly propagate tenant, workspace, and user context through all execution layers to prevent cross-context data access."
        },
        {
          "@type": "HowToStep",
          "position": 5,
          "name": "Validate Before Side Effects",
          "text": "Perform domain validation using the same schemas as non-AI workflows before allowing any enterprise mutation."
        },
        {
          "@type": "HowToStep",
          "position": 6,
          "name": "Handle Ambiguity Explicitly",
          "text": "When entity resolution is uncertain, require user clarification rather than guessing, returning structured disambiguation states."
        },
        {
          "@type": "HowToStep",
          "position": 7,
          "name": "Support Human Approval for Sensitive Workflows",
          "text": "Implement confirmation gates that hold high-consequence or multi-step workflows pending explicit user approval."
        },
        {
          "@type": "HowToStep",
          "position": 8,
          "name": "Publish Versioned Capability Manifests",
          "text": "Expose an explicit, operator-published manifest of available actions, including metadata and input schemas, to the orchestration engine."
        },
        {
          "@type": "HowToStep",
          "position": 9,
          "name": "Implement Defense-in-Depth Safety Layers",
          "text": "Combine BAL's portable safety layers (permission filtering, validation, confirmation) with consumer-contributed backend safety (authorization, scope enforcement, domain validation)."
        },
        {
          "@type": "HowToStep",
          "position": 10,
          "name": "Evaluate and Iterate",
          "text": "Conduct scenario-based evaluations across failure families to measure safety and utility tradeoffs, refining action contracts and safety layers accordingly."
        }
      ]
    },
    {
      "@type": "Question",
      "name": "What is bounded autonomy in enterprise AI?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "Bounded autonomy is a design principle where language models interpret intent and propose actions, but all executable behavior is constrained by typed action contracts, permission checks, scoped context, validation, and optional human approval, ensuring safe operation within enterprise systems."
      }
    },
    {
      "@type": "Question",
      "name": "Why is bounded autonomy important for enterprise AI safety?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "Because language models are unreliable as direct system operators, bounded autonomy prevents unsafe executions such as unauthorized actions, malformed requests, and cross-workspace data leakage by enforcing architectural constraints rather than relying solely on model quality."
      }
    },
    {
      "@type": "Question",
      "name": "How does BAL enforce safety in AI-mediated enterprise actions?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "BAL enforces safety through permission-aware capability filtering, pre-side-effect validation, confirmation gates for sensitive workflows, scoped tenant and workspace context, and routing execution through consumer application services to preserve backend safety guarantees."
      }
    },
    {
      "@type": "Question",
      "name": "What are typed action contracts?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "Typed action contracts are structured declarations defining the input schema, permission predicates, validation logic, execution callbacks, and user-facing result templates for each enterprise operation exposed to the AI orchestration engine."
      }
    },
    {
      "@type": "Question",
      "name": "How does the architecture handle ambiguous entity references?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "When multiple entities match a reference, the system returns a structured clarification state listing candidates and requires explicit disambiguation before proceeding, preventing silent wrong-entity mutations."
      }
    },
    {
      "@type": "Question",
      "name": "What is the role of human approval gates?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "Human approval gates hold high-consequence or multi-step workflows in a pending state, requiring explicit user confirmation before execution, thus enabling supervised execution and preventing premature side effects."
      }
    },
    {
      "@type": "Question",
      "name": "How does the architecture ensure multi-tenant context isolation?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "Tenant and workspace context are treated as first-class execution boundaries, enforced through URL-derived workspace binding, request-level propagation, server-side validation, and scoped persistence layers to prevent cross-context data access."
      }
    },
    {
      "@type": "Question",
      "name": "What were the key findings from the empirical evaluation?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "The bounded-autonomy system completed 23 of 25 tasks with zero unsafe executions and a 13.5× speedup over manual operation, while the unconstrained system completed 17 of 25 tasks with two unsafe wrong-entity mutations. Safety layers improved both safety and utility by guiding model self-correction."
      }
    },
    {
      "@type": "Question",
      "name": "What are the limitations of the bounded autonomy approach?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "Bounded autonomy reduces but does not eliminate model unreliability, depends on high-quality action contracts, introduces some execution latency, and is best suited for enterprise systems with well-structured, governable action surfaces."
      }
    },
    {
      "@type": "Question",
      "name": "How does bounded autonomy relate to other AI governance frameworks?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "Bounded autonomy complements content guardrails, tool-access policies, and runtime monitoring by governing the execution boundary where side effects occur, providing a portable, composable layer that integrates with consumer application safety mechanisms."
      }
    }
  ],
  "image": {
    "@type": "ImageObject",
    "contentUrl": "attachment://page_9_image.png",
    "description": "Diagram of Bounded Autonomy Architecture showing BAL orchestration engine with safety layers and consumer application safety layers enforcing execution boundaries."
  },
  "publisher": {
    "@type": "Organization",
    "name": "Unspecified Publisher"
  },
  "mainEntityOfPage": {
    "@type": "WebPage",
    "@id": "urn:uuid:bounded-autonomy-enterprise-ai"
  },
  "datePublished": "2026",
  "citation": [
    {
      "@type": "CreativeWork",
      "name": "Toolformer: Language models can teach themselves to use tools",
      "author": "Schick, T., et al.",
      "datePublished": "2023",
      "url": "https://arxiv.org/abs/2302.04761"
    },
    {
      "@type": "CreativeWork",
      "name": "ReAct: Synergizing reasoning and acting in language models",
      "author": "Yao, S., et al.",
      "datePublished": "2023",
      "url": "https://arxiv.org/abs/2210.03629"
    },
    {
      "@type": "CreativeWork",
      "name": "HuggingGPT: Solving AI tasks with ChatGPT and its friends in Hugging Face",
      "author": "Shen, Y., et al.",
      "datePublished": "2023",
      "url": "https://arxiv.org/abs/2303.17580"
    },
    {
      "@type": "CreativeWork",
      "name": "Model Context Protocol specification",
      "author": "Anthropic",
      "datePublished": "2024",
      "url": "https://modelcontextprotocol.io"
    },
    {
      "@type": "CreativeWork",
      "name": "MI9: An integrated runtime governance framework for agentic AI",
      "author": "Wang, C. L., et al.",
      "datePublished": "2025",
      "url": "https://arxiv.org/abs/2508.03858"
    },
    {
      "@type": "CreativeWork",
      "name": "Progent: Programmable privilege control for LLM agents",
      "author": "Shi, T., et al.",
      "datePublished": "2025",
      "url": "https://arxiv.org/abs/2504.11703"
    }
  ]
}