@base <https://www.allthingsdistributed.com/2026/04/s3-files-and-the-changing-face-of-s3.html> .
@prefix schema: <http://schema.org/> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .

<#article> a schema:Article ;
  schema:headline "S3 Files and the changing face of S3"@en ;
  schema:name "S3 Files and the changing face of S3"@en ;
  schema:datePublished "2026-04-07" ;
  schema:inLanguage "en" ;
  schema:url <https://www.allthingsdistributed.com/2026/04/s3-files-and-the-changing-face-of-s3.html> ;
  schema:publisher <#all-things-distributed> ;
  schema:author <#andy-warfield> ;
  schema:contributor <#werner-vogels> ;
  schema:image <https://www.allthingsdistributed.com/images/s3-files-and-the-changing-face-of-s3.jpg> ;
  schema:wordCount 6004 ;
  schema:abstract """The article explains how Amazon S3 is expanding from object storage into a broader family of data primitives, culminating in S3 Files, which lets existing S3 data be mounted and used through familiar filesystem interfaces."""@en ;
  schema:about
    <#amazon-s3>,
    <#s3-files>,
    <#amazon-efs>,
    <#s3-tables>,
    <#s3-vectors>,
    <#apache-iceberg>,
    <#genomics-workloads>,
    <#agentic-development>,
    <#filesystem-object-friction> ;
  schema:articleSection
    "Part 1: The Changing Face of S3"@en,
    "Part 2: The Design of S3 Files"@en ;
  schema:articleBody """The post frames data access friction as a long-standing systems problem, illustrated by genomics workloads that needed filesystem tools even when data benefited from S3 storage. It argues that agentic development, analytics, media, machine learning, and scientific computing all increase the value of storage systems that decouple data from any one application interface. Against that backdrop, the article positions S3 Tables, S3 Vectors, and S3 Files as successive S3-native primitives for structured data, vector indexes, and filesystem-oriented access. The design discussion emphasizes that object and file semantics should not be forcibly merged into one lowest-common-denominator system. Instead, S3 Files uses an explicit boundary with staged synchronization back to S3, conflict handling, bidirectional sync, active-working-set eviction, and read-bypass optimizations for throughput-heavy reads."""@en ;
  schema:hasPart <#intro>, <#part-1>, <#part-2> ;
  schema:relatedLink
    <https://aws.amazon.com/s3/features/files/>,
    <https://gatk.broadinstitute.org/>,
    <https://github.com/WarfieldLab/bunnies>,
    <https://lizizhikevich.github.io/self-assembling-systems/2016/10/28/burst-parallelism-notes-on-mapreduce-and-serverless.html>,
    <https://commons.wikimedia.org/>,
    <https://www.linkedin.com/in/andywarfield/> ;
  schema:mentions
    <#loren-rieseberg>,
    <#js-legare>,
    <#gatk4>,
    <#bunnies>,
    <#read-bypass>,
    <#lost-found>,
    <#s3-files-mount-howto>,
    <#faq-1>,
    <#faq-2>,
    <#faq-3>,
    <#faq-4>,
    <#faq-5>,
    <#faq-6>,
    <#faq-7>,
    <#faq-8>,
    <#faq-9>,
    <#faq-10>,
    <#defined-terms>,
    <#article-image> .

<#intro> a schema:WebPageElement ;
  schema:name "Werner's introduction"@en ;
  schema:position 1 ;
  schema:author <#werner-vogels> ;
  schema:about <#s3-files> ;
  schema:text """Werner Vogels introduces Andy Warfield's guest post as a discussion of data movement friction, the origin of S3 Files, and the practical lessons behind the product's design."""@en .

<#part-1> a schema:WebPageElement ;
  schema:name "Part 1: The Changing Face of S3"@en ;
  schema:position 2 ;
  schema:about <#filesystem-object-friction>, <#genomics-workloads>, <#s3-tables>, <#s3-vectors>, <#s3-files> ;
  schema:text """This section traces the motivation for S3 Files from genomics research at the University of British Columbia through broader customer patterns in analytics, AI, media, and software development. It presents S3 Tables and S3 Vectors as earlier examples of turning common data-access patterns into first-class S3-native primitives."""@en .

<#part-2> a schema:WebPageElement ;
  schema:name "Part 2: The Design of S3 Files"@en ;
  schema:position 3 ;
  schema:about <#s3-files>, <#amazon-efs>, <#filesystem-object-friction> ;
  schema:text """This section explains why converging file and object semantics into one interface produced unacceptable compromises. It describes the final design as a staged system with bidirectional synchronization, conflict handling, working-set-based eviction, and high-throughput read bypass while preserving S3 as the source of truth."""@en .

<#article-image> a schema:ImageObject ;
  schema:name "Article lead image for S3 Files and the changing face of S3"@en ;
  schema:contentUrl <https://www.allthingsdistributed.com/images/s3-files-and-the-changing-face-of-s3.jpg> ;
  schema:creditText "Photo credit: Ossewa"@en ;
  schema:about <#article>, <#s3-files> ;
  schema:isPartOf <#article> .

<#andy-warfield> a schema:Person ;
  schema:name "Andy Warfield"@en ;
  schema:url <https://www.linkedin.com/in/andywarfield/> ;
  schema:description """Andy Warfield is presented as the principal author of the article and as a leader involved in designing S3 Files and broader S3 data abstractions."""@en .

<#werner-vogels> a schema:Person ;
  schema:name "Werner Vogels"@en ;
  schema:url <https://www.allthingsdistributed.com/> ;
  schema:description """Werner Vogels appears as the host of All Things Distributed and contributes the introductory note that frames the guest article for readers."""@en .

<#all-things-distributed> a schema:Blog ;
  schema:name "All Things Distributed"@en ;
  schema:url <https://www.allthingsdistributed.com/> ;
  schema:publisher <#amazon-web-services> ;
  schema:description """All Things Distributed is the publication venue for the article and serves as a channel for architecture and systems commentary associated with AWS leadership."""@en .

<#amazon-web-services> a schema:Organization ;
  schema:name "Amazon Web Services"@en ;
  schema:url <https://aws.amazon.com/> ;
  owl:sameAs <https://dbpedia.org/resource/Amazon_Web_Services> ;
  schema:description """Amazon Web Services is the cloud platform context in which S3, EFS, S3 Tables, S3 Vectors, and S3 Files are discussed as interoperable data services."""@en .

<#amazon-s3> a schema:SoftwareApplication ;
  schema:name "Amazon S3"@en ;
  schema:url <https://aws.amazon.com/s3/> ;
  owl:sameAs <https://dbpedia.org/resource/Amazon_S3> ;
  schema:applicationCategory "Cloud object storage"@en ;
  schema:description """Amazon S3 is described as durable object storage that increasingly exposes higher-level data primitives so customers can work with stored data through the interfaces that best fit each workload."""@en .

<#amazon-efs> a schema:SoftwareApplication ;
  schema:name "Amazon Elastic File System"@en ;
  schema:alternateName "Amazon EFS"@en ;
  schema:url <https://aws.amazon.com/efs/> ;
  schema:applicationCategory "Managed network file system"@en ;
  schema:description """Amazon EFS supplies the filesystem side of S3 Files, allowing mounted access patterns while synchronization preserves S3 as the durable object-store source of truth."""@en .

<#s3-files> a schema:SoftwareApplication, schema:Product ;
  schema:name "S3 Files"@en ;
  schema:url <https://aws.amazon.com/s3/features/files/> ;
  schema:applicationCategory "Cloud storage feature"@en ;
  schema:description """S3 Files is introduced as a new S3 capability that lets customers mount an S3 bucket or prefix inside EC2, containers, or Lambda and work with the data through filesystem APIs while changes propagate back to S3."""@en ;
  schema:brand <#amazon-web-services> ;
  schema:featureList
    "Mount S3 buckets or prefixes as a filesystem view"@en,
    "Bidirectional synchronization between file view and S3 objects"@en,
    "Conflict resolution that preserves S3 as source of truth"@en,
    "Lost+found handling for conflicting file-side changes"@en,
    "Working-set eviction for inactive file data after 30 days"@en,
    "Read-bypass optimization for high-throughput sequential reads"@en .

<#s3-tables> a schema:SoftwareApplication ;
  schema:name "S3 Tables"@en ;
  schema:url <https://aws.amazon.com/s3/features/tables/> ;
  schema:applicationCategory "Managed table primitive"@en ;
  schema:description """S3 Tables is described as a first-class table abstraction on S3, based on Apache Iceberg with additional guardrails, automatic compaction, and cross-region replication support."""@en .

<#s3-vectors> a schema:SoftwareApplication ;
  schema:name "S3 Vectors"@en ;
  schema:url <https://aws.amazon.com/s3/features/vectors/> ;
  schema:applicationCategory "Managed vector index primitive"@en ;
  schema:description """S3 Vectors is presented as an S3-native primitive for elastic similarity-search indexes that preserves storage-oriented economics while exposing a simple always-available API endpoint."""@en .

<#apache-iceberg> a schema:SoftwareApplication ;
  schema:name "Apache Iceberg"@en ;
  schema:url <https://iceberg.apache.org/> ;
  owl:sameAs <https://dbpedia.org/resource/Apache_Iceberg> ;
  schema:description """Apache Iceberg is discussed as the open table format underpinning S3 Tables, especially for schema evolution, mutations, snapshots, and richer structured-data workflows."""@en .

<#genomics-workloads> a schema:DefinedTerm ;
  schema:name "Genomics cloud workloads"@en ;
  schema:description """The article uses genomics analysis as the motivating example of bursty, parallel computation that benefits from cloud storage and compute but still depends heavily on filesystem-oriented tools."""@en .

<#agentic-development> a schema:DefinedTerm ;
  schema:name "Agentic development"@en ;
  schema:description """Agentic development is described as a force that lowers the cost of building applications and increases the importance of storage systems that let data remain reusable across fast-changing code and tools."""@en .

<#filesystem-object-friction> a schema:DefinedTerm ;
  schema:name "Filesystem-object friction"@en ;
  schema:description """Filesystem-object friction is the recurring mismatch between tools that expect local files and data that lives durably in object storage, often forcing copying, duplication, and fragile synchronization workflows."""@en .

<#defined-terms> a schema:DefinedTermSet ;
  schema:name "Defined terms for S3 Files and the changing face of S3"@en ;
  schema:hasPart
    <#genomics-workloads>,
    <#agentic-development>,
    <#filesystem-object-friction>,
    <#read-bypass>,
    <#lost-found>,
    <#stage-and-commit>,
    <#active-working-set> ;
  schema:isPartOf <#article> ;
  schema:about <#s3-files>, <#amazon-s3> .

<#loren-rieseberg> a schema:Person ;
  schema:name "Loren Rieseberg"@en ;
  schema:affiliation <#university-of-british-columbia> ;
  schema:description """Loren Rieseberg is identified as a UBC botany professor whose sunflower genomics research helped motivate the storage usability problem discussed in the article."""@en .

<#js-legare> a schema:Person ;
  schema:name "JS Legare"@en ;
  schema:affiliation <#university-of-british-columbia> ;
  schema:description """JS Legare is described as a PhD graduate who helped bridge genomics workloads to cloud execution by building the 'bunnies' system."""@en .

<#university-of-british-columbia> a schema:CollegeOrUniversity ;
  schema:name "University of British Columbia"@en ;
  schema:url <https://www.ubc.ca/> ;
  owl:sameAs <https://dbpedia.org/resource/University_of_British_Columbia> .

<#gatk4> a schema:SoftwareApplication ;
  schema:name "GATK4"@en ;
  schema:url <https://gatk.broadinstitute.org/> ;
  schema:description """GATK4 is the genomics analysis toolkit referenced as an example of existing Linux-oriented tooling that assumes filesystem access patterns."""@en .

<#bunnies> a schema:SoftwareApplication ;
  schema:name "bunnies"@en ;
  schema:url <https://github.com/WarfieldLab/bunnies> ;
  schema:description """Bunnies is the container-based system referenced in the article for running genomics analyses on S3-backed cloud infrastructure."""@en .

<#read-bypass> a schema:DefinedTerm ;
  schema:name "Read bypass"@en ;
  schema:description """Read bypass is an S3 Files optimization that reroutes high-throughput sequential reads away from traditional NFS access and toward parallel GET requests directly against S3."""@en .

<#lost-found> a schema:DefinedTerm ;
  schema:name "lost+found conflict handling"@en ;
  schema:description """When both the filesystem side and S3 modify the same data concurrently, the design keeps S3 authoritative and moves the conflicting filesystem version into a lost+found directory while emitting a CloudWatch signal."""@en .

<#stage-and-commit> a schema:DefinedTerm ;
  schema:name "Stage and commit boundary"@en ;
  schema:description """Stage and commit is the explicit synchronization boundary in S3 Files where file-oriented changes are aggregated before being written back to S3 as object updates."""@en .

<#active-working-set> a schema:DefinedTerm ;
  schema:name "Active working set"@en ;
  schema:description """Active working set refers to the recently used file data retained in the filesystem view, while older inactive data can be evicted without being deleted from S3."""@en .

<#s3-files-mount-howto> a schema:HowTo ;
  schema:name "How to work with S3 data through S3 Files"@en ;
  schema:about <#s3-files>, <#amazon-s3>, <#amazon-efs> ;
  schema:isPartOf <#article> ;
  schema:step <#step-1>, <#step-2>, <#step-3>, <#step-4> ;
  schema:description """The article describes a workflow in which a builder mounts an S3 bucket or prefix, uses standard filesystem tools against that mounted view, and relies on S3 Files to synchronize durable object updates back to S3."""@en .

<#step-1> a schema:HowToStep ;
  schema:name "Mount the bucket or prefix"@en ;
  schema:position 1 ;
  schema:text "Mount an S3 bucket or prefix inside an EC2 instance, container, or Lambda environment so it appears as filesystem-accessible data."@en ;
  schema:isPartOf <#s3-files-mount-howto> .

<#step-2> a schema:HowToStep ;
  schema:name "Use existing file-oriented tools"@en ;
  schema:position 2 ;
  schema:text "Run existing analytics, training, build, or Unix-based tooling directly against the mounted filesystem view instead of copying objects onto local disks."@en ;
  schema:isPartOf <#s3-files-mount-howto> .

<#step-3> a schema:HowToStep ;
  schema:name "Let synchronization propagate updates"@en ;
  schema:position 3 ;
  schema:text "Allow S3 Files to aggregate file-side changes and propagate them back to S3 while also reflecting object-side changes into the filesystem view."@en ;
  schema:isPartOf <#s3-files-mount-howto> .

<#step-4> a schema:HowToStep ;
  schema:name "Handle conflicts through the S3 authority model"@en ;
  schema:position 4 ;
  schema:text "If concurrent changes conflict, treat S3 as the source of truth and use the lost+found outcome and metrics to inspect file-side divergence."@en ;
  schema:isPartOf <#s3-files-mount-howto> .

<#faq-1> a schema:Question ;
  schema:name "Why was S3 Files created?"@en ;
  schema:text "Why was S3 Files created?"@en ;
  schema:acceptedAnswer <#faq-1-answer> ;
  schema:isPartOf <#article> .

<#faq-1-answer> a schema:Answer ;
  schema:text "It was created to remove the repeated friction of copying data between S3 and filesystems for tools that fundamentally expect file-based access."@en ;
  schema:isPartOf <#article> .

<#faq-2> a schema:Question ;
  schema:name "What customer problem does the article emphasize?"@en ;
  schema:text "What customer problem does the article emphasize?"@en ;
  schema:acceptedAnswer <#faq-2-answer> ;
  schema:isPartOf <#article> .

<#faq-2-answer> a schema:Answer ;
  schema:text "The recurring problem is data friction: durable data may live in S3 while the tools customers rely on expect Linux filesystem semantics."@en ;
  schema:isPartOf <#article> .

<#faq-3> a schema:Question ;
  schema:name "How does the article connect agentic development to storage design?"@en ;
  schema:text "How does the article connect agentic development to storage design?"@en ;
  schema:acceptedAnswer <#faq-3-answer> ;
  schema:isPartOf <#article> .

<#faq-3-answer> a schema:Answer ;
  schema:text "It argues that faster, cheaper software creation makes durable reusable data more important, so storage should attach cleanly to many changing applications and tools."@en ;
  schema:isPartOf <#article> .

<#faq-4> a schema:Question ;
  schema:name "How do S3 Tables fit the article's argument?"@en ;
  schema:text "How do S3 Tables fit the article's argument?"@en ;
  schema:acceptedAnswer <#faq-4-answer> ;
  schema:isPartOf <#article> .

<#faq-4-answer> a schema:Answer ;
  schema:text "S3 Tables are presented as an earlier example of turning a common access pattern for structured data into a managed first-class S3 primitive."@en ;
  schema:isPartOf <#article> .

<#faq-5> a schema:Question ;
  schema:name "How do S3 Vectors fit the article's argument?"@en ;
  schema:text "How do S3 Vectors fit the article's argument?"@en ;
  schema:acceptedAnswer <#faq-5-answer> ;
  schema:isPartOf <#article> .

<#faq-5-answer> a schema:Answer ;
  schema:text "S3 Vectors extend the same pattern to similarity-search indexes, giving customers an elastic S3-native vector primitive instead of relying only on compute-heavy vector databases."@en ;
  schema:isPartOf <#article> .

<#faq-6> a schema:Question ;
  schema:name "Why did the team reject a fully unified file-object semantic model?"@en ;
  schema:text "Why did the team reject a fully unified file-object semantic model?"@en ;
  schema:acceptedAnswer <#faq-6-answer> ;
  schema:isPartOf <#article> .

<#faq-6-answer> a schema:Answer ;
  schema:text "They concluded that forcing both models into one system created a lowest-common-denominator design that would break expectations on both the file and object sides."@en ;
  schema:isPartOf <#article> .

<#faq-7> a schema:Question ;
  schema:name "What is the synchronization model described for S3 Files?"@en ;
  schema:text "What is the synchronization model described for S3 Files?"@en ;
  schema:acceptedAnswer <#faq-7-answer> ;
  schema:isPartOf <#article> .

<#faq-7-answer> a schema:Answer ;
  schema:text "File changes are aggregated and committed back to S3 on a staged basis, while object-side updates are also reflected back into the filesystem view."@en ;
  schema:isPartOf <#article> .

<#faq-8> a schema:Question ;
  schema:name "How are conflicts resolved?"@en ;
  schema:text "How are conflicts resolved?"@en ;
  schema:acceptedAnswer <#faq-8-answer> ;
  schema:isPartOf <#article> .

<#faq-8-answer> a schema:Answer ;
  schema:text "S3 is the source of truth, and conflicting filesystem-side versions are moved into lost+found with metrics emitted for visibility."@en ;
  schema:isPartOf <#article> .

<#faq-9> a schema:Question ;
  schema:name "What is read bypass?"@en ;
  schema:text "What is read bypass?"@en ;
  schema:acceptedAnswer <#faq-9-answer> ;
  schema:isPartOf <#article> .

<#faq-9-answer> a schema:Answer ;
  schema:text "Read bypass is an optimization that serves high-throughput sequential reads through parallel S3 GET paths rather than standard NFS data access."@en ;
  schema:isPartOf <#article> .

<#faq-10> a schema:Question ;
  schema:name "What larger claim does the article make about S3?"@en ;
  schema:text "What larger claim does the article make about S3?"@en ;
  schema:acceptedAnswer <#faq-10-answer> ;
  schema:isPartOf <#article> .

<#faq-10-answer> a schema:Answer ;
  schema:text "The article argues that S3 is evolving from an object store into a broader durable data platform with multiple first-class primitives for different ways of working with data."@en ;
  schema:isPartOf <#article> .
