{
  "@context": {
    "@vocab": "http://schema.org/",
    "ccs": "http://acm.org/ccs#",
    "ex": "http://example.org/vocab#"
  },
  "@type": "ScholarlyArticle",
  "headline": "HotComment: A Benchmark for Evaluating Popularity of Online Comments",
  "author": [
    {
      "@type": "Person",
      "name": "Yafeng Wu",
      "affiliation": {
        "@type": "CollegeOrUniversity",
        "name": "Huazhong University of Science and Technology",
        "address": {
          "@type": "PostalAddress",
          "addressLocality": "Wuhan",
          "addressCountry": "China"
        }
      }
    },
    {
      "@type": "Person",
      "name": "Yunyao Zhang",
      "affiliation": {
        "@type": "CollegeOrUniversity",
        "name": "Huazhong University of Science and Technology",
        "address": {
          "@type": "PostalAddress",
          "addressLocality": "Wuhan",
          "addressCountry": "China"
        }
      }
    },
    {
      "@type": "Person",
      "name": "Liliang Ye",
      "affiliation": {
        "@type": "CollegeOrUniversity",
        "name": "Huazhong University of Science and Technology",
        "address": {
          "@type": "PostalAddress",
          "addressLocality": "Wuhan",
          "addressCountry": "China"
        }
      }
    },
    {
      "@type": "Person",
      "name": "Guiyi Zeng",
      "affiliation": {
        "@type": "CollegeOrUniversity",
        "name": "Huazhong University of Science and Technology",
        "address": {
          "@type": "PostalAddress",
          "addressLocality": "Wuhan",
          "addressCountry": "China"
        }
      }
    },
    {
      "@type": "Person",
      "name": "Junqing Yu",
      "affiliation": {
        "@type": "CollegeOrUniversity",
        "name": "Huazhong University of Science and Technology",
        "address": {
          "@type": "PostalAddress",
          "addressLocality": "Wuhan",
          "addressCountry": "China"
        }
      }
    },
    {
      "@type": "Person",
      "name": "Chen Xu",
      "affiliation": {
        "@type": "CollegeOrUniversity",
        "name": "Beijing Institute of Computer Technology and Applications",
        "address": {
          "@type": "PostalAddress",
          "addressLocality": "Beijing",
          "addressCountry": "China"
        }
      }
    },
    {
      "@type": "Person",
      "name": "Zikai Song",
      "email": "skyesong@hust.edu.cn",
      "affiliation": {
        "@type": "CollegeOrUniversity",
        "name": "Huazhong University of Science and Technology",
        "address": {
          "@type": "PostalAddress",
          "addressLocality": "Wuhan",
          "addressCountry": "China"
        }
      }
    }
  ],
  "datePublished": "2026-04-28",
  "abstract": "Online comments play a crucial role in shaping public sentiment and opinion dynamics on social media. However, evaluating their popularity remains challenging, not only because it depends on linguistic quality, originality, and emotional resonance, but also because stylistic preferences vary widely across platforms and user groups, causing the same comment to resonate differently in different communities. In this work, we present HotComment, a multimodal benchmark integrating video and text modalities that comprehensively quantifies popularity from three enhanced aspects: (1) Content Quality, which evaluates semantic similarity with ground-truth human comments and extends quality assessment through four interpretable dimensions; (2) Popularity Prediction, based on trends from models trained on real-world interaction data; and (3) User Behavior Simulation, which models the distribution of platform users and approximates engagement scores through an agent-based framework. Furthermore, we propose StyleCmt, inspired by social ripple effects, where multiple stylistic dimensions align to amplify socially resonant expressions and suppress incongruent ones.",
  "keywords": [
    "Comment Generation",
    "Social Media Analysis",
    "Multimodal Dataset",
    "Large Language Models"
  ],
  "about": [
    {
      "@type": "DefinedTermSet",
      "name": "CCS Concepts",
      "hasDefinedTerm": [
        {
          "@type": "DefinedTerm",
          "name": "Human-centered computing → Social media",
          "inDefinedTermSet": "ccs"
        },
        {
          "@type": "DefinedTerm",
          "name": "Computing methodologies → Natural language generation",
          "inDefinedTermSet": "ccs"
        },
        {
          "@type": "DefinedTerm",
          "name": "Computing methodologies → Machine learning",
          "inDefinedTermSet": "ccs"
        },
        {
          "@type": "DefinedTerm",
          "name": "Computing methodologies → Modeling and simulation",
          "inDefinedTermSet": "ccs"
        }
      ]
    }
  ],
  "articleBody": "HotComment is a benchmark for evaluating online comment popularity integrating content quality, popularity prediction, and user behavior simulation.",
  "hasPart": [
    {
      "@type": "DefinedTermSet",
      "name": "Defined Terms",
      "description": "Key terms defined in the article related to comment evaluation and generation.",
      "hasDefinedTerm": [
        {
          "@type": "DefinedTerm",
          "name": "Content Quality",
          "description": "Evaluates semantic similarity with human comments and stylistic expressiveness across four dimensions: Linguistic Expression, Creative Imagination, Emotional Resonance, and Social and Cultural Influence."
        },
        {
          "@type": "DefinedTerm",
          "name": "Popularity Prediction",
          "description": "Models platform-level engagement preferences to estimate comment popularity using trained prediction models."
        },
        {
          "@type": "DefinedTerm",
          "name": "User Behavior Simulation",
          "description": "Agent-based simulation modeling heterogeneous audience responses to approximate user engagement."
        },
        {
          "@type": "DefinedTerm",
          "name": "StyleCmt",
          "description": "A framework inspired by wave interference modeling stylistic interactions to generate socially resonant comments."
        },
        {
          "@type": "DefinedTerm",
          "name": "Linguistic Expression",
          "description": "Stylistic dimension evaluating rhetorical and writing artistry such as humor, irony, metaphor, rhythm, and aesthetic fluency."
        },
        {
          "@type": "DefinedTerm",
          "name": "Creative Imagination",
          "description": "Stylistic dimension measuring originality and associative thinking in comments."
        },
        {
          "@type": "DefinedTerm",
          "name": "Emotional Resonance",
          "description": "Stylistic dimension examining emotional depth and attitudinal stance to evoke empathy and sentiment."
        },
        {
          "@type": "DefinedTerm",
          "name": "Social and Cultural Influence",
          "description": "Stylistic dimension assessing potential for social propagation via memes, cultural references, and intertextuality."
        },
        {
          "@type": "DefinedTerm",
          "name": "Exposure Specificity Index (ESI)",
          "description": "A measure representing baseline exclusivity of audience exposure in user behavior simulation."
        },
        {
          "@type": "DefinedTerm",
          "name": "Stylistic Resonance Score (SRS)",
          "description": "An averaged score derived from the four stylistic dimensions under Content Quality."
        }
      ]
    },
    {
      "@type": "Question",
      "name": "What is HotComment?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "HotComment is a large-scale multimodal benchmark for evaluating the popularity of online comments, integrating content quality, popularity prediction, and user behavior simulation."
      }
    },
    {
      "@type": "Question",
      "name": "What are the three main evaluation dimensions in HotComment?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "The three main dimensions are Content Quality, Popularity Prediction, and User Behavior Simulation."
      }
    },
    {
      "@type": "Question",
      "name": "How does HotComment evaluate Content Quality?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "Content Quality is evaluated by semantic similarity to human comments and stylistic expressiveness across four dimensions: Linguistic Expression, Creative Imagination, Emotional Resonance, and Social and Cultural Influence."
      }
    },
    {
      "@type": "Question",
      "name": "What is StyleCmt?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "StyleCmt is a wave-interference-inspired framework that models interactions among stylistic elements to generate comments aligned with audience preferences."
      }
    },
    {
      "@type": "Question",
      "name": "How does User Behavior Simulation work in HotComment?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "It uses agent-based hierarchical simulation to model audience composition and selective exposure, estimating engagement scores based on realistic user profiles and platform demographics."
      }
    },
    {
      "@type": "Question",
      "name": "What datasets are used in HotComment?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "HotComment dataset includes over 43,000 online articles and 34,000 videos with approximately 1.4 million content-comment pairs collected from platforms like NetEase News, Tencent News, and Bilibili."
      }
    },
    {
      "@type": "Question",
      "name": "How is popularity labeled in HotComment?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "Popular comments are those ranked in the top-15 by likes with thresholds based on relative or absolute like counts; non-popular comments have low likes and are selected from the same day."
      }
    },
    {
      "@type": "Question",
      "name": "What improvements does StyleCmt provide?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "StyleCmt consistently improves semantic similarity, stylistic quality, and engagement alignment across large language models and multimodal models, outperforming chain-of-thought and few-shot prompting."
      }
    },
    {
      "@type": "Question",
      "name": "What stylistic dimensions does StyleCmt enhance?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "StyleCmt enhances Linguistic Expression, Creative Imagination, Emotional Resonance, and Social and Cultural Influence in a balanced manner."
      }
    },
    {
      "@type": "Question",
      "name": "How does HotComment differ from existing comment evaluation methods?",
      "acceptedAnswer": {
        "@type": "Answer",
        "text": "Unlike existing methods focusing mainly on semantic similarity or isolated stylistic aspects, HotComment integrates multi-dimensional content quality, platform-specific popularity prediction, and user-centric behavior simulation for a comprehensive evaluation."
      }
    },
    {
      "@type": "HowTo",
      "name": "How to evaluate online comment popularity using HotComment",
      "description": "Steps to evaluate online comment popularity with the HotComment benchmark.",
      "step": [
        {
          "@type": "HowToStep",
          "position": 1,
          "name": "Assess Content Quality",
          "text": "Evaluate semantic similarity of generated comments against popular human comments and measure stylistic expressiveness across four dimensions."
        },
        {
          "@type": "HowToStep",
          "position": 2,
          "name": "Perform Popularity Prediction",
          "text": "Use platform-specific trained models to predict the likelihood of comment popularity based on real-world interaction data."
        },
        {
          "@type": "HowToStep",
          "position": 3,
          "name": "Conduct User Behavior Simulation",
          "text": "Simulate audience exposure and engagement using agent-based hierarchical models reflecting realistic user demographics and selective exposure."
        }
      ]
    },
    {
      "@type": "HowTo",
      "name": "How to generate socially resonant comments with StyleCmt",
      "description": "Steps to generate comments aligned with audience stylistic preferences using StyleCmt.",
      "step": [
        {
          "@type": "HowToStep",
          "position": 1,
          "name": "Construct Resonance Field",
          "text": "Retrieve similar hot comments and decompose their stylistic components across four dimensions."
        },
        {
          "@type": "HowToStep",
          "position": 2,
          "name": "Interference-Driven Planning",
          "text": "Aggregate and identify dominant stylistic patterns to form an interference blueprint."
        },
        {
          "@type": "HowToStep",
          "position": 3,
          "name": "Coherent Superposition and Emission",
          "text": "Generate multiple linguistic realizations under the stylistic configuration, select the most resonant comment, and refine it for output."
        }
      ]
    },
    {
      "@type": "HowTo",
      "name": "How to use HotComment dataset for training and evaluation",
      "description": "Guidelines for dataset partitioning and usage in HotComment.",
      "step": [
        {
          "@type": "HowToStep",
          "position": 1,
          "name": "Partition Dataset",
          "text": "Divide dataset into training, validation, and test sets with an 8:1:1 ratio, stratified by publication time and content category."
        },
        {
          "@type": "HowToStep",
          "position": 2,
          "name": "Train Popularity Prediction Model",
          "text": "Train platform-specific popularity prediction models on the training split using binary classification with cross-entropy and supervised contrastive loss."
        },
        {
          "@type": "HowToStep",
          "position": 3,
          "name": "Evaluate Models",
          "text": "Evaluate comment generation models on test set using content quality metrics, popularity prediction scores, and user behavior simulation engagement scores."
        }
      ]
    }
  ],
  "image": {
    "@type": "ImageObject",
    "contentUrl": "page1_image.png",
    "description": "Figure 1 on page 1 illustrating three types of comments for a video: real human-generated popular comment, StyleCmt-enhanced LLM-generated comment, and standard LLM-generated comment."
  },
  "publisher": {
    "@type": "Organization",
    "name": "arXiv",
    "url": "https://arxiv.org/abs/2604.25614"
  }
}