<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Perioper Med</journal-id><journal-id journal-id-type="publisher-id">periop</journal-id><journal-id journal-id-type="index">32</journal-id><journal-title>JMIR Perioperative Medicine</journal-title><abbrev-journal-title>JMIR Perioper Med</abbrev-journal-title><issn pub-type="epub">2561-9128</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v8i1e70047</article-id><article-id pub-id-type="doi">10.2196/70047</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Evaluating Large Language Models for Preoperative Patient Education in Superior Capsular Reconstruction: Comparative Study of Claude, GPT, and Gemini</article-title></title-group><contrib-group><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Liu</surname><given-names>Yukang</given-names></name><degrees>MBBS</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Li</surname><given-names>Hua</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Ouyang</surname><given-names>Jianfeng</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff3">3</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Xue</surname><given-names>Zhaowen</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Wang</surname><given-names>Min</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff5">5</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>He</surname><given-names>Hebei</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Song</surname><given-names>Bin</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff6">6</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Zheng</surname><given-names>Xiaofei</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author" corresp="yes" equal-contrib="yes"><name name-style="western"><surname>Gan</surname><given-names>Wenyi</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff3">3</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib></contrib-group><aff id="aff1"><institution>The Second School of Clinical Medicine, Southern Medical University</institution><addr-line>Guangzhou</addr-line><country>China</country></aff><aff id="aff2"><institution>Department of Orthopedics, Beijing Jishuitan Hospital</institution><addr-line>Beijing</addr-line><country>China</country></aff><aff id="aff3"><institution>Zhuhai People's Hospital (The Affiliated Hospital of Beijing Institute of Technology, Zhuhai Clinical Medical College of Jinan University)</institution><addr-line>79 Kangning Road, Xiangzhou District</addr-line><addr-line>Zhuhai, Guangdong</addr-line><country>China</country></aff><aff id="aff4"><institution>Department of Sports Medicine, The First Affiliated Hospital of Jinan University</institution><addr-line>Guangzhou</addr-line><country>China</country></aff><aff id="aff5"><institution>Department of Orthopaedics, Guangzhou Red Cross Hospital of Jinan University</institution><addr-line>Guangzhou</addr-line><country>China</country></aff><aff id="aff6"><institution>Department of Joint Surgery and Sports Medicine, The Sixth Affiliated Hospital of Sun Yat-sen University</institution><addr-line>Guangzhou</addr-line><country>China</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Rohatgi</surname><given-names>Nidhi</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Johora Faria</surname><given-names>Fatema Tuj</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Ma</surname><given-names>Ming</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Wenyi Gan, PhD, Zhuhai People's Hospital (The Affiliated Hospital of Beijing Institute of Technology, Zhuhai Clinical Medical College of Jinan University), 79 Kangning Road, Xiangzhou District, Zhuhai, Guangdong, 519000, China, 86 13076855735; <email>494414224@qq.com</email></corresp><fn fn-type="equal" id="equal-contrib1"><label>*</label><p>these authors contributed equally</p></fn></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>12</day><month>6</month><year>2025</year></pub-date><volume>8</volume><elocation-id>e70047</elocation-id><history><date date-type="received"><day>13</day><month>12</month><year>2024</year></date><date date-type="rev-recd"><day>04</day><month>04</month><year>2025</year></date><date date-type="accepted"><day>08</day><month>04</month><year>2025</year></date></history><copyright-statement>&#x00A9; Yukang Liu, Hua Li, Jianfeng Ouyang, Zhaowen Xue, Min Wang, Hebei He, Bin Song, Xiaofei Zheng, Wenyi Gan. Originally published in JMIR Perioperative Medicine (<ext-link ext-link-type="uri" xlink:href="http://periop.jmir.org">http://periop.jmir.org</ext-link>), 12.6.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Perioperative Medicine, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="http://periop.jmir.org">http://periop.jmir.org</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://periop.jmir.org/2025/1/e70047"/><abstract><sec><title>Background</title><p>Large language models (LLMs) are revolutionizing natural language processing, increasingly applied in clinical settings to enhance preoperative patient education.</p></sec><sec><title>Objective</title><p>This study aimed to evaluate the effectiveness and applicability of various LLMs in preoperative patient education by analyzing their responses to superior capsular reconstruction (SCR)&#x2013;related inquiries.</p></sec><sec sec-type="methods"><title>Methods</title><p>In total, 10 sports medicine clinical experts formulated 11 SCR issues and developed preoperative patient education strategies during a webinar, inputting 12 text commands into Claude-3-Opus (Anthropic), GPT-4-Turbo (OpenAI), and Gemini-1.5-Pro (Google DeepMind). A total of 3 experts assessed the language models&#x2019; responses for correctness, completeness, logic, potential harm, and overall satisfaction, while preoperative education documents were evaluated using DISCERN questionnaire and Patient Education Materials Assessment Tool instruments, and reviewed by 5 postoperative patients for readability and educational value; readability of all responses was also analyzed using the cntext package and py-readability-metrics.</p></sec><sec sec-type="results"><title>Results</title><p>Between July 1 and August 17, 2024, sports medicine experts and patients evaluated 33 responses and 3 preoperative patient education documents generated by 3 language models regarding SCR surgery. For the 11 query responses, clinicians rated Gemini significantly higher than Claude in all categories (<italic>P</italic>&#x003C;.05) and higher than GPT in completeness, risk avoidance, and overall rating (<italic>P</italic>&#x003C;.05). For the 3 educational documents, Gemini&#x2019;s Patient Education Materials Assessment Tool score significantly exceeded Claude&#x2019;s (<italic>P</italic>=.03), and patients rated Gemini&#x2019;s materials superior in all aspects, with significant differences in educational quality versus Claude (<italic>P</italic>=.02) and overall satisfaction versus both Claude (<italic>P</italic>&#x003C;.01) and GPT (<italic>P</italic>=.01). GPT had significantly higher readability than Claude on 3 R-based metrics (<italic>P</italic>&#x003C;.01). Interrater agreement was high among clinicians and fair among patients.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>Claude-3-Opus, GPT-4-Turbo, and Gemini-1.5-Pro effectively generated readable presurgical education materials but lacked citations and failed to discuss alternative treatments or the risks of forgoing SCR surgery, highlighting the need for expert oversight when using these LLMs in patient education.</p></sec></abstract><kwd-group><kwd>superior capsular reconstruction</kwd><kwd>massive rotator cuff tear</kwd><kwd>large language models</kwd><kwd>preoperative patient education</kwd><kwd>informed consent process</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Large language models (LLMs) are extensive neural network models based on deep learning [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. These models learn the grammar, semantics, and contextual information of a language by training on vast amounts of textual data, enabling them to perform various natural language processing tasks [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. Due to the powerful text processing, text generation capabilities, and immense knowledge training of LLMs, researchers have begun to continually explore the potential of LLMs in clinical application scenarios, including professional licensing examinations in various countries and regions [<xref ref-type="bibr" rid="ref3">3</xref>-<xref ref-type="bibr" rid="ref5">5</xref>], answering public health questions [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>], analyzing radiological images [<xref ref-type="bibr" rid="ref8">8</xref>], disease screening [<xref ref-type="bibr" rid="ref9">9</xref>], disease diagnosis [<xref ref-type="bibr" rid="ref10">10</xref>], and discipline education [<xref ref-type="bibr" rid="ref11">11</xref>]. As the versions and functions of LLMs are constantly updated and upgraded, these models have a low usage threshold and are convenient to use. It is particularly important for professionals in various disciplines to assess the accuracy and completeness of LLMs in their respective fields. This assessment not only provides a strong basis for the application of LLMs in various disciplines but also identifies their shortcomings, serving as a warning for nonprofessional users [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref11">11</xref>].</p><p>Superior capsular reconstruction (SCR) was initially proposed by Mihata et al [<xref ref-type="bibr" rid="ref12">12</xref>] in 2012 as a technique to restore the superior restraint of the humeral head passively, thereby restoring force couples and improving shoulder joint kinematics. Over the past decade, SCR has become one of the commonly used treatment methods for massive and irreparable rotator cuff tears among clinicians [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref14">14</xref>]. However, the surgical techniques for SCR are highly variable [<xref ref-type="bibr" rid="ref15">15</xref>]. For example, contrary to the results of earlier studies, further research suggests using dermal allograft instead of fascia lata autograft, leading to a current lack of sufficiently effective long-term follow-up data with high levels of evidence [<xref ref-type="bibr" rid="ref16">16</xref>-<xref ref-type="bibr" rid="ref18">18</xref>]. Moreover, as SCR is a reconstructive surgery rather than a repair surgery [<xref ref-type="bibr" rid="ref15">15</xref>], it is challenging to provide patients with a standardized and effective explanation and communication during the preoperative informed consent process. An effective preoperative informed consent process is one of the essential steps in alleviating patients&#x2019; perioperative anxiety and improving treatment efficacy [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref20">20</xref>].</p><p>Rational and effective preoperative patient education is one of the critical components in developing standardized diagnosis and treatment processes for clinical surgery departments [<xref ref-type="bibr" rid="ref21">21</xref>]. The main difficulty lies in the professional knowledge gap between medical staff and patients [<xref ref-type="bibr" rid="ref22">22</xref>]. Previous studies have shown that using multimedia as patient education materials can better help patients understand surgical procedures and alleviate perioperative anxiety [<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref24">24</xref>]. However, in most cases, doctors still primarily use verbal responses to address patients&#x2019; individualized questions [<xref ref-type="bibr" rid="ref25">25</xref>]. This might probably because preparing personalized educational materials and providing oral education requires a significant investment of time and effort, leading to high time and economic costs. Furthermore, there is a vast difference in the sources of medical information accessed by doctors and patients [<xref ref-type="bibr" rid="ref26">26</xref>]. Doctors primarily obtain medical information from clinical guidelines, research literature, and textbooks, while patients often acquire medical information through simple search engines and social media software, which may contain false and overly embellished content [<xref ref-type="bibr" rid="ref26">26</xref>-<xref ref-type="bibr" rid="ref28">28</xref>]. Patients often lack the ability to think independently when faced with this information.</p><p>With the development of LLMs in recent years, researchers have discovered that the disciplinary knowledge possessed by these LLMs can pass professional examinations in multiple disciplines [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref29">29</xref>]. Their powerful text processing capabilities not only allow them to polish complex text content to enhance readability but also enable them to independently generate text content that is more comprehensive and empathetic compared to health care professionals [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref30">30</xref>]. The quality of their answers is also significantly better than the search results from search engines [<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref28">28</xref>]. Researchers have also pointed out that when using LLMs as patient education assistive tools, the primary task of doctors is to determine the accuracy of the information and make necessary clarifications [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref31">31</xref>]. Furthermore, researchers believe that LLMs can present information in a way that is understandable to most patients, making them a valuable supplement for orthopedic surgeons in obtaining informed consent and shared decision-making [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>].</p><p>This cross-sectional study aims to assess the capability and application potential of different LLMs in preoperative patient education by evaluating the responses of 3 LLMs&#x2014;GPT-4-Turbo, Claude-3-Opus, and Gemini-1.5-Pro&#x2014;to SCR-related patient inquiries. In addition, the study will evaluate patient education documents generated by the LLMs for the informed consent process, which will be jointly assessed by health care professionals and patients. We hypothesize that LLMs can generate readable patient education materials for SCR, but the accuracy, completeness, and patient-assessed readability of the content will require expert review before clinical application.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Study Design Overview</title><p>This cross-sectional analysis, conducted from July 1 to August 17, 2024, evaluated the quality of responses generated by different LLMs in the context of preoperative patient education for SCR. The study design assessed Claude-3-Opus, GPT-4-Turbo, and Gemini-1.5-Pro (accessed via Poe) on their ability to answer SCR-related patient questions and generate educational materials. The specific study flow is shown in <xref ref-type="fig" rid="figure1">Figure 1</xref>. All LLM prompts and responses, as well as expert and patient evaluations, were conducted in Chinese. Screenshots of Poe website operations are available in Mendeley (Mendeley Data, V1), with English translations generated by GPT-4-Turbo (via Poe) in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Flow diagram of the study process. LLM: large language model; SCR: superior capsular reconstruction.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="periop_v8i1e70047_fig01.png"/></fig></sec><sec id="s2-2"><title>Ethical Considerations</title><p>This study was approved by the Ethics Committee of our organization and was eligible for exemption from ethical review considering that this cross-sectional study involved no interventions or potential risks to patients.</p></sec><sec id="s2-3"><title>Questions and Prompts Development</title><p>The research team for this study consists of 12 members, including 10 experienced sports medicine clinicians and 2 doctoral students specializing in LLMs, who collaborated to create patient education materials about SCR. The clinicians include 3 senior-level experts (2 of whom are subject matter experts from external institutions), 2 associate senior-level experts, and 5 intermediate-level experts, with each clinician having at least 5 years of clinical experience.</p><p>The 2 doctoral students first collected a total of 100 questions by having each of the 10 clinical experts propose 10 questions daily that patients frequently asked about SCR, covering aspects like etiology, treatment principles, methods, complications, rehabilitation, and hospitalization costs. After removing duplicates and combining some of the questions, they included only the effective questions that all experts agreed were meaningful. This process resulted in the inclusion of 11 questions. Along with these questions, the doctoral students provided instructions (<xref ref-type="table" rid="table1">Table 1</xref>) requiring LLMs to draft a standardized preoperative informed consent patient education document. After the drafted prompts were reviewed and approved by the aforementioned 10 clinical experts, doctoral students created standardized prompts for each question, consisting of unified &#x201C;Background+ Question&#x201D; formats (<xref ref-type="table" rid="table1">Table 1</xref>). These standardized prompts were then used to generate a comprehensive patient education document addressing most concerns of SCR patients using LLMs.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Content and strategies for asking questions to large language models.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Subject</td><td align="left" valign="bottom">Theme</td><td align="left" valign="bottom">Content</td></tr></thead><tbody><tr><td align="left" valign="top">Background</td><td align="left" valign="top">Clinical case</td><td align="left" valign="top">The patient was diagnosed with a massive rotator cuff tear due to supraspinatus muscle injury. The doctor plans to perform a superior capsular reconstruction surgery on the shoulder joint.</td></tr><tr><td align="left" valign="top">Question 1</td><td align="left" valign="top">Muscle injury</td><td align="left" valign="top">The imaging report says that I have a supraspinatus muscle injury. What is the supraspinatus muscle, and what causes this type of injury?</td></tr><tr><td align="left" valign="top">Question 2</td><td align="left" valign="top">Surgical principles and indications</td><td align="left" valign="top">What is the reconstruction of the superior capsule of the shoulder joint, what is the therapeutic principle of the surgery, and what are the indications for the surgery?</td></tr><tr><td align="left" valign="top">Question 3</td><td align="left" valign="top">Graft materials</td><td align="left" valign="top">What are the commonly used graft materials in the reconstruction of the superior capsule of the shoulder joint, and what are the differences between these grafts?</td></tr><tr><td align="left" valign="top">Question 4</td><td align="left" valign="top">Surgical hardware</td><td align="left" valign="top">Besides grafts, does the reconstruction of the superior capsule of the shoulder joint require the use of screws, and do these screws need to be removed in a second surgery?</td></tr><tr><td align="left" valign="top">Question 5</td><td align="left" valign="top">Surgical complications</td><td align="left" valign="top">What are the surgical complications of superior capsule reconstruction of the shoulder joint?</td></tr><tr><td align="left" valign="top">Question 6</td><td align="left" valign="top">Recovery time</td><td align="left" valign="top">How long is the typical recovery time after superior capsule reconstruction surgery of the shoulder joint?</td></tr><tr><td align="left" valign="top">Question 7</td><td align="left" valign="top">Healing issues</td><td align="left" valign="top">What situations can lead to poor healing or failure of the superior capsule reconstruction surgery of the shoulder joint?</td></tr><tr><td align="left" valign="top">Question 8</td><td align="left" valign="top">Autograft risks</td><td align="left" valign="top">In superior capsule reconstruction surgery of the shoulder joint, if an autograft is chosen, what are the impacts and risks to the area from which the autologous tissue is harvested?</td></tr><tr><td align="left" valign="top">Question 9</td><td align="left" valign="top">Surgical costs</td><td align="left" valign="top">What are the chargeable items during the superior capsule reconstruction surgery of the shoulder joint, and what surgical consumables are needed?</td></tr><tr><td align="left" valign="top">Question 10</td><td align="left" valign="top">Graft longevity</td><td align="left" valign="top">If the superior capsule reconstruction surgery of the shoulder joint is successful, how long is the lifespan of the implanted graft, and what are the differences between different types of grafts?</td></tr><tr><td align="left" valign="top">Question 11</td><td align="left" valign="top">Anesthesia and hospitalization</td><td align="left" valign="top">What type of anesthesia is required for superior capsule reconstruction surgery, how long does the surgery take, and how long is the hospital stay required?</td></tr><tr><td align="left" valign="top">Document generation request</td><td align="left" valign="top">Education document</td><td align="left" valign="top">Please generate a comprehensive educational document about superior capsule reconstruction surgery of the shoulder joint. This document is to be provided to patients for reading during the preoperative informed consent process.</td></tr></tbody></table></table-wrap></sec><sec id="s2-4"><title>LLM Selection and Prompt Execution</title><p>Both ChatGPT 4 and Claude 3 are among the most popular language models today, with Gemini (formerly known as Bard) also gaining significant traction [<xref ref-type="bibr" rid="ref32">32</xref>]. Studies suggest potential discrepancies in the functionalities of GPT-4 models used on the OpenAI official website [<xref ref-type="bibr" rid="ref33">33</xref>]. To mitigate potential systematic errors arising from these discrepancies, we access Claude-3-Opus, GPT-4-Turbo, and Gemini-1.5-Pro through the Poe website. Poe, created by Anthropic, is a platform that aggregates multiple AI chatbots, enabling users to engage with different AI assistants within a single interface and compare their responses [<xref ref-type="bibr" rid="ref34">34</xref>].</p><p>To ensure that each interaction is independent and unbiased by previous exchanges, the doctoral students perform a &#x201C;clear context&#x201D; operation after each query. This approach ensures that each question and response are treated independently, preventing information carryover from previous interactions, and is informed by other research [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref11">11</xref>]. Since the purpose of our study was to evaluate the ability of pretrained LLMs to handle new tasks, we used LLMs in Zero-shot mode. Before input, the generated content has no specific setting (ie, suppose you are a doctor or speak like a doctor). The input provided to the LLMs follows a &#x201C;background+ question/request&#x201D; format (human message) and the output answers (assistant message) were collected then, ensuring clarity and relevance within each independent interaction.</p></sec><sec id="s2-5"><title>Evaluation of LLM Response Quality</title><p>This study evaluates the quality of patient informed consent documents generated by LLMs from 3 perspectives: physicians&#x2019; assessment, patients&#x2019; assessment, and readability analysis.</p><p>In total, 3 senior doctors evaluated the LLMs&#x2019; responses to 11 specific questions related to a specific medical procedure, assessing them for correctness, completeness, logic, and potential harm using a 5-point Likert scale [<xref ref-type="bibr" rid="ref35">35</xref>]. Physicians also provided an overall satisfaction score using a 10-point Likert scale. In addition, to evaluate the quality of health care information provided by each LLM, 2 validated instruments were also used to assess the generated documents: DISCERN (score ranging from 1=low to 5=high for overall information quality) and the Patient Education Materials Assessment Tool (PEMAT) for printable materials (scores of 0%&#x2010;100% for understandability) [<xref ref-type="bibr" rid="ref6">6</xref>]. The PEMAT assessment tool was able to assess printable and audiovisual understandability, while the DISCERN instrument could review the quality of information for the consumer particularly with a focus on treatment choices in health information.</p><p>In total, 5 patients who underwent the specific medical procedure reviewed the LLM-generated patient education documents, rating their readability and educational value on a 5-point Likert scale and overall satisfaction on a 10-point Likert scale. This aimed to assess the documents&#x2019; clarity and educational value from nonprofessional readers&#x2019; perspectives.</p><p>Finally, a readability analysis of all LLMs&#x2019; responses was conducted using the cntext package [<xref ref-type="bibr" rid="ref36">36</xref>] in R (version 4.4.1), examining sentence structure and evaluating readability via 3 indices: readability 1 (average characters per clause), readability 2 (proportion of adverbs and conjunctions), and readability 3, based on the Fog Index and calculated as half the sum of readability 1 and readability 2. Besides, we also applied the &#x201C;py-readability-metrics&#x201D; to evaluate the readability, which includes metrics such as the Flesch Reading Ease Score, Flesch-Kincaid Grade Level, and Gunning Fog Index.</p></sec><sec id="s2-6"><title>Data Analysis</title><p>Statistical analysis used SPSS (version 26.0; IBM Corp) using nonparametric tests due to nonnormally distributed data (Kolmogorov-Smirnov test). Mann-Whitney <italic>U</italic> test compared scoring between groups, with significance at <italic>P</italic>&#x003C;.05. Interrater reliability, assessed using Fleiss kappa value, was interpreted as follows: poor agreement (&#x003C;0.01); slight agreement (0.01&#x2010;0.20); fair agreement (0.21&#x2010;0.40); moderate agreement (0.41&#x2010;0.60); substantial agreement (0.61&#x2010;0.80); almost perfect agreement (0.81&#x2010;1.00) [<xref ref-type="bibr" rid="ref7">7</xref>]. GraphPad Prism 8 generated bar charts for visualizing results.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Overview</title><p>Between July 1 and July 14, 2024, we sent invitations to sports medicine experts at various hospitals in the South China region for a webinar held on July 18. During this meeting, we discussed 11 key issues and formulated 12 strategies for sending inquiry requests as part of our study. From July 20 to August 1, 2024, we posed 11 surgery-related questions about SCR and requested the creation of preoperative patient education documents through the Poe website to 3 different LLMs: Claude-3-Opus, GPT-4-Turbo, and Gemini-1.5-Pro. These models collectively produced 33 responses and 3 preoperative patient education documents. From August 10 to August 17, 2024, three experienced sports medicine clinicians, who are not from the same institution, along with 5 patients who had undergone SCR surgery, evaluated the responses and documents provided by the LLMs.</p></sec><sec id="s3-2"><title>Evaluations From the Subjective Perspective of Doctors</title><p>In total, 3 professional sports medicine doctors first evaluated the responses of 3 different LLMs to 11 inquiries. The evaluations focused on accuracy, completeness, logicality, potential risk, and overall rating. The results showed that Gemini&#x2019;s responses were significantly superior to Claude&#x2019;s in all evaluated categories including accuracy (mean 5.00, SD 0.00 vs mean 4.48, SD 0.83; <italic>P</italic>&#x003C;.001), completeness (mean 4.88, SD 0.33 vs mean 4.39, SD 0.70; <italic>P</italic>=.001), logicality (mean 5.00, SD 0.00 vs mean 4.70, SD 0.59; <italic>P</italic>&#x003C;.01) potential risk (mean 5.00, SD 0.00 vs mean 4.73, SD 0.57; <italic>P</italic>&#x003C;.01), and overall rating (mean 9.88, SD 0.42 vs mean 9.03, SD 1.31; <italic>P</italic>=.001; <xref ref-type="fig" rid="figure2">Figures 2A and 2B</xref>). Compared to GPT, Gemini&#x2019;s responses were superior in all categories, with significant differences noted in completeness (mean 4.88, SD 0.33 vs mean 4.55, SD 0.67; <italic>P</italic>=.02), potential risk (mean 5.00, SD 0.00 vs mean 4.67, SD 0.82; <italic>P</italic>=.01), and overall rating (mean 9.88, SD 0.42 vs mean 9.24, SD 1.30; <italic>P</italic>=.01; <xref ref-type="fig" rid="figure2">Figures 2A and 2B</xref>. GPT&#x2019;s responses, when compared to Claude&#x2019;s, were superior in accuracy (<italic>P</italic>=.03), completeness (<italic>P</italic>=.34), logicality (<italic>P</italic>=.11), and overall rating (<italic>P</italic>=.42); however, Claude was rated higher in potential risk (<italic>P</italic>=.85; <xref ref-type="fig" rid="figure2">Figures 2A and 2B</xref>). Of these differences, only the accuracy presented a statistically significant difference (<xref ref-type="fig" rid="figure2">Figures 2A and 2B</xref>).</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Quality evaluation results from doctors and patients for 11 questions generated by 3 large language models. (<bold>A-B</bold>) Evaluation from the doctor&#x2019;s perspective; (<bold>C-D</bold>) evaluation from the patient&#x2019;s perspective. n.s. not significant; *<italic>P</italic>&#x003C;.05, **<italic>P</italic>&#x003C;.01, ***<italic>P</italic>&#x003C;.001.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="periop_v8i1e70047_fig02.png"/></fig><p>In terms of the PEMAT scores for the preoperative patient education materials generated by each LLM, Gemini scored higher than GPT (mean 1.00, SD 0.00 vs mean 0.91, SD 0.09; <italic>P</italic>=.12), and GPT scored higher than Claude (mean 0.91, SD 0.09 vs mean 0.79, SD 0.10; <italic>P</italic>=.18), with only the difference between Gemini and Claude (mean 1.00, SD 0.00 vs mean 0.79, SD 0.10; <italic>P</italic>=.03) being statistically significant (<xref ref-type="fig" rid="figure3">Figure 3</xref>). Regarding the DISCERN scores, Claude achieved the highest overall score, followed by Gemini and then GPT, though these differences were not statistically significant (<xref ref-type="table" rid="table2">Table 2</xref>). In the item of the DISCERN which represents overall satisfaction (the 16th question presented in <xref ref-type="table" rid="table2">Table 2</xref>), Gemini scored the highest, while GPT and Claude scored the same, with no statistical significance in the differences. The consistency among the 3 evaluators was high, with no instances of &#x201C;Poor agreement&#x201D; or &#x201C;Slight agreement&#x201D; in their assessments (<xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>).</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>PEMAT scoring percentage for the patient education document generated by three large language models. n.s.: not significant; *<italic>P</italic>&#x003C;.05, **<italic>P</italic>&#x003C;.01, ***<italic>P</italic>&#x003C;.001.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="periop_v8i1e70047_fig03.png"/></fig><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Quality grades for section 2 of the DISCERN Tool.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Section 2. How good is the quality of information on treatment choices ?</td><td align="left" valign="bottom">Claude-3-Opus,<break/>Median (IQR)</td><td align="left" valign="bottom">GPT-4-Turbo,<break/>Median (IQR)</td><td align="left" valign="bottom">Gemini-1.5-Pro,<break/>Median (IQR)</td><td align="left" valign="bottom">Claude versus GPT, <italic>P</italic> value</td><td align="left" valign="bottom">Claude versus Gemini, <italic>P</italic> value</td><td align="left" valign="bottom">GPT versus Gemini, <italic>P</italic> value</td></tr></thead><tbody><tr><td align="left" valign="top">Does it describe how each treatment works?</td><td align="left" valign="top">4 (3-4)</td><td align="left" valign="top">4 (3-4)</td><td align="left" valign="top">5 (4-5)</td><td align="left" valign="top">&#x2014;<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup></td><td align="left" valign="top">.09</td><td align="left" valign="top">.09</td></tr><tr><td align="left" valign="top">Does it describe the benefits of each treatment?</td><td align="left" valign="top">4 (3-5)</td><td align="left" valign="top">4 (3-4)</td><td align="left" valign="top">1 (1-1)</td><td align="left" valign="top">.64</td><td align="left" valign="top">.04</td><td align="left" valign="top">.03</td></tr><tr><td align="left" valign="top">Does it describe the risks of each treatment?</td><td align="left" valign="top">4 (3-4)</td><td align="left" valign="top">3 (2-3)</td><td align="left" valign="top">5 (4-5)</td><td align="left" valign="top">.09</td><td align="left" valign="top">.09</td><td align="left" valign="top">.04</td></tr><tr><td align="left" valign="top">Does it describe what would happen if no treatment is used?</td><td align="left" valign="top">1 (1-1)</td><td align="left" valign="top">1 (1-1)</td><td align="left" valign="top">1 (1-1)</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top">Does it describe how the treatment choices affect overall quality of life?</td><td align="left" valign="top">1 (1-1)</td><td align="left" valign="top">1 (1-1)</td><td align="left" valign="top">1 (1-1)</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top">Is it clear that there may be more than one possible treatment choice?</td><td align="left" valign="top">1 (1-1)</td><td align="left" valign="top">1 (1-1)</td><td align="left" valign="top">1 (1-1)</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top">Does it provide support for shared decision-making?</td><td align="left" valign="top">3 (3-4)</td><td align="left" valign="top">3 (2-3)</td><td align="left" valign="top">3 (2-3)</td><td align="left" valign="top">.32</td><td align="left" valign="top">.20</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top">Based on the answers to all of the above questions, rate the overall quality of the publication as a source of information about treatment choices.</td><td align="left" valign="top">3 (3-4)</td><td align="left" valign="top">3 (3-4)</td><td align="left" valign="top">4 (3-4)</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">.46</td><td align="left" valign="top">.46</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>Not applicable.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-3"><title>Evaluations From the Subjective Perspective of Patients</title><p>In the ratings provided by 5 follow-up patients for the preoperative patient education materials generated by the LLMs, Gemini scored higher than GPT and Claude across all parameters, including readability, educational quality, and overall rating (<xref ref-type="fig" rid="figure2">Figures 2C and 2D</xref>). Among these, the difference in educational quality between Gemini and Claude (mean 4.00, SD 0.00 vs mean 3.60, SD 0.55; <italic>P</italic>=.02) was statistically significant (<xref ref-type="fig" rid="figure2">Figures 2C and 2D</xref>). Furthermore, Gemini&#x2019;s advantage in overall satisfaction when compared to both Claude (mean 8.80, SD 0.45 vs mean 6.80, SD 1.10; <italic>P</italic>&#x003C;.01) and GPT (mean 8.80, SD 0.45 vs mean 7.20, SD 0.84; <italic>P</italic>=.01) also showed statistical significance (<xref ref-type="fig" rid="figure2">Figures 2C and 2D</xref>). The consistency of all ratings given by the 5 follow-up patients was evaluated as &#x201C;Fair agreement&#x201D; (<xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>).</p></sec><sec id="s3-4"><title>Objective Evaluations of Readability</title><p>Based on the analysis methods of the context package, readability is assessed from 3 perspectives, namely readability 1, readability 2, and readability 3. Under these assessments, GPT&#x2019;s readability is higher than that of Gemini (readability 1: mean 36.38, SD 7.47 vs mean 31.39, SD 7.20, <italic>P</italic>=.18; readability 2: mean 2.09, SD 0.71 vs mean 1.55, SD 0.51, <italic>P</italic>=.09; readability 3: mean 19.24, SD 4.07 vs mean 16.47, SD 3.77, <italic>P</italic>=.17) and Claude (readability 1: mean 36.38, SD 7.47 vs mean 28.05, SD 6.43, <italic>P</italic>&#x003C;.01; readability 2: mean 2.09, SD 0.71 vs mean 1.21, SD 0.42, <italic>P&#x003C;</italic>.01; readability 3: mean 19.24, SD 4.07 vs mean 14.63, SD 3.40, <italic>P</italic>&#x003C;.01), with the difference between GPT and Claude being statistically significant (<xref ref-type="fig" rid="figure4">Figure 4</xref>). Although Gemini&#x2019;s readability is higher than Claude&#x2019;s, the difference is not statistically significant (<xref ref-type="fig" rid="figure4">Figure 4</xref>). However, when readability was assessed using py-readability metrics, there was no statistical difference between the 3 LLM models (<xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>).</p><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>Comparison of the results of text readability analysis from three analytical perspectives using the cntext package in R software. n.s.: not significant; *<italic>P</italic>&#x003C;.05, **<italic>P</italic>&#x003C;.01, ***<italic>P</italic>&#x003C;.001.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="periop_v8i1e70047_fig04.png"/></fig></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>The main findings of our study are as follows: (1) the three LLMs (Claude-3-Opus, GPT-4-Turbo, and Gemini-1.5-Pro) demonstrated good overall potential for application in patient education for SCR surgery. They were able to generate answers to 11 SCR-related questions and create standardized preoperative informed consent patient education documents. (2) In the subjective evaluations by professional sports medicine clinicians and patients who had undergone SCR surgery, Gemini slightly outperformed GPT and Claude in multiple dimensions, including accuracy, completeness, logic, potential risks, and overall satisfaction. (3) In this study, the 3 LLMs did not proactively provide evidence sources when answering questions and generating patient education documents. If LLMs are to be used to assist with patient education in clinical applications, it may be necessary to specifically require LLMs to cite information sources to enable doctors and patients to judge the authority and reliability of the content. (4) Although Gemini performed best in the ratings for SCR patient education-related tasks, considering the complexity and potential risks of LLMs in medical applications, clinicians still need to carefully review and make necessary corrections to the content generated by LLMs to ensure the professionalism and reasonableness of patient education materials. LLMs should be positioned as assistive tools rather than decision-making entities in clinical applications.</p><p>LLMs have proven to be reliable sources of information for orthopedic surgery-related questions, creating patient education documents that enhance the understanding of diagnostic and therapeutic processes for nonprofessionals and improve the readability of educational materials [<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref38">38</xref>]. However, evaluating the quality of responses from LLMs is not straightforward. Researchers assessed ChatGPT 3.5&#x2019;s medical knowledge by using clinical standards and licensing examination questions to evaluate its theoretical understanding and practical application [<xref ref-type="bibr" rid="ref39">39</xref>]. With the advent of ChatGPT 4.0 and the iterative upgrades of various LLMs from different companies, there has been a growing recognition and exploration of the expanded pretraining data and enhanced text processing capabilities of the latest LLM versions in different clinical scenarios [<xref ref-type="bibr" rid="ref40">40</xref>,<xref ref-type="bibr" rid="ref41">41</xref>]. Scholars have realized that the quality of LLM responses is influenced by multiple factors, including the amount of information in the query [<xref ref-type="bibr" rid="ref42">42</xref>], the questioning strategy [<xref ref-type="bibr" rid="ref43">43</xref>], and many unpredictable elements [<xref ref-type="bibr" rid="ref44">44</xref>]. These unpredictable elements are evident when, under controlled conditions with all variables constant, the same question yields different answers and shows varying styles of text presentation. Consequently, while researchers have acknowledged the capabilities of LLMs in diagnosing, treating, and creating educational documents across disciplines, they continue to reject the idea of LLMs performing independent medical actions, affirming their role solely as an auxiliary tool in the hands of professionals [<xref ref-type="bibr" rid="ref45">45</xref>,<xref ref-type="bibr" rid="ref46">46</xref>].</p><p>This study aims to assess the feasibility of using three popular LLMs as auxiliary tools for sports medicine physicians during the informed consent process for patients undergoing SCR. In this study, physicians use LLMs primarily to assess the accuracy and comprehensiveness of the information and to clarify content. Unlike previous studies that evaluated answer readability solely through software analysis of word and sentence structure [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref47">47</xref>], this study also included follow-up visits with SCR patients post surgery, where patients subjectively assessed the readability and educational significance of the information. Patient ratings primarily focused on the presurgical educational materials generated by LLMs, excluding the evaluation of 11 specific questions, as the answers to these questions required physician assessment of accuracy and comprehensiveness and clarification before clinical use. Without this step by physicians, patients, who are not medical professionals, might not be able to accurately assess the details of the questions. Although all 3 models performed satisfactorily in evaluating &#x201C;potential risks,&#x201D; this does not imply that patients can rely on LLMs as their sole source of medical advice. We believe that the SCR medical decision-making process, which does not involve extensive use of medications and auxiliary treatments pre- and post-surgery and follows a &#x201C;surgery-rehabilitation&#x201D; model, does not necessitate the phase-wise, continuous assessments and patient education required for conditions like cancer.</p><p>Despite the potential benefits of using LLMs in patient education, several ethical and privacy issues need to be addressed before their widespread application. The accuracy and reliability of the information generated by LLMs are critical, especially in sensitive medical contexts. To enhance their accuracy, strategies such as retrieving pertinent information from credible, external data sources before generating text can be incorporated into subsequent versions of LLMs. And patient privacy is a fundamental concern when using LLMs in medical settings. LLMs may require access to patient data to generate personalized and relevant information. However, this access must be strictly regulated to prevent unauthorized use or disclosure of sensitive patient information.</p><p>In addition, our &#x201C;Prompt Execution&#x201D; phase revealed that without background information, LLMs occasionally misidentify SCR as a supraspinatus repair surgery under patch bridging, leading to content generation biases. We consider such biases to be system errors caused by human operational mistakes, which can be avoided by adjusting prompt strategies under the guidance of subject matter experts. Therefore, using LLMs for specialist information retrieval is not without its challenges, and we believe that merely relying on LLM-generated disclaimers like &#x201C;I am not a medical professional; if you feel unwell, please seek medical attention immediately&#x201D; at the end of responses is insufficient [<xref ref-type="bibr" rid="ref28">28</xref>]. The mitigation of these errors can be facilitated through the use of techniques such as fine-tuning and retrieval-augmented generation. Fine-tuning entails training the LLM on a smaller, highly specialized dataset that has been meticulously curated to capture the intricate details of the medical domain and retrieval-augmented generation can address issues of hallucinations by first retrieving pertinent information from credible, external data sources before generating text. Incorporating these strategies into subsequent versions of LLMs has the potential to enhance their accuracy and reliability, particularly in sensitive applications such as patient education. A thorough examination would offer valuable insights into refining these models to deliver precise and trustworthy information within medical contexts.</p><p>Our study meanwhile discovers critical gaps in LLMs are used in medical settings, particularly in presurgical patient education. LLMs often do not provide sources for their information, and their responses can include inaccuracies or fabricated sources, known as &#x201C;hallucinations&#x201D; [<xref ref-type="bibr" rid="ref48">48</xref>]. This issue is exacerbated when users do not specifically ask for sources, leading LLMs to sometimes provide outdated or irrelevant information [<xref ref-type="bibr" rid="ref48">48</xref>,<xref ref-type="bibr" rid="ref49">49</xref>]. Furthermore, the LLMs in the study failed to discuss alternative treatments, benefits, and risks associated with not undergoing specific surgeries like SCR. This omission is significant as discussing these elements is essential for informed medical decision-making and respects patient rights to understand all available options. Given these limitations, LLMs should not independently manage diagnosis or patient education. Instead, they should serve as supplementary tools, aiding health care professionals who can provide the necessary context, accuracy, and depth in patient interactions. This approach ensures that patient education remains thorough, accurate, and ethically conducted, aligning with medical standards and patient rights. This challenge can be tackled through the application of more advanced prompt engineering methodologies, the integration of contextual reasoning capabilities, and the implementation of step-by-step guidance mechanisms. By engaging in multiple iterative interactions with the model, it becomes possible to refine its responses and produce more comprehensive information, encompassing alternative treatment options, based on the specific inputs provided by the user. Such an approach would empower the LLM to deliver content that is more personalized, well-informed, and balanced. Moreover, the development of LLM-Agents offers a compelling solution to the limitations of LLMs in sensitive domains like medical decision-making. By integrating planning, memory, tool use, and agent or brain components, these agents can enhance their ability to provide accurate, verified information. This not only supports human expertise but also ensures that the information presented is transparent and evidence-backed. As research continues, the full potential of integrating citation capabilities within LLM-Agents should be explored to further improve their reliability and trustworthiness in high-stakes contexts.</p><p>With the evolution of internet technology, we have witnessed a transition from Web1.0 to Web2.0, and the ways we access information have dramatically changed&#x2014;from relying on traditional media to accessing massive amounts of information anytime and anywhere via the internet, social media, and personal media platforms [<xref ref-type="bibr" rid="ref50">50</xref>,<xref ref-type="bibr" rid="ref51">51</xref>]. Particularly on social media and personal media platforms, we can find questions similar to our own and the corresponding responses [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref50">50</xref>,<xref ref-type="bibr" rid="ref51">51</xref>]. However, the accuracy and comprehensiveness of information obtained in this manner can be uncertain [<xref ref-type="bibr" rid="ref51">51</xref>]. Online responses vary greatly in quality, lacking systematic organization and authority, and the response time and outcomes of further inquiries are unpredictable. Studies have shown that answers from ChatGPT 3.5 are not only more comprehensive and empathetic than those from certified physicians on Reddit forums but, despite demonstrating high quality in assessing dementia care issues, they fall slightly short in predicting potential future problems [<xref ref-type="bibr" rid="ref52">52</xref>,<xref ref-type="bibr" rid="ref53">53</xref>]. When comparing responses from ChatGPT 4.0, 3.5, and those on Reddit, ChatGPT 4.0&#x2019;s responses significantly surpassed the others, reaching a new level of excellence [<xref ref-type="bibr" rid="ref54">54</xref>]. In responding to patient inquiries, LLMs also perform more accurately than Google searches and are easier to read [<xref ref-type="bibr" rid="ref27">27</xref>]. However, they also share a common drawback: the use of LLMs in medical consultations is best accompanied by professional medical personnel to &#x201C;clarify&#x201D; the responses [<xref ref-type="bibr" rid="ref31">31</xref>]. Therefore, LLMs are not suitable for independently handling any part of the diagnostic or treatment process within the medical system, but they are better suited as tools to enhance the efficiency of professional medical personnel or as mediums for personalized patient communication and education [<xref ref-type="bibr" rid="ref55">55</xref>,<xref ref-type="bibr" rid="ref56">56</xref>].</p><p>As technology continues to advance, hospitals are consistently innovating in all aspects of clinical diagnosis and treatment to enhance diagnostic accuracy, treatment outcomes, and patient satisfaction, representing an unstoppable trend in health care innovation [<xref ref-type="bibr" rid="ref57">57</xref>,<xref ref-type="bibr" rid="ref58">58</xref>]. However, balancing standardized processes with personalized patient needs often presents a challenge [<xref ref-type="bibr" rid="ref59">59</xref>]. LLMs present an opportunity to potentially maintain standardized quality in their responses while also accommodating personalized requests. LLMs, encompassing both free and paid versions, are generally accessible to the public as open platforms [<xref ref-type="bibr" rid="ref60">60</xref>]. Although current research does not support its use in guiding clinical decisions [<xref ref-type="bibr" rid="ref61">61</xref>], using ChatGPT in doctor-patient communication benefits both doctors and patients [<xref ref-type="bibr" rid="ref7">7</xref>]. Doctors can interpret and supplement ChatGPT&#x2019;s responses based on their clinical experience, offering more personalized consultations to patients [<xref ref-type="bibr" rid="ref31">31</xref>]. In addition, patients reduce their need to search for information on the internet, and their trust in physicians may be enhanced with the objective evidence provided by AI. Under the joint oversight of doctors and patients, the advantages of artificial intelligence can be fully used [<xref ref-type="bibr" rid="ref62">62</xref>]. Nevertheless, the widespread adoption and application of LLMs still face technical and policy limitations. Technical limitations include differences in handling inputs in various languages [<xref ref-type="bibr" rid="ref63">63</xref>], performance discrepancies between proprietary and open-source models [<xref ref-type="bibr" rid="ref64">64</xref>], and the occurrence of &#x201C;hallucinations&#x201D; when faced with biased questions [<xref ref-type="bibr" rid="ref65">65</xref>]. Since commonly used LLMs like GPT, Gemini, and Claude are proprietary, and these models are trained with significantly more data than open-source models, we can only continue to explore ways to avoid &#x201C;hallucinations&#x201D; instead of fixing the root cause of such issues [<xref ref-type="bibr" rid="ref66">66</xref>,<xref ref-type="bibr" rid="ref67">67</xref>]. In addition, policy restrictions cannot be ignored [<xref ref-type="bibr" rid="ref68">68</xref>]. Health systems and hospitals need to develop detailed policies to regulate the clinical auxiliary use of LLMs, including ensuring patient informed consent, standardized user training, and the preservation of usage records [<xref ref-type="bibr" rid="ref7">7</xref>]. Sound policies are essential to ensure the appropriate and efficient use of tools [<xref ref-type="bibr" rid="ref65">65</xref>,<xref ref-type="bibr" rid="ref68">68</xref>]. Through these measures, the safety of LLM applications in the medical field can be effectively enhanced, protecting patient rights while improving the efficiency and quality of doctor-patient communication [<xref ref-type="bibr" rid="ref47">47</xref>,<xref ref-type="bibr" rid="ref69">69</xref>].</p></sec><sec id="s4-2"><title>Limitations</title><p>This study has several limitations. First, both the linguistic input and the analyzed responses were in Chinese. On one hand, this choice was made to facilitate assessments by Chinese-speaking clinical experts and patients during follow-ups. On the other hand, input in different languages could introduce potential errors and biases. Second, this research only explores the feasibility of using LLMs to generate content related to SCR for patient education. The variability in surgical procedures and specialties could pose distinct challenges in patient education, which means the conclusions drawn from this study cannot be simply generalized to other disciplines. Finally, during the &#x201C;Prompts Development&#x201D; phase, it was found that without additional background information, SCRs are prone to be misidentified by LLMs as bridge suture repairs of the supraspinatus muscle. However, since all 3 models used were proprietary, we opted for a &#x201C;Background+ Question&#x201D; approach to mitigate this systematic error, without being able to investigate the reasons behind such occurrences.</p></sec><sec id="s4-3"><title>Conclusions</title><p>Claude-3-Opus, GPT-4-Turbo, and Gemini-1.5-Pro effectively addressed patient queries and generated readable presurgical education materials. However, they lacked citations and failed to explore alternative treatments, benefits, and potential risks of forgoing SCR surgery. While these LLMs can serve as valuable aids for physicians, they should not be used as standalone tools for patient education without expert oversight to ensure comprehensive and accurate information is provided.</p></sec></sec></body><back><ack><p>We would like to express our deepest gratitude to all the experts and patients who have contributed to this research.</p></ack><notes><sec><title>Data Availability</title><p>All data included in this study are available upon request by contact with the corresponding author.</p></sec></notes><fn-group><fn fn-type="other"><label>Author Note</label><p>The subjects of this study are LLMs (large language models). Besides being used as operational models, LLMs also serve as tools for translating Chinese content into English, as detailed in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. The specific types of models used, the websites they are accessed through, and their methods of use are all mentioned in the relevant sections. Beyond these functions, LLMs do not influence the generation of the article&#x2019;s content in any other way.</p></fn><fn fn-type="con"><p>Conceptualization: WY Gan, H Li, JF Ouyang</p><p>Methodology: WY Gan, H Li, JF Ouyang</p><p>Supervision: XF Zheng</p><p>Visualization: YK Liu</p><p>Writing&#x2014;original draft: WY Gan, H Li, JF Ouyang, YK Liu</p><p>Writing&#x2014;reviewing and editing: WY Gan, H Li, JF Ouyang, YK Liu, ZW Xue, M Wang, HB He, B Song, XF Zheng</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">LLM</term><def><p>large language model</p></def></def-item><def-item><term id="abb2">PEMAT</term><def><p>Patient Education Materials Assessment Tool</p></def></def-item><def-item><term id="abb3">SCR</term><def><p>superior capsular reconstruction</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Flaharty</surname><given-names>KA</given-names> </name><name name-style="western"><surname>Hu</surname><given-names>P</given-names> </name><name name-style="western"><surname>Hanchard</surname><given-names>SL</given-names> </name><etal/></person-group><article-title>Evaluating large language models on medical, lay-language, and self-reported descriptions of genetic conditions</article-title><source>Am J Hum Genet</source><year>2024</year><month>09</month><day>5</day><volume>111</volume><issue>9</issue><fpage>1819</fpage><lpage>1833</lpage><pub-id pub-id-type="doi">10.1016/j.ajhg.2024.07.011</pub-id><pub-id pub-id-type="medline">39146935</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rengers</surname><given-names>TA</given-names> </name><name name-style="western"><surname>Thiels</surname><given-names>CA</given-names> </name><name name-style="western"><surname>Salehinejad</surname><given-names>H</given-names> </name></person-group><article-title>Academic Surgery in the Era of Large Language Models: A Review</article-title><source>JAMA Surg</source><year>2024</year><month>04</month><day>1</day><volume>159</volume><issue>4</issue><fpage>445</fpage><lpage>450</lpage><pub-id pub-id-type="doi">10.1001/jamasurg.2023.6496</pub-id><pub-id pub-id-type="medline">38353991</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chow</surname><given-names>R</given-names> </name><name name-style="western"><surname>Hasan</surname><given-names>S</given-names> </name><name name-style="western"><surname>Zheng</surname><given-names>A</given-names> </name><etal/></person-group><article-title>The Accuracy of Artificial Intelligence ChatGPT in Oncology Examination Questions</article-title><source>J Am Coll Radiol</source><year>2024</year><month>11</month><volume>21</volume><issue>11</issue><fpage>1800</fpage><lpage>1804</lpage><pub-id pub-id-type="doi">10.1016/j.jacr.2024.07.011</pub-id><pub-id pub-id-type="medline">39098369</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Eng</surname><given-names>E</given-names> </name><name name-style="western"><surname>Mowers</surname><given-names>C</given-names> </name><name name-style="western"><surname>Sachdev</surname><given-names>D</given-names> </name><etal/></person-group><article-title>Chat Generative Pre-Trained Transformer (ChatGPT) &#x2013; 3.5 Responses Require Advanced Readability for the General Population and May Not Effectively Supplement Patient-Related Information Provided by the Treating Surgeon Regarding Common Questions About Rotator Cuff Repair</article-title><source>Arthroscopy: The Journal of Arthroscopic &#x0026; Related Surgery</source><year>2025</year><month>01</month><volume>41</volume><issue>1</issue><fpage>42</fpage><lpage>52</lpage><pub-id pub-id-type="doi">10.1016/j.arthro.2024.05.009</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mika</surname><given-names>AP</given-names> </name><name name-style="western"><surname>Martin</surname><given-names>JR</given-names> </name><name name-style="western"><surname>Engstrom</surname><given-names>SM</given-names> </name><name name-style="western"><surname>Polkowski</surname><given-names>GG</given-names> </name><name name-style="western"><surname>Wilson</surname><given-names>JM</given-names> </name></person-group><article-title>Assessing ChatGPT Responses to Common Patient Questions Regarding Total Hip Arthroplasty</article-title><source>Journal of Bone and Joint Surgery</source><year>2023</year><volume>105</volume><issue>19</issue><fpage>1519</fpage><lpage>1526</lpage><pub-id pub-id-type="doi">10.2106/JBJS.23.00209</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Pan</surname><given-names>A</given-names> </name><name name-style="western"><surname>Musheyev</surname><given-names>D</given-names> </name><name name-style="western"><surname>Bockelman</surname><given-names>D</given-names> </name><name name-style="western"><surname>Loeb</surname><given-names>S</given-names> </name><name name-style="western"><surname>Kabarriti</surname><given-names>AE</given-names> </name></person-group><article-title>Assessment of Artificial Intelligence Chatbot Responses to Top Searched Queries About Cancer</article-title><source>JAMA Oncol</source><year>2023</year><month>10</month><day>1</day><volume>9</volume><issue>10</issue><fpage>1437</fpage><lpage>1440</lpage><pub-id pub-id-type="doi">10.1001/jamaoncol.2023.2947</pub-id><pub-id pub-id-type="medline">37615960</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Xue</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Gan</surname><given-names>W</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>H</given-names> </name><name name-style="western"><surname>She</surname><given-names>G</given-names> </name><name name-style="western"><surname>Zheng</surname><given-names>X</given-names> </name></person-group><article-title>Quality and Dependability of ChatGPT and DingXiangYuan Forums for Remote Orthopedic Consultations: Comparative Analysis</article-title><source>J Med Internet Res</source><year>2024</year><month>03</month><day>14</day><volume>26</volume><fpage>e50882</fpage><pub-id pub-id-type="doi">10.2196/50882</pub-id><pub-id pub-id-type="medline">38483451</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gertz</surname><given-names>RJ</given-names> </name><name name-style="western"><surname>Dratsch</surname><given-names>T</given-names> </name><name name-style="western"><surname>Bunck</surname><given-names>AC</given-names> </name><etal/></person-group><article-title>Potential of GPT-4 for detecting errors in radiology reports: Implications for reporting accuracy</article-title><source>Radiology</source><year>2024</year><month>04</month><volume>311</volume><issue>1</issue><fpage>e232714</fpage><pub-id pub-id-type="doi">10.1148/radiol.232714</pub-id><pub-id pub-id-type="medline">38625012</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Maida</surname><given-names>M</given-names> </name><name name-style="western"><surname>Ramai</surname><given-names>D</given-names> </name><name name-style="western"><surname>Mori</surname><given-names>Y</given-names> </name><etal/></person-group><article-title>The role of generative language systems in increasing patient awareness of colon cancer screening</article-title><source>Endoscopy</source><year>2025</year><month>03</month><volume>57</volume><issue>3</issue><fpage>262</fpage><lpage>268</lpage><pub-id pub-id-type="doi">10.1055/a-2388-6084</pub-id><pub-id pub-id-type="medline">39142348</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ebel</surname><given-names>S</given-names> </name><name name-style="western"><surname>Ehrengut</surname><given-names>C</given-names> </name><name name-style="western"><surname>Denecke</surname><given-names>T</given-names> </name><name name-style="western"><surname>G&#x00F6;&#x00DF;mann</surname><given-names>H</given-names> </name><name name-style="western"><surname>Beeskow</surname><given-names>AB</given-names> </name></person-group><article-title>GPT-4o&#x2019;s competency in answering the simulated written European Board of Interventional Radiology exam compared to a medical student and experts in Germany and its ability to generate exam items on interventional radiology: a descriptive study</article-title><source>J Educ Eval Health Prof</source><year>2024</year><volume>21</volume><fpage>21</fpage><pub-id pub-id-type="doi">10.3352/jeehp.2024.21.21</pub-id><pub-id pub-id-type="medline">39161266</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gan</surname><given-names>W</given-names> </name><name name-style="western"><surname>Ouyang</surname><given-names>J</given-names> </name><name name-style="western"><surname>Li</surname><given-names>H</given-names> </name><etal/></person-group><article-title>Integrating ChatGPT in orthopedic education for medical undergraduates: Randomized controlled trial</article-title><source>J Med Internet Res</source><year>2024</year><month>08</month><day>20</day><volume>26</volume><fpage>e57037</fpage><pub-id pub-id-type="doi">10.2196/57037</pub-id><pub-id pub-id-type="medline">39163598</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mihata</surname><given-names>T</given-names> </name><name name-style="western"><surname>McGarry</surname><given-names>MH</given-names> </name><name name-style="western"><surname>Pirolo</surname><given-names>JM</given-names> </name><name name-style="western"><surname>Kinoshita</surname><given-names>M</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>TQ</given-names> </name></person-group><article-title>Superior capsule reconstruction to restore superior stability in irreparable rotator cuff tears: a biomechanical cadaveric study</article-title><source>Am J Sports Med</source><year>2012</year><month>10</month><volume>40</volume><issue>10</issue><fpage>2248</fpage><lpage>2255</lpage><pub-id pub-id-type="doi">10.1177/0363546512456195</pub-id><pub-id pub-id-type="medline">22886689</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>E. Cline</surname><given-names>K</given-names> </name><name name-style="western"><surname>Tibone</surname><given-names>JE</given-names> </name><name name-style="western"><surname>Ihn</surname><given-names>H</given-names> </name><etal/></person-group><article-title>Superior Capsule Reconstruction Using Fascia Lata Allograft Compared With Double- and Single-Layer Dermal Allograft: A Biomechanical Study</article-title><source>Arthroscopy: The Journal of Arthroscopic &#x0026; Related Surgery</source><year>2021</year><month>04</month><volume>37</volume><issue>4</issue><fpage>1117</fpage><lpage>1125</lpage><pub-id pub-id-type="doi">10.1016/j.arthro.2020.11.054</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mihata</surname><given-names>T</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>TQ</given-names> </name><name name-style="western"><surname>Hasegawa</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Arthroscopic superior capsule reconstruction for irreparable rotator cuff tears: Comparison of clinical outcomes with and without subscapularis tear</article-title><source>Am J Sports Med</source><year>2020</year><month>12</month><volume>48</volume><issue>14</issue><fpage>3429</fpage><lpage>3438</lpage><pub-id pub-id-type="doi">10.1177/0363546520965993</pub-id><pub-id pub-id-type="medline">33104385</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Claro</surname><given-names>R</given-names> </name><name name-style="western"><surname>Fonte</surname><given-names>H</given-names> </name></person-group><article-title>Superior capsular reconstruction: current evidence and limits</article-title><source>EFORT Open Rev</source><year>2023</year><month>05</month><day>9</day><volume>8</volume><issue>5</issue><fpage>340</fpage><lpage>350</lpage><pub-id pub-id-type="doi">10.1530/EOR-23-0027</pub-id><pub-id pub-id-type="medline">37158430</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mihata</surname><given-names>T</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>TQ</given-names> </name><name name-style="western"><surname>Watanabe</surname><given-names>C</given-names> </name><etal/></person-group><article-title>Clinical results of arthroscopic superior capsule reconstruction for irreparable rotator cuff tears</article-title><source>Arthroscopy: The Journal of Arthroscopic &#x0026; Related Surgery</source><year>2013</year><month>03</month><volume>29</volume><issue>3</issue><fpage>459</fpage><lpage>470</lpage><pub-id pub-id-type="doi">10.1016/j.arthro.2012.10.022</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hirahara</surname><given-names>AM</given-names> </name><name name-style="western"><surname>Andersen</surname><given-names>WJ</given-names> </name><name name-style="western"><surname>Panero</surname><given-names>AJ</given-names> </name></person-group><article-title>Superior capsular reconstruction: Clinical outcomes after minimum 2-year follow-up</article-title><source>Am J Orthop (Belle Mead NJ)</source><year>2017</year><volume>46</volume><issue>6</issue><fpage>266</fpage><lpage>278</lpage><pub-id pub-id-type="medline">29309442</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Snyder</surname><given-names>SJ</given-names> </name><name name-style="western"><surname>Arnoczky</surname><given-names>SP</given-names> </name><name name-style="western"><surname>Bond</surname><given-names>JL</given-names> </name><name name-style="western"><surname>Dopirak</surname><given-names>R</given-names> </name></person-group><article-title>Histologic evaluation of a biopsy specimen obtained 3 months after rotator cuff augmentation with GraftJacket Matrix</article-title><source>Arthroscopy: The Journal of Arthroscopic &#x0026; Related Surgery</source><year>2009</year><month>03</month><volume>25</volume><issue>3</issue><fpage>329</fpage><lpage>333</lpage><pub-id pub-id-type="doi">10.1016/j.arthro.2008.05.023</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Edwards</surname><given-names>PK</given-names> </name><name name-style="western"><surname>Mears</surname><given-names>SC</given-names> </name><name name-style="western"><surname>Lowry Barnes</surname><given-names>C</given-names> </name></person-group><article-title>Preoperative education for hip and knee replacement: Never stop learning</article-title><source>Curr Rev Musculoskelet Med</source><year>2017</year><month>09</month><volume>10</volume><issue>3</issue><fpage>356</fpage><lpage>364</lpage><pub-id pub-id-type="doi">10.1007/s12178-017-9417-4</pub-id><pub-id pub-id-type="medline">28647838</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Alattas</surname><given-names>SA</given-names> </name><name name-style="western"><surname>Smith</surname><given-names>T</given-names> </name><name name-style="western"><surname>Bhatti</surname><given-names>M</given-names> </name><name name-style="western"><surname>Wilson-Nunn</surname><given-names>D</given-names> </name><name name-style="western"><surname>Donell</surname><given-names>S</given-names> </name></person-group><article-title>Greater pre-operative anxiety, pain and poorer function predict a worse outcome of a total knee arthroplasty</article-title><source>Knee Surg Sports Traumatol Arthrosc</source><year>2017</year><month>11</month><volume>25</volume><issue>11</issue><fpage>3403</fpage><lpage>3410</lpage><pub-id pub-id-type="doi">10.1007/s00167-016-4314-8</pub-id><pub-id pub-id-type="medline">27734110</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Krebs</surname><given-names>ED</given-names> </name><name name-style="western"><surname>Hoang</surname><given-names>SC</given-names> </name></person-group><article-title>Informed consent and shared decision making in the perioperative environment</article-title><source>Clin Colon Rectal Surg</source><year>2023</year><month>05</month><volume>36</volume><issue>03</issue><fpage>223</fpage><lpage>228</lpage><pub-id pub-id-type="doi">10.1055/s-0043-1761158</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Noble</surname><given-names>PC</given-names> </name><name name-style="western"><surname>Fuller-Lafreniere</surname><given-names>S</given-names> </name><name name-style="western"><surname>Meftah</surname><given-names>M</given-names> </name><name name-style="western"><surname>Dwyer</surname><given-names>MK</given-names> </name></person-group><article-title>Challenges in outcome measurement: Discrepancies between patient and provider definitions of success</article-title><source>Clin Orthop Relat Res</source><year>2013</year><volume>471</volume><issue>11</issue><fpage>3437</fpage><lpage>3445</lpage><pub-id pub-id-type="doi">10.1007/s11999-013-3198-x</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Villanueva</surname><given-names>C</given-names> </name><name name-style="western"><surname>Talwar</surname><given-names>A</given-names> </name><name name-style="western"><surname>Doyle</surname><given-names>M</given-names> </name></person-group><article-title>Improving informed consent in cardiac surgery by enhancing preoperative education</article-title><source>Patient Educ Couns</source><year>2018</year><month>12</month><volume>101</volume><issue>12</issue><fpage>2047</fpage><lpage>2053</lpage><pub-id pub-id-type="doi">10.1016/j.pec.2018.06.008</pub-id><pub-id pub-id-type="medline">29937111</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bollschweiler</surname><given-names>E</given-names> </name><name name-style="western"><surname>Apitzsch</surname><given-names>J</given-names> </name><name name-style="western"><surname>Obliers</surname><given-names>R</given-names> </name><etal/></person-group><article-title>Improving informed consent of surgical patients using a multimedia-based program? Results of a prospective randomized multicenter study of patients before cholecystectomy</article-title><source>Ann Surg</source><year>2008</year><month>08</month><volume>248</volume><issue>2</issue><fpage>205</fpage><lpage>211</lpage><pub-id pub-id-type="doi">10.1097/SLA.0b013e318180a3a7</pub-id><pub-id pub-id-type="medline">18650629</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sceats</surname><given-names>LA</given-names> </name><name name-style="western"><surname>Morris</surname><given-names>AM</given-names> </name><name name-style="western"><surname>Narayan</surname><given-names>RR</given-names> </name><name name-style="western"><surname>Mezynski</surname><given-names>A</given-names> </name><name name-style="western"><surname>Woo</surname><given-names>RK</given-names> </name><name name-style="western"><surname>Yang</surname><given-names>GP</given-names> </name></person-group><article-title>Lost in translation: Informed consent in the medical mission setting</article-title><source>Surgery</source><year>2019</year><month>02</month><volume>165</volume><issue>2</issue><fpage>438</fpage><lpage>443</lpage><pub-id pub-id-type="doi">10.1016/j.surg.2018.06.010</pub-id><pub-id pub-id-type="medline">30061041</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Neubauer</surname><given-names>PD</given-names> </name><name name-style="western"><surname>Tabaee</surname><given-names>A</given-names> </name><name name-style="western"><surname>Schwam</surname><given-names>ZG</given-names> </name><name name-style="western"><surname>Francis</surname><given-names>FK</given-names> </name><name name-style="western"><surname>Manes</surname><given-names>RP</given-names> </name></person-group><article-title>Patient knowledge and expectations in endoscopic sinus surgery</article-title><source>Int Forum Allergy Rhinol</source><year>2016</year><month>09</month><volume>6</volume><issue>9</issue><fpage>921</fpage><lpage>925</lpage><pub-id pub-id-type="doi">10.1002/alr.21763</pub-id><pub-id pub-id-type="medline">27028979</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hristidis</surname><given-names>V</given-names> </name><name name-style="western"><surname>Ruggiano</surname><given-names>N</given-names> </name><name name-style="western"><surname>Brown</surname><given-names>EL</given-names> </name><name name-style="western"><surname>Ganta</surname><given-names>SRR</given-names> </name><name name-style="western"><surname>Stewart</surname><given-names>S</given-names> </name></person-group><article-title>ChatGPT vs Google for queries related to dementia and other cognitive decline: Comparison of results</article-title><source>J Med Internet Res</source><year>2023</year><month>07</month><day>25</day><volume>25</volume><fpage>e48966</fpage><pub-id pub-id-type="doi">10.2196/48966</pub-id><pub-id pub-id-type="medline">37490317</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Oeding</surname><given-names>JF</given-names> </name><name name-style="western"><surname>Lu</surname><given-names>AZ</given-names> </name><name name-style="western"><surname>Mazzucco</surname><given-names>M</given-names> </name><etal/></person-group><article-title>ChatGPT-4 Performs clinical information retrieval tasks using consistently more trustworthy resources than does google search for queries concerning the Latarjet procedure</article-title><source>Arthroscopy: The Journal of Arthroscopic &#x0026; Related Surgery</source><year>2025</year><month>03</month><volume>41</volume><issue>3</issue><fpage>588</fpage><lpage>597</lpage><pub-id pub-id-type="doi">10.1016/j.arthro.2024.05.025</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nicikowski</surname><given-names>J</given-names> </name><name name-style="western"><surname>Szczepa&#x0144;ski</surname><given-names>M</given-names> </name><name name-style="western"><surname>Miedziaszczyk</surname><given-names>M</given-names> </name><name name-style="western"><surname>Kudli&#x0144;ski</surname><given-names>B</given-names> </name></person-group><article-title>The potential of ChatGPT in medicine: an example analysis of nephrology specialty exams in Poland</article-title><source>Clin Kidney J</source><year>2024</year><month>08</month><volume>17</volume><issue>8</issue><fpage>sfae193</fpage><pub-id pub-id-type="doi">10.1093/ckj/sfae193</pub-id><pub-id pub-id-type="medline">39099569</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bernstein</surname><given-names>IA</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>YV</given-names> </name><name name-style="western"><surname>Govil</surname><given-names>D</given-names> </name><etal/></person-group><article-title>Comparison of ophthalmologist and large language model chatbot responses to online patient eye care questions</article-title><source>JAMA Netw Open</source><year>2023</year><month>08</month><day>1</day><volume>6</volume><issue>8</issue><fpage>e2330320</fpage><pub-id pub-id-type="doi">10.1001/jamanetworkopen.2023.30320</pub-id><pub-id pub-id-type="medline">37606922</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>LT</given-names> </name><name name-style="western"><surname>Sinkler</surname><given-names>MA</given-names> </name><name name-style="western"><surname>Adelstein</surname><given-names>JM</given-names> </name><name name-style="western"><surname>Voos</surname><given-names>JE</given-names> </name><name name-style="western"><surname>Calcei</surname><given-names>JG</given-names> </name></person-group><article-title>Chatgpt responses to common questions about anterior cruciate ligament reconstruction are frequently satisfactory</article-title><source>Arthroscopy: The Journal of Arthroscopic &#x0026; Related Surgery</source><year>2024</year><month>07</month><volume>40</volume><issue>7</issue><fpage>2058</fpage><lpage>2066</lpage><pub-id pub-id-type="doi">10.1016/j.arthro.2023.12.009</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nwachukwu</surname><given-names>BU</given-names> </name><name name-style="western"><surname>Varady</surname><given-names>NH</given-names> </name><name name-style="western"><surname>Allen</surname><given-names>AA</given-names> </name><etal/></person-group><article-title>Currently available large language models do not provide musculoskeletal treatment recommendations that are concordant with evidence-based clinical practice guidelines</article-title><source>Arthroscopy: The Journal of Arthroscopic &#x0026; Related Surgery</source><year>2025</year><month>02</month><volume>41</volume><issue>2</issue><fpage>263</fpage><lpage>275</lpage><pub-id pub-id-type="doi">10.1016/j.arthro.2024.07.040</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Chen</surname><given-names>L</given-names> </name><name name-style="western"><surname>Zaharia</surname><given-names>M</given-names> </name><name name-style="western"><surname>Zou</surname><given-names>J</given-names> </name></person-group><article-title>How is chatgpt&#x2019;s behavior changing over time?</article-title><access-date>2025-06-06</access-date><comment>Preprint posted online on  Jul 1, 2023</comment><comment><ext-link ext-link-type="uri" xlink:href="https://ui.adsabs.harvard.edu/abs/2023arXiv230709009C">https://ui.adsabs.harvard.edu/abs/2023arXiv230709009C</ext-link></comment></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Menz</surname><given-names>BD</given-names> </name><name name-style="western"><surname>Kuderer</surname><given-names>NM</given-names> </name><name name-style="western"><surname>Bacchi</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Current safeguards, risk mitigation, and transparency measures of large language models against the generation of health disinformation: repeated cross sectional analysis</article-title><source>BMJ</source><year>2024</year><month>03</month><day>20</day><volume>384</volume><fpage>e078538</fpage><pub-id pub-id-type="doi">10.1136/bmj-2023-078538</pub-id><pub-id pub-id-type="medline">38508682</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yalamanchili</surname><given-names>A</given-names> </name><name name-style="western"><surname>Sengupta</surname><given-names>B</given-names> </name><name name-style="western"><surname>Song</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Quality of large language model responses to radiation oncology patient care questions</article-title><source>JAMA Netw Open</source><year>2024</year><month>04</month><day>1</day><volume>7</volume><issue>4</issue><fpage>e244630</fpage><pub-id pub-id-type="doi">10.1001/jamanetworkopen.2024.4630</pub-id><pub-id pub-id-type="medline">38564215</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>HQ</given-names> </name><name name-style="western"><surname>Yang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Xue</surname><given-names>FW</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>ZY</given-names> </name></person-group><article-title>Annual report readability and trade credit financing: Evidence from China</article-title><source>Research in International Business and Finance</source><year>2024</year><month>04</month><volume>69</volume><fpage>102220</fpage><pub-id pub-id-type="doi">10.1016/j.ribaf.2024.102220</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Draschl</surname><given-names>A</given-names> </name><name name-style="western"><surname>Hauer</surname><given-names>G</given-names> </name><name name-style="western"><surname>Fischerauer</surname><given-names>SF</given-names> </name><etal/></person-group><article-title>Are chatgpt&#x2019;s free-text responses on periprosthetic joint infections of the hip and knee reliable and useful?</article-title><source>J Clin Med</source><year>2023</year><month>10</month><day>20</day><volume>12</volume><issue>20</issue><fpage>6655</fpage><pub-id pub-id-type="doi">10.3390/jcm12206655</pub-id><pub-id pub-id-type="medline">37892793</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kaarre</surname><given-names>J</given-names> </name><name name-style="western"><surname>Feldt</surname><given-names>R</given-names> </name><name name-style="western"><surname>Keeling</surname><given-names>LE</given-names> </name><etal/></person-group><article-title>Exploring the potential of ChatGPT as a supplementary tool for providing orthopaedic information</article-title><source>Knee surg sports traumatol arthrosc</source><year>2023</year><month>11</month><volume>31</volume><issue>11</issue><fpage>5190</fpage><lpage>5198</lpage><pub-id pub-id-type="doi">10.1007/s00167-023-07529-2</pub-id></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sumbal</surname><given-names>A</given-names> </name><name name-style="western"><surname>Sumbal</surname><given-names>R</given-names> </name><name name-style="western"><surname>Amir</surname><given-names>A</given-names> </name></person-group><article-title>Can ChatGPT-3.5 pass a medical exam? A systematic review of ChatGPT&#x2019;s performance in academic testing</article-title><source>J Med Educ Curric Dev</source><year>2024</year><volume>11</volume><issue>23821205241238641</issue><fpage>23821205241238641</fpage><pub-id pub-id-type="doi">10.1177/23821205241238641</pub-id><pub-id pub-id-type="medline">38487300</pub-id></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Deng</surname><given-names>L</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>T</given-names> </name><etal/></person-group><article-title>Evaluation of large language models in breast cancer clinical scenarios: a comparative analysis based on ChatGPT-3.5, ChatGPT-4.0, and Claude2</article-title><source>Int J Surg</source><year>2024</year><month>01</month><volume>110</volume><issue>4</issue><fpage>1941</fpage><lpage>1950</lpage><pub-id pub-id-type="doi">10.1097/JS9.0000000000001066</pub-id></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jarry Trujillo</surname><given-names>C</given-names> </name><name name-style="western"><surname>Vela Ulloa</surname><given-names>J</given-names> </name><name name-style="western"><surname>Escalona Vivas</surname><given-names>G</given-names> </name><etal/></person-group><article-title>Surgeons vs ChatGPT: Assessment and feedback performance based on real surgical scenarios</article-title><source>J Surg Educ</source><year>2024</year><month>07</month><volume>81</volume><issue>7</issue><fpage>960</fpage><lpage>966</lpage><pub-id pub-id-type="doi">10.1016/j.jsurg.2024.03.012</pub-id><pub-id pub-id-type="medline">38749814</pub-id></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhu</surname><given-names>L</given-names> </name><name name-style="western"><surname>Mou</surname><given-names>W</given-names> </name><name name-style="western"><surname>Lai</surname><given-names>Y</given-names> </name><etal/></person-group><article-title>Step into the era of large multimodal models: a pilot study on ChatGPT-4V(ision)&#x2019;s ability to interpret radiological images</article-title><source>Int J Surg</source><year>2024</year><month>07</month><day>1</day><volume>110</volume><issue>7</issue><fpage>4096</fpage><lpage>4102</lpage><pub-id pub-id-type="doi">10.1097/JS9.0000000000001359</pub-id><pub-id pub-id-type="medline">38498394</pub-id></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lim</surname><given-names>ZW</given-names> </name><name name-style="western"><surname>Pushpanathan</surname><given-names>K</given-names> </name><name name-style="western"><surname>Yew</surname><given-names>SME</given-names> </name><etal/></person-group><article-title>Benchmarking large language models&#x2019; performances for myopia care: a comparative analysis of ChatGPT-3.5, ChatGPT-4.0, and Google Bard</article-title><source>EBioMedicine</source><year>2023</year><month>09</month><volume>95</volume><issue>104770</issue><fpage>104770</fpage><pub-id pub-id-type="doi">10.1016/j.ebiom.2023.104770</pub-id><pub-id pub-id-type="medline">37625267</pub-id></nlm-citation></ref><ref id="ref44"><label>44</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chervenak</surname><given-names>J</given-names> </name><name name-style="western"><surname>Lieman</surname><given-names>H</given-names> </name><name name-style="western"><surname>Blanco-Breindel</surname><given-names>M</given-names> </name><name name-style="western"><surname>Jindal</surname><given-names>S</given-names> </name></person-group><article-title>The promise and peril of using a large language model to obtain clinical information: ChatGPT performs strongly as a fertility counseling tool with limitations</article-title><source>Fertil Steril</source><year>2023</year><month>09</month><volume>120</volume><issue>3 Pt 2</issue><fpage>575</fpage><lpage>583</lpage><pub-id pub-id-type="doi">10.1016/j.fertnstert.2023.05.151</pub-id><pub-id pub-id-type="medline">37217092</pub-id></nlm-citation></ref><ref id="ref45"><label>45</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Thirunavukarasu</surname><given-names>AJ</given-names> </name><name name-style="western"><surname>Ting</surname><given-names>DSJ</given-names> </name><name name-style="western"><surname>Elangovan</surname><given-names>K</given-names> </name><name name-style="western"><surname>Gutierrez</surname><given-names>L</given-names> </name><name name-style="western"><surname>Tan</surname><given-names>TF</given-names> </name><name name-style="western"><surname>Ting</surname><given-names>DSW</given-names> </name></person-group><article-title>Large language models in medicine</article-title><source>Nat Med</source><year>2023</year><month>08</month><volume>29</volume><issue>8</issue><fpage>1930</fpage><lpage>1940</lpage><pub-id pub-id-type="doi">10.1038/s41591-023-02448-8</pub-id><pub-id pub-id-type="medline">37460753</pub-id></nlm-citation></ref><ref id="ref46"><label>46</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tan</surname><given-names>S</given-names> </name><name name-style="western"><surname>Xin</surname><given-names>X</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>D</given-names> </name></person-group><article-title>ChatGPT in medicine: prospects and challenges: a review article</article-title><source>Int J Surg</source><year>2024</year><month>06</month><day>1</day><volume>110</volume><issue>6</issue><fpage>3701</fpage><lpage>3706</lpage><pub-id pub-id-type="doi">10.1097/JS9.0000000000001312</pub-id><pub-id pub-id-type="medline">38502861</pub-id></nlm-citation></ref><ref id="ref47"><label>47</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Haver</surname><given-names>HL</given-names> </name><name name-style="western"><surname>Gupta</surname><given-names>AK</given-names> </name><name name-style="western"><surname>Ambinder</surname><given-names>EB</given-names> </name><etal/></person-group><article-title>Evaluating the use of ChatGPT to accurately simplify patient-centered information about breast cancer prevention and screening</article-title><source>Radiol Imaging Cancer</source><year>2024</year><month>03</month><volume>6</volume><issue>2</issue><fpage>e230086</fpage><pub-id pub-id-type="doi">10.1148/rycan.230086</pub-id><pub-id pub-id-type="medline">38305716</pub-id></nlm-citation></ref><ref id="ref48"><label>48</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chelli</surname><given-names>M</given-names> </name><name name-style="western"><surname>Descamps</surname><given-names>J</given-names> </name><name name-style="western"><surname>Lavou&#x00E9;</surname><given-names>V</given-names> </name><etal/></person-group><article-title>Hallucination rates and reference accuracy of ChatGPT and Bard for systematic reviews: Comparative analysis</article-title><source>J Med Internet Res</source><year>2024</year><month>05</month><day>22</day><volume>26</volume><fpage>e53164</fpage><pub-id pub-id-type="doi">10.2196/53164</pub-id><pub-id pub-id-type="medline">38776130</pub-id></nlm-citation></ref><ref id="ref49"><label>49</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Burnette</surname><given-names>H</given-names> </name><name name-style="western"><surname>Pabani</surname><given-names>A</given-names> </name><name name-style="western"><surname>von Itzstein</surname><given-names>MS</given-names> </name><etal/></person-group><article-title>Use of artificial intelligence chatbots in clinical management of immune-related adverse events</article-title><source>J Immunother Cancer</source><year>2024</year><month>05</month><day>30</day><volume>12</volume><issue>5</issue><fpage>38816231</fpage><pub-id pub-id-type="doi">10.1136/jitc-2023-008599</pub-id><pub-id pub-id-type="medline">38816231</pub-id></nlm-citation></ref><ref id="ref50"><label>50</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Terrasse</surname><given-names>M</given-names> </name><name name-style="western"><surname>Gorin</surname><given-names>M</given-names> </name><name name-style="western"><surname>Sisti</surname><given-names>D</given-names> </name></person-group><article-title>Social media, e-health, and medical ethics</article-title><source>Hastings Cent Rep</source><year>2019</year><month>01</month><volume>49</volume><issue>1</issue><fpage>24</fpage><lpage>33</lpage><pub-id pub-id-type="doi">10.1002/hast.975</pub-id><pub-id pub-id-type="medline">30790306</pub-id></nlm-citation></ref><ref id="ref51"><label>51</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ho</surname><given-names>A</given-names> </name><name name-style="western"><surname>McGrath</surname><given-names>C</given-names> </name><name name-style="western"><surname>Mattheos</surname><given-names>N</given-names> </name></person-group><article-title>Social media patient testimonials in implant dentistry: information or misinformation?</article-title><source>Clin Oral Implants Res</source><year>2017</year><month>07</month><volume>28</volume><issue>7</issue><fpage>791</fpage><lpage>800</lpage><pub-id pub-id-type="doi">10.1111/clr.12883</pub-id><pub-id pub-id-type="medline">27279455</pub-id></nlm-citation></ref><ref id="ref52"><label>52</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ayers</surname><given-names>JW</given-names> </name><name name-style="western"><surname>Poliak</surname><given-names>A</given-names> </name><name name-style="western"><surname>Dredze</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Comparing physician and artificial intelligence chatbot responses to patient questions posted to a public social media forum</article-title><source>JAMA Intern Med</source><year>2023</year><month>06</month><day>1</day><volume>183</volume><issue>6</issue><fpage>589</fpage><lpage>596</lpage><pub-id pub-id-type="doi">10.1001/jamainternmed.2023.1838</pub-id><pub-id pub-id-type="medline">37115527</pub-id></nlm-citation></ref><ref id="ref53"><label>53</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Aguirre</surname><given-names>A</given-names> </name><name name-style="western"><surname>Hilsabeck</surname><given-names>R</given-names> </name><name name-style="western"><surname>Smith</surname><given-names>T</given-names> </name><etal/></person-group><article-title>Assessing the quality of chatgpt responses to dementia caregivers&#x2019; questions: Qualitative analysis</article-title><source>JMIR Aging</source><year>2024</year><month>05</month><day>6</day><volume>7</volume><fpage>e53019</fpage><pub-id pub-id-type="doi">10.2196/53019</pub-id><pub-id pub-id-type="medline">38722219</pub-id></nlm-citation></ref><ref id="ref54"><label>54</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Girton</surname><given-names>MR</given-names> </name><name name-style="western"><surname>Greene</surname><given-names>DN</given-names> </name><name name-style="western"><surname>Messerlian</surname><given-names>G</given-names> </name><name name-style="western"><surname>Keren</surname><given-names>DF</given-names> </name><name name-style="western"><surname>Yu</surname><given-names>M</given-names> </name></person-group><article-title>ChatGPT vs medical professional: Analyzing responses to laboratory medicine questions on social media</article-title><source>Clin Chem</source><year>2024</year><month>09</month><day>3</day><volume>70</volume><issue>9</issue><fpage>1122</fpage><lpage>1139</lpage><pub-id pub-id-type="doi">10.1093/clinchem/hvae093</pub-id><pub-id pub-id-type="medline">39013110</pub-id></nlm-citation></ref><ref id="ref55"><label>55</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>La Bella</surname><given-names>S</given-names> </name><name name-style="western"><surname>Attanasi</surname><given-names>M</given-names> </name><name name-style="western"><surname>Porreca</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Reliability of a generative artificial intelligence tool for pediatric familial Mediterranean fever: insights from a multicentre expert survey</article-title><source>Pediatr Rheumatol Online J</source><year>2024</year><month>08</month><day>23</day><volume>22</volume><issue>1</issue><fpage>78</fpage><pub-id pub-id-type="doi">10.1186/s12969-024-01011-0</pub-id><pub-id pub-id-type="medline">39180115</pub-id></nlm-citation></ref><ref id="ref56"><label>56</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cavnar Helvaci</surname><given-names>B</given-names> </name><name name-style="western"><surname>Hepsen</surname><given-names>S</given-names> </name><name name-style="western"><surname>Candemir</surname><given-names>B</given-names> </name><etal/></person-group><article-title>Assessing the accuracy and reliability of ChatGPT&#x2019;s medical responses about thyroid cancer</article-title><source>Int J Med Inform</source><year>2024</year><month>11</month><volume>191</volume><issue>105593</issue><fpage>105593</fpage><pub-id pub-id-type="doi">10.1016/j.ijmedinf.2024.105593</pub-id><pub-id pub-id-type="medline">39151245</pub-id></nlm-citation></ref><ref id="ref57"><label>57</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Pallett</surname><given-names>AC</given-names> </name><name name-style="western"><surname>Nguyen</surname><given-names>BT</given-names> </name><name name-style="western"><surname>Klein</surname><given-names>NM</given-names> </name><name name-style="western"><surname>Phippen</surname><given-names>N</given-names> </name><name name-style="western"><surname>Miller</surname><given-names>CR</given-names> </name><name name-style="western"><surname>Barnett</surname><given-names>JC</given-names> </name></person-group><article-title>A randomized controlled trial to determine whether A video presentation improves informed consent for hysterectomy</article-title><source>Am J Obstet Gynecol</source><year>2018</year><month>09</month><volume>219</volume><issue>3</issue><fpage>277</fpage><pub-id pub-id-type="doi">10.1016/j.ajog.2018.06.016</pub-id><pub-id pub-id-type="medline">29959929</pub-id></nlm-citation></ref><ref id="ref58"><label>58</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhang</surname><given-names>MH</given-names> </name><name name-style="western"><surname>Haq</surname><given-names>ZU</given-names> </name><name name-style="western"><surname>Braithwaite</surname><given-names>EM</given-names> </name><name name-style="western"><surname>Simon</surname><given-names>NC</given-names> </name><name name-style="western"><surname>Riaz</surname><given-names>KM</given-names> </name></person-group><article-title>A randomized, controlled trial of video supplementation on the cataract surgery informed consent process</article-title><source>Graefes Arch Clin Exp Ophthalmol</source><year>2019</year><month>08</month><volume>257</volume><issue>8</issue><fpage>1719</fpage><lpage>1728</lpage><pub-id pub-id-type="doi">10.1007/s00417-019-04372-5</pub-id><pub-id pub-id-type="medline">31144057</pub-id></nlm-citation></ref><ref id="ref59"><label>59</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>McCollough</surname><given-names>CH</given-names> </name></person-group><article-title>Standardization versus individualization: how each contributes to managing dose in computed tomography</article-title><source>Health Phys</source><year>2013</year><month>11</month><volume>105</volume><issue>5</issue><fpage>445</fpage><lpage>453</lpage><pub-id pub-id-type="doi">10.1097/HP.0b013e31829db936</pub-id><pub-id pub-id-type="medline">24077044</pub-id></nlm-citation></ref><ref id="ref60"><label>60</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Vaid</surname><given-names>A</given-names> </name><name name-style="western"><surname>Duong</surname><given-names>SQ</given-names> </name><name name-style="western"><surname>Lampert</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Local large language models for privacy-preserving accelerated review of historic echocardiogram reports</article-title><source>J Am Med Inform Assoc</source><year>2024</year><month>09</month><day>1</day><volume>31</volume><issue>9</issue><fpage>2097</fpage><lpage>2102</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocae085</pub-id><pub-id pub-id-type="medline">38687616</pub-id></nlm-citation></ref><ref id="ref61"><label>61</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Balla</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Tirunagari</surname><given-names>S</given-names> </name><name name-style="western"><surname>Windridge</surname><given-names>D</given-names> </name></person-group><article-title>Machine learning in pediatrics: Evaluating challenges, opportunities, and explainability</article-title><source>Indian Pediatr</source><year>2023</year><month>05</month><day>14</day><pub-id pub-id-type="medline">37179470</pub-id></nlm-citation></ref><ref id="ref62"><label>62</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yeo</surname><given-names>YH</given-names> </name><name name-style="western"><surname>Samaan</surname><given-names>JS</given-names> </name><name name-style="western"><surname>Ng</surname><given-names>WH</given-names> </name><etal/></person-group><article-title>Assessing the performance of ChatGPT in answering questions regarding cirrhosis and hepatocellular carcinoma</article-title><source>Clin Mol Hepatol</source><year>2023</year><month>07</month><volume>29</volume><issue>3</issue><fpage>721</fpage><lpage>732</lpage><pub-id pub-id-type="doi">10.3350/cmh.2023.0089</pub-id><pub-id pub-id-type="medline">36946005</pub-id></nlm-citation></ref><ref id="ref63"><label>63</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Shao</surname><given-names>CY</given-names> </name><name name-style="western"><surname>Li</surname><given-names>H</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>XL</given-names> </name><etal/></person-group><article-title>Appropriateness and comprehensiveness of using ChatGPT for perioperative patient education in thoracic surgery in different language contexts: Survey study</article-title><source>Interact J Med Res</source><year>2023</year><month>08</month><day>14</day><volume>12</volume><fpage>e46900</fpage><pub-id pub-id-type="doi">10.2196/46900</pub-id><pub-id pub-id-type="medline">37578819</pub-id></nlm-citation></ref><ref id="ref64"><label>64</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sandmann</surname><given-names>S</given-names> </name><name name-style="western"><surname>Riepenhausen</surname><given-names>S</given-names> </name><name name-style="western"><surname>Plagwitz</surname><given-names>L</given-names> </name><name name-style="western"><surname>Varghese</surname><given-names>J</given-names> </name></person-group><article-title>Systematic analysis of ChatGPT, Google search and Llama 2 for clinical decision support tasks</article-title><source>Nat Commun</source><year>2024</year><month>03</month><day>6</day><volume>15</volume><issue>1</issue><fpage>2050</fpage><pub-id pub-id-type="doi">10.1038/s41467-024-46411-8</pub-id><pub-id pub-id-type="medline">38448475</pub-id></nlm-citation></ref><ref id="ref65"><label>65</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rao</surname><given-names>A</given-names> </name><name name-style="western"><surname>Pang</surname><given-names>M</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Assessing the utility of ChatGPT throughout the entire clinical workflow: Development and usability study</article-title><source>J Med Internet Res</source><year>2023</year><month>08</month><day>22</day><volume>25</volume><fpage>e48659</fpage><pub-id pub-id-type="doi">10.2196/48659</pub-id><pub-id pub-id-type="medline">37606976</pub-id></nlm-citation></ref><ref id="ref66"><label>66</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Masters</surname><given-names>K</given-names> </name></person-group><article-title>Medical Teacher &#x2019;s first ChatGPT&#x2019;s referencing hallucinations: Lessons for editors, reviewers, and teachers</article-title><source>Med Teach</source><year>2023</year><month>07</month><day>3</day><volume>45</volume><issue>7</issue><fpage>673</fpage><lpage>675</lpage><pub-id pub-id-type="doi">10.1080/0142159X.2023.2208731</pub-id></nlm-citation></ref><ref id="ref67"><label>67</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hatem</surname><given-names>R</given-names> </name><name name-style="western"><surname>Simmons</surname><given-names>B</given-names> </name><name name-style="western"><surname>Thornton</surname><given-names>JE</given-names> </name></person-group><article-title>A call to address AI &#x201C;Hallucinations&#x201D; and how healthcare professionals can mitigate their risks</article-title><source>Cureus</source><year>2023</year><month>09</month><volume>15</volume><issue>9</issue><fpage>37809168</fpage><pub-id pub-id-type="doi">10.7759/cureus.44720</pub-id></nlm-citation></ref><ref id="ref68"><label>68</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bukar</surname><given-names>UA</given-names> </name><name name-style="western"><surname>Sayeed</surname><given-names>MS</given-names> </name><name name-style="western"><surname>Razak</surname><given-names>SFA</given-names> </name><name name-style="western"><surname>Yogarayan</surname><given-names>S</given-names> </name><name name-style="western"><surname>Amodu</surname><given-names>OA</given-names> </name></person-group><article-title>An integrative decision-making framework to guide policies on regulating ChatGPT usage</article-title><source>PeerJ Comput Sci</source><year>2024</year><volume>10</volume><fpage>e1845</fpage><pub-id pub-id-type="doi">10.7717/peerj-cs.1845</pub-id><pub-id pub-id-type="medline">38440047</pub-id></nlm-citation></ref><ref id="ref69"><label>69</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Platt</surname><given-names>J</given-names> </name><name name-style="western"><surname>Nong</surname><given-names>P</given-names> </name><name name-style="western"><surname>Smiddy</surname><given-names>R</given-names> </name><etal/></person-group><article-title>Public comfort with the use of ChatGPT and expectations for healthcare</article-title><source>J Am Med Inform Assoc</source><year>2024</year><month>09</month><day>1</day><volume>31</volume><issue>9</issue><fpage>1976</fpage><lpage>1982</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocae164</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>All Questions and Answers for Claude-3-Opus, GPT-4-Turbo, and Gemini-1.5-Pro (Use GPT-4-Turbo for Chinese to English translation).</p><media xlink:href="periop_v8i1e70047_app1.docx" xlink:title="DOCX File, 71 KB"/></supplementary-material><supplementary-material id="app2"><label>Multimedia Appendix 2</label><p>Table S1: Consistent evaluation of Fleiss kappa among raters.</p><media xlink:href="periop_v8i1e70047_app2.docx" xlink:title="DOCX File, 14 KB"/></supplementary-material><supplementary-material id="app3"><label>Multimedia Appendix 3</label><p>Comaprison of readability by py-readability-metrics.</p><media xlink:href="periop_v8i1e70047_app3.docx" xlink:title="DOCX File, 19 KB"/></supplementary-material></app-group></back></article>