<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Perioper Med</journal-id><journal-id journal-id-type="publisher-id">periop</journal-id><journal-id journal-id-type="index">32</journal-id><journal-title>JMIR Perioperative Medicine</journal-title><abbrev-journal-title>JMIR Perioper Med</abbrev-journal-title><issn pub-type="epub">2561-9128</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v9i1e81374</article-id><article-id pub-id-type="doi">10.2196/81374</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Physician Perspectives on ChatGPT-4o as a Patient Resource for Abdominal Cancer Surgeries: Cross-Sectional Survey</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Lindsay</surname><given-names>Christina V</given-names></name><degrees>BS</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Shenoy</surname><given-names>Devika A</given-names></name><degrees>BS</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Martin</surname><given-names>Allison N</given-names></name><degrees>MPH, MD</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Clipper</surname><given-names>Christie L</given-names></name><degrees>DHA</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Shah</surname><given-names>Kevin N</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Lidsky</surname><given-names>Michael E</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Nussbaum</surname><given-names>Daniel P</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Snyderman</surname><given-names>Ralph</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="aff" rid="aff4">4</xref></contrib></contrib-group><aff id="aff1"><institution>School of Medicine, Duke University</institution><addr-line>8 Searle Center Dr</addr-line><addr-line>Durham</addr-line><addr-line>NC</addr-line><country>United States</country></aff><aff id="aff2"><institution>Center for Personalized Health Care, Duke University</institution><addr-line>Durham</addr-line><addr-line>NC</addr-line><country>United States</country></aff><aff id="aff3"><institution>Department of Surgery, Duke University</institution><addr-line>Durham</addr-line><addr-line>NC</addr-line><country>United States</country></aff><aff id="aff4"><institution>Department of Medicine, Duke University</institution><addr-line>Durham</addr-line><addr-line>NC</addr-line><country>United States</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Shiffermiller</surname><given-names>Jason</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Richlitzki</surname><given-names>Cedric</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Roland</surname><given-names>Abi</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Matsuda</surname><given-names>Shinichi</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Christina V Lindsay, BS, School of Medicine, Duke University, 8 Searle Center Dr, Durham, NC, 27710, United States, 1 305-439-7523; <email>christina.lindsay@duke.edu</email></corresp></author-notes><pub-date pub-type="collection"><year>2026</year></pub-date><pub-date pub-type="epub"><day>4</day><month>5</month><year>2026</year></pub-date><volume>9</volume><elocation-id>e81374</elocation-id><history><date date-type="received"><day>27</day><month>07</month><year>2025</year></date><date date-type="rev-recd"><day>08</day><month>03</month><year>2026</year></date><date date-type="accepted"><day>12</day><month>03</month><year>2026</year></date></history><copyright-statement>&#x00A9; Christina V Lindsay, Devika A Shenoy, Allison N Martin, Christie L Clipper, Kevin N Shah, Michael E Lidsky, Daniel P Nussbaum, Ralph Snyderman. Originally published in JMIR Perioperative Medicine (<ext-link ext-link-type="uri" xlink:href="http://periop.jmir.org">http://periop.jmir.org</ext-link>), 4.5.2026. </copyright-statement><copyright-year>2026</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Perioperative Medicine, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="http://periop.jmir.org">http://periop.jmir.org</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://periop.jmir.org/2026/1/e81374"/><abstract><sec><title>Background</title><p>Artificial intelligence (AI) models are being increasingly integrated into clinical care. Moreover, the availability of publicly accessible AI resources makes them attractive to patients seeking clinical information. Little is known regarding the use of large language models as patient resources for navigating major cancer diagnoses.</p></sec><sec><title>Objective</title><p>This study aimed to evaluate the content, readability, and safety of ChatGPT (OpenAI; GPT-4o)-generated responses to common perioperative queries about hepatic, pancreatic, and colon cancers.</p></sec><sec sec-type="methods"><title>Methods</title><p>A 28-question survey was developed based on frequently asked surgical questions for select malignancies. Surgical oncologists rated ChatGPT-4o-generated responses on a 5-point Likert scale for accuracy, quality, and tangibility. Readability was assessed using the Flesch-Kincaid Reading Grade Level (FKRGL) and Flesch Reading Ease (FRE). Respondents provided free-text comments and reported their comfort with patients using ChatGPT. Survey completion implied consent.</p></sec><sec sec-type="results"><title>Results</title><p>A total of 7 attending surgical oncologists with a median of 7 (IQR 4-13) years in practice completed the survey. Responses received mean scores of 3.5/5 (SD 0.28) for quality, 3.6/5 (SD 0.34) for accuracy, and 3.6/5 (SD 0.29) for tangibility. The responses had a median FKRGL score of 14.6 (IQR 13.3-15.6) and FRE score of 29.4 (IQR 20.5-36.3). On a post hoc analysis for select questions, the median FKRGL was 15.6 (IQR 14.4-16.7), decreasing to 7.1 (IQR 6.1-8.3) and 14.5 (IQR 13.2-15.4) with prompting and rephrasing, and the median FRE was 18.1 (IQR 14.6-24.7), increasing to 73.8 (IQR 66.6-79.3) and 32.0 (IQR 27.0-37.7) with prompting and rephrasing. Numerous inaccuracies and content gaps were reported, and approximately 43% (3/7) of providers did not report feeling &#x201C;comfortable&#x201D; in having patients consult publicly available AI for medical information.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>This study provides cautionary, yet optimistic, findings regarding the value of publicly accessible ChatGPT as a patient resource for abdominal malignancies. Providers should be prepared to effectively counsel patients to identify their educational attainment level when using ChatGPT to mitigate readability challenges.</p></sec></abstract><kwd-group><kwd>patient education</kwd><kwd>health literacy</kwd><kwd>generative artificial intelligence</kwd><kwd>surgical oncology</kwd><kwd>perioperative care</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>In recent years, artificial intelligence (AI) has promised to reshape medicine. Chatbots such as ChatGPT (OpenAI) [<xref ref-type="bibr" rid="ref1">1</xref>], DeepAI, and Google Gemini use large language models (LLMs), a popular form of AI. These models are trained upon large datasets to generate answers [<xref ref-type="bibr" rid="ref2">2</xref>]. Recent LLM improvement in reasoning has been noted to reflect human-level cognition [<xref ref-type="bibr" rid="ref3">3</xref>]. Furthermore, studies have examined LLM function in the health care sector. LLMs have been found to pass United States Medical Licensing Examinations [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>] and medical subspecialty exams [<xref ref-type="bibr" rid="ref6">6</xref>-<xref ref-type="bibr" rid="ref8">8</xref>] and to provide successful clinical reasoning and diagnoses [<xref ref-type="bibr" rid="ref9">9</xref>]. Moreover, ChatGPT has the potential to supersede other search engines in answering patient health&#x2013;related questions by providing more comprehensive and specific answers [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref11">11</xref>].</p><p>Although AI has been found to augment medical practice, its use as a resource by patients is not well understood. Patients have long reported turning to the internet for clinical advice [<xref ref-type="bibr" rid="ref12">12</xref>]. Studies evaluating responses from common search engines to frequently asked general surgery questions have typically found the quality to range from fair to good but found that the readability level often exceeded the recommended level for the general population [<xref ref-type="bibr" rid="ref13">13</xref>]. More recently, patients have turned to ChatGPT for clinical questions; a study conducted in Australia found that approximately 9.9% of Australian adults asked ChatGPT medical questions within the first half of 2024 [<xref ref-type="bibr" rid="ref14">14</xref>]. Following the rapid rise of publicly accessible proprietary LLM chatbots and the lack of peer-reviewed output within these learning models, recent work across specialties, including oncology, gastroenterology, otolaryngology, and surgery, has sought to evaluate LLM-generated responses to questions commonly asked by patients [<xref ref-type="bibr" rid="ref15">15</xref>-<xref ref-type="bibr" rid="ref21">21</xref>]. The reported overall quality of generated responses varies across fields and is impacted by the type of LLM used [<xref ref-type="bibr" rid="ref20">20</xref>]. Furthermore, prior research has suggested that, as with &#x201C;Dr Google&#x201D; and other popular search engines [<xref ref-type="bibr" rid="ref13">13</xref>], the readability of LLM-generated responses may serve as a key limitation of using LLMs such as ChatGPT as a patient resource [<xref ref-type="bibr" rid="ref22">22</xref>]. Additionally, ChatGPT answers are limited in consistency [<xref ref-type="bibr" rid="ref23">23</xref>], generating similar but nonidentical responses.</p><p>Gastrointestinal malignancies, including pancreatic, colorectal, hepatic, stomach, and esophageal malignancies, account for over one-quarter of cancer incidence globally and are steadily increasing. By 2040, the global number of gastrointestinal cancer deaths is projected to increase by over 70% to 5.6 million [<xref ref-type="bibr" rid="ref24">24</xref>]. Given the significant disease burden of gastrointestinal malignancies and related therapies, it is essential to properly evaluate pertinent patient resources to better inform patients, many of whom will be accessing these resources independently. To date, few studies have examined the use of publicly accessible proprietary LLMs as a perioperative resource for patients with abdominal malignancies. The aim of this study was to evaluate the content and readability of LLM-generated responses to common patient queries for hepatic, pancreatic, and colon cancers.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Ethical Considerations</title><p>This study was submitted to the Duke University Institutional Review Board for review and was determined to be exempt (Pro00116649). This study involved surgeon-participants who evaluated GPT-generated responses to frequently asked questions. No patient data were used. To maintain participant confidentiality, all data were analyzed in aggregate. All GPT inquiries and survey questions were asked in English. Consent from participants was implied through voluntary completion of the survey. No compensation was provided to participants.</p></sec><sec id="s2-2"><title>Question Development</title><p>Preliminary questions were developed by CVL and DAS. Questions for this study were developed by sourcing frequently asked questions about colon, liver, and pancreatic cancers from 7 hospital patient information and nonprofit cancer foundation websites [<xref ref-type="bibr" rid="ref25">25</xref>-<xref ref-type="bibr" rid="ref31">31</xref>]. This methodology was used in an earlier study examining LLMs as a tool for patient education in lung cancer surgery [<xref ref-type="bibr" rid="ref32">32</xref>]. Questions on general disease information, including signs and symptoms, staging and treatment options, surgical eligibility, and operative risks, were formulated for colon, hepatic, and pancreatic cancers using identical language for each condition. Standardized language was used to determine the suitability of LLMs for delivering useful abdominal cancer education as applied to each of the conditions. Additional questions were created to address common patient concerns related to postoperative recovery and potential adverse outcomes following abdominal cancer surgery.</p></sec><sec id="s2-3"><title>Question Piloting</title><p>The preliminary survey questions were initially evaluated for relevance and alignment with patient phrasing through subjective assessment by 2 general surgery residents. Residents were prompted to assess the frequency with which the proposed questions were encountered in practice to evaluate the survey questions based on clinical relevance. Residents were also prompted to evaluate the survey questions based on alignment with patient phrasing, to suggest phrasing revisions for items that received a Likert score &#x2264;3 on a 5-point scale, and to propose additional relevant questions not addressed by the survey. At this stage, 8 questions were removed and 4 were added per resident feedback. Additional questions were adjusted accordingly. After piloting scenarios with residents, all questions were run sequentially through a publicly available, proprietary version of ChatGPT (GPT-4o; released on May 13, 2024) [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref33">33</xref>], on March 9, 2025, in Durham, United States. ChatGPT-4o is based on a proprietary GPT-4-class pretrained base LLM that has been instruction-tuned for conversational use. No additional model fine-tuning or retraining was performed through this study. ChatGPT was prompted to answer in paragraph form without additional contextual information. As in prior studies, a new chat entry was posed for each question [<xref ref-type="bibr" rid="ref32">32</xref>].</p><p>A Qualtrics survey was formulated with the final 28 questions and LLM responses. This survey was piloted by our surgeon expert, ANM, who provided final revisions for question phrasing. <xref ref-type="table" rid="table1">Table 1</xref> lists the finalized questions prompted into ChatGPT. Revised questions were newly run through ChatGPT, and the Qualtrics survey was adjusted accordingly.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>GPT queries: abdominal cancer frequently asked questions and common postoperative complications.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Domain</td><td align="left" valign="bottom">Question</td></tr></thead><tbody><tr><td align="left" valign="top">Signs and symptoms</td><td align="left" valign="top">Q1: What are the signs and symptoms of pancreatic cancer?<break/>Q2: What are the signs and symptoms of colon cancer?<break/>Q3: What are the signs and symptoms of liver cancer?</td></tr><tr><td align="left" valign="top">Stages and treatment</td><td align="left" valign="top">Q4: What are the different stages and treatments for pancreatic cancer?<break/>Q5: What are the different stages and treatments for colon cancer?<break/>Q6: What are the different stages and treatments for liver cancer?</td></tr><tr><td align="left" valign="top">Surgery eligibility</td><td align="left" valign="top">Q7: Who is appropriate for surgery for pancreatic cancer?<break/>Q8: Who is appropriate for surgery for colon cancer?<break/>Q9: Who is appropriate for surgery for liver cancer?</td></tr><tr><td align="left" valign="top">Surgery risks</td><td align="left" valign="top">Q13: What are the risks of surgery to remove my pancreatic cancer?<break/>Q14: What are the risks of surgery to remove my colon cancer?<break/>Q15: What are the risks of surgery to remove my liver cancer?</td></tr><tr><td align="left" valign="top">General postoperative recovery</td><td align="left" valign="top">Q10: How long is the recovery from pancreatic cancer surgery?<break/>Q11: How long is the recovery from colon cancer surgery?<break/>Q12: How long is the recovery from liver cancer surgery?<break/>Q16: Will I need an ostomy after surgery to remove my colon cancer?<break/>Q17: How long will I be in the hospital after surgery for cancer in my belly?<break/>Q18: How long after surgery for cancer in my belly can I exercise?<break/>Q19: How long will it take to recover from surgery for cancer in my belly?<break/>Q20: Should I stay close to the hospital in a hotel or Airbnb after I&#x2019;m discharged from surgery for cancer in my belly?<break/>Q21: How long after surgery for cancer in my belly can I do chores around the house?</td></tr><tr><td align="left" valign="top">Adverse outcomes</td><td align="left" valign="top">Q22: I just had surgery for cancer in my belly, and my incision is painful. What do I do?<break/>Q23: I just had surgery for cancer in my belly, and I am still in some pain. Is there anything else I can take for the pain?<break/>Q24: I just had surgery for cancer in my belly, and my incision is starting to hurt more and looks slightly open. What do I do?<break/>Q25: I just had surgery for cancer in my belly, and the incision is warm to the touch and draining a yellowish fluid. What do I do?<break/>Q26: I just had surgery for cancer in my belly. It hurts when I breathe, and I have a new cough. What do I do?<break/>Q27: I just had surgery for cancer in my belly. It now burns when I pee. What do I do?<break/>Q28: I am about to have surgery for cancer in my belly. How can I prevent an infection after?</td></tr></tbody></table></table-wrap></sec><sec id="s2-4"><title>Outcomes or Data Collection and Variables</title><p>An anonymous survey was disseminated to surgical faculty at a single institution using Qualtrics, a secure, web-based survey platform. Eligible participants were board-certified surgeons who had completed fellowship training in surgical oncology or colorectal surgery and were actively practicing at the time of the study. Surgeons were identified through publicly available web-based colorectal surgery and surgical oncology faculty rosters and were invited to participate via an email containing the anonymous Qualtrics link. The Qualtrics platform is commonly used in academic research, as it permits investigators to design surveys, test them for accessibility and functionality, distribute them electronically as a web link or QR code, and export result reports. The final list of questions and ChatGPT-4o&#x2013;generated responses graded by surgeons is included in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. On the Qualtrics platform, prior to initiating the survey, surgeons were instructed to grade responses for accuracy, quality, and tangibility on a 5-point Likert scale (1=&#x201C;poor,&#x201D; 5=&#x201C;excellent&#x201D;). The survey defined accuracy as the medical or social correctness of a response, quality as the extent to which a response is well-written and comprehensive, and tangibility as the degree to which the response provides actionable guidance.</p><p>After evaluating ChatGPT responses, surgeon respondents were prompted to self-report demographics, including age, sex, years as a practicing surgeon post training, AI frequency in practice, and prior experience with AI. Furthermore, a free response section permitted surgeons to share thoughts or concerns. Respondents were assessed for comfort with patient-AI use through the question, &#x201C;If a patient informed you they are using publicly available AI (eg, ChatGPT) for health information, how comfortable would you be with encouraging them to use AI following this survey?&#x201D; Available answer choices included &#x201C;very uncomfortable,&#x201D; &#x201C;uncomfortable,&#x201D; &#x201C;neither comfortable nor uncomfortable,&#x201D; &#x201C;comfortable,&#x201D; and &#x201C;very comfortable.&#x201D;</p><p>LLM-generated responses were separately graded for readability using the Flesch-Kincaid Reading Grade Level (FKRGL) and Flesch Reading Ease (FRE) formulas through the Readability Statistics tool in Microsoft Word Version 16.105.2 [<xref ref-type="bibr" rid="ref34">34</xref>]. FRE and FKRGL formulas calculate readability based on the average sentence and word length of a text. The FKRGL scale assesses approximate grade level of a text, with an FKRGL score of 5 corresponding to a US 5th-grade reading level. The FRE scale measures readability from 0, unreadable text, to 100, very easily readable text. Both scales were selected as they are validated tools for grading text readability, and they are commonly used by professionals to evaluate the readability of patient-directed health care information [<xref ref-type="bibr" rid="ref34">34</xref>].</p><p>A post hoc analysis was performed to assess FKRGL, FRE scores, and content similarity for GPT responses under 3 prompting conditions: version 1 represented the response to the original question provided for reference; version 2 consisted of responses to the original question preceded by a prompt to &#x201C;Answer at a 5th-grade level;&#x201D; and version 3 comprised responses to questions that were reworded to a 5th-grade reading level by ChatGPT-4o prior to response generation. Four questions were selected for post hoc analysis to provide a focused analysis of question-phrasing and prompting on readability. Questions were selected based on having the highest original FKRGL score within 4 different domains and to ensure representation of each malignancy type. Content similarity was graded by 2 independent graders, CVL and DAS, using a 5-point Likert scale (1=not similar, 5=very similar).</p></sec><sec id="s2-5"><title>Statistical Analysis</title><p>Respondent answers were collected and analyzed in aggregate. Descriptive statistics for categorical variables were reported as frequencies with percentages; continuous variables were reported as mean with SD or median with IQR, where appropriate. Analyses were calculated using Microsoft Excel Version 16.95.4; formulas used included =MEDIAN() for median, =AVERAGE() for mean, =STDEV() for SD, and =QUARTILE.INC() to derive the IQR.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Overview</title><p>Of the 12 eligible surgeons contacted, 7 responded, resulting in a survey response rate of 58.3%. All respondents were academic surgeons at a single institution. The median reported respondent categorical age range was 35&#x2010;44 years. Most survey respondents were male (4/7, 57.1%). Respondents had practiced surgery for a median of 7 (IQR 4&#x2010;13) years post training. When assessed for frequency of AI use, 1 respondent reported using AI &#x201C;daily,&#x201D; 2 reported using AI &#x201C;weekly,&#x201D; 3 reported using AI &#x201C;monthly,&#x201D; and 1 reported using AI &#x201C;almost never.&#x201D;</p></sec><sec id="s3-2"><title>Quality of LLM Responses</title><p>When asked to evaluate the quality of responses, experts consistently rated answers between &#x201C;good&#x201D; and &#x201C;very good&#x201D; to &#x201C;excellent,&#x201D; with an aggregate mean response rating of 3.54 (SD 0.28). Across all domains (<xref ref-type="table" rid="table2">Table 2</xref>), questions related to staging and treatment consistently performed worse, receiving an average rating of 3.33 (SD 0.30), while questions about adverse outcomes tended to perform best, receiving an average rating of 3.73 (SD 0.27). <xref ref-type="table" rid="table2">Table 2</xref> indicates the median and IQR of respondent grade for each question, with quality scores ranging from 3.00 to 4.50 and IQR ranging from 2.50-3.50 to 3.25-5.00. The question indicating postoperative urinary tract infection (UTI) received the highest median quality score of 4.50 (3.25&#x2010;5.00), between &#x201C;very good&#x201D; and &#x201C;excellent.&#x201D;</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Quality, accuracy, and tangibility scores for GPT-generated responses<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup>.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Domain and question ID</td><td align="left" valign="bottom">Quality score, median (IQR)</td><td align="left" valign="bottom">Accuracy score, median (IQR)</td><td align="left" valign="bottom">Tangibility score, median (IQR)</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="4">Signs and symptoms</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Q1</td><td align="left" valign="top">4 (3.5&#x2010;4)</td><td align="left" valign="top">4 (4&#x2010;4.5)</td><td align="left" valign="top">4 (4&#x2010;4.5)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Q2</td><td align="left" valign="top">3 (3-4)</td><td align="left" valign="top">4 (3.5&#x2010;4.5)</td><td align="left" valign="top">4 (3-4)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Q3</td><td align="left" valign="top">3 (2.5&#x2010;4)</td><td align="left" valign="top">3 (2.5&#x2010;4)</td><td align="left" valign="top">3 (3&#x2010;3.5)</td></tr><tr><td align="left" valign="top" colspan="4">Stages and treatment</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Q4</td><td align="left" valign="top">3 (2.5&#x2010;4.5)</td><td align="left" valign="top">4 (2.5&#x2010;4.5)</td><td align="left" valign="top">4 (2.5&#x2010;4.5)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Q5</td><td align="left" valign="top">4 (3-4)</td><td align="left" valign="top">4 (3-4)</td><td align="left" valign="top">4 (3.5&#x2010;4)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Q6</td><td align="left" valign="top">3 (2.5&#x2010;3.5)</td><td align="left" valign="top">3 (2.5&#x2010;3.5)</td><td align="left" valign="top">3 (2.5&#x2010;3.5)</td></tr><tr><td align="left" valign="top" colspan="4">Surgery eligibility</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Q7</td><td align="left" valign="top">4 (2.5&#x2010;4)</td><td align="left" valign="top">4 (2.5&#x2010;4)</td><td align="left" valign="top">4 (3.5&#x2010;4)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Q8</td><td align="left" valign="top">4 (3.5&#x2010;4)</td><td align="left" valign="top">4 (3.5&#x2010;4)</td><td align="left" valign="top">4 (3.5&#x2010;4)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Q9</td><td align="left" valign="top">4 (4-4)</td><td align="left" valign="top">4 (3.5&#x2010;4)</td><td align="left" valign="top">4 (4-4)</td></tr><tr><td align="left" valign="top" colspan="4">General postoperative recovery</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Q10</td><td align="left" valign="top">4 (3-4)</td><td align="left" valign="top">4 (2.5&#x2010;4)</td><td align="left" valign="top">4 (3-4)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Q11</td><td align="left" valign="top">3 (2.5&#x2010;4)</td><td align="left" valign="top">3 (2.5&#x2010;3.5)</td><td align="left" valign="top">3 (2.5&#x2010;3.5)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Q12</td><td align="left" valign="top">4 (2.5&#x2010;4)</td><td align="left" valign="top">3 (2.5&#x2010;3.5)</td><td align="left" valign="top">3 (2.5&#x2010;4)</td></tr><tr><td align="left" valign="top" colspan="4">Surgery risks</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Q13</td><td align="left" valign="top">4 (3-4)</td><td align="left" valign="top">4 (3-4)</td><td align="left" valign="top">3 (3-4)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Q14</td><td align="left" valign="top">4 (3.5&#x2010;4)</td><td align="left" valign="top">4 (3.5&#x2010;4)</td><td align="left" valign="top">4 (3.5&#x2010;4)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Q15</td><td align="left" valign="top">3 (3&#x2010;3.5)</td><td align="left" valign="top">3 (3&#x2010;3.5)</td><td align="left" valign="top">3 (3&#x2010;3.5)</td></tr><tr><td align="left" valign="top" colspan="4">General postoperative recovery</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Q16</td><td align="left" valign="top">4 (3-4)</td><td align="left" valign="top">4 (3.5&#x2010;4)</td><td align="left" valign="top">4 (3.5&#x2010;4)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Q17</td><td align="left" valign="top">3 (2.5&#x2010;4)</td><td align="left" valign="top">4 (3.5&#x2010;4)</td><td align="left" valign="top">4 (2.5&#x2010;4)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Q18</td><td align="left" valign="top">4 (3.5&#x2010;4.5)</td><td align="left" valign="top">4 (3.5&#x2010;4.5)</td><td align="left" valign="top">4 (3.5&#x2010;4)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Q19</td><td align="left" valign="top">4 (3.5&#x2010;4)</td><td align="left" valign="top">4 (3.5&#x2010;4)</td><td align="left" valign="top">4 (3-4)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Q20</td><td align="left" valign="top">4 (3.5&#x2010;4)</td><td align="left" valign="top">4 (3-4)</td><td align="left" valign="top">4 (3-4)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Q21</td><td align="left" valign="top">4 (3-4)</td><td align="left" valign="top">4 (3-4)</td><td align="left" valign="top">4 (3&#x2010;4.5)</td></tr><tr><td align="left" valign="top" colspan="4">Adverse outcomes</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Q22</td><td align="left" valign="top">3.5 (3-4)</td><td align="left" valign="top">3.5 (3-4)</td><td align="left" valign="top">3.5 (3-4)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Q23</td><td align="left" valign="top">3.5 (3-4)</td><td align="left" valign="top">4 (3.25&#x2010;4)</td><td align="left" valign="top">4 (3.25&#x2010;4)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Q24</td><td align="left" valign="top">4 (3.25&#x2010;4.75)</td><td align="left" valign="top">4.5 (3.25&#x2010;5)</td><td align="left" valign="top">4.5 (3.25&#x2010;5)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Q25</td><td align="left" valign="top">4 (3.25&#x2010;4)</td><td align="left" valign="top">4 (3.25&#x2010;4)</td><td align="left" valign="top">4 (3.25&#x2010;4)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Q26</td><td align="left" valign="top">4 (3.25&#x2010;4.75)</td><td align="left" valign="top">4.5 (3.25&#x2010;5)</td><td align="left" valign="top">4.5 (3.25&#x2010;5)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Q27</td><td align="left" valign="top">4.5 (3.25&#x2010;5)</td><td align="left" valign="top">4.5 (3.25&#x2010;5)</td><td align="left" valign="top">4.5 (3.25&#x2010;5)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Q28</td><td align="left" valign="top">4 (4-4)</td><td align="left" valign="top">4 (3.25&#x2010;4.75)</td><td align="left" valign="top">4.5 (3.25&#x2010;5)</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>Accuracy is defined as how &#x201C;medically or socially accurate&#x201D; a response is; quality as how &#x201C;well-written and comprehensive&#x201D; a response is; and tangibility as how &#x201C;actionable&#x201D; a response is.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-3"><title>Accuracy of LLM Responses</title><p>Similar to quality, when asked to evaluate the accuracy of responses, experts generally rated responses between &#x201C;good&#x201D; and &#x201C;very good&#x201D; to &#x201C;excellent,&#x201D; with an aggregate mean response rating of 3.57 (SD 0.34). Across all domains, questions related to staging and treatment performed the worst, receiving an average rating of 3.29 (SD 0.38). Conversely, questions about adverse outcomes consistently performed best, receiving an average rating of 3.83 (SD 0.24). Median response accuracy ratings ranged from 3.00 to 4.50 with IQRs ranging from 2.50-3.50 to 3.25-5.00. Questions regarding postoperative wound dehiscence (Q24), pulmonary embolism (Q26), and UTI management (Q27) received the highest median accuracy grading of 4.50 (IQR 3.25-5.00).</p></sec><sec id="s3-4"><title>Tangibility of LLM Responses</title><p>When asked to evaluate the tangibility, or how &#x201C;actionable&#x201D; a response was, experts likewise consistently rated responses between &#x201C;good&#x201D; and &#x201C;very good&#x201D; to &#x201C;excellent,&#x201D; with an aggregate mean response rating of 3.62 (SD 0.29). Across all domains, questions pertaining to staging and treatment performed the worst, receiving the lowest mean tangibility score of 3.47 (SD 0.30), while questions about adverse outcomes performed best, receiving an average rating of 3.86 (SD 0.28). The median response ratings ranged from 3.00 to 4.50 with IQR scores ranging from 2.50-3.50 to 3.25-5.00. Questions regarding postoperative wound dehiscence (Q24), pulmonary embolism (Q26), UTI management (Q27), and infection prevention (Q28) received the highest median tangibility grading of 4.50 (IQR 3.25-5.00).</p></sec><sec id="s3-5"><title>Readability of LLM Responses</title><p>When assessing readability (<xref ref-type="table" rid="table3">Table 3</xref>), ChatGPT-4o&#x2013;generated responses read at an average FKRGL of 14.51 (SD 1.86), requiring some level of college education for adequate comprehension. Response FKRGL scores ranged from 10.8 to 18.1. A question regarding wound dehiscence received the lowest grade score, 10.8, while a question regarding pancreatic cancer surgery candidacy received the highest score, 18.1. The mean FRE score of ChatGPT-4o&#x2013;generated responses was 28.8 (SD 9.87), corresponding to a college graduate reading level and indicating low readability. Response FRE scores ranged from 11.7 to 48.0. A question regarding colon cancer surgery recovery had the worst readability, with an FRE score of 11.7. As with FKRGL grading, a question regarding wound dehiscence had the highest ease of readability with an FRE score of 48.0.</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Readability of GPT-generated responses. Readability is represented as FKRGL<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup> and FRE<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup> scores.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">ID and question</td><td align="left" valign="bottom">FKRGL score (US-grade reading level)</td><td align="left" valign="bottom">FRE score</td><td align="left" valign="bottom">Estimated FRE US-grade level [<xref ref-type="bibr" rid="ref34">34</xref>]</td></tr></thead><tbody><tr><td align="left" valign="top">Q1: What are the signs and symptoms of pancreatic cancer?</td><td align="left" valign="top">12.9</td><td align="left" valign="top">36.8</td><td align="left" valign="top">13&#x2010;16</td></tr><tr><td align="left" valign="top">Q2: What are the signs and symptoms of colon cancer?</td><td align="left" valign="top">12.2</td><td align="left" valign="top">41.3</td><td align="left" valign="top">13&#x2010;16</td></tr><tr><td align="left" valign="top">Q3: What are the signs and symptoms of liver cancer?</td><td align="left" valign="top">11.8</td><td align="left" valign="top">41.2</td><td align="left" valign="top">13&#x2010;16</td></tr><tr><td align="left" valign="top">Q4: What are the different stages and treatments for pancreatic cancer?</td><td align="left" valign="top">13.4</td><td align="left" valign="top">28.6</td><td align="left" valign="top">College graduate</td></tr><tr><td align="left" valign="top">Q5: What are the different stages and treatments for colon cancer?</td><td align="left" valign="top">15.1</td><td align="left" valign="top">16.4</td><td align="left" valign="top">College graduate</td></tr><tr><td align="left" valign="top">Q6: What are the different stages and treatments for liver cancer?</td><td align="left" valign="top">14.5</td><td align="left" valign="top">17.8</td><td align="left" valign="top">College graduate</td></tr><tr><td align="left" valign="top">Q7: Who is appropriate for surgery for pancreatic cancer?</td><td align="left" valign="top">18.1</td><td align="left" valign="top">20.6</td><td align="left" valign="top">College graduate</td></tr><tr><td align="left" valign="top">Q8: Who is appropriate for surgery for colon cancer?</td><td align="left" valign="top">18.0</td><td align="left" valign="top">17.4</td><td align="left" valign="top">College graduate</td></tr><tr><td align="left" valign="top">Q9: Who is appropriate for surgery for liver cancer?</td><td align="left" valign="top">16.6</td><td align="left" valign="top">21.5</td><td align="left" valign="top">College graduate</td></tr><tr><td align="left" valign="top">Q10: How long is the recovery from pancreatic cancer surgery?</td><td align="left" valign="top">16.1</td><td align="left" valign="top">20.0</td><td align="left" valign="top">College graduate</td></tr><tr><td align="left" valign="top">Q11: How long is the recovery from colon cancer surgery?</td><td align="left" valign="top">16.2</td><td align="left" valign="top">11.7</td><td align="left" valign="top">College graduate</td></tr><tr><td align="left" valign="top">Q12: How long is the recovery from liver cancer surgery?</td><td align="left" valign="top">14.5</td><td align="left" valign="top">17.4</td><td align="left" valign="top">College graduate</td></tr><tr><td align="left" valign="top">Q13. What are the risks of surgery to remove my pancreatic cancer?</td><td align="left" valign="top">14.8</td><td align="left" valign="top">20.7</td><td align="left" valign="top">College graduate</td></tr><tr><td align="left" valign="top">Q14. What are the risks of surgery to remove my colon cancer?</td><td align="left" valign="top">14.2</td><td align="left" valign="top">22.8</td><td align="left" valign="top">College graduate</td></tr><tr><td align="left" valign="top">Q15. What are the risks of surgery to remove my liver cancer?</td><td align="left" valign="top">14.9</td><td align="left" valign="top">15.6</td><td align="left" valign="top">College graduate</td></tr><tr><td align="left" valign="top">Q16. Will I need an ostomy after surgery to remove my colon cancer?</td><td align="left" valign="top">14.2</td><td align="left" valign="top">32.9</td><td align="left" valign="top">13&#x2010;16</td></tr><tr><td align="left" valign="top">Q17: How long will I be in the hospital after surgery for cancer in my belly?</td><td align="left" valign="top">14.7</td><td align="left" valign="top">36.2</td><td align="left" valign="top">13&#x2010;16</td></tr><tr><td align="left" valign="top">Q18: How long after surgery for cancer in my belly can I exercise?</td><td align="left" valign="top">15.4</td><td align="left" valign="top">26.5</td><td align="left" valign="top">College graduate</td></tr><tr><td align="left" valign="top">Q19: How long will it take to recover from surgery for cancer in my belly?</td><td align="left" valign="top">14.1</td><td align="left" valign="top">31.8</td><td align="left" valign="top">13&#x2010;16</td></tr><tr><td align="left" valign="top">Q20: Should I stay close to the hospital in a hotel or Airbnb after I&#x2019;m discharged from surgery for cancer in my belly?</td><td align="left" valign="top">17.0</td><td align="left" valign="top">32.7</td><td align="left" valign="top">13&#x2010;16</td></tr><tr><td align="left" valign="top">Q21: How long after surgery for cancer in my belly can I do chores around the house?</td><td align="left" valign="top">15.3</td><td align="left" valign="top">33.9</td><td align="left" valign="top">13&#x2010;16</td></tr><tr><td align="left" valign="top">Q22: I just had surgery for cancer in my belly, and my incision is painful. What do I do?</td><td align="left" valign="top">12.1</td><td align="left" valign="top">43.6</td><td align="left" valign="top">13&#x2010;16</td></tr><tr><td align="left" valign="top">Q23: I just had surgery for cancer in my belly, and I am still in some pain. Is there anything else I can take for the pain?</td><td align="left" valign="top">12.7</td><td align="left" valign="top">36.6</td><td align="left" valign="top">13&#x2010;16</td></tr><tr><td align="left" valign="top">Q24: I just had surgery for cancer in my belly, and my incision is starting to hurt more and looks slightly open. What do I do?</td><td align="left" valign="top">10.8</td><td align="left" valign="top">48.0</td><td align="left" valign="top">13&#x2010;16</td></tr><tr><td align="left" valign="top">Q25: I just had surgery for cancer in my belly, and the incision is warm to the touch and draining a yellowish fluid. What do I do?</td><td align="left" valign="top">13.6</td><td align="left" valign="top">40.0</td><td align="left" valign="top">13&#x2010;16</td></tr><tr><td align="left" valign="top">Q26: I just had surgery for cancer in my belly. It hurts when I breathe, and I have a new cough. What do I do?</td><td align="left" valign="top">16.2</td><td align="left" valign="top">27.4</td><td align="left" valign="top">College graduate</td></tr><tr><td align="left" valign="top">Q27: I just had surgery for cancer in my belly. It now burns when I pee. What do I do?</td><td align="left" valign="top">15.0</td><td align="left" valign="top">30.1</td><td align="left" valign="top">13&#x2010;16</td></tr><tr><td align="left" valign="top">Q28: I am about to have surgery for cancer in my belly. How can I prevent an infection after?</td><td align="left" valign="top">12.0</td><td align="left" valign="top">36.1</td><td align="left" valign="top">13&#x2010;16</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>FKRGL: Flesch-Kincaid Reading Grade Level.</p></fn><fn id="table3fn2"><p><sup>b</sup>FRE: Flesch Reading Ease.</p></fn></table-wrap-foot></table-wrap><p>Four questions were selected for post hoc analysis shown in <xref ref-type="table" rid="table4">Table 4</xref> (Q1, Q7, Q11, and Q15). These questions had an original median FKRGL score of 15.6 (IQR 14.4-16.7; range 12.9&#x2010;18.1) and FRE score of 18.1 (IQR 14.6-24.7; range: 11.7&#x2010;36.8). When GPT-4o was queried to respond to select questions to the level of a 5th-grade reader, the median FKRGL score decreased to 7.1 (IQR 6.1-8.3; range: 5.9&#x2010;9.0) and FRE increased to 73.8 (IQR 66.6-79.3; range: 60.1&#x2010;80.9). Two independent graders (CVL and DAS) found responses to result in a mean content similarity of 3.88 (SD 0.25) in comparison to the original response. Responses to questions that were rephrased by GPT, with prompting to query at a 5th-grade reading level, resulted in a median FKRGL score of 14.5 (IQR 13.2-15.4; range: 11.6 to 15.8), a median FRE score of 32.0 (IQR 27.0-37.7; range: 21.4&#x2010;45.0), and a mean content similarity of 4.63 (SD 0.25) to original responses. The raters had identical scores for 50% (4/8) of responses, with the other 4 responses differing by 1 on the 5-point Likert scale.</p><table-wrap id="t4" position="float"><label>Table 4.</label><caption><p>FKRGL<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup> and FRE<sup><xref ref-type="table-fn" rid="table4fn2">b</xref></sup> scores for select questions (V1), questions prompted to respond at the 5th-grade level (V2), and questions rephrased by GPT to be asked at the 5th-grade level (V3).</p></caption><table id="table4" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Question and versions</td><td align="left" valign="bottom">FKRGL score</td><td align="left" valign="bottom">FRE score</td><td align="left" valign="bottom">Content similarity, mean (SD)</td></tr></thead><tbody><tr><td align="left" valign="top">Q1</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>V1: What are the signs and symptoms of pancreatic cancer?</td><td align="left" valign="top">12.9</td><td align="left" valign="top">36.8</td><td align="left" valign="top">Reference</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>V2: Answer at a 5th-grade level: What are the signs and symptoms of pancreatic cancer?</td><td align="left" valign="top">6.1</td><td align="left" valign="top">80.9</td><td align="left" valign="top">4.0 (0)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>V3: What are the warning signs of pancreatic cancer and how might someone feel if they have it?</td><td align="left" valign="top">11.6</td><td align="left" valign="top">45.0</td><td align="left" valign="top">4.5 (0.71)</td></tr><tr><td align="left" valign="top">Q7</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>V1: Who is appropriate for surgery for pancreatic cancer?</td><td align="left" valign="top">18.1</td><td align="left" valign="top">20.6</td><td align="left" valign="top">Reference</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>V2: Answer at a 5th-grade level: Who is appropriate for surgery for pancreatic cancer?</td><td align="left" valign="top">9.0</td><td align="left" valign="top">60.1</td><td align="left" valign="top">4.0 (0)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>V3: Who can have surgery to treat pancreatic cancer?</td><td align="left" valign="top">15.3</td><td align="left" valign="top">28.8</td><td align="left" valign="top">5.0 (0)</td></tr><tr><td align="left" valign="top">Q11</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>V1: How long is the recovery from colon cancer surgery?</td><td align="left" valign="top">16.2</td><td align="left" valign="top">11.7</td><td align="left" valign="top">Reference</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>V2: Answer at a 5th-grade level: How long is the recovery from colon cancer surgery?</td><td align="left" valign="top">5.9</td><td align="left" valign="top">78.8</td><td align="left" valign="top">3.5 (0.71)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>V3: How long does it take to feel better after colon cancer surgery?</td><td align="left" valign="top">15.8</td><td align="left" valign="top">21.4</td><td align="left" valign="top">4.5 (0.71)</td></tr><tr><td align="left" valign="top">Q15</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>V1: What are the risks of surgery to remove my liver cancer?</td><td align="left" valign="top">14.9</td><td align="left" valign="top">15.6</td><td align="left" valign="top">Reference</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>V2: Answer at a 5th-grade level: What are the risks of surgery to remove my liver cancer?</td><td align="left" valign="top">8.1</td><td align="left" valign="top">68.8</td><td align="left" valign="top">4.0 (0)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>V3: What could go wrong if I have surgery to take out my liver cancer?</td><td align="left" valign="top">13.7</td><td align="left" valign="top">35.2</td><td align="left" valign="top">4.5 (0.71)</td></tr></tbody></table><table-wrap-foot><fn id="table4fn1"><p><sup>a</sup>FKRGL: Flesch-Kincaid Reading Grade Level.</p></fn><fn id="table4fn2"><p><sup>b</sup>FRE: Flesch Reading Ease.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-6"><title>Qualitative Feedback</title><p>Numerous inaccuracies within GPT-generated responses were detected by a surgeon-expert concerning general disease information and postoperative recovery. The following feedback has been modified for clarity but maintains the original intent. In Q2 (signs and symptoms of colon cancer), rectal bleeding was mistakenly described as a systemic symptom, while it is a local symptom that may lead to secondary systemic symptoms, including fatigue due to anemia. For Q10 (pancreatic cancer surgery recovery), &#x201C;light activities,&#x201D; which are often defined as walking or activities of daily living in the surgical setting, were resumed while a patient was admitted, instead of the written 6 to 12 weeks following discharge. Likewise, for Q11 (colon cancer surgery recovery), certain &#x201C;light activities&#x201D; could be resumed sooner. For Q12 (liver cancer surgery recovery), the mention of major hepatectomy as treatment was notably absent.</p><p>Regarding quality, numerous content gaps were noted. For Q3 (signs and symptoms of liver cancer), the response described chronic liver disease symptoms; these are common for patients with primary liver cancers but less frequent in the setting of secondary liver cancers (ie, colorectal cancer with liver metastases). Regarding Q4 (stages and treatments of pancreatic cancer), genetic testing should be included when discussing targeted therapies. For Q7 (pancreatic cancer surgery eligibility), discussion of the biology of resectability, which is accounted for by tumor markers such as Ca 19&#x2010;9, was notably absent. Regarding Q20 (staying near the hospital following discharge), while listed, it is not emphasized that staying nearby is unnecessary unless the patient lives far away. Furthermore, the question could be enhanced by including discussion of local housing options with case management or a social worker. For Q25 (postoperative infection), concern for dehiscence is not explicitly stated, and the volume of drainage should be addressed sooner, as high volume may indicate dehiscence.</p></sec><sec id="s3-7"><title>Provider Recommendations for GPT as a Patient Resource</title><p>When assessed on their comfort level with patients using publicly available AI for health information, 57.1% (4/7) of providers reported being &#x201C;comfortable,&#x201D; 14.3% (1/7) reported being &#x201C;neither comfortable nor uncomfortable,&#x201D; 14.3% (1/7) reported being &#x201C;uncomfortable,&#x201D; and 14.3% (1/7) reported being &#x201C;very uncomfortable.&#x201D; Regarding provider discomfort, when asked for questions or concerns pertaining to the study, 1 respondent reported &#x201C;The answers should be designed for a lower health literacy level.&#x201D; Another physician expressed concern over direct patient use of ChatGPT, primarily citing lack of supervision and noting &#x201C;health is not something you want to leave up to a robot. There will always be intricacies that cannot be understood by AI.&#x201D;</p></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>This study is among the first to evaluate ChatGPT-4o as a patient information resource for individuals preparing for or recovering from surgery for abdominal malignancies [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref35">35</xref>]. As patient self-use of LLMs for medical information is increasing [<xref ref-type="bibr" rid="ref14">14</xref>], it is essential to assess the content quality, safety, and comprehensibility of GPT-generated responses. Through gaining a deeper understanding of the strengths and weaknesses present within LLMs, providers may help patients be aware of such options and help them navigate the use of these sources. The current study&#x2019;s results indicate that ChatGPT-4o may serve as a useful patient information resource, with most responses rated from &#x201C;good&#x201D; to &#x201C;very good to excellent&#x201D; in quality, accuracy, and tangibility. Notably, the lowest rated responses received a median score of 3.0, corresponding to a &#x201C;good&#x201D; rating, whereas the highest rated responses received a score of 4.50, corresponding to a rating between &#x201C;very good&#x201D; and &#x201C;excellent.&#x201D; However, there is still room for improvement in generated responses prior to the endorsement of ChatGPT as a &#x201C;gold-standard&#x201D; patient resource. While most providers were &#x201C;comfortable&#x201D; having patients use publicly available AI for health information, 42.9% (3/7) of providers did not report feeling &#x201C;comfortable&#x201D; having patients use publicly available AI for health information, with 2 reported being &#x201C;uncomfortable&#x201D; or &#x201C;very uncomfortable.&#x201D; Physicians cited concerns regarding patient use of ChatGPT, noting poor response comprehensibility and lack of supervision, factors likely contributing to their lack of comfort in patient use of ChatGPT. Moreover, this study raises concerns about the comprehensibility of the generated responses, as elevated FKGRL scores indicate that many require a postsecondary reading level for adequate understanding. Physicians should be aware that, in the context of patient use of LLMs for medical information, patients would benefit from instructions for use and monitoring for potential ChatGPT-derived misconceptions.</p><p>Information is scarce regarding the safety and accuracy of ChatGPT-generated responses in the perioperative setting for abdominal malignancies. Given the complex biological mechanisms and therapeutic management of gastrointestinal malignancies, it is critical to evaluate the quality of ChatGPT-generated content. The presented data suggest that, although ChatGPT responses averaged as &#x201C;good&#x201D; or &#x201C;very good,&#x201D; scores were highly question- and domain-dependent. Given its high overall ratings, ChatGPT may serve as an advantageous tool for patients to develop a baseline knowledge of their disease prior to clinical encounters. However, numerous inaccuracies and content gaps were identified within responses. This is congruent with past work assessing ChatGPT&#x2019;s use for thoracic surgery, where most responses likewise ranged from &#x201C;good&#x201D; to &#x201C;very good,&#x201D; minor inaccuracies were identified in each answer, and certain domains performed better than others [<xref ref-type="bibr" rid="ref32">32</xref>]. Regarding abdominal malignancies, questions concerning staging and treatment received the lowest mean accuracy and quality scores. As such, providers should be encouraged to assess potential disease misconceptions that patients using ChatGPT may have and ensure they distribute comprehensive general disease information. Interestingly, ChatGPT-4o excelled in answering questions pertaining to adverse outcomes following surgery. As such, ChatGPT may help guide patients seeking proper management for postoperative complications.</p><p>The present study suggests that patient information regarding abdominal malignancies presented by ChatGPT-4o may produce material that is poorly comprehensible for many of the intended population due to requirements of high health literacy and education level. One surgeon expressed that responses should be written for a lower health literacy level. This is consistent with the findings of a high grade level requirement for adequate comprehensibility (FKRGL score), averaging a grade level of 14.5. An FKRGL of 14.5 represents a reading level requiring some level of college education. Current recommendations suggest that patient resources should be tailored to a 5th-grade reading level for accessibility [<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref37">37</xref>].</p><p>Readability as a limitation of ChatGPT has been previously reported in the literature [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref22">22</xref>]. Past work regarding cervical spine surgery likewise noted high FKRGL scores to limit ChatGPT-3.5&#x2019;s use as a patient resource. After prompting ChatGPT to provide answers at a 6th-grade reading level, answers decreased from a grade level of 13.5 to 11.2, though remaining persistently elevated. Notably, the present study used similar techniques that successfully produced responses at a lower reading level with ChatGPT-4o. For select questions, the median FKRGL score prior to rephrasing or prompting was 15.6 (IQR 14.4-16.7). Remarkable improvement was noted upon prompting GPT to respond to the level of a 5th-grade reader, decreasing the median FKRGL score, or US grade level, to 7.1 (IQR 6.1-8.3). This work suggests that improvements within ChatGPT-4o may allow for more comprehensible responses, given appropriate prompting. Notably, there was a less meaningful drop in median FKRGL score, from 15.6 (IQR 14.4-16.7) to 14.5 (IQR 13.2-15.4), when ChatGPT was used to <italic>rephrase</italic> questions to be asked at the reading level of a 5th grader. Prior to modification, the median FRE score for select questions was 18.1 (IQR 14.6-24.7). Consistent with FKRGL trends, prompting questions resulted in a more substantial increase in FRE score (median FRE 73.8, IQR 66.6-79.3), indicating markedly improved readability, compared with rephrasing (median FRE 32, IQR 27.0-37.7). This suggests that explicitly requesting ChatGPT to produce responses at a lower level may be more effective in improving readability than adjusting question phrasing. Although contents similar to original questions were better for the latter group than the former (4.63/5 versus 3.88/5), most key concepts were retained within both groups. As such, prompting ChatGPT to answer at a lower grade level may improve readability without significantly sacrificing content. Therefore, providers should be encouraged to assess patient use of LLMs for medical questions and provide patients with a menu for how to prompt ChatGPT to answer at an appropriate grade level if relevant.</p><p>While comprehensibility without prompting educational level can be a limitation for the intended patient population, ChatGPT may serve as a useful tool for providers and trainees. Past work in public health has found AI chatbots to be a useful educational tool for medical students in answering complex medical questions [<xref ref-type="bibr" rid="ref38">38</xref>]. Within the present study, questions 1 to 9 pertain to &#x201C;signs and symptoms,&#x201D; &#x201C;stages and treatment,&#x201D; and &#x201C;surgery eligibility&#x201D;; these questions may be asked by clinicians or learners. While patient readability was limited by a high mean grade level required, the ratings typically ranged from &#x201C;good&#x201D; to &#x201C;very good&#x201D; in quality, accuracy, and tangibility. This suggests that ChatGPT-4o can serve as a useful resource for physicians and medical trainees, given a higher health literacy than the general population. To further evaluate the use and comprehensibility of ChatGPT as a patient resource, future investigations should involve patient perspectives.</p><p>This study has several limitations. First, the small sample size (n=7) of surgical oncologists grading the responses substantially limits statistical power and the reliability of the findings. The single-institutional nature of the study further limits generalizability, as physician responses may reflect regional practice patterns and institutional biases. Future validation should evaluate larger, multi-institutional cohorts to confirm reproducibility and evaluate external validity. Second, the survey incorporated subjective assessments which may limit reproducibility, as concepts graded, such as &#x201C;quality,&#x201D; &#x201C;accuracy,&#x201D; and &#x201C;tangibility,&#x201D; are abstract. To enhance reproducibility, standardized definitions of these domains were included on each page of the survey. Third, questions may not be well representative of patient language. Although questions were obtained from hospital websites and piloted with residents to improve alignment with patient phrasing, they may not encompass the full spectrum of patient inquiries nor the variability of patients&#x2019; health literacy. As only 28 questions were assessed across 3 malignancies, the nature of the questions is limited in scope and may not represent all questions patients may ask pertaining to their diagnosed malignancy. Moreover, questions are broad, pertaining to &#x201C;colon,&#x201D; &#x201C;pancreas,&#x201D; &#x201C;liver,&#x201D; or &#x201C;belly&#x201D; cancers, without specifying types and stages.</p></sec><sec id="s4-2"><title>Conclusions</title><p>This preliminary study indicates that, while publicly accessible ChatGPT may serve as a useful patient resource, its use as an unsupervised source of information for patients with abdominal malignancies has distinct limitations. Providers should be aware that many of their patients are accessing ChatGPT and recognize that developing an understanding of its strengths and limitations can help them guide their patients to enable its best use. Inaccuracies, gaps in information, and poor readability were identified in ChatGPT-generated content, suggesting patients may benefit from physician guidance. Providers should be prepared to properly support their patients reporting ChatGPT use by counseling techniques such as prompting questions to tailor responses to their educational level. The data herein indicate that this is critical for the interpretation of the information by patients, as without this guidance, the answers are directed to an educational level of college or above.</p></sec></sec></body><back><ack><p>During the preparation of this work, the authors used ChatGPT-4o as specified above to collect the information required for data analysis (artificial intelligence&#x2013;generated responses). No generative artificial intelligence was used in the writing process.</p></ack><notes><sec><title>Funding</title><p>The authors declared no financial support was received for this work.</p></sec><sec><title>Data Availability</title><p>To preserve respondent confidentiality, the data set used in this study is not publicly available. A limited version of the dataset used in this study can be obtained from the first author upon request.</p></sec></notes><fn-group><fn fn-type="con"><p>Conceptualization: CVL, DAS, ANM, CLC, KNS, MEL, DPN, RS</p><p>Data curation: CVL, DAS, ANM, CLC, KNS, MEL, DPN, RS</p><p>Formal analysis: CVL, DAS, RS</p><p>Methodology: CVL, DAS, RS</p><p>Supervision: ANM, CLC, KNS, MEL, DPN, RS</p><p>Writing &#x2013; original draft: CVL, RS</p><p>Writing &#x2013; review &#x0026; editing: CVL, DAS, ANM, CLC, KNS, MEL, DPN, RS</p></fn><fn fn-type="conflict"><p>RS serves on the Board of Directors of DNAnexus, Heartland Whole Health Institute, ZealCare, Inc, where he is also the cofounder; Board of Trustees of American Medical Program, Tel Aviv University, and Scientific Advisory Board of OrthoBioTherapeutics Inc.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AI</term><def><p>artificial intelligence</p></def></def-item><def-item><term id="abb2">FKRGL</term><def><p>Flesch-Kincaid Reading Grade Level</p></def></def-item><def-item><term id="abb3">FRE</term><def><p>Flesch Reading Ease</p></def></def-item><def-item><term id="abb4">LLM</term><def><p>large language model</p></def></def-item><def-item><term id="abb5">UTI</term><def><p>urinary tract infection</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="web"><source>ChatGPT</source><access-date>2025-03-09</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://chatgpt.com">https://chatgpt.com</ext-link></comment></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Guinness</surname><given-names>H</given-names> </name></person-group><article-title>How does ChatGPT work?</article-title><source>Zapier</source><access-date>2025-04-15</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://zapier.com/blog/how-does-chatgpt-work/">https://zapier.com/blog/how-does-chatgpt-work/</ext-link></comment></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Bubeck</surname><given-names>S</given-names> </name><name name-style="western"><surname>Chandrasekaran</surname><given-names>V</given-names> </name><name name-style="western"><surname>Eldan</surname><given-names>R</given-names> </name><name name-style="western"><surname>Gehrke</surname><given-names>J</given-names> </name><name name-style="western"><surname>Horvitz</surname><given-names>E</given-names> </name><name name-style="western"><surname>Kamar</surname><given-names>E</given-names> </name><etal/></person-group><article-title>Sparks of artificial general intelligence: early experiments with GPT-4</article-title><source>arXiv</source><comment>Preprint posted online on  Mar 22, 2023</comment><pub-id pub-id-type="doi">10.48550/arXiv.2303.12712</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kung</surname><given-names>TH</given-names> </name><name name-style="western"><surname>Cheatham</surname><given-names>M</given-names> </name><name name-style="western"><surname>Medenilla</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Performance of ChatGPT on USMLE: potential for AI-assisted medical education using large language models</article-title><source>PLOS Digit Health</source><year>2023</year><month>02</month><volume>2</volume><issue>2</issue><fpage>e0000198</fpage><pub-id pub-id-type="doi">10.1371/journal.pdig.0000198</pub-id><pub-id pub-id-type="medline">36812645</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gilson</surname><given-names>A</given-names> </name><name name-style="western"><surname>Safranek</surname><given-names>CW</given-names> </name><name name-style="western"><surname>Huang</surname><given-names>T</given-names> </name><etal/></person-group><article-title>How does ChatGPT perform on the United States Medical Licensing Examination (USMLE)? The implications of large language models for medical education and knowledge assessment</article-title><source>JMIR Med Educ</source><year>2023</year><month>02</month><day>8</day><volume>9</volume><fpage>e45312</fpage><pub-id pub-id-type="doi">10.2196/45312</pub-id><pub-id pub-id-type="medline">36753318</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gupta</surname><given-names>R</given-names> </name><name name-style="western"><surname>Herzog</surname><given-names>I</given-names> </name><name name-style="western"><surname>Park</surname><given-names>JB</given-names> </name><etal/></person-group><article-title>Performance of ChatGPT on the plastic surgery inservice training examination</article-title><source>Aesthet Surg J</source><year>2023</year><month>11</month><day>16</day><volume>43</volume><issue>12</issue><fpage>NP1078</fpage><lpage>NP1082</lpage><pub-id pub-id-type="doi">10.1093/asj/sjad128</pub-id><pub-id pub-id-type="medline">37128784</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hoch</surname><given-names>CC</given-names> </name><name name-style="western"><surname>Wollenberg</surname><given-names>B</given-names> </name><name name-style="western"><surname>L&#x00FC;ers</surname><given-names>JC</given-names> </name><etal/></person-group><article-title>ChatGPT&#x2019;s quiz skills in different otolaryngology subspecialties: an analysis of 2576 single-choice and multiple-choice board certification preparation questions</article-title><source>Eur Arch Otorhinolaryngol</source><year>2023</year><month>09</month><volume>280</volume><issue>9</issue><fpage>4271</fpage><lpage>4278</lpage><pub-id pub-id-type="doi">10.1007/s00405-023-08051-4</pub-id><pub-id pub-id-type="medline">37285018</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mihalache</surname><given-names>A</given-names> </name><name name-style="western"><surname>Huang</surname><given-names>RS</given-names> </name><name name-style="western"><surname>Popovic</surname><given-names>MM</given-names> </name><name name-style="western"><surname>Muni</surname><given-names>RH</given-names> </name></person-group><article-title>Performance of an upgraded artificial intelligence chatbot for ophthalmic knowledge assessment</article-title><source>JAMA Ophthalmol</source><year>2023</year><month>08</month><day>1</day><volume>141</volume><issue>8</issue><fpage>798</fpage><lpage>800</lpage><pub-id pub-id-type="doi">10.1001/jamaophthalmol.2023.2754</pub-id><pub-id pub-id-type="medline">37440220</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cabral</surname><given-names>S</given-names> </name><name name-style="western"><surname>Restrepo</surname><given-names>D</given-names> </name><name name-style="western"><surname>Kanjee</surname><given-names>Z</given-names> </name><etal/></person-group><article-title>Clinical reasoning of a generative artificial intelligence model compared with physicians</article-title><source>JAMA Intern Med</source><year>2024</year><month>05</month><day>1</day><volume>184</volume><issue>5</issue><fpage>581</fpage><lpage>583</lpage><pub-id pub-id-type="doi">10.1001/jamainternmed.2024.0295</pub-id><pub-id pub-id-type="medline">38557971</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tan</surname><given-names>S</given-names> </name><name name-style="western"><surname>Xin</surname><given-names>X</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>D</given-names> </name></person-group><article-title>ChatGPT in medicine: prospects and challenges: a review article</article-title><source>Int J Surg</source><year>2024</year><month>06</month><day>1</day><volume>110</volume><issue>6</issue><fpage>3701</fpage><lpage>3706</lpage><pub-id pub-id-type="doi">10.1097/JS9.0000000000001312</pub-id><pub-id pub-id-type="medline">38502861</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Liu</surname><given-names>HY</given-names> </name><name name-style="western"><surname>Alessandri Bonetti</surname><given-names>M</given-names> </name><name name-style="western"><surname>De Lorenzi</surname><given-names>F</given-names> </name><name name-style="western"><surname>Gimbel</surname><given-names>ML</given-names> </name><name name-style="western"><surname>Nguyen</surname><given-names>VT</given-names> </name><name name-style="western"><surname>Egro</surname><given-names>FM</given-names> </name></person-group><article-title>Consulting the digital doctor: Google versus ChatGPT as sources of information on breast implant-associated anaplastic large cell lymphoma and breast implant illness</article-title><source>Aesthetic Plast Surg</source><year>2024</year><month>02</month><volume>48</volume><issue>4</issue><fpage>590</fpage><lpage>607</lpage><pub-id pub-id-type="doi">10.1007/s00266-023-03713-4</pub-id><pub-id pub-id-type="medline">37903939</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bergmo</surname><given-names>TS</given-names> </name><name name-style="western"><surname>Sandsdalen</surname><given-names>V</given-names> </name><name name-style="western"><surname>Manskow</surname><given-names>US</given-names> </name><name name-style="western"><surname>Sm&#x00E5;brekke</surname><given-names>L</given-names> </name><name name-style="western"><surname>Waaseth</surname><given-names>M</given-names> </name></person-group><article-title>Internet use for obtaining medicine information: cross-sectional survey</article-title><source>JMIR Form Res</source><year>2023</year><month>02</month><day>2</day><volume>7</volume><fpage>e40466</fpage><pub-id pub-id-type="doi">10.2196/40466</pub-id><pub-id pub-id-type="medline">36729577</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ramli</surname><given-names>R</given-names> </name><name name-style="western"><surname>Jambor</surname><given-names>MA</given-names> </name><name name-style="western"><surname>Kong</surname><given-names>CY</given-names> </name></person-group><article-title>Dr Google - assessing the reliability and readability of information on general surgical procedures found via search engines</article-title><source>ANZ J Surg</source><year>2023</year><month>03</month><volume>93</volume><issue>3</issue><fpage>590</fpage><lpage>596</lpage><pub-id pub-id-type="doi">10.1111/ans.18289</pub-id><pub-id pub-id-type="medline">36716246</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ayre</surname><given-names>J</given-names> </name><name name-style="western"><surname>Cvejic</surname><given-names>E</given-names> </name><name name-style="western"><surname>McCaffery</surname><given-names>KJ</given-names> </name></person-group><article-title>Use of ChatGPT to obtain health information in Australia, 2024: insights from a nationally representative survey</article-title><source>Med J Aust</source><year>2025</year><month>03</month><day>3</day><volume>222</volume><issue>4</issue><fpage>210</fpage><lpage>212</lpage><pub-id pub-id-type="doi">10.5694/mja2.52598</pub-id><pub-id pub-id-type="medline">39901778</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Shen</surname><given-names>SA</given-names> </name><name name-style="western"><surname>Perez-Heydrich</surname><given-names>CA</given-names> </name><name name-style="western"><surname>Xie</surname><given-names>DX</given-names> </name><name name-style="western"><surname>Nellis</surname><given-names>JC</given-names> </name></person-group><article-title>ChatGPT vs. web search for patient questions: what does ChatGPT do better?</article-title><source>Eur Arch Otorhinolaryngol</source><year>2024</year><month>06</month><volume>281</volume><issue>6</issue><fpage>3219</fpage><lpage>3225</lpage><pub-id pub-id-type="doi">10.1007/s00405-024-08524-0</pub-id><pub-id pub-id-type="medline">38416195</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Artioli</surname><given-names>E</given-names> </name><name name-style="western"><surname>Veronesi</surname><given-names>F</given-names> </name><name name-style="western"><surname>Mazzotti</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Assessing ChatGPT responses to common patient questions regarding total ankle arthroplasty</article-title><source>J Exp Orthop</source><year>2024</year><volume>12</volume><issue>1</issue><fpage>e70138</fpage><pub-id pub-id-type="doi">10.1002/jeo2.70138</pub-id><pub-id pub-id-type="medline">39741912</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gajjar</surname><given-names>AA</given-names> </name><name name-style="western"><surname>Kumar</surname><given-names>RP</given-names> </name><name name-style="western"><surname>Paliwoda</surname><given-names>ED</given-names> </name><etal/></person-group><article-title>Usefulness and accuracy of artificial intelligence chatbot responses to patient questions for neurosurgical procedures</article-title><source>Neurosurgery</source><year>2024</year><month>02</month><day>14</day><volume>95</volume><issue>1</issue><pub-id pub-id-type="doi">10.1227/neu.0000000000002856</pub-id><pub-id pub-id-type="medline">38353558</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Samaan</surname><given-names>JS</given-names> </name><name name-style="western"><surname>Yeo</surname><given-names>YH</given-names> </name><name name-style="western"><surname>Rajeev</surname><given-names>N</given-names> </name><etal/></person-group><article-title>Assessing the accuracy of responses by the language model ChatGPT to questions regarding bariatric surgery</article-title><source>Obes Surg</source><year>2023</year><month>06</month><volume>33</volume><issue>6</issue><fpage>1790</fpage><lpage>1796</lpage><pub-id pub-id-type="doi">10.1007/s11695-023-06603-5</pub-id><pub-id pub-id-type="medline">37106269</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yeo</surname><given-names>YH</given-names> </name><name name-style="western"><surname>Samaan</surname><given-names>JS</given-names> </name><name name-style="western"><surname>Ng</surname><given-names>WH</given-names> </name><etal/></person-group><article-title>Assessing the performance of ChatGPT in answering questions regarding cirrhosis and hepatocellular carcinoma</article-title><source>Clin Mol Hepatol</source><year>2023</year><month>07</month><volume>29</volume><issue>3</issue><fpage>721</fpage><lpage>732</lpage><pub-id pub-id-type="doi">10.3350/cmh.2023.0089</pub-id><pub-id pub-id-type="medline">36946005</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rydzewski</surname><given-names>NR</given-names> </name><name name-style="western"><surname>Dinakaran</surname><given-names>D</given-names> </name><name name-style="western"><surname>Zhao</surname><given-names>SG</given-names> </name><etal/></person-group><article-title>Comparative evaluation of LLMs in clinical oncology</article-title><source>NEJM AI</source><year>2024</year><month>05</month><volume>1</volume><issue>5</issue><pub-id pub-id-type="doi">10.1056/aioa2300151</pub-id><pub-id pub-id-type="medline">39131700</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lee</surname><given-names>TC</given-names> </name><name name-style="western"><surname>Staller</surname><given-names>K</given-names> </name><name name-style="western"><surname>Botoman</surname><given-names>V</given-names> </name><name name-style="western"><surname>Pathipati</surname><given-names>MP</given-names> </name><name name-style="western"><surname>Varma</surname><given-names>S</given-names> </name><name name-style="western"><surname>Kuo</surname><given-names>B</given-names> </name></person-group><article-title>ChatGPT answers common patient questions about colonoscopy</article-title><source>Gastroenterology</source><year>2023</year><month>08</month><volume>165</volume><issue>2</issue><fpage>509</fpage><lpage>511.e7</lpage><pub-id pub-id-type="doi">10.1053/j.gastro.2023.04.033</pub-id><pub-id pub-id-type="medline">37150470</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Subramanian</surname><given-names>T</given-names> </name><name name-style="western"><surname>Araghi</surname><given-names>K</given-names> </name><name name-style="western"><surname>Amen</surname><given-names>TB</given-names> </name><etal/></person-group><article-title>Chat generative pretraining transformer answers patient-focused questions in cervical spine surgery</article-title><source>Clin Spine Surg</source><year>2024</year><month>07</month><day>1</day><volume>37</volume><issue>6</issue><fpage>E278</fpage><lpage>E281</lpage><pub-id pub-id-type="doi">10.1097/BSD.0000000000001600</pub-id><pub-id pub-id-type="medline">38531823</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Al-Dujaili</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Omari</surname><given-names>S</given-names> </name><name name-style="western"><surname>Pillai</surname><given-names>J</given-names> </name><name name-style="western"><surname>Al Faraj</surname><given-names>A</given-names> </name></person-group><article-title>Assessing the accuracy and consistency of ChatGPT in clinical pharmacy management: a preliminary analysis with clinical pharmacy experts worldwide</article-title><source>Res Social Adm Pharm</source><year>2023</year><month>12</month><volume>19</volume><issue>12</issue><fpage>1590</fpage><lpage>1594</lpage><pub-id pub-id-type="doi">10.1016/j.sapharm.2023.08.012</pub-id><pub-id pub-id-type="medline">37696742</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Arnold</surname><given-names>M</given-names> </name><name name-style="western"><surname>Abnet</surname><given-names>CC</given-names> </name><name name-style="western"><surname>Neale</surname><given-names>RE</given-names> </name><etal/></person-group><article-title>Global burden of 5 major types of gastrointestinal cancer</article-title><source>Gastroenterology</source><year>2020</year><month>07</month><volume>159</volume><issue>1</issue><fpage>335</fpage><lpage>349.e15</lpage><pub-id pub-id-type="doi">10.1053/j.gastro.2020.02.068</pub-id><pub-id pub-id-type="medline">32247694</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="web"><article-title>Pancreatic cancer questions to ask the healthcare team</article-title><source>Pancreatic Cancer Action Network</source><year>2025</year><access-date>2025-01-15</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://pancan.org/facing-pancreatic-cancer/diagnosis/choosing-your-healthcare-team/questions-to-ask-your-healthcare-team">https://pancan.org/facing-pancreatic-cancer/diagnosis/choosing-your-healthcare-team/questions-to-ask-your-healthcare-team</ext-link></comment></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="web"><article-title>Colon cancer FAQs</article-title><source>Moffitt Cancer Center</source><year>2025</year><access-date>2025-01-15</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.moffitt.org/cancers/colon-cancer/faqs">https://www.moffitt.org/cancers/colon-cancer/faqs</ext-link></comment></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="web"><article-title>Frequently asked questions</article-title><source>Mount Sinai Tisch Cancer Center</source><year>2025</year><access-date>2025-01-15</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.mountsinai.org/care/cancer/services/colon/faqs">https://www.mountsinai.org/care/cancer/services/colon/faqs</ext-link></comment></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="web"><article-title>Liver cancer &#x2013; frequently asked questions</article-title><source>Pelican Cancer Foundation</source><year>2025</year><access-date>2025-01-15</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.pelicancancer.org/patient-resources/liver-cancer/frequently-asked-questions">https://www.pelicancancer.org/patient-resources/liver-cancer/frequently-asked-questions</ext-link></comment></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="web"><article-title>Questions to ask about pancreatic cancer</article-title><source>American Cancer Society</source><year>2024</year><access-date>2025-01-15</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.cancer.org/cancer/types/pancreatic-cancer/detection-diagnosis-staging/talking-with-doctor.html">https://www.cancer.org/cancer/types/pancreatic-cancer/detection-diagnosis-staging/talking-with-doctor.html</ext-link></comment></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="web"><article-title>Questions to ask about liver cancer</article-title><source>American Cancer Society</source><year>2025</year><access-date>2025-01-15</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.cancer.org/cancer/types/liver-cancer/detection-diagnosis-staging/talking-with-doctor.html">https://www.cancer.org/cancer/types/liver-cancer/detection-diagnosis-staging/talking-with-doctor.html</ext-link></comment></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="web"><article-title>Frequently asked questions</article-title><source>Hirshberg Foundation for Pancreatic Cancer Research</source><year>2025</year><access-date>2025-01-15</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://pancreatic.org/pancreatic-cancer/faqs">https://pancreatic.org/pancreatic-cancer/faqs</ext-link></comment></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ferrari-Light</surname><given-names>D</given-names> </name><name name-style="western"><surname>Merritt</surname><given-names>RE</given-names> </name><name name-style="western"><surname>D&#x2019;Souza</surname><given-names>D</given-names> </name><etal/></person-group><article-title>Evaluating ChatGPT as a patient resource for frequently asked questions about lung cancer surgery-a pilot study</article-title><source>J Thorac Cardiovasc Surg</source><year>2025</year><month>04</month><volume>169</volume><issue>4</issue><fpage>1174</fpage><lpage>1180</lpage><pub-id pub-id-type="doi">10.1016/j.jtcvs.2024.09.030</pub-id><pub-id pub-id-type="medline">39326732</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="web"><article-title>ChatGPT &#x2014; release notes</article-title><source>OpenAI</source><year>2026</year><access-date>2026-03-01</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://help.openai.com/en/articles/6825453-chatgpt-release-notes?utm_source=chatgpt.com">https://help.openai.com/en/articles/6825453-chatgpt-release-notes?utm_source=chatgpt.com</ext-link></comment></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jindal</surname><given-names>P</given-names> </name><name name-style="western"><surname>MacDermid</surname><given-names>JC</given-names> </name></person-group><article-title>Assessing reading levels of health information: uses and limitations of flesch formula</article-title><source>Educ Health (Abingdon)</source><year>2017</year><volume>30</volume><issue>1</issue><fpage>84</fpage><lpage>88</lpage><pub-id pub-id-type="doi">10.4103/1357-6283.210517</pub-id><pub-id pub-id-type="medline">28707643</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Munir</surname><given-names>MM</given-names> </name><name name-style="western"><surname>Endo</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Ejaz</surname><given-names>A</given-names> </name><name name-style="western"><surname>Dillhoff</surname><given-names>M</given-names> </name><name name-style="western"><surname>Cloyd</surname><given-names>JM</given-names> </name><name name-style="western"><surname>Pawlik</surname><given-names>TM</given-names> </name></person-group><article-title>Online artificial intelligence platforms and their applicability to gastrointestinal surgical operations</article-title><source>J Gastrointest Surg</source><year>2024</year><month>01</month><volume>28</volume><issue>1</issue><fpage>64</fpage><lpage>69</lpage><pub-id pub-id-type="doi">10.1016/j.gassur.2023.11.019</pub-id><pub-id pub-id-type="medline">38353076</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="report"><article-title>AHRQ health literacy universal precautions toolkit</article-title><year>2024</year><access-date>2025-04-20</access-date><publisher-name>Agency for Healthcare Research and Quality (AHRQ)</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://www.ahrq.gov/sites/default/files/wysiwyg/health-literacy/3rd-edition-toolkit/health-literacy-toolkit-third-edition.pdf">https://www.ahrq.gov/sites/default/files/wysiwyg/health-literacy/3rd-edition-toolkit/health-literacy-toolkit-third-edition.pdf</ext-link></comment></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Stossel</surname><given-names>LM</given-names> </name><name name-style="western"><surname>Segar</surname><given-names>N</given-names> </name><name name-style="western"><surname>Gliatto</surname><given-names>P</given-names> </name><name name-style="western"><surname>Fallar</surname><given-names>R</given-names> </name><name name-style="western"><surname>Karani</surname><given-names>R</given-names> </name></person-group><article-title>Readability of patient education materials available at the point of care</article-title><source>J Gen Intern Med</source><year>2012</year><month>09</month><volume>27</volume><issue>9</issue><fpage>1165</fpage><lpage>1170</lpage><pub-id pub-id-type="doi">10.1007/s11606-012-2046-0</pub-id><pub-id pub-id-type="medline">22528620</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Baglivo</surname><given-names>F</given-names> </name><name name-style="western"><surname>De Angelis</surname><given-names>L</given-names> </name><name name-style="western"><surname>Casigliani</surname><given-names>V</given-names> </name><name name-style="western"><surname>Arzilli</surname><given-names>G</given-names> </name><name name-style="western"><surname>Privitera</surname><given-names>GP</given-names> </name><name name-style="western"><surname>Rizzo</surname><given-names>C</given-names> </name></person-group><article-title>Exploring the possible use of AI chatbots in public health education: feasibility study</article-title><source>JMIR Med Educ</source><year>2023</year><month>11</month><day>1</day><volume>9</volume><fpage>e51421</fpage><pub-id pub-id-type="doi">10.2196/51421</pub-id><pub-id pub-id-type="medline">37910155</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Distributed survey with GPT-generated responses.</p><media xlink:href="periop_v9i1e81374_app1.docx" xlink:title="DOCX File, 31 KB"/></supplementary-material></app-group></back></article>