Files
LLM_Engineering_OLD/community-contributions/protocol_summarizer_webapp/app.py
albertoclemente 99d1d2b4f5 Fix: Convert protocol_summarizer_webapp from submodule to regular files
- Remove protocol_summarizer_webapp submodule reference
- Add all webapp files as regular files to enable proper PR creation
- Includes Streamlit app, documentation, and configuration files
2025-07-03 17:23:00 +02:00

122 lines
5.1 KiB
Python

import os
from dotenv import load_dotenv
import streamlit as st
import requests
from openai import OpenAI
load_dotenv()
st.title("Protocol Summarizer")
st.markdown("""
Search for clinical trials by keyword, select a study, and generate a protocol summary using an LLM.
""")
# Search input
# Show results only after user presses Enter
with st.form(key="search_form"):
query = st.text_input("Enter a disease, study title, or keyword:")
max_results = st.slider("Number of results", 1, 20, 5)
submitted = st.form_submit_button("Search")
@st.cache_data(show_spinner=False)
def search_clinical_trials(query, max_results=5):
if not query:
return []
url = f"https://clinicaltrials.gov/api/v2/studies?query.term={query}&pageSize={max_results}&format=json"
resp = requests.get(url)
studies = []
if resp.status_code == 200:
data = resp.json()
for study in data.get('studies', []):
nct = study.get('protocolSection', {}).get('identificationModule', {}).get('nctId', 'N/A')
title = study.get('protocolSection', {}).get('identificationModule', {}).get('officialTitle', 'N/A')
studies.append({'nct': nct, 'title': title})
return studies
results = search_clinical_trials(query, max_results) if query else []
if results:
st.subheader("Search Results")
for i, study in enumerate(results):
st.markdown(f"**{i+1}. {study['title']}** (NCT: {study['nct']})")
selected = st.number_input("Select study number to summarize", min_value=1, max_value=len(results), value=1)
selected_study = results[selected-1]
st.markdown(f"### Selected Study\n**{selected_study['title']}** (NCT: {selected_study['nct']})")
if st.button("Summarize Protocol"):
# Fetch the brief summary for the selected study
nct_id = selected_study['nct']
# Use the V2 API which we know works reliably
url = f"https://clinicaltrials.gov/api/v2/studies/{nct_id}?format=json"
with st.spinner("Fetching study details..."):
resp = requests.get(url)
brief = ""
if resp.status_code == 200:
try:
data = resp.json()
# V2 API has protocolSection at the root level
if 'protocolSection' in data:
desc_mod = data.get('protocolSection', {}).get('descriptionModule', {})
brief = desc_mod.get('briefSummary', '')
# If briefSummary is empty, try detailedDescription
if not brief:
brief = desc_mod.get('detailedDescription', '')
except Exception as e:
st.error(f"Error parsing study data: {e}")
# If API fails, try HTML scraping as a fallback
if not brief and resp.status_code != 200:
st.warning(f"API returned status code {resp.status_code}. Trying alternative method...")
html_url = f"https://clinicaltrials.gov/ct2/show/{nct_id}"
html_resp = requests.get(html_url)
if "Brief Summary:" in html_resp.text:
start = html_resp.text.find("Brief Summary:") + 15
excerpt = html_resp.text[start:start+1000]
# Clean up HTML
import re
excerpt = re.sub('<[^<]+?>', ' ', excerpt)
excerpt = re.sub('\\s+', ' ', excerpt)
brief = excerpt.strip()
if not brief:
st.error("No brief summary or detailed description found for this study.")
st.stop()
# Now we have the brief summary, send it to the LLM
openai = OpenAI()
def user_prompt_for_protocol_brief(brief_text):
return (
"Extract the following details from the clinical trial brief summary in markdown format with clear section headings (e.g., ## Study Design, ## Population, etc.):\n"
"- Study design\n"
"- Population\n"
"- Interventions\n"
"- Primary and secondary endpoints\n"
"- Study duration\n\n"
f"Brief summary text:\n{brief_text}"
)
system_prompt = "You are a clinical research assistant. Extract and list the requested protocol details in markdown format with clear section headings."
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt_for_protocol_brief(brief)}
]
with st.spinner("Summarizing with LLM..."):
try:
response = openai.chat.completions.create(
model="gpt-4o-mini",
messages=messages
)
summary = response.choices[0].message.content
st.markdown(summary)
except Exception as e:
st.error(f"LLM call failed: {e}")
else:
if query:
st.info("No results found. Try a different keyword.")