From 3c6f4263adcd15bd6ac8471e6470a5aff314c280 Mon Sep 17 00:00:00 2001 From: Feyisa-Diba Date: Sat, 16 Aug 2025 00:30:26 -0400 Subject: [PATCH] Add thesis summarizer notebook using direct PDF link --- .../day-1-thesis_pdf_summarizer.ipynb | 305 ++++++++++++++++++ 1 file changed, 305 insertions(+) create mode 100644 week1/community-contributions/day-1-thesis_pdf_summarizer.ipynb diff --git a/week1/community-contributions/day-1-thesis_pdf_summarizer.ipynb b/week1/community-contributions/day-1-thesis_pdf_summarizer.ipynb new file mode 100644 index 0000000..e18c68f --- /dev/null +++ b/week1/community-contributions/day-1-thesis_pdf_summarizer.ipynb @@ -0,0 +1,305 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "581151c0-941e-47b3-a3e0-2da65ba70087", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import requests\n", + "from dotenv import load_dotenv\n", + "from IPython.display import Markdown, display\n", + "from openai import OpenAI" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "47353a41-4b47-499e-9460-fd645345f591", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "API key found and looks good so far\n" + ] + } + ], + "source": [ + "load_dotenv()\n", + "api_key = os.getenv('OPENAI_API_KEY')\n", + "\n", + "if not api_key:\n", + " print('No API key was found')\n", + "elif not api_key.startswith(\"sk-proj-\"):\n", + " print(\"API key is found but is not in the proper format\")\n", + "else:\n", + " print(\"API key found and looks good so far\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "dbfbb29a-3452-45a0-b9b3-4e329ac776fb", + "metadata": {}, + "outputs": [], + "source": [ + "openai = OpenAI()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "88ffe256-e46a-45e8-a616-0ac574aa7085", + "metadata": {}, + "outputs": [], + "source": [ + "system_prompt = \"\"\"You are a research summarizer specialized in wireless communication systems and propagation modeling. Your task is to summarize a research thesis in no more than 1000 words. The summary must be clear, structured, and written in markdown format.\n", + "\n", + "The summary should include the following sections:\n", + "\n", + "1. **Title and Authors** – Provide the full title of the thesis and author name(s).\n", + "2. **Objective / Research Problem** – Clearly state the core research goal or question addressed in the thesis.\n", + "3. **Scientific and Regional Background** – Explain the technical context of radio wave propagation, and why studying it in the Horn of Africa region is important.\n", + "4. **Methodology** – Summarize the modeling techniques, data sources, simulation tools, frequency bands (e.g., microwave, millimeter), and measurement or evaluation methods used.\n", + "5. **Key Findings** – Highlight the quantitative and qualitative results, including differences between precipitation and clear-air conditions, and observed trends across geographic locations.\n", + "6. **Conclusion** – Describe the primary outcomes and how they advance understanding in wireless communications.\n", + "7. **Limitations** – Point out any constraints (e.g., lack of in-situ measurement, simulation assumptions).\n", + "8. **Future Work** – Suggest next steps for improving or extending this research.\n", + "9. **Real-World Applications** – Discuss how the models or findings could improve wireless network planning, 5G deployment, or link budgeting in East Africa and similar regions.\n", + "\n", + "Use academic language but keep it concise, clear, and structured for a technical reader. Output in markdown format only.\n", + "\"\"\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "5f3f7b1a-865f-44cc-854d-9e9e7771eb82", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: ipywidgets in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (8.1.7)\n", + "Collecting pdfplumber\n", + " Downloading pdfplumber-0.11.7-py3-none-any.whl.metadata (42 kB)\n", + "Requirement already satisfied: comm>=0.1.3 in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from ipywidgets) (0.2.3)\n", + "Requirement already satisfied: ipython>=6.1.0 in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from ipywidgets) (9.4.0)\n", + "Requirement already satisfied: traitlets>=4.3.1 in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from ipywidgets) (5.14.3)\n", + "Requirement already satisfied: widgetsnbextension~=4.0.14 in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from ipywidgets) (4.0.14)\n", + "Requirement already satisfied: jupyterlab_widgets~=3.0.15 in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from ipywidgets) (3.0.15)\n", + "Collecting pdfminer.six==20250506 (from pdfplumber)\n", + " Downloading pdfminer_six-20250506-py3-none-any.whl.metadata (4.2 kB)\n", + "Requirement already satisfied: Pillow>=9.1 in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from pdfplumber) (11.3.0)\n", + "Collecting pypdfium2>=4.18.0 (from pdfplumber)\n", + " Downloading pypdfium2-4.30.0-py3-none-win_amd64.whl.metadata (48 kB)\n", + "Requirement already satisfied: charset-normalizer>=2.0.0 in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from pdfminer.six==20250506->pdfplumber) (3.4.3)\n", + "Requirement already satisfied: cryptography>=36.0.0 in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from pdfminer.six==20250506->pdfplumber) (45.0.6)\n", + "Requirement already satisfied: cffi>=1.14 in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from cryptography>=36.0.0->pdfminer.six==20250506->pdfplumber) (1.17.1)\n", + "Requirement already satisfied: pycparser in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from cffi>=1.14->cryptography>=36.0.0->pdfminer.six==20250506->pdfplumber) (2.22)\n", + "Requirement already satisfied: colorama in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (0.4.6)\n", + "Requirement already satisfied: decorator in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (5.2.1)\n", + "Requirement already satisfied: ipython-pygments-lexers in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (1.1.1)\n", + "Requirement already satisfied: jedi>=0.16 in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (0.19.2)\n", + "Requirement already satisfied: matplotlib-inline in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (0.1.7)\n", + "Requirement already satisfied: prompt_toolkit<3.1.0,>=3.0.41 in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (3.0.51)\n", + "Requirement already satisfied: pygments>=2.4.0 in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (2.19.2)\n", + "Requirement already satisfied: stack_data in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (0.6.3)\n", + "Requirement already satisfied: typing_extensions>=4.6 in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (4.14.1)\n", + "Requirement already satisfied: wcwidth in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from prompt_toolkit<3.1.0,>=3.0.41->ipython>=6.1.0->ipywidgets) (0.2.13)\n", + "Requirement already satisfied: parso<0.9.0,>=0.8.4 in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from jedi>=0.16->ipython>=6.1.0->ipywidgets) (0.8.4)\n", + "Requirement already satisfied: executing>=1.2.0 in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from stack_data->ipython>=6.1.0->ipywidgets) (2.2.0)\n", + "Requirement already satisfied: asttokens>=2.1.0 in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from stack_data->ipython>=6.1.0->ipywidgets) (3.0.0)\n", + "Requirement already satisfied: pure_eval in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from stack_data->ipython>=6.1.0->ipywidgets) (0.2.3)\n", + "Downloading pdfplumber-0.11.7-py3-none-any.whl (60 kB)\n", + "Downloading pdfminer_six-20250506-py3-none-any.whl (5.6 MB)\n", + " ---------------------------------------- 0.0/5.6 MB ? eta -:--:--\n", + " --------------------------------------- 5.5/5.6 MB 30.7 MB/s eta 0:00:01\n", + " ---------------------------------------- 5.6/5.6 MB 22.9 MB/s 0:00:00\n", + "Downloading pypdfium2-4.30.0-py3-none-win_amd64.whl (2.9 MB)\n", + " ---------------------------------------- 0.0/2.9 MB ? eta -:--:--\n", + " ---------------------------------------- 2.9/2.9 MB 28.0 MB/s 0:00:00\n", + "Installing collected packages: pypdfium2, pdfminer.six, pdfplumber\n", + "\n", + " ---------------------------------------- 0/3 [pypdfium2]\n", + " ---------------------------------------- 0/3 [pypdfium2]\n", + " ------------- -------------------------- 1/3 [pdfminer.six]\n", + " ------------- -------------------------- 1/3 [pdfminer.six]\n", + " ------------- -------------------------- 1/3 [pdfminer.six]\n", + " ------------- -------------------------- 1/3 [pdfminer.six]\n", + " ------------- -------------------------- 1/3 [pdfminer.six]\n", + " ------------- -------------------------- 1/3 [pdfminer.six]\n", + " -------------------------- ------------- 2/3 [pdfplumber]\n", + " ---------------------------------------- 3/3 [pdfplumber]\n", + "\n", + "Successfully installed pdfminer.six-20250506 pdfplumber-0.11.7 pypdfium2-4.30.0\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "pip install ipywidgets pdfplumber" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "06dcfc1d-b106-4b9a-9346-6dd6af4a4015", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "UNIVERSITY OF KWAZULU-NATAL\n", + "Radio Wave Propagation Modeling under\n", + "Precipitation and Clear-air at Microwave\n", + "and Millimetric Bands over Wireless Links\n", + "in the Horn of Africa\n", + "Feyisa Debo Diba\n", + "February, 2017\n", + "Supervisor: Professor Thomas J. Afullo\n", + "Co-supervisor: Dr. Akintunde Ayodeji Alonge\n", + "Radio Wave Propagation Modeling under\n", + "Precipitation and Clear-air at Microwave\n", + "and Millimetric Bands over Wireless Links\n", + "in the Horn of Africa\n", + "Feyisa Debo Diba\n", + "In fulfillment of the Degree of Doctor of Philosophy in\n", + "Electronic Engineering, College of Agriculture, Engineering\n", + "and Science, University of KwaZulu-Natal, Durban\n", + "February, 2017\n", + "Supervisor:\n", + "As the candidate’s Supervisor, I agree/do not agree to the submission of this thesis\n", + "Professor T.J. Afullo ———————————-\n", + "Date—————————————————\n", + "Co-Supervisor:\n", + "Dr. Akintunde Ayodeji Alonge\n", + "As the candidate’s Co.Supervisor, I agree to the submission of this thesis\n", + "Dr. A. A. Alonge ———————————-\n", + "Date—————————————————\n", + "ii\n", + "DECLARATION 1 - PLAGIARISM\n", + "I, Feyisa Debo Diba\n" + ] + } + ], + "source": [ + "# Cell 3: Download and extract from PDF URL\n", + "pdf_url = (\n", + " \"https://researchspace.ukzn.ac.za/server/api/core/bitstreams/\"\n", + " \"29218203-bfc8-4fcb-bc63-9afba3341910/content\"\n", + ")\n", + "\n", + "response = requests.get(pdf_url)\n", + "if response.status_code != 200:\n", + " raise Exception(f\"Failed to download PDF (Status code: {response.status_code})\")\n", + "\n", + "with pdfplumber.open(BytesIO(response.content)) as pdf:\n", + " thesis_text = \"\\n\".join(page.extract_text() for page in pdf.pages if page.extract_text())\n", + "\n", + "# Optional Preview\n", + "print(thesis_text[:1000])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "84c544db-64a0-4181-beb0-1cc72bc88466", + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "# Summary of the Research Thesis\n", + "\n", + "## 1. Title and Authors\n", + "**Title:** Radio Wave Propagation Modeling under Precipitation and Clear-air at Microwave and Millimetric Bands over Wireless Links in the Horn of Africa \n", + "**Author:** Feyisa Debo Diba \n", + "**Supervisors:** Professor Thomas J. Afullo, Dr. Akintunde Ayodeji Alonge \n", + "\n", + "## 2. Objective / Research Problem\n", + "The thesis investigates radio wave propagation modeling in clear air and precipitation conditions over wireless communication systems in the Horn of Africa, specifically Ethiopia. The research aims to address the attenuation problem caused by precipitation for systems operating at higher frequency bands.\n", + "\n", + "## 3. Scientific and Regional Background\n", + "The congestion of lower operating frequency bands has led to the rapid growth of utilizing higher frequency spectrum for wireless communication systems. However, the Horn of Africa, particularly Ethiopia, lacks comprehensive studies on propagation modeling under different atmospheric conditions. This research provides valuable insights for the region, contributing to the efficient operation of wireless networks.\n", + "\n", + "## 4. Methodology\n", + "The research uses three years of atmospheric data (temperature, pressure, relative humidity) from the National Meteorological Agency of Ethiopia and clear air signal measurements over terrestrial Line-of-Sight (LOS) links from EthioTelecom. Rainfall data from a Davis Vantage weather station installed at Jimma University, Ethiopia, are also used. The study applies the ITU-R model for refractivity gradient prediction and the Rice-Holmberg (R-H) model for one-minute rain rate distribution. A semi-Markovian model is used for rainfall event characterization and generation.\n", + "\n", + "## 5. Key Findings\n", + "The research derived radio climatological parameters for different rain and clear air fade models. It also proposed rainfall rate conversion factors for Ethiopian sites and developed rainfall rate and fade margin contour maps for Ethiopia. The study found that the sojourn time of spikes in every rain regime is appropriately described by Erlang-k distribution. The number of spikes of generated rainfall events and the corresponding sojourn times follow the power-law relationship.\n", + "\n", + "## 6. Conclusion\n", + "The research provides a comprehensive analysis of radio wave propagation under different atmospheric conditions in Ethiopia. The findings contribute to the understanding of the impact of atmospheric conditions on wireless communication systems operating at higher frequency bands.\n", + "\n", + "## 7. Limitations\n", + "The research is limited by the availability and quality of atmospheric and signal level data. The simulation models also have inherent assumptions that may affect the accuracy of the results.\n", + "\n", + "## 8. Future Work\n", + "Future research could focus on refining the models used in this study by incorporating more data and improving the simulation techniques. Studies could also be extended to other regions in the Horn of Africa.\n", + "\n", + "## 9. Real-World Applications\n", + "The findings of this research can improve wireless network planning and 5G deployment in East Africa. The models developed can also be used in link budgeting, which is crucial for the design and operation of wireless communication systems." + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Cell 4: Summarize via OpenAI\n", + "messages = [\n", + " {\"role\": \"system\", \"content\": system_prompt},\n", + " {\"role\": \"user\", \"content\": f\"Here is the thesis text (truncated):\\n\\n{thesis_text[:10000]}\"}\n", + "]\n", + "\n", + "response = openai.chat.completions.create(\n", + " model=\"gpt-4\",\n", + " messages=messages,\n", + " temperature=0.3\n", + ")\n", + "\n", + "summary = response.choices[0].message.content.strip()\n", + "display(Markdown(summary))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e1cdf9ec-5efb-4d4b-8de2-83648865f092", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}