274 lines
8.5 KiB
Plaintext
274 lines
8.5 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "a15135e6-3ba5-44ae-b14b-dc67674a5ca3",
|
|
"metadata": {
|
|
"editable": true,
|
|
"slideshow": {
|
|
"slide_type": ""
|
|
},
|
|
"tags": []
|
|
},
|
|
"source": [
|
|
"# Resarch Paper Summarizer by Name"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "a50f02ea-0f04-4f68-ae66-d1369780065e",
|
|
"metadata": {
|
|
"editable": true,
|
|
"slideshow": {
|
|
"slide_type": ""
|
|
},
|
|
"tags": []
|
|
},
|
|
"source": [
|
|
"### Imports"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "ea6e09ac-adee-4bb8-b3bd-4f6411495751",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"## If dependencies do not exist please install them\n",
|
|
"# !pip install python-dotenv openai arxiv"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "e5301f2b-3037-4a85-b7cd-5e6bd700418a",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import arxiv\n",
|
|
"import os\n",
|
|
"from openai import OpenAI\n",
|
|
"from dotenv import load_dotenv\n",
|
|
"from IPython.display import Markdown, display"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "ac45a1f4-0005-4e0a-be90-741182c1db9f",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Load Open AI Key"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "381bef97-6bb7-4bdc-a71d-2ea65c8f6964",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"load_dotenv()\n",
|
|
"api_key = os.getenv(\"OPENAI_API_KEY\")\n",
|
|
"\n",
|
|
"if not api_key:\n",
|
|
" print(\"❌ No OpenAI API key found in .env file.\")\n",
|
|
"else:\n",
|
|
" print(\"✅ API key loaded successfully.\")\n",
|
|
"\n",
|
|
"# ✅ Initialize OpenAI\n",
|
|
"openai = OpenAI(api_key=api_key)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "00817dbe-209e-418c-bb46-7b6b866fdff4",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Main Class MLResearchFetcher"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "7355ba4c-ef61-4934-bb79-4d80b4473e52",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"class MLResearchFetcher:\n",
|
|
" def __init__(self, system_prompt, query=\"machine learning\", max_results=5):\n",
|
|
" self.query = query\n",
|
|
" self.max_results = max_results\n",
|
|
" self.system_prompt = system_prompt\n",
|
|
"\n",
|
|
" def fetch_papers(self):\n",
|
|
" search = arxiv.Search(\n",
|
|
" query=f'ti:\"{self.query}\"',\n",
|
|
" max_results=self.max_results,\n",
|
|
" sort_by=arxiv.SortCriterion.SubmittedDate,\n",
|
|
" sort_order=arxiv.SortOrder.Descending,\n",
|
|
" )\n",
|
|
" return list(search.results())\n",
|
|
"\n",
|
|
" def summarize_abstract(self, abstract, system_prompt):\n",
|
|
" try:\n",
|
|
" completion = openai.chat.completions.create(\n",
|
|
" model=\"gpt-4o-mini\",\n",
|
|
" messages=[\n",
|
|
" {\"role\": \"system\", \"content\": system_prompt},\n",
|
|
" {\"role\": \"user\", \"content\": abstract}\n",
|
|
" ]\n",
|
|
" )\n",
|
|
" return completion.choices[0].message.content.strip()\n",
|
|
" except Exception as e:\n",
|
|
" return f\"❌ Error during summarization: {e}\"\n",
|
|
"\n",
|
|
" def display_results(self):\n",
|
|
" papers = self.fetch_papers()\n",
|
|
" for paper in papers:\n",
|
|
" display(Markdown(f\"### 📄 [{paper.title}]({paper.entry_id})\"))\n",
|
|
" display(Markdown(f\"**Authors:** {', '.join(author.name for author in paper.authors)}\"))\n",
|
|
" display(Markdown(f\"**Published:** {paper.published.date()}\"))\n",
|
|
" display(Markdown(f\"**Abstract:** {paper.summary.strip()}\"))\n",
|
|
" summary = self.summarize_abstract(paper.summary, self.system_prompt)\n",
|
|
" display(Markdown(f\"**🔍 Summary:** {summary}\"))\n",
|
|
" display(Markdown(\"---\"))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "304857ba-e832-42a3-8219-ec9202e41509",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Helper Functions"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "1be2a2da-135b-4aec-b200-dc364d319ac4",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"system_prompt = \"You are an expert research paper summarizer and AI research assistant. \\\n",
|
|
"When provided with the URL or content of a research paper in the field of machine learning, artificial intelligence, or data science, perform the following: \\\n",
|
|
"1. **Extract and present** the following details in a clear, structured Markdown format: \\\n",
|
|
" - Title and Author(s) \\\n",
|
|
" - Year of Publication \\\n",
|
|
" - Objective or Aim of the Research (Why the study was conducted) \\\n",
|
|
" - Background or Introduction (What foundational knowledge or motivation led to this work) \\\n",
|
|
" - Type of Research (e.g., empirical study, theoretical analysis, experimental benchmark) \\\n",
|
|
" - Methods or Methodology (How the research was conducted: dataset, models, techniques used) \\\n",
|
|
" - Results and Key Findings (What was discovered or proven) \\\n",
|
|
" - Conclusion (Summary of insights, limitations, and proposed future work) \\\n",
|
|
"\\\n",
|
|
"2. **Evaluate** the impact and relevance of the paper: \\\n",
|
|
" - Assess the significance of the research to the broader ML/AI community \\\n",
|
|
" - Note any novelty, performance improvements, or theoretical breakthroughs \\\n",
|
|
" - Comment on the potential applications or industry relevance \\\n",
|
|
"\\\n",
|
|
"3. **Suggest new research directions**: \\\n",
|
|
" - Identify gaps, limitations, or unexplored ideas in the paper \\\n",
|
|
" - Propose at least one new research idea or follow-up paper that builds upon this work \\\n",
|
|
"\\\n",
|
|
"Respond in a clean, professional Markdown format suitable for researchers or students reviewing the literature.\"\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "f8b68134-c265-4272-87c4-e16fc205e7c4",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def print_papers(papers):\n",
|
|
" for paper in papers:\n",
|
|
" title = paper.title\n",
|
|
" authors = \", \".join(author.name for author in paper.authors)\n",
|
|
" published = paper.published.strftime('%Y-%m-%d')\n",
|
|
" abstract = paper.summary.strip()\n",
|
|
" link = paper.entry_id\n",
|
|
" pdf_link = [l.href for l in paper.links if l.title == 'pdf']\n",
|
|
" categories = \", \".join(paper.categories)\n",
|
|
"\n",
|
|
" print(f\"\\n📄 Title: {title}\")\n",
|
|
" print(f\"👥 Authors: {authors}\")\n",
|
|
" print(f\"📅 Published: {published}\")\n",
|
|
" print(f\"🏷️ Categories: {categories}\")\n",
|
|
" print(f\"🔗 Link: {link}\")\n",
|
|
" if pdf_link:\n",
|
|
" print(f\"📄 PDF: {pdf_link[0]}\")\n",
|
|
" print(f\"\\n📝 Abstract:\\n{abstract}\")\n",
|
|
" print(\"-\" * 80)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "9e688bbd-d3dd-4f2b-a7c3-d6e550ec9667",
|
|
"metadata": {},
|
|
"source": [
|
|
"#### Get the papers given the name of the paper"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "6dcf9639-d6b5-4194-b6a2-5260329fcbe7",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"fetcher = MLResearchFetcher(system_prompt, query=\"QWEN2 TECHNICAL REPORT\", max_results=3)\n",
|
|
"papers = fetcher.fetch_papers()\n",
|
|
"print_papers(papers)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "a04e219b-389f-4e0a-9645-662d966d4055",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Call the model and get the results"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "297e915b-078a-49c7-836f-3c4ddf8e17dc",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"fetcher.display_results()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "2344499c-3b39-4497-a0bf-1cff83117fdc",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.11.12"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|