diff --git a/week1/community-contributions/_mansoor/Week1Day1/pdf_summarizer/pdf_extractor.py b/week1/community-contributions/_mansoor/Week1Day1/pdf_summarizer/pdf_extractor.py new file mode 100644 index 0000000..9889be5 --- /dev/null +++ b/week1/community-contributions/_mansoor/Week1Day1/pdf_summarizer/pdf_extractor.py @@ -0,0 +1,31 @@ +import pymupdf # PyMuPDF +def extract_text(pdf_path): + """ + Extracts and aggregates text from all pages of a given PDF file while displaying + metadata including title and author. + + This function opens a PDF file, extracts text from every page, and combines the text + into a single string for further use. Metadata such as the document title and author + will also be printed for informational purposes. The PDF file is closed automatically + once the operation is complete. + + Parameters: + pdf_path (str): The file path to the PDF document. + + Returns: + str: A compiled string of text extracted from all pages of the PDF. + """ + # Replace 'your_document.pdf' with the actual path to your PDF file + doc = pymupdf.open(pdf_path) + print(f"Document title: {doc.metadata['title']}") + print(f"Document author: {doc.metadata['author']}") + + # Extract text from all pages + all_text = "" + for page in doc: + all_text += page.get_text() + "\n" + print("\nText from all pages:") + print(all_text) + + doc.close() + return all_text \ No newline at end of file diff --git a/week1/community-contributions/_mansoor/Week1Day1/pdf_summarizer/pdf_summarize.ipynb b/week1/community-contributions/_mansoor/Week1Day1/pdf_summarizer/pdf_summarize.ipynb new file mode 100644 index 0000000..6ec2bc6 --- /dev/null +++ b/week1/community-contributions/_mansoor/Week1Day1/pdf_summarizer/pdf_summarize.ipynb @@ -0,0 +1,254 @@ +{ + "cells": [ + { + "cell_type": "code", + "id": "initial_id", + "metadata": { + "collapsed": true, + "ExecuteTime": { + "end_time": "2025-10-20T08:01:30.691815Z", + "start_time": "2025-10-20T08:01:30.689588Z" + } + }, + "source": [ + "from dotenv import load_dotenv\n", + "import pdf_extractor\n", + "import os\n", + "from ollama import Client" + ], + "outputs": [], + "execution_count": 20 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-10-20T08:01:32.070132Z", + "start_time": "2025-10-20T08:01:32.064843Z" + } + }, + "cell_type": "code", + "source": [ + "load_dotenv(override=True)\n", + "api_key = os.environ.get('OLLAMA_API_KEY')\n", + "\n", + "if not api_key:\n", + " print(\"No API key found\")\n", + "else:\n", + " print(\"API key found\")\n" + ], + "id": "7c1e78571e54895f", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "API key found\n" + ] + } + ], + "execution_count": 21 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-10-20T08:01:33.313806Z", + "start_time": "2025-10-20T08:01:33.305667Z" + } + }, + "cell_type": "code", + "source": [ + "client = Client(\n", + " host=\"https://ollama.com\",\n", + " headers={'Authorization': 'Bearer ' + os.environ.get('OLLAMA_API_KEY')}\n", + ")" + ], + "id": "4be731227f848288", + "outputs": [], + "execution_count": 22 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-10-20T08:01:35.004035Z", + "start_time": "2025-10-20T08:01:34.990890Z" + } + }, + "cell_type": "code", + "source": "pdf_content = pdf_extractor.extract_text(\"sample.pdf\")", + "id": "912aacb46475d2ab", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Document title: \n", + "Document author: \n", + "\n", + "Text from all pages:\n", + "The Mountain Guardian\n", + "High above the clouds, where the wind howled through jagged peaks and snow kissed the stone,\n", + "there lived a man whose name few remembered. The villagers below called him Kaelen the Silent, a\n", + "ghost among the mountains, a legend whispered around fires. For decades, no one had seen him\n", + "descend, yet strange lights often danced in the night sky above the cliffs - lights that bent and\n", + "shimmered like the northern auroras, though no aurora ever touched those skies.\n", + "Kaelen had not always been alone. Once, he was a warrior - the greatest of his kind. Born with an\n", + "unnatural power that hummed beneath his skin, he could command the very essence of the world:\n", + "stones shifted at his will, rivers bent their flow, and storms obeyed his call. The elders had declared\n", + "him chosen, a guardian meant to protect the realm. But power was a double-edged blade, and when\n", + "war came, it cut too deep.\n", + "In the final battle of the Age of Blades, Kaelen's strength saved thousands - and doomed just as\n", + "many. In a moment of desperation, he unleashed his full might upon the invading armies, shattering\n", + "the ground and swallowing them whole. The land itself screamed under the force. Cities crumbled,\n", + "forests burned, and the blood of both friend and foe stained the soil. The war ended that day, but the\n", + "cost was too high. Wracked with guilt, Kaelen vanished into the mountains, vowing never again to\n", + "wield his gift.\n", + "Years passed. Seasons turned. Legends grew. The world moved on, forgetting the man who once\n", + "shaped its fate. But Kaelen did not forget. Each dawn, he stood at the edge of the cliff and watched\n", + "the valley below - the rivers he had diverted, the scars he had carved into the land. He lived simply:\n", + "gathering herbs, carving wooden charms, speaking to no one but the wind. Yet the power still\n", + "thrummed beneath his skin, restless and waiting.\n", + "One winter, a storm unlike any other swept through the mountains. Villages were buried beneath\n", + "snow, and beasts from the frozen north roamed far beyond their borders. Among them came a\n", + "darkness more terrible than any blizzard: an ancient force, long sealed away, had awakened. Its\n", + "shadow crept across the land, devouring light and life alike. And with it came a name Kaelen\n", + "thought he would never hear again - the Order of the Dawn, the same elders who had once called\n", + "him guardian.\n", + "They came to his mountain, desperate and broken. \"The world needs you,\" they said. \"Only you can\n", + "stop this.\"\n", + "\n", + "Kaelen turned away. \"The world needs peace,\" he whispered. \"And I am no bringer of peace.\"\n", + "But the cries of the valley reached him - the weeping of children, the howls of the dying, the\n", + "whispers of a world on the brink. The guilt he had carried for decades began to shift, transforming\n", + "into something else: resolve. Perhaps his power was never meant to destroy or to save. Perhaps it\n", + "was meant to balance - to stand between chaos and order.\n", + "At dawn, Kaelen descended the mountain for the first time in forty years. His footsteps shook the\n", + "ground. The wind followed in his wake. The villagers stared in awe as the man from legend walked\n", + "among them, cloak billowing like a storm cloud.\n", + "The darkness waited beyond the valley, patient and hungry. Kaelen felt its presence - ancient,\n", + "powerful, and mocking. But he did not falter. This time, he would not wield his gift as a weapon of\n", + "wrath. This time, he would master it.\n", + "And as the first clash of power shook the heavens, the world realized that the guardian had returned\n", + "- not as a destroyer, not as a savior, but as a man who understood that true strength lies not in\n", + "isolation, but in purpose.\n", + "\n", + "\n" + ] + } + ], + "execution_count": 23 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-10-20T08:01:36.914001Z", + "start_time": "2025-10-20T08:01:36.911275Z" + } + }, + "cell_type": "code", + "source": [ + "system_prompt = \"\"\"You are a snarky assistant that analyzes the contents of a pdf,\n", + "and provides a short, snarky, humorous summary, ignoring text that might be navigation related.\n", + "Respond in markdown. Do not wrap the markdown in a code block - respond just with the markdown.\"\"\"\n", + "\n", + "user_prompt = \"\"\"\n", + " Here are the contents of a pdf.\n", + " Provide a short summary of this pdf.\n", + "\"\"\"\n", + "\n", + "user_prompt += pdf_content\n" + ], + "id": "a665eb55a5cce433", + "outputs": [], + "execution_count": 24 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-10-20T08:01:38.255895Z", + "start_time": "2025-10-20T08:01:38.253714Z" + } + }, + "cell_type": "code", + "source": [ + "messages = [\n", + " {\"role\": \"system\", \"content\": system_prompt},\n", + " {\"role\": \"user\", \"content\": user_prompt}\n", + "]" + ], + "id": "9cf97ff1a01c4a0b", + "outputs": [], + "execution_count": 25 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-10-20T08:05:57.525835Z", + "start_time": "2025-10-20T08:05:57.522774Z" + } + }, + "cell_type": "code", + "source": "response = client.chat('gpt-oss:120b-cloud', messages=messages, stream=True)", + "id": "3c08773150a59b12", + "outputs": [], + "execution_count": 41 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-10-20T08:06:02.788455Z", + "start_time": "2025-10-20T08:05:59.261571Z" + } + }, + "cell_type": "code", + "source": [ + "from IPython.display import display, Markdown\n", + "\n", + "output = \"\"\n", + "for part in response:\n", + " content = part['message']['content']\n", + " output += content\n", + " # print(content, end='', flush=True)\n", + "\n", + "display(Markdown(output))\n" + ], + "id": "13553a2bef707111", + "outputs": [ + { + "data": { + "text/plain": [ + "" + ], + "text/markdown": "## TL;DR: The “Mountain Guardian” is basically **Brooding Goliath #12** \n\n- **Kaelen the Silent**: Once a god‑level warrior who could bend rocks, rivers, and storms to his whims. Think “Avatar” meets “Grumpy Old Man”. \n- **War trauma**: He demolished an entire invading army, erased whole cities, and then got a massive case of *oops‑I‑did‑that* guilt, so he retreated to his alpine Airbnb for 40 years. \n- **Mountaintop hermit life**: Collects herbs, carves wooden charms, and talks to the wind—basically a D&D NPC with an overpowered “power‑under‑the‑skin” passive. \n- **Plot twist**: A cosmic snow‑storm + ancient evil + the Order of the Dawn (the same folks who called him “guardian”) knock on his door. “World needs you!” they cry. \n- **Kaelen’s epiphany**: “Peace = staying on my mountain” → “Maybe I can actually *use* my powers without blowing everything up.” \n- **Climactic comeback**: He finally descends, shakes the valley (literally), and fights the darkness—not as a smiting juggernaut, but as a reluctantly responsible adult with a purpose. \n\n**Bottom line:** A brooding, guilt‑ridden superhero finally decides to get off his rock and do his job. The moral? Even the biggest hermits can’t ignore the world forever—especially when it’s on fire. 🌋🗻✨" + }, + "metadata": {}, + "output_type": "display_data", + "jetTransient": { + "display_id": null + } + } + ], + "execution_count": 42 + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/week1/community-contributions/_mansoor/Week1Day1/pdf_summarizer/sample.pdf b/week1/community-contributions/_mansoor/Week1Day1/pdf_summarizer/sample.pdf new file mode 100644 index 0000000..8013094 Binary files /dev/null and b/week1/community-contributions/_mansoor/Week1Day1/pdf_summarizer/sample.pdf differ