From 5e7d2d5a7d39bc98c3bc18c60ab1fc0191a39e69 Mon Sep 17 00:00:00 2001 From: Peter Wolf Date: Sat, 10 May 2025 17:13:08 +0200 Subject: [PATCH] Add datasheet comparator notebook from loupdethies --- .../wk1-day1-datasheet_comparator.ipynb | 302 ++++++++++++++++++ 1 file changed, 302 insertions(+) create mode 100644 week1/community-contributions/wk1-day1-datasheet_comparator.ipynb diff --git a/week1/community-contributions/wk1-day1-datasheet_comparator.ipynb b/week1/community-contributions/wk1-day1-datasheet_comparator.ipynb new file mode 100644 index 0000000..a7e79d2 --- /dev/null +++ b/week1/community-contributions/wk1-day1-datasheet_comparator.ipynb @@ -0,0 +1,302 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "52629582-ec22-447a-ae09-cba16a46976d", + "metadata": {}, + "source": [ + "# Datasheet Comparator - MVP" + ] + }, + { + "cell_type": "markdown", + "id": "40de9dc5-0387-4950-8f8f-4805b46187c3", + "metadata": {}, + "source": [ + "This notebook is part of a project that compares technical specifications from two electronic component datasheets.\n", + "\n", + "Initially, the PDFs are provided as local files, but future versions will allow users to:\n", + "- Select datasheets interactively from within the notebook\n", + "- Search and retrieve part information from distributor APIs (e.g. Mouser, Digi-Key)\n", + "- Use AI to extract, analyze, and summarize key specifications and differences\n", + "\n", + "The goal is to support engineers in identifying part changes, upgrades, or replacements efficiently." + ] + }, + { + "cell_type": "markdown", + "id": "b51c91b6-953b-479c-acc5-ab2a189fabba", + "metadata": {}, + "source": [ + "# 📌 Section A: Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "553666d5-af7e-46f0-b945-0d48c32bfbbf", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import requests\n", + "from dotenv import load_dotenv\n", + "from bs4 import BeautifulSoup\n", + "from IPython.display import Markdown, display\n", + "from openai import OpenAI\n", + "import fitz # PyMuPDF for PDF parsing\n", + "import pandas as pd" + ] + }, + { + "cell_type": "markdown", + "id": "a19c077a-36e3-4ff2-bee7-85f23e90b89a", + "metadata": {}, + "source": [ + "# Load OpenAI API key from environment variable (recommended)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6435f1c7-f161-4cad-b68a-05080304ff22", + "metadata": {}, + "outputs": [], + "source": [ + "load_dotenv(override=True)\n", + "api_key = os.getenv(\"OPENAI_API_KEY\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3722da9c-e1e9-4838-8ab9-04e45e52d8f0", + "metadata": {}, + "outputs": [], + "source": [ + "openai = OpenAI()" + ] + }, + { + "cell_type": "markdown", + "id": "34916ec4-643c-4b76-8e21-13c3364782fa", + "metadata": {}, + "source": [ + "# Define paths to datasheets\n", + "💬 **Note:** These example datasheet paths will later be replaced by a user-driven file selection dialog within the Jupyter notebook; optionally, this section could be extended to fetch component data directly from distributor websites." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "42621aa4-7094-4209-95ba-ecf03ba609fb", + "metadata": {}, + "outputs": [], + "source": [ + "pdf_path_1 = \"./datasheets/part_old.pdf\"\n", + "pdf_path_2 = \"./datasheets/part_new.pdf\"" + ] + }, + { + "cell_type": "markdown", + "id": "8f09e201-ab22-4b9d-a9a3-b12cc671a68a", + "metadata": {}, + "source": [ + "# 📌 Section B: Extract text from datasheets" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ff36d62e-efb6-4d08-a1d5-ceb470917103", + "metadata": {}, + "outputs": [], + "source": [ + "def extract_text_from_pdf(path):\n", + " text = \"\"\n", + " with fitz.open(path) as doc:\n", + " for page in doc:\n", + " text += page.get_text()\n", + " return text" + ] + }, + { + "cell_type": "markdown", + "id": "0da6bc72-93e2-4229-885b-7020f3920855", + "metadata": {}, + "source": [ + "# 📌 Section C: Use ChatGPT to summarize and compare" + ] + }, + { + "cell_type": "markdown", + "id": "4e8de5f9-c2b6-4d6f-9cde-c1275ec0be83", + "metadata": {}, + "source": [ + "## Section C.1: Define system_prompt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30dc6c3a-d7a1-4837-9d57-00c4b2d63092", + "metadata": {}, + "outputs": [], + "source": [ + "system_prompt = \"You are a technical assistant helping to compare electronic component datasheets.\"" + ] + }, + { + "cell_type": "markdown", + "id": "5bf19f66-89f2-4fbf-b5d6-ff1f8e06ba6d", + "metadata": {}, + "source": [ + "## Section C.2: Define user_prompt, summerize and compare" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4ff4e362-11d4-4737-a10e-1953ac0eac55", + "metadata": {}, + "outputs": [], + "source": [ + "def summarize_datasheet(text, part_name, system_prompt):\n", + " user_prompt = f\"\"\"\n", + " Summarize the most important technical characteristics of the electronic component '{part_name}' based on this datasheet text:\n", + " ---\n", + " {text}\n", + " ---\n", + " Give a structured list of properties like voltage, current, dimensions, operating temperature, etc.\n", + " \"\"\"\n", + " response = openai.chat.completions.create(\n", + " model=\"gpt-4o-mini\",\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": system_prompt},\n", + " {\"role\": \"user\", \"content\": user_prompt}\n", + " ]\n", + " )\n", + " return response.choices[0].message.content\n", + " \n", + "def compare_parts(text1, text2, system_prompt):\n", + " user_prompt = f\"\"\"\n", + " Compare the following two summaries of electronic components and evaluate whether the second part is a valid replacement for the first one.\n", + " Identify any differences in electrical specs, mechanical dimensions, and compliance with medical device requirements.\n", + " Suggest what changes would be required to use the second part in place of the first (e.g., schematic/layout changes).\n", + " \n", + " Old Part Summary:\n", + " {text1}\n", + "\n", + " New Part Summary:\n", + " {text2}\n", + "\n", + " Provide a table of differences and a short final recommendation.\n", + " \"\"\"\n", + " response = openai.chat.completions.create(\n", + " model=\"gpt-4o-mini\",\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": system_prompt},\n", + " {\"role\": \"user\", \"content\": user_prompt}\n", + " ]\n", + " )\n", + " return response.choices[0].message.content" + ] + }, + { + "cell_type": "markdown", + "id": "92524623-b1f9-4b55-9056-d02c41457df4", + "metadata": {}, + "source": [ + "# 📌 Section D: Put it all together and print it nicely." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ebd172eb-a8fb-4308-95c7-fee8f3f250ae", + "metadata": {}, + "outputs": [], + "source": [ + "def display_summary_and_compare(part1, part2, system_prompt):\n", + " content1 = extract_text_from_pdf(part1)\n", + " content2 = extract_text_from_pdf(part2)\n", + " summary1 = summarize_datasheet(content1, \"Old Part\", system_prompt)\n", + " summary2 = summarize_datasheet(content2, \"New Part\", system_prompt)\n", + " compare = compare_parts(summary1, summary2, system_prompt)\n", + " report = summary1 + summary2 + compare\n", + " display(Markdown(report))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ab2f1cfb-7e7b-429d-9f53-68524f93afbf", + "metadata": {}, + "outputs": [], + "source": [ + "display_summary_and_compare(pdf_path_1, pdf_path_2, system_prompt)" + ] + }, + { + "cell_type": "markdown", + "id": "09c76689-db27-4fa4-9fb2-4ac1d4d111fb", + "metadata": {}, + "source": [ + "# 📌 Section E: Next Steps (to be developed)" + ] + }, + { + "cell_type": "markdown", + "id": "8ade0b16-52a6-4af4-a1ae-d0a505bf87a0", + "metadata": {}, + "source": [ + "# - Parse key properties into structured tables (e.g., using regex or ChatGPT)" + ] + }, + { + "cell_type": "markdown", + "id": "2a7f6e50-1490-47ef-b911-278981636528", + "metadata": {}, + "source": [ + "# - Automatically download datasheets from distributor websites" + ] + }, + { + "cell_type": "markdown", + "id": "740bdc6d-48e4-4c7f-b7e9-4bb0d86b653f", + "metadata": {}, + "source": [ + "# - Search for compatible parts via web APIs" + ] + }, + { + "cell_type": "markdown", + "id": "adda4dda-8bed-423b-a9c2-87f988ffa391", + "metadata": {}, + "source": [ + "# - Export results to Excel or Markdown" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python (datasheet_env)", + "language": "python", + "name": "datasheet_env" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}