Update legal_qna_with_rag_on_bare_acts.ipynb

2025-10-26 20:57:48 +05:30
parent fd7d9da852
commit d3318988a9
1 changed files with 61 additions and 19 deletions
--- a/week5/community-contributions/legal_qna_with_rag_on_bare_acts/legal_qna_with_rag_on_bare_acts.ipynb
+++ b/week5/community-contributions/legal_qna_with_rag_on_bare_acts/legal_qna_with_rag_on_bare_acts.ipynb
@@ -39,15 +39,19 @@
    "        MODEL_REGISTRY[label] = {\"client\": client, \"model\": model_id}\n",
    "\n",
    "# OpenAI\n",
    "_register(\"OpenAI • GPT-5\",        openai_client, \"gpt-5\")\n",
    "_register(\"OpenAI • GPT-5 Nano\",   openai_client, \"gpt-5-nano\")\n",
    "_register(\"OpenAI • GPT-4o-mini\",  openai_client, \"gpt-4o-mini\")\n",
    "\n",
-    "# Gemini\n",
+    "# Gemini (Google)\n",
    "_register(\"Gemini • 2.5 Flash\",   gemini_client, \"gemini-2.5-flash\")\n",
    "_register(\"Gemini • 2.5 Pro\",      gemini_client, \"gemini-2.5-pro\")\n",
    "_register(\"Gemini • 2.5 Flash\",    gemini_client, \"gemini-2.5-flash\")\n",
    "\n",
    "# Groq\n",
    "_register(\"Groq • Llama 3.3 70B\", groq_client,   \"llama-3.3-70b-versatile\")\n",
    "_register(\"Groq • Llama 3.1 8B\",   groq_client,   \"llama-3.1-8b-instant\")\n",
    "_register(\"Groq • Llama 3.3 70B\",  groq_client,   \"llama-3.3-70b-versatile\")\n",
    "_register(\"Groq • GPT-OSS 20B\",    groq_client,   \"openai/gpt-oss-20b\")\n",
    "_register(\"Groq • GPT-OSS 120B\",   groq_client,   \"openai/gpt-oss-120b\")\n",
    "\n",
    "AVAILABLE_MODELS = list(MODEL_REGISTRY.keys())\n",
    "DEFAULT_MODEL = AVAILABLE_MODELS[0] if AVAILABLE_MODELS else \"OpenAI • GPT-4o-mini\"\n",
@@ -189,7 +193,8 @@
    "    SYSTEM = (\n",
    "        \"You are a precise legal assistant for Indian Bare Acts. \"\n",
    "        \"Answer ONLY from the provided context. If the answer is not in context, say you don't know. \"\n",
-    "        \"Cite the sources by file name (e.g., ipc, coi, bns) in brackets.\"\n",
+    "        \"Cite sources inline in square brackets as [file #chunk] (e.g., [bns #12]). \"\n",
    "        \"Prefer exact quotes for critical provisions/sections.\"\n",
    "    )\n",
    "\n",
    "    @staticmethod\n",
@@ -197,10 +202,19 @@
    "        ctx = \"\\n\\n---\\n\\n\".join(\n",
    "            f\"[{c['meta']['source']} #{c['meta']['chunk_id']}]\\n{c['text']}\" for c in contexts\n",
    "        )\n",
-    "        return f\"Question:\\n{query}\\n\\nContext:\\n{ctx}\\n\\nInstructions:\\n- Keep answers concise.\\n- Quote key lines when useful.\\n- Add [source] inline.\"\n",
+    "        return (\n",
    "            f\"Question:\\n{query}\\n\\n\"\n",
    "            f\"Context (do not use outside this):\\n{ctx}\\n\\n\"\n",
    "            \"Instructions:\\n- Keep answers concise and faithful to the text.\\n\"\n",
    "            \"- Use [file #chunk] inline where relevant.\"\n",
    "        )\n",
    "\n",
    "def _snippet(txt: str, n: int = 220) -> str:\n",
    "    s = \" \".join(txt.strip().split())\n",
    "    return (s[:n] + \"…\") if len(s) > n else s\n",
    "\n",
    "class RagQAService:\n",
-    "    \"\"\"Coordinates retrieval + generation.\"\"\"\n",
+    "    \"\"\"Coordinates retrieval + generation, and returns a rich reference block.\"\"\"\n",
    "    def __init__(self, index: BareActsIndex, llm: MultiLLM):\n",
    "        self.index = index\n",
    "        self.llm = llm\n",
@@ -211,9 +225,12 @@
    "        user = self.builder.build_user(question, ctx)\n",
    "        reply = self.llm.complete(model_label=model_label, system=self.builder.SYSTEM, user=user)\n",
    "\n",
-    "        # Append sources deterministically (post-processing for transparency)\n",
+    "        # Rich references: file, chunk index, snippet\n",
-    "        sources = \", \".join(sorted({c[\"meta\"][\"source\"] for c in ctx}))\n",
+    "        references = \"\\n\".join(\n",
-    "        return f\"{reply}\\n\\n— Sources: {sources}\"\n"
+    "            f\"- [{c['meta']['source']} #{c['meta']['chunk_id']}] {_snippet(c['text'])}\"\n",
    "            for c in ctx\n",
    "        )\n",
    "        return f\"{reply}\\n\\n**References**\\n{references}\"\n"
   ]
  },
  {
@@ -244,25 +261,50 @@
    "    except Exception as e:\n",
    "        return f\"⚠️ {e}\"\n",
    "\n",
    "DEFAULT_QUESTION = \"Which Section is for Murder in BNS and whats the Punishment for murder ?\"\n",
    "\n",
    "with gr.Blocks(title=\"Legal QnA • Bare Acts (RAG + Multi-LLM)\") as app:\n",
    "    gr.Markdown(\"### 🧑‍⚖️ Legal Q&A on Bare Acts (RAG) — Multi-Provider LLM\")\n",
    "    with gr.Row():\n",
-    "        model_dd = gr.Dropdown(choices=AVAILABLE_MODELS or [\"OpenAI • GPT-4o-mini\"],\n",
+    "        model_dd = gr.Dropdown(\n",
    "            choices=AVAILABLE_MODELS or [\"OpenAI • GPT-4o-mini\"],\n",
    "            value=DEFAULT_MODEL if AVAILABLE_MODELS else None,\n",
-    "                               label=\"Model\")\n",
+    "            label=\"Model\"\n",
    "        )\n",
    "        topk = gr.Slider(2, 12, value=6, step=1, label=\"Top-K context\")\n",
    "\n",
-    "    chat = gr.ChatInterface(fn=chat_fn,\n",
+    "    chat = gr.ChatInterface(\n",
    "        fn=chat_fn,\n",
    "        type=\"messages\",\n",
-    "                            additional_inputs=[model_dd, topk])\n",
+    "        additional_inputs=[model_dd, topk],\n",
    "        textbox=gr.Textbox(\n",
    "            value=DEFAULT_QUESTION,\n",
    "            label=\"Ask a legal question\",\n",
    "            placeholder=\"Type your question about BNS/IPC/Constitution…\"\n",
    "        ),\n",
    "    )\n",
    "\n",
    "app.launch(inbrowser=True)\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "llm-engineering",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
-   "name": "python"
+   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.10"
  }
 },
 "nbformat": 4,