Update legal_qna_with_rag_on_bare_acts.ipynb

This commit is contained in:
Nik
2025-10-26 20:57:48 +05:30
parent fd7d9da852
commit d3318988a9

View File

@@ -39,15 +39,19 @@
" MODEL_REGISTRY[label] = {\"client\": client, \"model\": model_id}\n", " MODEL_REGISTRY[label] = {\"client\": client, \"model\": model_id}\n",
"\n", "\n",
"# OpenAI\n", "# OpenAI\n",
"_register(\"OpenAI • GPT-5\", openai_client, \"gpt-5\")\n",
"_register(\"OpenAI • GPT-5 Nano\", openai_client, \"gpt-5-nano\")\n",
"_register(\"OpenAI • GPT-4o-mini\", openai_client, \"gpt-4o-mini\")\n", "_register(\"OpenAI • GPT-4o-mini\", openai_client, \"gpt-4o-mini\")\n",
"\n", "\n",
"# Gemini\n", "# Gemini (Google)\n",
"_register(\"Gemini • 2.5 Flash\", gemini_client, \"gemini-2.5-flash\")\n",
"_register(\"Gemini • 2.5 Pro\", gemini_client, \"gemini-2.5-pro\")\n", "_register(\"Gemini • 2.5 Pro\", gemini_client, \"gemini-2.5-pro\")\n",
"_register(\"Gemini • 2.5 Flash\", gemini_client, \"gemini-2.5-flash\")\n",
"\n", "\n",
"# Groq\n", "# Groq\n",
"_register(\"Groq • Llama 3.3 70B\", groq_client, \"llama-3.3-70b-versatile\")\n",
"_register(\"Groq • Llama 3.1 8B\", groq_client, \"llama-3.1-8b-instant\")\n", "_register(\"Groq • Llama 3.1 8B\", groq_client, \"llama-3.1-8b-instant\")\n",
"_register(\"Groq • Llama 3.3 70B\", groq_client, \"llama-3.3-70b-versatile\")\n",
"_register(\"Groq • GPT-OSS 20B\", groq_client, \"openai/gpt-oss-20b\")\n",
"_register(\"Groq • GPT-OSS 120B\", groq_client, \"openai/gpt-oss-120b\")\n",
"\n", "\n",
"AVAILABLE_MODELS = list(MODEL_REGISTRY.keys())\n", "AVAILABLE_MODELS = list(MODEL_REGISTRY.keys())\n",
"DEFAULT_MODEL = AVAILABLE_MODELS[0] if AVAILABLE_MODELS else \"OpenAI • GPT-4o-mini\"\n", "DEFAULT_MODEL = AVAILABLE_MODELS[0] if AVAILABLE_MODELS else \"OpenAI • GPT-4o-mini\"\n",
@@ -189,7 +193,8 @@
" SYSTEM = (\n", " SYSTEM = (\n",
" \"You are a precise legal assistant for Indian Bare Acts. \"\n", " \"You are a precise legal assistant for Indian Bare Acts. \"\n",
" \"Answer ONLY from the provided context. If the answer is not in context, say you don't know. \"\n", " \"Answer ONLY from the provided context. If the answer is not in context, say you don't know. \"\n",
" \"Cite the sources by file name (e.g., ipc, coi, bns) in brackets.\"\n", " \"Cite sources inline in square brackets as [file #chunk] (e.g., [bns #12]). \"\n",
" \"Prefer exact quotes for critical provisions/sections.\"\n",
" )\n", " )\n",
"\n", "\n",
" @staticmethod\n", " @staticmethod\n",
@@ -197,10 +202,19 @@
" ctx = \"\\n\\n---\\n\\n\".join(\n", " ctx = \"\\n\\n---\\n\\n\".join(\n",
" f\"[{c['meta']['source']} #{c['meta']['chunk_id']}]\\n{c['text']}\" for c in contexts\n", " f\"[{c['meta']['source']} #{c['meta']['chunk_id']}]\\n{c['text']}\" for c in contexts\n",
" )\n", " )\n",
" return f\"Question:\\n{query}\\n\\nContext:\\n{ctx}\\n\\nInstructions:\\n- Keep answers concise.\\n- Quote key lines when useful.\\n- Add [source] inline.\"\n", " return (\n",
" f\"Question:\\n{query}\\n\\n\"\n",
" f\"Context (do not use outside this):\\n{ctx}\\n\\n\"\n",
" \"Instructions:\\n- Keep answers concise and faithful to the text.\\n\"\n",
" \"- Use [file #chunk] inline where relevant.\"\n",
" )\n",
"\n",
"def _snippet(txt: str, n: int = 220) -> str:\n",
" s = \" \".join(txt.strip().split())\n",
" return (s[:n] + \"…\") if len(s) > n else s\n",
"\n", "\n",
"class RagQAService:\n", "class RagQAService:\n",
" \"\"\"Coordinates retrieval + generation.\"\"\"\n", " \"\"\"Coordinates retrieval + generation, and returns a rich reference block.\"\"\"\n",
" def __init__(self, index: BareActsIndex, llm: MultiLLM):\n", " def __init__(self, index: BareActsIndex, llm: MultiLLM):\n",
" self.index = index\n", " self.index = index\n",
" self.llm = llm\n", " self.llm = llm\n",
@@ -211,9 +225,12 @@
" user = self.builder.build_user(question, ctx)\n", " user = self.builder.build_user(question, ctx)\n",
" reply = self.llm.complete(model_label=model_label, system=self.builder.SYSTEM, user=user)\n", " reply = self.llm.complete(model_label=model_label, system=self.builder.SYSTEM, user=user)\n",
"\n", "\n",
" # Append sources deterministically (post-processing for transparency)\n", " # Rich references: file, chunk index, snippet\n",
" sources = \", \".join(sorted({c[\"meta\"][\"source\"] for c in ctx}))\n", " references = \"\\n\".join(\n",
" return f\"{reply}\\n\\n— Sources: {sources}\"\n" " f\"- [{c['meta']['source']} #{c['meta']['chunk_id']}] {_snippet(c['text'])}\"\n",
" for c in ctx\n",
" )\n",
" return f\"{reply}\\n\\n**References**\\n{references}\"\n"
] ]
}, },
{ {
@@ -244,25 +261,50 @@
" except Exception as e:\n", " except Exception as e:\n",
" return f\"⚠️ {e}\"\n", " return f\"⚠️ {e}\"\n",
"\n", "\n",
"DEFAULT_QUESTION = \"Which Section is for Murder in BNS and whats the Punishment for murder ?\"\n",
"\n",
"with gr.Blocks(title=\"Legal QnA • Bare Acts (RAG + Multi-LLM)\") as app:\n", "with gr.Blocks(title=\"Legal QnA • Bare Acts (RAG + Multi-LLM)\") as app:\n",
" gr.Markdown(\"### 🧑‍⚖️ Legal Q&A on Bare Acts (RAG) — Multi-Provider LLM\")\n", " gr.Markdown(\"### 🧑‍⚖️ Legal Q&A on Bare Acts (RAG) — Multi-Provider LLM\")\n",
" with gr.Row():\n", " with gr.Row():\n",
" model_dd = gr.Dropdown(choices=AVAILABLE_MODELS or [\"OpenAI • GPT-4o-mini\"],\n", " model_dd = gr.Dropdown(\n",
" choices=AVAILABLE_MODELS or [\"OpenAI • GPT-4o-mini\"],\n",
" value=DEFAULT_MODEL if AVAILABLE_MODELS else None,\n", " value=DEFAULT_MODEL if AVAILABLE_MODELS else None,\n",
" label=\"Model\")\n", " label=\"Model\"\n",
" )\n",
" topk = gr.Slider(2, 12, value=6, step=1, label=\"Top-K context\")\n", " topk = gr.Slider(2, 12, value=6, step=1, label=\"Top-K context\")\n",
"\n", "\n",
" chat = gr.ChatInterface(fn=chat_fn,\n", " chat = gr.ChatInterface(\n",
" fn=chat_fn,\n",
" type=\"messages\",\n", " type=\"messages\",\n",
" additional_inputs=[model_dd, topk])\n", " additional_inputs=[model_dd, topk],\n",
" textbox=gr.Textbox(\n",
" value=DEFAULT_QUESTION,\n",
" label=\"Ask a legal question\",\n",
" placeholder=\"Type your question about BNS/IPC/Constitution…\"\n",
" ),\n",
" )\n",
"\n", "\n",
"app.launch(inbrowser=True)\n" "app.launch(inbrowser=True)\n"
] ]
} }
], ],
"metadata": { "metadata": {
"kernelspec": {
"display_name": "llm-engineering",
"language": "python",
"name": "python3"
},
"language_info": { "language_info": {
"name": "python" "codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.10"
} }
}, },
"nbformat": 4, "nbformat": 4,