From d3318988a962b8ef32dd4ab3ebce5dc712a1612f Mon Sep 17 00:00:00 2001 From: Nik Date: Sun, 26 Oct 2025 20:57:48 +0530 Subject: [PATCH] Update legal_qna_with_rag_on_bare_acts.ipynb --- .../legal_qna_with_rag_on_bare_acts.ipynb | 80 ++++++++++++++----- 1 file changed, 61 insertions(+), 19 deletions(-) diff --git a/week5/community-contributions/legal_qna_with_rag_on_bare_acts/legal_qna_with_rag_on_bare_acts.ipynb b/week5/community-contributions/legal_qna_with_rag_on_bare_acts/legal_qna_with_rag_on_bare_acts.ipynb index c64b90a..313ea6f 100644 --- a/week5/community-contributions/legal_qna_with_rag_on_bare_acts/legal_qna_with_rag_on_bare_acts.ipynb +++ b/week5/community-contributions/legal_qna_with_rag_on_bare_acts/legal_qna_with_rag_on_bare_acts.ipynb @@ -39,15 +39,19 @@ " MODEL_REGISTRY[label] = {\"client\": client, \"model\": model_id}\n", "\n", "# OpenAI\n", - "_register(\"OpenAI • GPT-4o-mini\", openai_client, \"gpt-4o-mini\")\n", + "_register(\"OpenAI • GPT-5\", openai_client, \"gpt-5\")\n", + "_register(\"OpenAI • GPT-5 Nano\", openai_client, \"gpt-5-nano\")\n", + "_register(\"OpenAI • GPT-4o-mini\", openai_client, \"gpt-4o-mini\")\n", "\n", - "# Gemini\n", - "_register(\"Gemini • 2.5 Flash\", gemini_client, \"gemini-2.5-flash\")\n", - "_register(\"Gemini • 2.5 Pro\", gemini_client, \"gemini-2.5-pro\")\n", + "# Gemini (Google)\n", + "_register(\"Gemini • 2.5 Pro\", gemini_client, \"gemini-2.5-pro\")\n", + "_register(\"Gemini • 2.5 Flash\", gemini_client, \"gemini-2.5-flash\")\n", "\n", "# Groq\n", - "_register(\"Groq • Llama 3.3 70B\", groq_client, \"llama-3.3-70b-versatile\")\n", - "_register(\"Groq • Llama 3.1 8B\", groq_client, \"llama-3.1-8b-instant\")\n", + "_register(\"Groq • Llama 3.1 8B\", groq_client, \"llama-3.1-8b-instant\")\n", + "_register(\"Groq • Llama 3.3 70B\", groq_client, \"llama-3.3-70b-versatile\")\n", + "_register(\"Groq • GPT-OSS 20B\", groq_client, \"openai/gpt-oss-20b\")\n", + "_register(\"Groq • GPT-OSS 120B\", groq_client, \"openai/gpt-oss-120b\")\n", "\n", "AVAILABLE_MODELS = list(MODEL_REGISTRY.keys())\n", "DEFAULT_MODEL = AVAILABLE_MODELS[0] if AVAILABLE_MODELS else \"OpenAI • GPT-4o-mini\"\n", @@ -189,7 +193,8 @@ " SYSTEM = (\n", " \"You are a precise legal assistant for Indian Bare Acts. \"\n", " \"Answer ONLY from the provided context. If the answer is not in context, say you don't know. \"\n", - " \"Cite the sources by file name (e.g., ipc, coi, bns) in brackets.\"\n", + " \"Cite sources inline in square brackets as [file #chunk] (e.g., [bns #12]). \"\n", + " \"Prefer exact quotes for critical provisions/sections.\"\n", " )\n", "\n", " @staticmethod\n", @@ -197,10 +202,19 @@ " ctx = \"\\n\\n---\\n\\n\".join(\n", " f\"[{c['meta']['source']} #{c['meta']['chunk_id']}]\\n{c['text']}\" for c in contexts\n", " )\n", - " return f\"Question:\\n{query}\\n\\nContext:\\n{ctx}\\n\\nInstructions:\\n- Keep answers concise.\\n- Quote key lines when useful.\\n- Add [source] inline.\"\n", + " return (\n", + " f\"Question:\\n{query}\\n\\n\"\n", + " f\"Context (do not use outside this):\\n{ctx}\\n\\n\"\n", + " \"Instructions:\\n- Keep answers concise and faithful to the text.\\n\"\n", + " \"- Use [file #chunk] inline where relevant.\"\n", + " )\n", + "\n", + "def _snippet(txt: str, n: int = 220) -> str:\n", + " s = \" \".join(txt.strip().split())\n", + " return (s[:n] + \"…\") if len(s) > n else s\n", "\n", "class RagQAService:\n", - " \"\"\"Coordinates retrieval + generation.\"\"\"\n", + " \"\"\"Coordinates retrieval + generation, and returns a rich reference block.\"\"\"\n", " def __init__(self, index: BareActsIndex, llm: MultiLLM):\n", " self.index = index\n", " self.llm = llm\n", @@ -211,9 +225,12 @@ " user = self.builder.build_user(question, ctx)\n", " reply = self.llm.complete(model_label=model_label, system=self.builder.SYSTEM, user=user)\n", "\n", - " # Append sources deterministically (post-processing for transparency)\n", - " sources = \", \".join(sorted({c[\"meta\"][\"source\"] for c in ctx}))\n", - " return f\"{reply}\\n\\n— Sources: {sources}\"\n" + " # Rich references: file, chunk index, snippet\n", + " references = \"\\n\".join(\n", + " f\"- [{c['meta']['source']} #{c['meta']['chunk_id']}] {_snippet(c['text'])}\"\n", + " for c in ctx\n", + " )\n", + " return f\"{reply}\\n\\n**References**\\n{references}\"\n" ] }, { @@ -244,25 +261,50 @@ " except Exception as e:\n", " return f\"⚠️ {e}\"\n", "\n", + "DEFAULT_QUESTION = \"Which Section is for Murder in BNS and whats the Punishment for murder ?\"\n", + "\n", "with gr.Blocks(title=\"Legal QnA • Bare Acts (RAG + Multi-LLM)\") as app:\n", " gr.Markdown(\"### 🧑‍⚖️ Legal Q&A on Bare Acts (RAG) — Multi-Provider LLM\")\n", " with gr.Row():\n", - " model_dd = gr.Dropdown(choices=AVAILABLE_MODELS or [\"OpenAI • GPT-4o-mini\"],\n", - " value=DEFAULT_MODEL if AVAILABLE_MODELS else None,\n", - " label=\"Model\")\n", + " model_dd = gr.Dropdown(\n", + " choices=AVAILABLE_MODELS or [\"OpenAI • GPT-4o-mini\"],\n", + " value=DEFAULT_MODEL if AVAILABLE_MODELS else None,\n", + " label=\"Model\"\n", + " )\n", " topk = gr.Slider(2, 12, value=6, step=1, label=\"Top-K context\")\n", "\n", - " chat = gr.ChatInterface(fn=chat_fn,\n", - " type=\"messages\",\n", - " additional_inputs=[model_dd, topk])\n", + " chat = gr.ChatInterface(\n", + " fn=chat_fn,\n", + " type=\"messages\",\n", + " additional_inputs=[model_dd, topk],\n", + " textbox=gr.Textbox(\n", + " value=DEFAULT_QUESTION,\n", + " label=\"Ask a legal question\",\n", + " placeholder=\"Type your question about BNS/IPC/Constitution…\"\n", + " ),\n", + " )\n", "\n", "app.launch(inbrowser=True)\n" ] } ], "metadata": { + "kernelspec": { + "display_name": "llm-engineering", + "language": "python", + "name": "python3" + }, "language_info": { - "name": "python" + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.10" } }, "nbformat": 4,