Added Perl to Python Code Converter
This commit is contained in:
394
week4/community-contributions/day4 -Perl to Python.ipynb
Normal file
394
week4/community-contributions/day4 -Perl to Python.ipynb
Normal file
@@ -0,0 +1,394 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "de352746-564c-4b33-b1ad-0b449988c448",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Perl to Python Code Generator\n",
|
||||
"\n",
|
||||
"The requirement: use a Frontier model to generate high performance Python code from Perl code\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "e610bf56-a46e-4aff-8de1-ab49d62b1ad3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# imports\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"import io\n",
|
||||
"import sys\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"from openai import OpenAI\n",
|
||||
"import google.generativeai\n",
|
||||
"import anthropic\n",
|
||||
"from IPython.display import Markdown, display, update_display\n",
|
||||
"import gradio as gr\n",
|
||||
"import subprocess\n",
|
||||
"import requests\n",
|
||||
"import json\n",
|
||||
"#for Hugging face end points\n",
|
||||
"from huggingface_hub import login, InferenceClient\n",
|
||||
"from transformers import AutoTokenizer"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "4f672e1c-87e9-4865-b760-370fa605e614",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# environment\n",
|
||||
"\n",
|
||||
"load_dotenv(override=True)\n",
|
||||
"os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')\n",
|
||||
"os.environ['ANTHROPIC_API_KEY'] = os.getenv('ANTHROPIC_API_KEY', 'your-key-if-not-using-env')\n",
|
||||
"os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN', 'your-key-if-not-using-env')\n",
|
||||
"##for connecting to HF End point\n",
|
||||
"hf_token = os.environ['HF_TOKEN']\n",
|
||||
"login(hf_token, add_to_git_credential=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "8aa149ed-9298-4d69-8fe2-8f5de0f667da",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# initialize\n",
|
||||
"# NOTE - option to use ultra-low cost models by uncommenting last 2 lines\n",
|
||||
"\n",
|
||||
"openai = OpenAI()\n",
|
||||
"claude = anthropic.Anthropic()\n",
|
||||
"OPENAI_MODEL = \"gpt-4o\"\n",
|
||||
"CLAUDE_MODEL = \"claude-3-5-sonnet-20240620\"\n",
|
||||
"\n",
|
||||
"# Want to keep costs ultra-low? Uncomment these lines:\n",
|
||||
"OPENAI_MODEL = \"gpt-4o-mini\"\n",
|
||||
"CLAUDE_MODEL = \"claude-3-haiku-20240307\"\n",
|
||||
"\n",
|
||||
"#To access open source models from Hugging face end points\n",
|
||||
"code_qwen = \"Qwen/CodeQwen1.5-7B-Chat\"\n",
|
||||
"code_gemma = \"google/codegemma-7b-it\"\n",
|
||||
"CODE_QWEN_URL = \"https://h1vdol7jxhje3mpn.us-east-1.aws.endpoints.huggingface.cloud\"\n",
|
||||
"CODE_GEMMA_URL = \"https://c5hggiyqachmgnqg.us-east-1.aws.endpoints.huggingface.cloud\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"id": "6896636f-923e-4a2c-9d6c-fac07828a201",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"system_message = \"You are an assistant that reimplements Perl scripts code into a high performance Python for a Windows 11 PC. \"\n",
|
||||
"system_message += \"Respond only with Python code; use comments sparingly and do not provide any explanation other than occasional # comments. \"\n",
|
||||
"system_message += \"The Python response needs to produce an identical output in the fastest possible time.\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "8e7b3546-57aa-4c29-bc5d-f211970d04eb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def user_prompt_for(perl):\n",
|
||||
" user_prompt = \"Rewrite this Perl scripts code in C++ with the fastest possible implementation that produces identical output in the least time. \"\n",
|
||||
" user_prompt += \"Respond only with Python code; do not explain your work other than a few comments. \"\n",
|
||||
" user_prompt += \"Pay attention to number types to ensure no int overflows. Remember to #include all necessary python libraries as needed,\\\n",
|
||||
" such as requests, os, json etc.\\n\\n\"\n",
|
||||
" user_prompt += perl\n",
|
||||
" return user_prompt"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "c6190659-f54c-4951-bef4-4960f8e51cc4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def messages_for(perl):\n",
|
||||
" return [\n",
|
||||
" {\"role\": \"system\", \"content\": system_message},\n",
|
||||
" {\"role\": \"user\", \"content\": user_prompt_for(perl)}\n",
|
||||
" ]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "71e1ba8c-5b05-4726-a9f3-8d8c6257350b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# write to a file called script.py\n",
|
||||
"\n",
|
||||
"def write_output(python):\n",
|
||||
" code = python.replace(\"```python\",\"\").replace(\"```\",\"\")\n",
|
||||
" output_file = \"script.py\"\n",
|
||||
" with open(output_file, \"w\") as f:\n",
|
||||
" f.write(code)\n",
|
||||
" return output_file"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "0be9f47d-5213-4700-b0e2-d444c7c738c0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def stream_gpt(perl): \n",
|
||||
" stream = openai.chat.completions.create(model=OPENAI_MODEL, messages=messages_for(perl), stream=True)\n",
|
||||
" reply = \"\"\n",
|
||||
" for chunk in stream:\n",
|
||||
" fragment = chunk.choices[0].delta.content or \"\"\n",
|
||||
" reply += fragment\n",
|
||||
" cleaned_reply = reply.replace('```python\\n','').replace('```','')\n",
|
||||
" yield cleaned_reply, None\n",
|
||||
" yield cleaned_reply, write_output(cleaned_reply)\n",
|
||||
" "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "8669f56b-8314-4582-a167-78842caea131",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def stream_claude(perl):\n",
|
||||
" result = claude.messages.stream(\n",
|
||||
" model=CLAUDE_MODEL,\n",
|
||||
" max_tokens=2000,\n",
|
||||
" system=system_message,\n",
|
||||
" messages=[{\"role\": \"user\", \"content\": user_prompt_for(perl)}],\n",
|
||||
" )\n",
|
||||
" reply = \"\"\n",
|
||||
" with result as stream:\n",
|
||||
" for text in stream.text_stream:\n",
|
||||
" reply += text\n",
|
||||
" cleaned_reply = reply.replace('```python\\n','').replace('```','')\n",
|
||||
" yield cleaned_reply, None\n",
|
||||
" yield cleaned_reply, write_output(cleaned_reply)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "5b166afe-741a-4711-bc38-626de3538ea2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def stream_code_qwen(python):\n",
|
||||
" tokenizer = AutoTokenizer.from_pretrained(code_qwen)\n",
|
||||
" messages = messages_for(python)\n",
|
||||
" text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)\n",
|
||||
" client = InferenceClient(CODE_QWEN_URL, token=hf_token)\n",
|
||||
" stream = client.text_generation(text, stream=True, details=True, max_new_tokens=3000)\n",
|
||||
" result = \"\"\n",
|
||||
" for r in stream:\n",
|
||||
" result += r.token.text\n",
|
||||
" cleaned_reply = result.replace('```python\\n','').replace('```','')\n",
|
||||
" yield cleaned_reply, None\n",
|
||||
" yield cleaned_reply, write_output(cleaned_reply) "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "2f1ae8f5-16c8-40a0-aa18-63b617df078d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def generate(perl_script, model):\n",
|
||||
" if model==\"GPT\":\n",
|
||||
" for result, file in stream_gpt(perl_script):\n",
|
||||
" yield result, file\n",
|
||||
" yield result, file\n",
|
||||
" elif model==\"Claude\":\n",
|
||||
" for result, file in stream_claude(perl_script):\n",
|
||||
" yield result, file\n",
|
||||
" yield result, file\n",
|
||||
" elif model==\"CodeQwen\":\n",
|
||||
" for result, file in stream_code_qwen(perl_script):\n",
|
||||
" yield result, file\n",
|
||||
" yield result, file\n",
|
||||
" else:\n",
|
||||
" raise ValueError(\"Unknown model\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "aa8e9a1c-9509-4056-bd0b-2578f3cc3335",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def execute_perl(perl_code):\n",
|
||||
"\n",
|
||||
" import subprocess\n",
|
||||
" #print(perl_file)\n",
|
||||
" perl_path = r\"E:\\Softwares\\Perl\\perl\\bin\\perl.exe\"\n",
|
||||
" # Run Perl script from Jupyter Lab\n",
|
||||
" result = subprocess.run([perl_path, '-e', perl_code], capture_output=True, text=True)\n",
|
||||
"\n",
|
||||
" # Return the output of the Perl script\n",
|
||||
" return result.stdout\n",
|
||||
" "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "01e9d980-8830-4421-8753-a065dcbea1ed",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def execute_python(code):\n",
|
||||
" try:\n",
|
||||
" output = io.StringIO()\n",
|
||||
" sys.stdout = output\n",
|
||||
" exec(code)\n",
|
||||
" finally:\n",
|
||||
" sys.stdout = sys.__stdout__\n",
|
||||
" return output.getvalue()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"id": "ed4e0aff-bfde-440e-8e6b-eb3c7143837e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"css = \"\"\"\n",
|
||||
".perl {background-color: #093645;}\n",
|
||||
".python {background-color: #0948;}\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"force_dark_mode = \"\"\"\n",
|
||||
"function refresh() {\n",
|
||||
" const url = new URL(window.location);\n",
|
||||
" if (url.searchParams.get('__theme') !== 'dark') {\n",
|
||||
" url.searchParams.set('__theme', 'dark');\n",
|
||||
" window.location.href = url.href;\n",
|
||||
" }\n",
|
||||
"}\n",
|
||||
"\"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"id": "caaee54d-79db-4db3-87df-2e7d2eba197c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div><iframe src=\"http://127.0.0.1:7861/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": []
|
||||
},
|
||||
"execution_count": 28,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"with gr.Blocks(css=css, js=force_dark_mode) as ui:\n",
|
||||
"\n",
|
||||
" gr.HTML(\"<h2 style='text-align: center; color: white;'> PERL to Python Code Generator</h2>\")\n",
|
||||
" with gr.Row(scale=0, equal_height=True):\n",
|
||||
" model = gr.Dropdown([\"GPT\", \"Claude\", \"CodeQwen\"], label=\"Select model\", value=\"GPT\")\n",
|
||||
" perl_file = gr.File(label=\"Upload Perl Script:\")\n",
|
||||
" convert = gr.Button(\"Convert to Python\")\n",
|
||||
" file_output = gr.File(label=\"Download Python script\", visible=False)\n",
|
||||
" with gr.Row():\n",
|
||||
" perl_script = gr.Textbox(label=\"Perl Script:\")\n",
|
||||
" python_script = gr.Textbox(label=\"Converted Python Script:\") \n",
|
||||
" with gr.Row():\n",
|
||||
" perl_run = gr.Button(\"Run PERL\")\n",
|
||||
" python_run = gr.Button(\"Run Python\")\n",
|
||||
" with gr.Row():\n",
|
||||
" perl_out = gr.TextArea(label=\"PERL result:\", elem_classes=[\"perl\"])\n",
|
||||
" python_out = gr.TextArea(label=\"Python result:\", elem_classes=[\"python\"])\n",
|
||||
" with gr.Row(): \n",
|
||||
" clear_button = gr.Button(\"Clear\")\n",
|
||||
" \n",
|
||||
" def extract_perl_code(file):\n",
|
||||
" if file is None:\n",
|
||||
" return \"No file uploaded.\", None \n",
|
||||
" with open(file.name, \"r\", encoding=\"utf-8\") as f:\n",
|
||||
" perl_code = f.read()\n",
|
||||
" return perl_code\n",
|
||||
"\n",
|
||||
" convert.click(extract_perl_code, inputs=[perl_file], outputs=[perl_script]).then(\n",
|
||||
" generate, inputs=[perl_script, model], outputs=[python_script, file_output]).then(\n",
|
||||
" lambda file_output: gr.update(visible=True), inputs=[file_output], outputs=[file_output]\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" perl_run.click(execute_perl, inputs=[perl_script], outputs=[perl_out])\n",
|
||||
" python_run.click(execute_python, inputs=[python_script], outputs=[python_out]) \n",
|
||||
"\n",
|
||||
" def clear_all():\n",
|
||||
" return None, \"\", \"\", gr.update(visible=False), \"\", \"\"\n",
|
||||
"\n",
|
||||
" clear_button.click(\n",
|
||||
" clear_all,\n",
|
||||
" outputs=[perl_file, perl_script, python_script, file_output, perl_out, python_out]\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
"\n",
|
||||
"ui.launch(inbrowser=True)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.13.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
Reference in New Issue
Block a user